From 7ef4063926f76f4ab3037227a9fa7a53e21e717f Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Tue, 10 Mar 2026 14:46:17 -0700 Subject: [PATCH 01/39] chore: Second round of 0.9.0 changelog update (#2225) ## Which issue does this PR close? - Related to #2213 ## What changes are included in this PR? ## Are these changes tested? --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57f5f73bb4..54bb48a8ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/). -## [v0.9.0] - 2026-03-09 +## [v0.9.0] - 2026-03-10 ### Breaking Changes @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/). * feat!(io): Implement Storage for OpenDal (#2080) by @CTTY in https://github.com/apache/iceberg-rust/pull/2080 * **Compatibility:** + * chore: Bumping MSRV to 1.92.0 (#2224) by @CTTY in https://github.com/apache/iceberg-rust/pull/2224 * chore: Update MSRV to 1.91.0 (#2204) by @xanderbailey in https://github.com/apache/iceberg-rust/pull/2204 * **Dependency Updates:** @@ -42,6 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ### All Changes +* feat(reader): support timestamp type in create_column (#2180) by @chenzl25 in https://github.com/apache/iceberg-rust/pull/2180 +* chore: Bumping MSRV to 1.92.0 (#2224) by @CTTY in https://github.com/apache/iceberg-rust/pull/2224 * perf(reader): Avoid second `create_parquet_record_batch_stream_builder()` call for migrated tables (#2176) by @mbutrovich in https://github.com/apache/iceberg-rust/pull/2176 * doc: Update IO feature flag and examples (#2214) by @CTTY in https://github.com/apache/iceberg-rust/pull/2214 * chore(deps): Bump uuid from 1.21.0 to 1.22.0 (#2221) by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/2221 From b6de5db7c632854adf3c75e72deedac565270f95 Mon Sep 17 00:00:00 2001 From: emkornfield Date: Tue, 10 Mar 2026 17:54:04 -0700 Subject: [PATCH 02/39] feat: Honor compression settings for metadata.json on write (#1876) ## Which issue does this PR close? Split off from https://github.com/apache/iceberg-rust/pull/1851 - Partially fixes #1731. ## What changes are included in this PR? This change honors the compression setting for metadata.json file (`write.metadata.compression-codec`). ## Are these changes tested? Add unit test to verify files are gzipped when the flag is enabled. BREAKING CHANGE: Make `write_to` take `MetadataLocation` --------- Co-authored-by: Kevin Liu Co-authored-by: Xuanwo --- crates/catalog/glue/src/catalog.rs | 14 +- crates/catalog/glue/src/utils.rs | 13 +- crates/catalog/hms/src/catalog.rs | 8 +- crates/catalog/hms/src/utils.rs | 18 +- crates/catalog/s3tables/src/catalog.rs | 16 +- crates/catalog/sql/src/catalog.rs | 13 +- crates/iceberg/src/catalog/memory/catalog.rs | 14 +- .../iceberg/src/catalog/metadata_location.rs | 210 ++++++++++++-- crates/iceberg/src/catalog/mod.rs | 7 +- crates/iceberg/src/compression.rs | 34 +++ crates/iceberg/src/spec/mod.rs | 1 + crates/iceberg/src/spec/table_metadata.rs | 116 +++++++- crates/iceberg/src/spec/table_properties.rs | 267 +++++++++++++++++- 13 files changed, 655 insertions(+), 76 deletions(-) diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index bf2f392330..968b197169 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -17,6 +17,7 @@ use std::collections::HashMap; use std::fmt::Debug; +use std::str::FromStr; use std::sync::Arc; use anyhow::anyhow; @@ -550,14 +551,14 @@ impl Catalog for GlueCatalog { let metadata = TableMetadataBuilder::from_table_creation(creation)? .build()? .metadata; - let metadata_location = - MetadataLocation::new_with_table_location(location.clone()).to_string(); + let metadata_location = MetadataLocation::new_with_metadata(location.clone(), &metadata); metadata.write_to(&self.file_io, &metadata_location).await?; + let metadata_location_str = metadata_location.to_string(); let glue_table = convert_to_glue_table( &table_name, - metadata_location.clone(), + metadata_location_str.clone(), &metadata, metadata.properties(), None, @@ -575,7 +576,7 @@ impl Catalog for GlueCatalog { Table::builder() .file_io(self.file_io()) - .metadata_location(metadata_location) + .metadata_location(metadata_location_str) .metadata(metadata) .identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name)) .build() @@ -813,12 +814,13 @@ impl Catalog for GlueCatalog { let current_metadata_location = current_table.metadata_location_result()?.to_string(); let staged_table = commit.apply(current_table)?; - let staged_metadata_location = staged_table.metadata_location_result()?; + let staged_metadata_location_str = staged_table.metadata_location_result()?; + let staged_metadata_location = MetadataLocation::from_str(staged_metadata_location_str)?; // Write new metadata staged_table .metadata() - .write_to(staged_table.file_io(), staged_metadata_location) + .write_to(staged_table.file_io(), &staged_metadata_location) .await?; // Persist staged table to Glue with optimistic locking diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index f3be58381a..906e6fcc18 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -306,8 +306,6 @@ mod tests { fn test_convert_to_glue_table() -> Result<()> { let table_name = "my_table".to_string(); let location = "s3a://warehouse/hive".to_string(); - let metadata_location = MetadataLocation::new_with_table_location(location).to_string(); - let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) .with_fields(vec![ @@ -316,6 +314,8 @@ mod tests { .build()?; let metadata = create_metadata(schema)?; + let metadata_location = + MetadataLocation::new_with_metadata(location, &metadata).to_string(); let parameters = HashMap::from([ (ICEBERG_FIELD_ID.to_string(), "1".to_string()), @@ -336,8 +336,13 @@ mod tests { .location(metadata.location()) .build(); - let result = - convert_to_glue_table(&table_name, metadata_location, &metadata, &properties, None)?; + let result = convert_to_glue_table( + &table_name, + metadata_location, + &metadata, + metadata.properties(), + None, + )?; assert_eq!(result.name(), &table_name); assert_eq!(result.description(), None); diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index c508b9b1c7..69c01847a5 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -463,17 +463,17 @@ impl Catalog for HmsCatalog { .build()? .metadata; - let metadata_location = - MetadataLocation::new_with_table_location(location.clone()).to_string(); + let metadata_location = MetadataLocation::new_with_metadata(location.clone(), &metadata); metadata.write_to(&self.file_io, &metadata_location).await?; + let metadata_location_str = metadata_location.to_string(); let hive_table = convert_to_hive_table( db_name.clone(), metadata.current_schema(), table_name.clone(), location, - metadata_location.clone(), + metadata_location_str.clone(), metadata.properties(), )?; @@ -485,7 +485,7 @@ impl Catalog for HmsCatalog { Table::builder() .file_io(self.file_io()) - .metadata_location(metadata_location) + .metadata_location(metadata_location_str) .metadata(metadata) .identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name)) .build() diff --git a/crates/catalog/hms/src/utils.rs b/crates/catalog/hms/src/utils.rs index 096e792f61..cd9b557397 100644 --- a/crates/catalog/hms/src/utils.rs +++ b/crates/catalog/hms/src/utils.rs @@ -311,8 +311,8 @@ fn get_current_time() -> Result { #[cfg(test)] mod tests { - use iceberg::spec::{NestedField, PrimitiveType, Type}; - use iceberg::{MetadataLocation, Namespace, NamespaceIdent}; + use iceberg::spec::{NestedField, PrimitiveType, TableMetadataBuilder, Type}; + use iceberg::{MetadataLocation, Namespace, NamespaceIdent, TableCreation}; use super::*; @@ -343,8 +343,6 @@ mod tests { let db_name = "my_db".to_string(); let table_name = "my_table".to_string(); let location = "s3a://warehouse/hms".to_string(); - let metadata_location = - MetadataLocation::new_with_table_location(location.clone()).to_string(); let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) @@ -354,6 +352,18 @@ mod tests { ]) .build()?; + let table_creation = TableCreation::builder() + .name(table_name.clone()) + .location(location.clone()) + .schema(schema.clone()) + .properties(properties.clone()) + .build(); + let metadata = TableMetadataBuilder::from_table_creation(table_creation)? + .build()? + .metadata; + let metadata_location = + MetadataLocation::new_with_metadata(location.clone(), &metadata).to_string(); + let result = convert_to_hive_table( db_name.clone(), &schema, diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index afe28ae453..e956937ce7 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -17,6 +17,7 @@ use std::collections::HashMap; use std::future::Future; +use std::str::FromStr; use std::sync::Arc; use async_trait::async_trait; @@ -501,17 +502,17 @@ impl Catalog for S3TablesCatalog { let metadata = TableMetadataBuilder::from_table_creation(creation)? .build()? .metadata; - let metadata_location = - MetadataLocation::new_with_table_location(table_location).to_string(); + let metadata_location = MetadataLocation::new_with_metadata(table_location, &metadata); metadata.write_to(&self.file_io, &metadata_location).await?; // update metadata location + let metadata_location_str = metadata_location.to_string(); self.s3tables_client .update_table_metadata_location() .table_bucket_arn(self.config.table_bucket_arn.clone()) .namespace(namespace.to_url_string()) .name(table_ident.name()) - .metadata_location(metadata_location.clone()) + .metadata_location(metadata_location_str.clone()) .version_token(create_resp.version_token()) .send() .await @@ -519,7 +520,7 @@ impl Catalog for S3TablesCatalog { let table = Table::builder() .identifier(table_ident) - .metadata_location(metadata_location) + .metadata_location(metadata_location_str) .metadata(metadata) .file_io(self.file_io.clone()) .build()?; @@ -630,11 +631,12 @@ impl Catalog for S3TablesCatalog { self.load_table_with_version_token(&table_ident).await?; let staged_table = commit.apply(current_table)?; - let staged_metadata_location = staged_table.metadata_location_result()?; + let staged_metadata_location_str = staged_table.metadata_location_result()?; + let staged_metadata_location = MetadataLocation::from_str(staged_metadata_location_str)?; staged_table .metadata() - .write_to(staged_table.file_io(), staged_metadata_location) + .write_to(staged_table.file_io(), &staged_metadata_location) .await?; let builder = self @@ -644,7 +646,7 @@ impl Catalog for S3TablesCatalog { .namespace(table_namespace.to_url_string()) .name(table_ident.name()) .version_token(version_token) - .metadata_location(staged_metadata_location); + .metadata_location(staged_metadata_location_str); let _ = builder.send().await.map_err(|e| { let error = e.into_service_error(); diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 97a224aa9c..f2242dc407 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -851,21 +851,22 @@ impl Catalog for SqlCatalog { .build()? .metadata; let tbl_metadata_location = - MetadataLocation::new_with_table_location(location.clone()).to_string(); + MetadataLocation::new_with_metadata(location.clone(), &tbl_metadata); tbl_metadata .write_to(&self.fileio, &tbl_metadata_location) .await?; + let tbl_metadata_location_str = tbl_metadata_location.to_string(); self.execute(&format!( "INSERT INTO {CATALOG_TABLE_NAME} ({CATALOG_FIELD_CATALOG_NAME}, {CATALOG_FIELD_TABLE_NAMESPACE}, {CATALOG_FIELD_TABLE_NAME}, {CATALOG_FIELD_METADATA_LOCATION_PROP}, {CATALOG_FIELD_RECORD_TYPE}) VALUES (?, ?, ?, ?, ?) - "), vec![Some(&self.name), Some(&namespace.join(".")), Some(&tbl_name.clone()), Some(&tbl_metadata_location), Some(CATALOG_FIELD_TABLE_RECORD_TYPE)], None).await?; + "), vec![Some(&self.name), Some(&namespace.join(".")), Some(&tbl_name.clone()), Some(&tbl_metadata_location_str), Some(CATALOG_FIELD_TABLE_RECORD_TYPE)], None).await?; Ok(Table::builder() .file_io(self.fileio.clone()) - .metadata_location(tbl_metadata_location) + .metadata_location(tbl_metadata_location_str) .identifier(tbl_ident) .metadata(tbl_metadata) .build()?) @@ -949,13 +950,15 @@ impl Catalog for SqlCatalog { let current_metadata_location = current_table.metadata_location_result()?.to_string(); let staged_table = commit.apply(current_table)?; - let staged_metadata_location = staged_table.metadata_location_result()?; + let staged_metadata_location_str = staged_table.metadata_location_result()?; + let staged_metadata_location = MetadataLocation::from_str(staged_metadata_location_str)?; staged_table .metadata() .write_to(staged_table.file_io(), &staged_metadata_location) .await?; + let staged_metadata_location_str = staged_metadata_location.to_string(); let update_result = self .execute( &format!( @@ -971,7 +974,7 @@ impl Catalog for SqlCatalog { AND {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?" ), vec![ - Some(staged_metadata_location), + Some(&staged_metadata_location_str), Some(current_metadata_location.as_str()), Some(&self.name), Some(table_ident.name()), diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index e008de8050..992a0ed617 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -18,6 +18,7 @@ //! This module contains memory catalog implementation. use std::collections::HashMap; +use std::str::FromStr; use std::sync::Arc; use async_trait::async_trait; @@ -295,15 +296,15 @@ impl Catalog for MemoryCatalog { let metadata = TableMetadataBuilder::from_table_creation(table_creation)? .build()? .metadata; - let metadata_location = MetadataLocation::new_with_table_location(location).to_string(); + let metadata_location = MetadataLocation::new_with_metadata(location, &metadata); metadata.write_to(&self.file_io, &metadata_location).await?; - root_namespace_state.insert_new_table(&table_ident, metadata_location.clone())?; + root_namespace_state.insert_new_table(&table_ident, metadata_location.to_string())?; Table::builder() .file_io(self.file_io.clone()) - .metadata_location(metadata_location) + .metadata_location(metadata_location.to_string()) .metadata(metadata) .identifier(table_ident) .build() @@ -381,12 +382,11 @@ impl Catalog for MemoryCatalog { let staged_table = commit.apply(current_table)?; // Write table metadata to the new location + let metadata_location = + MetadataLocation::from_str(staged_table.metadata_location_result()?)?; staged_table .metadata() - .write_to( - staged_table.file_io(), - staged_table.metadata_location_result()?, - ) + .write_to(staged_table.file_io(), &metadata_location) .await?; // Flip the pointer to reference the new metadata file. diff --git a/crates/iceberg/src/catalog/metadata_location.rs b/crates/iceberg/src/catalog/metadata_location.rs index 3705ee42dc..ed28118879 100644 --- a/crates/iceberg/src/catalog/metadata_location.rs +++ b/crates/iceberg/src/catalog/metadata_location.rs @@ -15,41 +15,86 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; use std::fmt::Display; use std::str::FromStr; use uuid::Uuid; +use crate::compression::CompressionCodec; +use crate::spec::{TableMetadata, parse_metadata_file_compression}; use crate::{Error, ErrorKind, Result}; /// Helper for parsing a location of the format: `/metadata/-.metadata.json` +/// or with compression: `/metadata/-.gz.metadata.json` #[derive(Clone, Debug, PartialEq)] pub struct MetadataLocation { table_location: String, version: i32, id: Uuid, + compression_codec: CompressionCodec, } impl MetadataLocation { + /// Determines the compression codec from table properties. + /// Parse errors result in CompressionCodec::None. + fn compression_from_properties(properties: &HashMap) -> CompressionCodec { + parse_metadata_file_compression(properties).unwrap_or(CompressionCodec::None) + } + /// Creates a completely new metadata location starting at version 0. - /// Only used for creating a new table. For updates, see `with_next_version`. + /// Only used for creating a new table. For updates, see `next_version`. + #[deprecated( + since = "0.8.0", + note = "Use new_with_metadata instead to properly handle compression settings" + )] pub fn new_with_table_location(table_location: impl ToString) -> Self { Self { table_location: table_location.to_string(), version: 0, id: Uuid::new_v4(), + compression_codec: CompressionCodec::None, + } + } + + /// Creates a completely new metadata location starting at version 0, + /// with compression settings from the table metadata. + /// Only used for creating a new table. For updates, see `next_version`. + pub fn new_with_metadata(table_location: impl ToString, metadata: &TableMetadata) -> Self { + Self { + table_location: table_location.to_string(), + version: 0, + id: Uuid::new_v4(), + compression_codec: Self::compression_from_properties(metadata.properties()), } } /// Creates a new metadata location for an updated metadata file. + /// Increments the version number and generates a new UUID. pub fn with_next_version(&self) -> Self { Self { table_location: self.table_location.clone(), version: self.version + 1, id: Uuid::new_v4(), + compression_codec: self.compression_codec, + } + } + + /// Updates the metadata location with compression settings from the new metadata. + pub fn with_new_metadata(&self, new_metadata: &TableMetadata) -> Self { + Self { + table_location: self.table_location.clone(), + version: self.version, + id: self.id, + compression_codec: Self::compression_from_properties(new_metadata.properties()), } } + /// Returns the compression codec used for this metadata location. + pub fn compression_codec(&self) -> CompressionCodec { + self.compression_codec + } + fn parse_metadata_path_prefix(path: &str) -> Result { let prefix = path.strip_suffix("/metadata").ok_or(Error::new( ErrorKind::Unexpected, @@ -59,30 +104,43 @@ impl MetadataLocation { Ok(prefix.to_string()) } - /// Parses a file name of the format `-.metadata.json`. - fn parse_file_name(file_name: &str) -> Result<(i32, Uuid)> { - let (version, id) = file_name - .strip_suffix(".metadata.json") - .ok_or(Error::new( - ErrorKind::Unexpected, - format!("Invalid metadata file ending: {file_name}"), - ))? - .split_once('-') - .ok_or(Error::new( - ErrorKind::Unexpected, - format!("Invalid metadata file name format: {file_name}"), - ))?; - - Ok((version.parse::()?, Uuid::parse_str(id)?)) + /// Parses a file name of the format `-.metadata.json` + /// or with compression: `-.gz.metadata.json`. + /// Parse errors for compression codec result in CompressionCodec::None. + fn parse_file_name(file_name: &str) -> Result<(i32, Uuid, CompressionCodec)> { + let stripped = file_name.strip_suffix(".metadata.json").ok_or(Error::new( + ErrorKind::Unexpected, + format!("Invalid metadata file ending: {file_name}"), + ))?; + + // Check for compression suffix (e.g., .gz) + let gzip_suffix = CompressionCodec::Gzip.suffix()?; + let (stripped, compression_codec) = if let Some(s) = stripped.strip_suffix(gzip_suffix) { + (s, CompressionCodec::Gzip) + } else { + (stripped, CompressionCodec::None) + }; + + let (version, id) = stripped.split_once('-').ok_or(Error::new( + ErrorKind::Unexpected, + format!("Invalid metadata file name format: {file_name}"), + ))?; + + Ok(( + version.parse::()?, + Uuid::parse_str(id)?, + compression_codec, + )) } } impl Display for MetadataLocation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let suffix = self.compression_codec.suffix().unwrap_or(""); write!( f, - "{}/metadata/{:0>5}-{}.metadata.json", - self.table_location, self.version, self.id + "{}/metadata/{:0>5}-{}{}.metadata.json", + self.table_location, self.version, self.id, suffix ) } } @@ -97,23 +155,41 @@ impl FromStr for MetadataLocation { ))?; let prefix = Self::parse_metadata_path_prefix(path)?; - let (version, id) = Self::parse_file_name(file_name)?; + let (version, id, compression_codec) = Self::parse_file_name(file_name)?; Ok(MetadataLocation { table_location: prefix, version, id, + compression_codec, }) } } #[cfg(test)] mod test { + use std::collections::HashMap; use std::str::FromStr; use uuid::Uuid; - use crate::MetadataLocation; + use crate::compression::CompressionCodec; + use crate::spec::{Schema, TableMetadata, TableMetadataBuilder}; + use crate::{MetadataLocation, TableCreation}; + + fn create_test_metadata(properties: HashMap) -> TableMetadata { + let table_creation = TableCreation::builder() + .name("test_table".to_string()) + .location("/test/table".to_string()) + .schema(Schema::builder().build().unwrap()) + .properties(properties) + .build(); + TableMetadataBuilder::from_table_creation(table_creation) + .unwrap() + .build() + .unwrap() + .metadata + } #[test] fn test_metadata_location_from_string() { @@ -125,6 +201,7 @@ mod test { table_location: "".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + compression_codec: CompressionCodec::None, }), ), // Some prefix @@ -134,6 +211,7 @@ mod test { table_location: "/abc".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + compression_codec: CompressionCodec::None, }), ), // Longer prefix @@ -143,6 +221,7 @@ mod test { table_location: "/abc/def".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + compression_codec: CompressionCodec::None, }), ), // Prefix with special characters @@ -152,6 +231,7 @@ mod test { table_location: "https://127.0.0.1".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + compression_codec: CompressionCodec::None, }), ), // Another id @@ -161,6 +241,7 @@ mod test { table_location: "/abc".to_string(), version: 1234567, id: Uuid::from_str("81056704-ce5b-41c4-bb83-eb6408081af6").unwrap(), + compression_codec: CompressionCodec::None, }), ), // Version 0 @@ -170,6 +251,17 @@ mod test { table_location: "/abc".to_string(), version: 0, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + compression_codec: CompressionCodec::None, + }), + ), + // With gzip compression + ( + "/abc/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.gz.metadata.json", + Ok(MetadataLocation { + table_location: "/abc".to_string(), + version: 1234567, + id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + compression_codec: CompressionCodec::Gzip, }), ), // Negative version @@ -216,8 +308,9 @@ mod test { #[test] fn test_metadata_location_with_next_version() { + let metadata = create_test_metadata(HashMap::new()); let test_cases = vec![ - MetadataLocation::new_with_table_location("/abc"), + MetadataLocation::new_with_metadata("/abc", &metadata), MetadataLocation::from_str( "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", ) @@ -233,4 +326,79 @@ mod test { assert_ne!(next.id, input.id); } } + + #[test] + fn test_with_next_version_preserves_compression() { + // Start from a parsed location with no compression + let location_none = MetadataLocation::from_str( + "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + ) + .unwrap(); + assert_eq!(location_none.compression_codec, CompressionCodec::None); + + let next_none = location_none.with_next_version(); + assert_eq!(next_none.compression_codec, CompressionCodec::None); + assert_eq!(next_none.version, 1); + + // Start from a parsed location with gzip compression + let location_gzip = MetadataLocation::from_str( + "/test/table/metadata/00005-81056704-ce5b-41c4-bb83-eb6408081af6.gz.metadata.json", + ) + .unwrap(); + assert_eq!(location_gzip.compression_codec, CompressionCodec::Gzip); + + let next_gzip = location_gzip.with_next_version(); + assert_eq!(next_gzip.compression_codec, CompressionCodec::Gzip); + assert_eq!(next_gzip.version, 6); + } + + #[test] + fn test_with_new_metadata_updates_compression() { + // Start from a parsed location with no compression + let location = MetadataLocation::from_str( + "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + ) + .unwrap(); + assert_eq!(location.compression_codec, CompressionCodec::None); + + // Update to gzip compression + let mut props_gzip = HashMap::new(); + props_gzip.insert( + "write.metadata.compression-codec".to_string(), + "gzip".to_string(), + ); + let metadata_gzip = create_test_metadata(props_gzip); + let updated_gzip = location.with_new_metadata(&metadata_gzip); + assert_eq!(updated_gzip.compression_codec, CompressionCodec::Gzip); + assert_eq!(updated_gzip.version, 0); + assert_eq!( + updated_gzip.to_string(), + "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.gz.metadata.json" + ); + + // Update back to no compression + let props_none = HashMap::new(); + let metadata_none = create_test_metadata(props_none); + let updated_none = updated_gzip.with_new_metadata(&metadata_none); + assert_eq!(updated_none.compression_codec, CompressionCodec::None); + assert_eq!(updated_none.version, 0); + assert_eq!( + updated_none.to_string(), + "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json" + ); + + // Test explicit "none" codec + let mut props_explicit_none = HashMap::new(); + props_explicit_none.insert( + "write.metadata.compression-codec".to_string(), + "none".to_string(), + ); + let metadata_explicit_none = create_test_metadata(props_explicit_none); + let updated_explicit = updated_gzip.with_new_metadata(&metadata_explicit_none); + assert_eq!(updated_explicit.compression_codec, CompressionCodec::None); + assert_eq!( + updated_explicit.to_string(), + "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json" + ); + } } diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 8db9674ad9..06326917ec 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -382,13 +382,16 @@ impl TableCommit { metadata_builder = update.apply(metadata_builder)?; } - // Bump the version of metadata + // Build the new metadata + let new_metadata = metadata_builder.build()?.metadata; + let new_metadata_location = MetadataLocation::from_str(current_metadata_location)? .with_next_version() + .with_new_metadata(&new_metadata) .to_string(); Ok(table - .with_metadata(Arc::new(metadata_builder.build()?.metadata)) + .with_metadata(Arc::new(new_metadata)) .with_metadata_location(new_metadata_location)) } } diff --git a/crates/iceberg/src/compression.rs b/crates/iceberg/src/compression.rs index 1218d81df6..42f5298437 100644 --- a/crates/iceberg/src/compression.rs +++ b/crates/iceberg/src/compression.rs @@ -85,6 +85,23 @@ impl CompressionCodec { pub(crate) fn is_none(&self) -> bool { matches!(self, CompressionCodec::None) } + + /// Returns the file extension suffix for this compression codec. + /// Returns empty string for None, ".gz" for Gzip. + /// + /// # Errors + /// + /// Returns an error for Lz4 and Zstd as they are not fully supported. + pub fn suffix(&self) -> Result<&'static str> { + match self { + CompressionCodec::None => Ok(""), + CompressionCodec::Gzip => Ok(".gz"), + codec @ (CompressionCodec::Lz4 | CompressionCodec::Zstd) => Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("suffix not defined for {codec:?}"), + )), + } + } } #[cfg(test)] @@ -133,4 +150,21 @@ mod tests { ); } } + + #[test] + fn test_suffix() { + // Test supported codecs + assert_eq!(CompressionCodec::None.suffix().unwrap(), ""); + assert_eq!(CompressionCodec::Gzip.suffix().unwrap(), ".gz"); + + // Test unsupported codecs return errors + assert!(CompressionCodec::Lz4.suffix().is_err()); + assert!(CompressionCodec::Zstd.suffix().is_err()); + + let lz4_err = CompressionCodec::Lz4.suffix().unwrap_err(); + assert!(lz4_err.to_string().contains("suffix not defined for Lz4")); + + let zstd_err = CompressionCodec::Zstd.suffix().unwrap_err(); + assert!(zstd_err.to_string().contains("suffix not defined for Zstd")); + } } diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index 707ebbb630..b23ca1eda0 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -50,6 +50,7 @@ pub use sort::*; pub use statistic_file::*; pub use table_metadata::*; pub(crate) use table_metadata_builder::FIRST_FIELD_ID; +pub(crate) use table_properties::parse_metadata_file_compression; pub use table_properties::*; pub use transform::*; pub(crate) use values::decimal_utils; diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 3e6374d58d..b91599b74f 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -35,8 +35,9 @@ pub use super::table_metadata_builder::{TableMetadataBuildResult, TableMetadataB use super::{ DEFAULT_PARTITION_SPEC_ID, PartitionSpecRef, PartitionStatisticsFile, SchemaId, SchemaRef, SnapshotRef, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType, - TableProperties, + TableProperties, parse_metadata_file_compression, }; +use crate::catalog::MetadataLocation; use crate::compression::CompressionCodec; use crate::error::{Result, timestamp_ms_to_utc}; use crate::io::FileIO; @@ -360,6 +361,18 @@ impl TableMetadata { &self.properties } + /// Returns the metadata compression codec from table properties. + /// + /// Returns `CompressionCodec::None` if compression is disabled or not configured. + /// Returns `CompressionCodec::Gzip` if gzip compression is enabled. + /// + /// # Errors + /// + /// Returns an error if the compression codec property has an invalid value. + pub fn metadata_compression_codec(&self) -> Result { + parse_metadata_file_compression(&self.properties) + } + /// Returns typed table properties parsed from the raw properties map with defaults. pub fn table_properties(&self) -> Result { TableProperties::try_from(&self.properties).map_err(|e| { @@ -466,11 +479,39 @@ impl TableMetadata { pub async fn write_to( &self, file_io: &FileIO, - metadata_location: impl AsRef, + metadata_location: &MetadataLocation, ) -> Result<()> { + let json_data = serde_json::to_vec(self)?; + + // Check if compression codec from properties matches the one in metadata_location + let codec = parse_metadata_file_compression(&self.properties)?; + + if codec != metadata_location.compression_codec() { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Compression codec mismatch: metadata_location has {:?}, but table properties specify {:?}", + metadata_location.compression_codec(), + codec + ), + )); + } + + // Apply compression based on codec + let data_to_write = match codec { + CompressionCodec::Gzip => codec.compress(json_data)?, + CompressionCodec::None => json_data, + _ => { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("Unsupported metadata compression codec: {codec:?}"), + )); + } + }; + file_io - .new_output(metadata_location)? - .write(serde_json::to_vec(self)?.into()) + .new_output(metadata_location.to_string())? + .write(data_to_write.into()) .await } @@ -1567,6 +1608,7 @@ mod tests { use uuid::Uuid; use super::{FormatVersion, MetadataLog, SnapshotLog, TableMetadataBuilder}; + use crate::catalog::MetadataLocation; use crate::compression::CompressionCodec; use crate::io::FileIO; use crate::spec::table_metadata::TableMetadata; @@ -1574,7 +1616,7 @@ mod tests { BlobMetadata, EncryptedKey, INITIAL_ROW_ID, Literal, NestedField, NullOrder, Operation, PartitionSpec, PartitionStatisticsFile, PrimitiveLiteral, PrimitiveType, Schema, Snapshot, SnapshotReference, SnapshotRetention, SortDirection, SortField, SortOrder, StatisticsFile, - Summary, Transform, Type, UnboundPartitionField, + Summary, TableProperties, Transform, Type, UnboundPartitionField, }; use crate::{ErrorKind, TableCreation}; @@ -3547,7 +3589,8 @@ mod tests { let original_metadata: TableMetadata = get_test_table_metadata("TableMetadataV2Valid.json"); // Define the metadata location - let metadata_location = format!("{temp_path}/metadata.json"); + let metadata_location = MetadataLocation::new_with_metadata(temp_path, &original_metadata); + let metadata_location_str = metadata_location.to_string(); // Write the metadata original_metadata @@ -3556,10 +3599,10 @@ mod tests { .unwrap(); // Verify the file exists - assert!(fs::metadata(&metadata_location).is_ok()); + assert!(fs::metadata(&metadata_location_str).is_ok()); // Read the metadata back - let read_metadata = TableMetadata::read_from(&file_io, &metadata_location) + let read_metadata = TableMetadata::read_from(&file_io, &metadata_location_str) .await .unwrap(); @@ -3603,6 +3646,63 @@ mod tests { assert!(result.is_err()); } + #[tokio::test] + async fn test_table_metadata_write_with_gzip_compression() { + let temp_dir = TempDir::new().unwrap(); + let temp_path = temp_dir.path().to_str().unwrap(); + let file_io = FileIO::new_with_fs(); + + // Get a test metadata and add gzip compression property + let original_metadata: TableMetadata = get_test_table_metadata("TableMetadataV2Valid.json"); + + // Modify properties to enable gzip compression (using mixed case to test case-insensitive matching) + let mut props = original_metadata.properties.clone(); + props.insert( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "GziP".to_string(), + ); + // Use builder to create new metadata with updated properties + let compressed_metadata = + TableMetadataBuilder::new_from_metadata(original_metadata.clone(), None) + .assign_uuid(original_metadata.table_uuid) + .set_properties(props.clone()) + .unwrap() + .build() + .unwrap() + .metadata; + + // Create MetadataLocation with compression codec from metadata + let metadata_location = + MetadataLocation::new_with_metadata(temp_path, &compressed_metadata); + let metadata_location_str = metadata_location.to_string(); + + // Verify the location has the .gz extension + assert!(metadata_location_str.contains(".gz.metadata.json")); + + // Write the metadata with compression + compressed_metadata + .write_to(&file_io, &metadata_location) + .await + .unwrap(); + + // Verify the compressed file exists + assert!(std::path::Path::new(&metadata_location_str).exists()); + + // Read the raw file and check it's gzip compressed + let raw_content = std::fs::read(&metadata_location_str).unwrap(); + assert!(raw_content.len() > 2); + assert_eq!(raw_content[0], 0x1F); // gzip magic number + assert_eq!(raw_content[1], 0x8B); // gzip magic number + + // Read the metadata back using the compressed location + let read_metadata = TableMetadata::read_from(&file_io, &metadata_location_str) + .await + .unwrap(); + + // Verify the complete round-trip: read metadata should match what we wrote + assert_eq!(read_metadata, compressed_metadata); + } + #[test] fn test_partition_name_exists() { let schema = Schema::builder() diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 413604f51c..6e08318479 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -16,24 +16,85 @@ // under the License. use std::collections::HashMap; +use std::fmt::Display; +use std::str::FromStr; + +use crate::compression::CompressionCodec; +use crate::error::{Error, ErrorKind, Result}; // Helper function to parse a property from a HashMap // If the property is not found, use the default value -fn parse_property( +fn parse_property( properties: &HashMap, key: &str, default: T, -) -> Result +) -> Result where - ::Err: std::fmt::Display, + ::Err: Display, { properties.get(key).map_or(Ok(default), |value| { - value - .parse::() - .map_err(|e| anyhow::anyhow!("Invalid value for {key}: {e}")) + value.parse::().map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!("Invalid value for {key}: {e}"), + ) + }) }) } +/// Parse compression codec for metadata files from table properties. +/// Retrieves the compression codec property, applies defaults, and parses the value. +/// Only "none" (or empty string) and "gzip" are supported for metadata compression. +/// +/// # Arguments +/// +/// * `properties` - HashMap containing table properties +/// +/// # Errors +/// +/// Returns an error if the codec is not "none", "", or "gzip" (case-insensitive). +/// Lz4 and Zstd are not supported for metadata file compression. +pub(crate) fn parse_metadata_file_compression( + properties: &HashMap, +) -> Result { + let value = properties + .get(TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC) + .map(|s| s.as_str()) + .unwrap_or(TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC_DEFAULT); + + // Handle empty string as None + if value.is_empty() { + return Ok(CompressionCodec::None); + } + + // Lowercase the value for case-insensitive parsing + let lowercase_value = value.to_lowercase(); + + // Use serde to parse the codec (which has rename_all = "lowercase") + let codec: CompressionCodec = serde_json::from_value(serde_json::Value::String( + lowercase_value, + )) + .map_err(|_| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid metadata compression codec: {value}. Only 'none' and 'gzip' are supported." + ), + ) + })?; + + // Validate that only None and Gzip are used for metadata + match codec { + CompressionCodec::None | CompressionCodec::Gzip => Ok(codec), + CompressionCodec::Lz4 | CompressionCodec::Zstd => Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid metadata compression codec: {value}. Only 'none' and 'gzip' are supported for metadata files." + ), + )), + } +} + /// TableProperties that contains the properties of a table. #[derive(Debug)] pub struct TableProperties { @@ -49,6 +110,8 @@ pub struct TableProperties { pub write_format_default: String, /// The target file size for files. pub write_target_file_size_bytes: usize, + /// Compression codec for metadata files (JSON) + pub metadata_compression_codec: CompressionCodec, /// Whether to use `FanoutWriter` for partitioned tables. pub write_datafusion_fanout_enabled: bool, } @@ -139,6 +202,11 @@ impl TableProperties { pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes"; /// Default target file size pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB + + /// Compression codec for metadata files (JSON) + pub const PROPERTY_METADATA_COMPRESSION_CODEC: &str = "write.metadata.compression-codec"; + /// Default metadata compression codec - uncompressed + pub const PROPERTY_METADATA_COMPRESSION_CODEC_DEFAULT: &str = "none"; /// Whether to use `FanoutWriter` for partitioned tables (handles unsorted data). /// If false, uses `ClusteredWriter` (requires sorted data, more memory efficient). pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED: &str = "write.datafusion.fanout.enabled"; @@ -148,9 +216,9 @@ impl TableProperties { impl TryFrom<&HashMap> for TableProperties { // parse by entry key or use default value - type Error = anyhow::Error; + type Error = Error; - fn try_from(props: &HashMap) -> Result { + fn try_from(props: &HashMap) -> Result { Ok(TableProperties { commit_num_retries: parse_property( props, @@ -182,6 +250,7 @@ impl TryFrom<&HashMap> for TableProperties { TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, )?, + metadata_compression_codec: parse_metadata_file_compression(props)?, write_datafusion_fanout_enabled: parse_property( props, TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED, @@ -194,6 +263,7 @@ impl TryFrom<&HashMap> for TableProperties { #[cfg(test)] mod tests { use super::*; + use crate::compression::CompressionCodec; #[test] fn test_table_properties_default() { @@ -219,6 +289,73 @@ mod tests { table_properties.write_target_file_size_bytes, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT ); + // Test compression defaults (none means CompressionCodec::None) + assert_eq!( + table_properties.metadata_compression_codec, + CompressionCodec::None + ); + } + + #[test] + fn test_table_properties_compression() { + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "gzip".to_string(), + )]); + let table_properties = TableProperties::try_from(&props).unwrap(); + assert_eq!( + table_properties.metadata_compression_codec, + CompressionCodec::Gzip + ); + } + + #[test] + fn test_table_properties_compression_none() { + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "none".to_string(), + )]); + let table_properties = TableProperties::try_from(&props).unwrap(); + assert_eq!( + table_properties.metadata_compression_codec, + CompressionCodec::None + ); + } + + #[test] + fn test_table_properties_compression_case_insensitive() { + // Test uppercase + let props_upper = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "GZIP".to_string(), + )]); + let table_properties = TableProperties::try_from(&props_upper).unwrap(); + assert_eq!( + table_properties.metadata_compression_codec, + CompressionCodec::Gzip + ); + + // Test mixed case + let props_mixed = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "GzIp".to_string(), + )]); + let table_properties = TableProperties::try_from(&props_mixed).unwrap(); + assert_eq!( + table_properties.metadata_compression_codec, + CompressionCodec::Gzip + ); + + // Test "NONE" should also be case-insensitive + let props_none_upper = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "NONE".to_string(), + )]); + let table_properties = TableProperties::try_from(&props_none_upper).unwrap(); + assert_eq!( + table_properties.metadata_compression_codec, + CompressionCodec::None + ); } #[test] @@ -293,4 +430,118 @@ mod tests { "Invalid value for write.target-file-size-bytes: invalid digit found in string" )); } + + #[test] + fn test_table_properties_compression_invalid_rejected() { + let invalid_codecs = ["lz4", "zstd", "snappy"]; + + for codec in invalid_codecs { + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + codec.to_string(), + )]); + let err = TableProperties::try_from(&props).unwrap_err(); + let err_msg = err.to_string(); + assert!( + err_msg.contains(&format!("Invalid metadata compression codec: {codec}")), + "Expected error message to contain codec '{codec}', got: {err_msg}" + ); + assert!( + err_msg.contains("Only 'none' and 'gzip' are supported"), + "Expected error message to contain supported codecs, got: {err_msg}" + ); + } + } + + #[test] + fn test_parse_metadata_file_compression_valid() { + // Test with "none" + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "none".to_string(), + )]); + assert_eq!( + parse_metadata_file_compression(&props).unwrap(), + CompressionCodec::None + ); + + // Test with empty string + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "".to_string(), + )]); + assert_eq!( + parse_metadata_file_compression(&props).unwrap(), + CompressionCodec::None + ); + + // Test with "gzip" + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "gzip".to_string(), + )]); + assert_eq!( + parse_metadata_file_compression(&props).unwrap(), + CompressionCodec::Gzip + ); + + // Test case insensitivity - "NONE" + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "NONE".to_string(), + )]); + assert_eq!( + parse_metadata_file_compression(&props).unwrap(), + CompressionCodec::None + ); + + // Test case insensitivity - "GZIP" + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "GZIP".to_string(), + )]); + assert_eq!( + parse_metadata_file_compression(&props).unwrap(), + CompressionCodec::Gzip + ); + + // Test case insensitivity - "GzIp" + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + "GzIp".to_string(), + )]); + assert_eq!( + parse_metadata_file_compression(&props).unwrap(), + CompressionCodec::Gzip + ); + + // Test default when property is missing + let props = HashMap::new(); + assert_eq!( + parse_metadata_file_compression(&props).unwrap(), + CompressionCodec::None + ); + } + + #[test] + fn test_parse_metadata_file_compression_invalid() { + let invalid_codecs = ["lz4", "zstd", "snappy"]; + + for codec in invalid_codecs { + let props = HashMap::from([( + TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC.to_string(), + codec.to_string(), + )]); + let err = parse_metadata_file_compression(&props).unwrap_err(); + let err_msg = err.to_string(); + assert!( + err_msg.contains("Invalid metadata compression codec"), + "Expected error message to contain 'Invalid metadata compression codec', got: {err_msg}" + ); + assert!( + err_msg.contains("Only 'none' and 'gzip' are supported"), + "Expected error message to contain supported codecs, got: {err_msg}" + ); + } + } } From 619b98f8622b7a2d9d2c04fc331a079362ea7009 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 08:54:50 +0800 Subject: [PATCH 03/39] chore(deps): Bump quinn-proto from 0.11.13 to 0.11.14 in /bindings/python (#2228) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [quinn-proto](https://github.com/quinn-rs/quinn) from 0.11.13 to 0.11.14.
Release notes

Sourced from quinn-proto's releases.

quinn-proto 0.11.14

@โ€‹jxs reported a denial of service issue in quinn-proto 5 days ago:

We coordinated with them to release this version to patch the issue. Unfortunately the maintainers missed these issues during code review and we did not have enough fuzzing coverage -- we regret the oversight and have added an additional fuzzing target.

Organizations that want to participate in coordinated disclosure can contact us privately to discuss terms.

What's Changed

Commits
  • 2c315aa proto: bump version to 0.11.14
  • 8ad47f4 Use newer rustls-pki-types PEM parser API
  • c81c028 ci: fix workflow syntax
  • 0050172 ci: pin wasm-bindgen-cli version
  • 8a6f82c Take semver-compatible dependency updates
  • e52db4a Apply suggestions from clippy 1.91
  • 6df7275 chore: Fix unnecessary_unwrap clippy
  • c8eefa0 proto: avoid unwrapping varint decoding during parameters parsing
  • 9723a97 fuzz: add fuzzing target for parsing transport parameters
  • eaf0ef3 Fix over-permissive proto dependency edge (#2385)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=quinn-proto&package-manager=cargo&previous-version=0.11.13&new-version=0.11.14)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/iceberg-rust/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- bindings/python/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 7667a0b1a3..5dac677618 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -3329,9 +3329,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", "getrandom 0.3.4", From 335961ae013b491667d8c815b26aa6a2c43d4de2 Mon Sep 17 00:00:00 2001 From: blackmwk Date: Thu, 12 Mar 2026 15:33:49 +0800 Subject: [PATCH 04/39] Add catalog test suite to unify catalog's behavior. (#2131) ## Which issue does this PR close? - Closes #2086 . ## What changes are included in this PR? In this pr we introduced catalog test suite in catalog-loader, which could unify the behavior of catalogs. ## Are these changes tested? Yes. --------- Co-authored-by: Ray Liu --- Cargo.lock | 47 +- Cargo.toml | 1 + crates/catalog/glue/Cargo.toml | 1 - crates/catalog/glue/src/catalog.rs | 37 +- .../catalog/glue/tests/glue_catalog_test.rs | 499 ------------------ crates/catalog/hms/src/catalog.rs | 65 ++- crates/catalog/hms/tests/hms_catalog_test.rs | 296 +---------- crates/catalog/loader/Cargo.toml | 4 + crates/catalog/loader/tests/common/mod.rs | 342 ++++++++++++ .../catalog/loader/tests/namespace_suite.rs | 355 +++++++++++++ .../loader/tests/table_register_suite.rs | 167 ++++++ .../loader/tests/table_rename_suite.rs | 163 ++++++ crates/catalog/loader/tests/table_suite.rs | 276 ++++++++++ crates/catalog/rest/Cargo.toml | 1 - crates/catalog/rest/src/catalog.rs | 22 +- .../catalog/rest/tests/rest_catalog_test.rs | 497 ----------------- crates/catalog/s3tables/src/catalog.rs | 21 + crates/catalog/sql/src/catalog.rs | 449 +--------------- crates/catalog/sql/src/error.rs | 6 +- crates/iceberg/src/catalog/memory/catalog.rs | 4 +- 20 files changed, 1496 insertions(+), 1757 deletions(-) delete mode 100644 crates/catalog/glue/tests/glue_catalog_test.rs create mode 100644 crates/catalog/loader/tests/common/mod.rs create mode 100644 crates/catalog/loader/tests/namespace_suite.rs create mode 100644 crates/catalog/loader/tests/table_register_suite.rs create mode 100644 crates/catalog/loader/tests/table_rename_suite.rs create mode 100644 crates/catalog/loader/tests/table_suite.rs delete mode 100644 crates/catalog/rest/tests/rest_catalog_test.rs diff --git a/Cargo.lock b/Cargo.lock index 43b46822cf..8419709a42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2878,6 +2878,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.32" @@ -3386,7 +3392,6 @@ dependencies = [ "iceberg_test_utils", "serde_json", "tokio", - "tracing", ] [[package]] @@ -3423,6 +3428,10 @@ dependencies = [ "iceberg-catalog-rest", "iceberg-catalog-s3tables", "iceberg-catalog-sql", + "iceberg-storage-opendal", + "iceberg_test_utils", + "reqwest", + "rstest", "sqlx", "tempfile", "tokio", @@ -3444,7 +3453,6 @@ dependencies = [ "serde_derive", "serde_json", "tokio", - "tracing", "typed-builder", "uuid", ] @@ -5355,6 +5363,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.5.3" @@ -5510,6 +5524,35 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" diff --git a/Cargo.toml b/Cargo.toml index b3290d564f..6a361ecbd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,6 +110,7 @@ rand = "0.8.5" regex = "1.11.3" reqwest = { version = "0.12.12", default-features = false, features = ["json"] } roaring = { version = "0.11" } +rstest = "0.26" fastnum = { version = "0.7", default-features = false, features = ["std", "serde"] } serde = { version = "1.0.219", features = ["rc"] } serde_bytes = "0.11.17" diff --git a/crates/catalog/glue/Cargo.toml b/crates/catalog/glue/Cargo.toml index e41253de36..d8d0927a90 100644 --- a/crates/catalog/glue/Cargo.toml +++ b/crates/catalog/glue/Cargo.toml @@ -37,7 +37,6 @@ iceberg = { workspace = true } iceberg-storage-opendal = { workspace = true, features = ["opendal-s3"] } serde_json = { workspace = true } tokio = { workspace = true } -tracing = { workspace = true } [dev-dependencies] iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index 968b197169..9e9d4580c3 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -339,6 +339,13 @@ impl Catalog for GlueCatalog { namespace: &NamespaceIdent, properties: HashMap, ) -> Result { + if self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceAlreadyExists, + format!("Namespace {namespace:?} already exists"), + )); + } + let db_input = convert_to_database(namespace, &properties)?; let builder = self.client.0.create_database().database_input(db_input); @@ -365,7 +372,19 @@ impl Catalog for GlueCatalog { let builder = self.client.0.get_database().name(&db_name); let builder = with_catalog_id!(builder, self.config); - let resp = builder.send().await.map_err(from_aws_sdk_error)?; + let resp = builder.send().await.map_err(|err| { + if err + .as_service_error() + .map(|e| e.is_entity_not_found_exception()) + == Some(true) + { + return Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + ); + } + from_aws_sdk_error(err) + })?; match resp.database() { Some(db) => { @@ -373,7 +392,7 @@ impl Catalog for GlueCatalog { Ok(namespace) } None => Err(Error::new( - ErrorKind::DataInvalid, + ErrorKind::NamespaceNotFound, format!("Database with name: {db_name} does not exist"), )), } @@ -429,6 +448,13 @@ impl Catalog for GlueCatalog { namespace: &NamespaceIdent, properties: HashMap, ) -> Result<()> { + if !self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + )); + } + let db_name = validate_namespace(namespace)?; let db_input = convert_to_database(namespace, &properties)?; @@ -456,6 +482,13 @@ impl Catalog for GlueCatalog { /// - `Err(...)` signifies failure to drop the namespace due to validation /// errors, connectivity issues, or Glue Catalog constraints. async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> { + if !self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + )); + } + let db_name = validate_namespace(namespace)?; let table_list = self.list_tables(namespace).await?; diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs deleted file mode 100644 index 0b7dbe9f23..0000000000 --- a/crates/catalog/glue/tests/glue_catalog_test.rs +++ /dev/null @@ -1,499 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Integration tests for glue catalog. -//! -//! These tests assume Docker containers are started externally via `make docker-up`. -//! Each test uses unique namespaces based on module path to avoid conflicts. - -use std::collections::HashMap; -use std::sync::Arc; - -use iceberg::io::{FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; -use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; -use iceberg::transaction::{ApplyTransactionAction, Transaction}; -use iceberg::{ - Catalog, CatalogBuilder, Namespace, NamespaceIdent, Result, TableCreation, TableIdent, -}; -use iceberg_catalog_glue::{ - AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, GLUE_CATALOG_PROP_URI, - GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalogBuilder, -}; -use iceberg_storage_opendal::OpenDalStorageFactory; -use iceberg_test_utils::{ - cleanup_namespace, get_glue_endpoint, get_minio_endpoint, normalize_test_name_with_parts, - set_up, -}; -use tokio::time::sleep; -use tracing::info; - -async fn get_catalog() -> GlueCatalog { - set_up(); - - let glue_endpoint = get_glue_endpoint(); - let minio_endpoint = get_minio_endpoint(); - - let props = HashMap::from([ - (AWS_ACCESS_KEY_ID.to_string(), "my_access_id".to_string()), - ( - AWS_SECRET_ACCESS_KEY.to_string(), - "my_secret_key".to_string(), - ), - (AWS_REGION_NAME.to_string(), "us-east-1".to_string()), - (S3_ENDPOINT.to_string(), minio_endpoint), - (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), - (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), - (S3_REGION.to_string(), "us-east-1".to_string()), - ]); - - // Wait for bucket to actually exist - let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 { - configured_scheme: "s3a".to_string(), - customized_credential_load: None, - })) - .with_props(props.clone()) - .build(); - - let mut retries = 0; - while retries < 30 { - if file_io.exists("s3a://warehouse/").await.unwrap_or(false) { - info!("S3 bucket 'warehouse' is ready"); - break; - } - info!("Waiting for bucket creation... (attempt {})", retries + 1); - sleep(std::time::Duration::from_millis(1000)).await; - retries += 1; - } - - let mut glue_props = HashMap::from([ - (GLUE_CATALOG_PROP_URI.to_string(), glue_endpoint), - ( - GLUE_CATALOG_PROP_WAREHOUSE.to_string(), - "s3a://warehouse/hive".to_string(), - ), - ]); - glue_props.extend(props.clone()); - - GlueCatalogBuilder::default() - .load("glue", glue_props) - .await - .unwrap() -} - -async fn set_test_namespace(catalog: &GlueCatalog, namespace: &NamespaceIdent) -> Result<()> { - let properties = HashMap::new(); - catalog.create_namespace(namespace, properties).await?; - - Ok(()) -} - -fn set_table_creation(location: Option, name: impl ToString) -> Result { - let schema = Schema::builder() - .with_schema_id(0) - .with_fields(vec![ - NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), - NestedField::required(2, "bar", Type::Primitive(PrimitiveType::String)).into(), - ]) - .build()?; - - let builder = TableCreation::builder() - .name(name.to_string()) - .properties(HashMap::new()) - .location_opt(location) - .schema(schema); - - Ok(builder.build()) -} - -#[tokio::test] -async fn test_rename_table() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_rename_table" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - - catalog - .create_namespace(namespace.name(), HashMap::new()) - .await?; - - let table = catalog.create_table(namespace.name(), creation).await?; - - let dest = TableIdent::new(namespace.name().clone(), "my_table_rename".to_string()); - - catalog.rename_table(table.identifier(), &dest).await?; - - let table = catalog.load_table(&dest).await?; - assert_eq!(table.identifier(), &dest); - - let src = TableIdent::new(namespace.name().clone(), "my_table".to_string()); - - let src_table_exists = catalog.table_exists(&src).await?; - assert!(!src_table_exists); - - Ok(()) -} - -#[tokio::test] -async fn test_table_exists() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_table_exists" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - - catalog - .create_namespace(namespace.name(), HashMap::new()) - .await?; - - let ident = TableIdent::new(namespace.name().clone(), "my_table".to_string()); - - let exists = catalog.table_exists(&ident).await?; - assert!(!exists); - - let table = catalog.create_table(namespace.name(), creation).await?; - - let exists = catalog.table_exists(table.identifier()).await?; - - assert!(exists); - - Ok(()) -} - -#[tokio::test] -async fn test_drop_table() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_drop_table" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - - catalog - .create_namespace(namespace.name(), HashMap::new()) - .await?; - - let table = catalog.create_table(namespace.name(), creation).await?; - - catalog.drop_table(table.identifier()).await?; - - let result = catalog.table_exists(table.identifier()).await?; - - assert!(!result); - - Ok(()) -} - -#[tokio::test] -async fn test_load_table() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_load_table" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - - catalog - .create_namespace(namespace.name(), HashMap::new()) - .await?; - - let expected = catalog.create_table(namespace.name(), creation).await?; - - let result = catalog - .load_table(&TableIdent::new( - namespace.name().clone(), - "my_table".to_string(), - )) - .await?; - - assert_eq!(result.identifier(), expected.identifier()); - assert_eq!(result.metadata_location(), expected.metadata_location()); - assert_eq!(result.metadata(), expected.metadata()); - - Ok(()) -} - -#[tokio::test] -async fn test_create_table() -> Result<()> { - let catalog = get_catalog().await; - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_create_table")); - cleanup_namespace(&catalog, &namespace).await; - set_test_namespace(&catalog, &namespace).await?; - // inject custom location, ignore the namespace prefix - let creation = set_table_creation(Some("s3a://warehouse/hive".into()), "my_table")?; - let result = catalog.create_table(&namespace, creation).await?; - - assert_eq!(result.identifier().name(), "my_table"); - assert!( - result - .metadata_location() - .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")) - ); - assert!( - catalog - .file_io() - .exists("s3a://warehouse/hive/metadata/") - .await? - ); - - Ok(()) -} - -#[tokio::test] -async fn test_list_tables() -> Result<()> { - let catalog = get_catalog().await; - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_list_tables")); - cleanup_namespace(&catalog, &namespace).await; - set_test_namespace(&catalog, &namespace).await?; - - let expected = vec![]; - let result = catalog.list_tables(&namespace).await?; - - assert_eq!(result, expected); - - Ok(()) -} - -#[tokio::test] -async fn test_drop_namespace() -> Result<()> { - let catalog = get_catalog().await; - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_drop_namespace")); - cleanup_namespace(&catalog, &namespace).await; - set_test_namespace(&catalog, &namespace).await?; - - let exists = catalog.namespace_exists(&namespace).await?; - assert!(exists); - - catalog.drop_namespace(&namespace).await?; - - let exists = catalog.namespace_exists(&namespace).await?; - assert!(!exists); - - Ok(()) -} - -#[tokio::test] -async fn test_update_namespace() -> Result<()> { - let catalog = get_catalog().await; - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_update_namespace")); - cleanup_namespace(&catalog, &namespace).await; - set_test_namespace(&catalog, &namespace).await?; - - let before_update = catalog.get_namespace(&namespace).await?; - let before_update = before_update.properties().get("description"); - - assert_eq!(before_update, None); - - let properties = HashMap::from([("description".to_string(), "my_update".to_string())]); - - catalog.update_namespace(&namespace, properties).await?; - - let after_update = catalog.get_namespace(&namespace).await?; - let after_update = after_update.properties().get("description"); - - assert_eq!(after_update, Some("my_update".to_string()).as_ref()); - - Ok(()) -} - -#[tokio::test] -async fn test_namespace_exists() -> Result<()> { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_namespace_exists")); - cleanup_namespace(&catalog, &namespace).await; - - let exists = catalog.namespace_exists(&namespace).await?; - assert!(!exists); - - set_test_namespace(&catalog, &namespace).await?; - - let exists = catalog.namespace_exists(&namespace).await?; - assert!(exists); - - Ok(()) -} - -#[tokio::test] -async fn test_get_namespace() -> Result<()> { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_get_namespace")); - cleanup_namespace(&catalog, &namespace).await; - - let does_not_exist = catalog.get_namespace(&namespace).await; - assert!(does_not_exist.is_err()); - - set_test_namespace(&catalog, &namespace).await?; - - let result = catalog.get_namespace(&namespace).await?; - let expected = Namespace::new(namespace); - - assert_eq!(result, expected); - - Ok(()) -} - -#[tokio::test] -async fn test_create_namespace() -> Result<()> { - let catalog = get_catalog().await; - - let properties = HashMap::new(); - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_create_namespace")); - cleanup_namespace(&catalog, &namespace).await; - - let expected = Namespace::new(namespace.clone()); - - let result = catalog.create_namespace(&namespace, properties).await?; - - assert_eq!(result, expected); - - Ok(()) -} - -#[tokio::test] -async fn test_list_namespace() -> Result<()> { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_list_namespace")); - cleanup_namespace(&catalog, &namespace).await; - set_test_namespace(&catalog, &namespace).await?; - - let result = catalog.list_namespaces(None).await?; - assert!(result.contains(&namespace)); - - let empty_result = catalog.list_namespaces(Some(&namespace)).await?; - assert!(empty_result.is_empty()); - - Ok(()) -} - -#[tokio::test] -async fn test_update_table() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_update_table" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - - catalog - .create_namespace(namespace.name(), HashMap::new()) - .await?; - - let expected = catalog.create_table(namespace.name(), creation).await?; - - let table = catalog - .load_table(&TableIdent::new( - namespace.name().clone(), - "my_table".to_string(), - )) - .await?; - - assert_eq!(table.identifier(), expected.identifier()); - assert_eq!(table.metadata_location(), expected.metadata_location()); - assert_eq!(table.metadata(), expected.metadata()); - - // Store the original metadata location for comparison - let original_metadata_location = table.metadata_location(); - - // Update table properties using the transaction - let tx = Transaction::new(&table); - let tx = tx - .update_table_properties() - .set("test_property".to_string(), "test_value".to_string()) - .apply(tx)?; - - // Commit the transaction to the catalog - let updated_table = tx.commit(&catalog).await?; - - // Verify the update was successful - assert_eq!( - updated_table.metadata().properties().get("test_property"), - Some(&"test_value".to_string()) - ); - - // Verify the metadata location has been updated - assert_ne!( - updated_table.metadata_location(), - original_metadata_location, - "Metadata location should be updated after commit" - ); - - // Load the table again from the catalog to verify changes were persisted - let reloaded_table = catalog.load_table(table.identifier()).await?; - - // Verify the reloaded table matches the updated table - assert_eq!( - reloaded_table.metadata().properties().get("test_property"), - Some(&"test_value".to_string()) - ); - assert_eq!( - reloaded_table.metadata_location(), - updated_table.metadata_location(), - "Reloaded table should have the same metadata location as the updated table" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_register_table() -> Result<()> { - let catalog = get_catalog().await; - // Use unique namespace to avoid conflicts - let namespace = NamespaceIdent::new(normalize_test_name_with_parts!("test_register_table")); - cleanup_namespace(&catalog, &namespace).await; - set_test_namespace(&catalog, &namespace).await?; - - let location = format!("s3a://warehouse/hive/{namespace}"); - let creation = set_table_creation(Some(location), "my_table")?; - let table = catalog.create_table(&namespace, creation).await?; - let metadata_location = table - .metadata_location() - .expect("Expected metadata location to be set") - .to_string(); - - catalog.drop_table(table.identifier()).await?; - let ident = TableIdent::new(namespace.clone(), "my_table".to_string()); - - let registered = catalog - .register_table(&ident, metadata_location.clone()) - .await?; - - assert_eq!(registered.identifier(), &ident); - assert_eq!( - registered.metadata_location(), - Some(metadata_location.as_str()) - ); - - Ok(()) -} diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index 69c01847a5..bd78193732 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -279,6 +279,12 @@ impl Catalog for HmsCatalog { namespace: &NamespaceIdent, properties: HashMap, ) -> Result { + if self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceAlreadyExists, + format!("Namespace {namespace:?} already exists"), + )); + } let database = convert_to_database(namespace, &properties)?; self.client @@ -303,13 +309,29 @@ impl Catalog for HmsCatalog { async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result { let name = validate_namespace(namespace)?; - let db = self + let resp = self .client .0 .get_database(name.into()) .await - .map(from_thrift_exception) - .map_err(from_thrift_error)??; + .map_err(from_thrift_error)?; + + let db = match resp { + MaybeException::Ok(db) => db, + MaybeException::Exception(ThriftHiveMetastoreGetDatabaseException::O1(_)) => { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} not found"), + )); + } + MaybeException::Exception(exception) => { + return Err(Error::new( + ErrorKind::Unexpected, + "Operation failed for hitting thrift error".to_string(), + ) + .with_source(anyhow!("thrift error: {exception:?}"))); + } + }; let ns = convert_to_namespace(&db)?; @@ -362,6 +384,12 @@ impl Catalog for HmsCatalog { namespace: &NamespaceIdent, properties: HashMap, ) -> Result<()> { + if !self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + )); + } let db = convert_to_database(namespace, &properties)?; let name = match &db.name { @@ -393,6 +421,13 @@ impl Catalog for HmsCatalog { async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> { let name = validate_namespace(namespace)?; + if !self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + )); + } + self.client .0 .drop_database(name.into(), false, false) @@ -413,6 +448,12 @@ impl Catalog for HmsCatalog { /// querying the database. async fn list_tables(&self, namespace: &NamespaceIdent) -> Result> { let name = validate_namespace(namespace)?; + if !self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + )); + } let tables = self .client @@ -541,6 +582,18 @@ impl Catalog for HmsCatalog { /// - Any network or communication error occurs with the database backend. async fn drop_table(&self, table: &TableIdent) -> Result<()> { let db_name = validate_namespace(table.namespace())?; + if !self.namespace_exists(table.namespace()).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {:?} does not exist", table.namespace()), + )); + } + if !self.table_exists(table).await? { + return Err(Error::new( + ErrorKind::TableNotFound, + format!("Table {table:?} does not exist"), + )); + } self.client .0 @@ -589,6 +642,12 @@ impl Catalog for HmsCatalog { async fn rename_table(&self, src: &TableIdent, dest: &TableIdent) -> Result<()> { let src_dbname = validate_namespace(src.namespace())?; let dest_dbname = validate_namespace(dest.namespace())?; + if self.table_exists(dest).await? { + return Err(Error::new( + ErrorKind::TableAlreadyExists, + format!("Destination table {dest:?} already exists"), + )); + } let src_tbl_name = src.name.clone(); let dest_tbl_name = dest.name.clone(); diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs index 74c9e52e92..f19cf7bff4 100644 --- a/crates/catalog/hms/tests/hms_catalog_test.rs +++ b/crates/catalog/hms/tests/hms_catalog_test.rs @@ -24,16 +24,13 @@ use std::collections::HashMap; use std::sync::Arc; use iceberg::io::{FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; -use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; -use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent, TableCreation, TableIdent}; +use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent}; use iceberg_catalog_hms::{ HMS_CATALOG_PROP_THRIFT_TRANSPORT, HMS_CATALOG_PROP_URI, HMS_CATALOG_PROP_WAREHOUSE, HmsCatalog, HmsCatalogBuilder, THRIFT_TRANSPORT_BUFFERED, }; use iceberg_storage_opendal::OpenDalStorageFactory; -use iceberg_test_utils::{ - cleanup_namespace, get_hms_endpoint, get_minio_endpoint, normalize_test_name_with_parts, set_up, -}; +use iceberg_test_utils::{get_hms_endpoint, get_minio_endpoint, set_up}; use tokio::time::sleep; use tracing::info; @@ -90,230 +87,6 @@ async fn get_catalog() -> HmsCatalog { .unwrap() } -async fn set_test_namespace(catalog: &HmsCatalog, namespace: &NamespaceIdent) -> Result<()> { - let properties = HashMap::new(); - - catalog.create_namespace(namespace, properties).await?; - - Ok(()) -} - -fn set_table_creation(location: Option, name: impl ToString) -> Result { - let schema = Schema::builder() - .with_schema_id(0) - .with_fields(vec![ - NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), - NestedField::required(2, "bar", Type::Primitive(PrimitiveType::String)).into(), - ]) - .build()?; - - let builder = TableCreation::builder() - .name(name.to_string()) - .properties(HashMap::new()) - .location_opt(location) - .schema(schema); - - Ok(builder.build()) -} - -#[tokio::test] -async fn test_rename_table() -> Result<()> { - let catalog = get_catalog().await; - let creation: TableCreation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_rename_table" - ))); - // Clean up from any previous test runs - cleanup_namespace(&catalog, namespace.name()).await; - set_test_namespace(&catalog, namespace.name()).await?; - - let table: iceberg::table::Table = catalog.create_table(namespace.name(), creation).await?; - - let dest = TableIdent::new(namespace.name().clone(), "my_table_rename".to_string()); - - catalog.rename_table(table.identifier(), &dest).await?; - - let result = catalog.table_exists(&dest).await?; - - assert!(result); - - Ok(()) -} - -#[tokio::test] -async fn test_table_exists() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_table_exists" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - set_test_namespace(&catalog, namespace.name()).await?; - - let table = catalog.create_table(namespace.name(), creation).await?; - - let result = catalog.table_exists(table.identifier()).await?; - - assert!(result); - - Ok(()) -} - -#[tokio::test] -async fn test_drop_table() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_drop_table" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - set_test_namespace(&catalog, namespace.name()).await?; - - let table = catalog.create_table(namespace.name(), creation).await?; - - catalog.drop_table(table.identifier()).await?; - - let result = catalog.table_exists(table.identifier()).await?; - - assert!(!result); - - Ok(()) -} - -#[tokio::test] -async fn test_load_table() -> Result<()> { - let catalog = get_catalog().await; - let creation = set_table_creation(None, "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_load_table" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - set_test_namespace(&catalog, namespace.name()).await?; - - let expected = catalog.create_table(namespace.name(), creation).await?; - - let result = catalog - .load_table(&TableIdent::new( - namespace.name().clone(), - "my_table".to_string(), - )) - .await?; - - assert_eq!(result.identifier(), expected.identifier()); - assert_eq!(result.metadata_location(), expected.metadata_location()); - assert_eq!(result.metadata(), expected.metadata()); - - Ok(()) -} - -#[tokio::test] -async fn test_create_table() -> Result<()> { - let catalog = get_catalog().await; - // inject custom location, ignore the namespace prefix - let creation = set_table_creation(Some("s3a://warehouse/hive".into()), "my_table")?; - // Use unique namespace to avoid conflicts - let namespace = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_create_table" - ))); - cleanup_namespace(&catalog, namespace.name()).await; - set_test_namespace(&catalog, namespace.name()).await?; - - let result = catalog.create_table(namespace.name(), creation).await?; - - assert_eq!(result.identifier().name(), "my_table"); - assert!( - result - .metadata_location() - .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")) - ); - assert!( - catalog - .file_io() - .exists("s3a://warehouse/hive/metadata/") - .await? - ); - - Ok(()) -} - -#[tokio::test] -async fn test_list_tables() -> Result<()> { - let catalog = get_catalog().await; - // Use unique namespace to avoid conflicts - let ns = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_list_tables" - ))); - // Clean up and create namespace, then verify it's empty - cleanup_namespace(&catalog, ns.name()).await; - set_test_namespace(&catalog, ns.name()).await?; - let result = catalog.list_tables(ns.name()).await?; - - assert_eq!(result, vec![]); - - let creation = set_table_creation(None, "my_table")?; - catalog.create_table(ns.name(), creation).await?; - let result = catalog.list_tables(ns.name()).await?; - - assert_eq!(result, vec![TableIdent::new( - ns.name().clone(), - "my_table".to_string() - )]); - - Ok(()) -} - -#[tokio::test] -async fn test_list_namespace() -> Result<()> { - let catalog = get_catalog().await; - - let result_no_parent = catalog.list_namespaces(None).await?; - - let result_with_parent = catalog - .list_namespaces(Some(&NamespaceIdent::new("parent".into()))) - .await?; - - assert!(result_no_parent.contains(&NamespaceIdent::new("default".into()))); - assert!(result_with_parent.is_empty()); - - Ok(()) -} - -#[tokio::test] -async fn test_create_namespace() -> Result<()> { - let catalog = get_catalog().await; - - let properties = HashMap::from([ - ("comment".to_string(), "my_description".to_string()), - ("location".to_string(), "my_location".to_string()), - ( - "hive.metastore.database.owner".to_string(), - "apache".to_string(), - ), - ( - "hive.metastore.database.owner-type".to_string(), - "user".to_string(), - ), - ("key1".to_string(), "value1".to_string()), - ]); - - // Use unique namespace to avoid conflicts - let ns = Namespace::with_properties( - NamespaceIdent::new(normalize_test_name_with_parts!("test_create_namespace")), - properties.clone(), - ); - cleanup_namespace(&catalog, ns.name()).await; - - let result = catalog.create_namespace(ns.name(), properties).await?; - - assert_eq!(result, ns); - - Ok(()) -} - #[tokio::test] async fn test_get_default_namespace() -> Result<()> { let catalog = get_catalog().await; @@ -340,68 +113,3 @@ async fn test_get_default_namespace() -> Result<()> { Ok(()) } - -#[tokio::test] -async fn test_namespace_exists() -> Result<()> { - let catalog = get_catalog().await; - - let ns_exists = Namespace::new(NamespaceIdent::new("default".into())); - // Use unique namespace to ensure it doesn't exist - let ns_not_exists = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_namespace_exists" - ))); - cleanup_namespace(&catalog, ns_not_exists.name()).await; - - let result_exists = catalog.namespace_exists(ns_exists.name()).await?; - let result_not_exists = catalog.namespace_exists(ns_not_exists.name()).await?; - - assert!(result_exists); - assert!(!result_not_exists); - - Ok(()) -} - -#[tokio::test] -async fn test_update_namespace() -> Result<()> { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let ns = NamespaceIdent::new(normalize_test_name_with_parts!("test_update_namespace")); - cleanup_namespace(&catalog, &ns).await; - set_test_namespace(&catalog, &ns).await?; - let properties = HashMap::from([("comment".to_string(), "my_update".to_string())]); - - catalog.update_namespace(&ns, properties).await?; - - let db = catalog.get_namespace(&ns).await?; - - assert_eq!( - db.properties().get("comment"), - Some(&"my_update".to_string()) - ); - - Ok(()) -} - -#[tokio::test] -async fn test_drop_namespace() -> Result<()> { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let ns = Namespace::new(NamespaceIdent::new(normalize_test_name_with_parts!( - "test_drop_namespace" - ))); - cleanup_namespace(&catalog, ns.name()).await; - - catalog.create_namespace(ns.name(), HashMap::new()).await?; - - let result = catalog.namespace_exists(ns.name()).await?; - assert!(result); - - catalog.drop_namespace(ns.name()).await?; - - let result = catalog.namespace_exists(ns.name()).await?; - assert!(!result); - - Ok(()) -} diff --git a/crates/catalog/loader/Cargo.toml b/crates/catalog/loader/Cargo.toml index d4b925fb94..20daf84f50 100644 --- a/crates/catalog/loader/Cargo.toml +++ b/crates/catalog/loader/Cargo.toml @@ -39,5 +39,9 @@ tokio = { workspace = true } async-trait = { workspace = true } [dev-dependencies] +iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } +iceberg-storage-opendal = { workspace = true } +reqwest = { workspace = true } +rstest = { workspace = true } sqlx = { workspace = true, features = ["runtime-tokio", "sqlite", "migrate"] } tempfile = { workspace = true } diff --git a/crates/catalog/loader/tests/common/mod.rs b/crates/catalog/loader/tests/common/mod.rs new file mode 100644 index 0000000000..90b72df8ab --- /dev/null +++ b/crates/catalog/loader/tests/common/mod.rs @@ -0,0 +1,342 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Shared helpers for catalog integration suites. + +#![allow(dead_code)] + +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; + +use iceberg::io::{ + FileIOBuilder, LocalFsStorageFactory, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, + S3_SECRET_ACCESS_KEY, +}; +use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; +use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; +use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation}; +use iceberg_catalog_glue::{ + AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, GLUE_CATALOG_PROP_URI, + GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalogBuilder, +}; +use iceberg_catalog_hms::{ + HMS_CATALOG_PROP_THRIFT_TRANSPORT, HMS_CATALOG_PROP_URI, HMS_CATALOG_PROP_WAREHOUSE, + HmsCatalog, HmsCatalogBuilder, THRIFT_TRANSPORT_BUFFERED, +}; +use iceberg_catalog_rest::{REST_CATALOG_PROP_URI, RestCatalog, RestCatalogBuilder}; +use iceberg_catalog_s3tables::{ + S3TABLES_CATALOG_PROP_ENDPOINT_URL, S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN, + S3TablesCatalogBuilder, +}; +use iceberg_catalog_sql::{ + SQL_CATALOG_PROP_BIND_STYLE, SQL_CATALOG_PROP_URI, SQL_CATALOG_PROP_WAREHOUSE, SqlBindStyle, + SqlCatalogBuilder, +}; +use iceberg_storage_opendal::OpenDalStorageFactory; +use iceberg_test_utils::{ + get_glue_endpoint, get_hms_endpoint, get_minio_endpoint, get_rest_catalog_endpoint, set_up, +}; +use sqlx::migrate::MigrateDatabase; +use tempfile::TempDir; +use tokio::time::sleep; + +#[derive(Debug, Clone, Copy)] +pub enum CatalogKind { + Rest, + Glue, + Hms, + Sql, + S3Tables, + Memory, +} + +impl fmt::Display for CatalogKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let name = match self { + CatalogKind::Rest => "rest_catalog", + CatalogKind::Glue => "glue_catalog", + CatalogKind::Hms => "hms_catalog", + CatalogKind::Sql => "sql_catalog", + CatalogKind::S3Tables => "s3tables_catalog", + CatalogKind::Memory => "memory_catalog", + }; + f.write_str(name) + } +} + +pub struct CatalogHarness { + pub catalog: Arc, + pub label: &'static str, + _tempdirs: Vec, +} + +// Shared setup for each catalog implementation so the suites exercise +// the same behavior against all backends. +pub async fn load_catalog(kind: CatalogKind) -> Option { + set_up(); + match kind { + CatalogKind::Rest => Some(CatalogHarness { + catalog: Arc::new(rest_catalog().await) as Arc, + label: "rest", + _tempdirs: Vec::new(), + }), + CatalogKind::Glue => Some(CatalogHarness { + catalog: Arc::new(glue_catalog().await) as Arc, + label: "glue", + _tempdirs: Vec::new(), + }), + CatalogKind::Hms => Some(CatalogHarness { + catalog: Arc::new(hms_catalog().await) as Arc, + label: "hms", + _tempdirs: Vec::new(), + }), + CatalogKind::Sql => { + let warehouse_dir = TempDir::new().unwrap(); + let db_dir = TempDir::new().unwrap(); + let db_path = db_dir.path().join("catalog.db"); + let db_uri = format!("sqlite:{}", db_path.to_str().unwrap()); + sqlx::Sqlite::create_database(&db_uri).await.unwrap(); + + let catalog = SqlCatalogBuilder::default() + .with_storage_factory(Arc::new(LocalFsStorageFactory)) + .load( + "sql", + HashMap::from([ + (SQL_CATALOG_PROP_URI.to_string(), db_uri), + ( + SQL_CATALOG_PROP_WAREHOUSE.to_string(), + warehouse_dir.path().to_str().unwrap().to_string(), + ), + ( + SQL_CATALOG_PROP_BIND_STYLE.to_string(), + SqlBindStyle::QMark.to_string(), + ), + ]), + ) + .await + .unwrap(); + + Some(CatalogHarness { + catalog: Arc::new(catalog) as Arc, + label: "sql", + _tempdirs: vec![warehouse_dir, db_dir], + }) + } + CatalogKind::S3Tables => { + let table_bucket_arn = match std::env::var("TABLE_BUCKET_ARN").ok() { + Some(value) => value, + None => return None, + }; + + let mut props = HashMap::from([( + S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN.to_string(), + table_bucket_arn, + )]); + + if let Ok(endpoint_url) = std::env::var("S3TABLES_ENDPOINT_URL") { + props.insert(S3TABLES_CATALOG_PROP_ENDPOINT_URL.to_string(), endpoint_url); + } + + let catalog = S3TablesCatalogBuilder::default() + .load("s3tables", props) + .await + .unwrap(); + + Some(CatalogHarness { + catalog: Arc::new(catalog) as Arc, + label: "s3tables", + _tempdirs: Vec::new(), + }) + } + CatalogKind::Memory => { + let warehouse_dir = TempDir::new().unwrap(); + let props = HashMap::from([( + MEMORY_CATALOG_WAREHOUSE.to_string(), + warehouse_dir.path().to_str().unwrap().to_string(), + )]); + let catalog = MemoryCatalogBuilder::default() + .load("memory", props) + .await + .unwrap(); + + Some(CatalogHarness { + catalog: Arc::new(catalog) as Arc, + label: "memory", + _tempdirs: vec![warehouse_dir], + }) + } + } +} + +// Catalog-specific setup is intentionally isolated here so the suites +// remain implementation-agnostic. +async fn rest_catalog() -> RestCatalog { + let rest_endpoint = get_rest_catalog_endpoint(); + + let client = reqwest::Client::new(); + let mut retries = 0; + while retries < 30 { + if client + .get(format!("{rest_endpoint}/v1/config")) + .send() + .await + .map(|resp| resp.status().is_success()) + .unwrap_or(false) + { + break; + } + sleep(std::time::Duration::from_millis(1000)).await; + retries += 1; + } + + RestCatalogBuilder::default() + .with_storage_factory(Arc::new(LocalFsStorageFactory)) + .load( + "rest", + HashMap::from([(REST_CATALOG_PROP_URI.to_string(), rest_endpoint)]), + ) + .await + .unwrap() +} + +async fn glue_catalog() -> GlueCatalog { + let glue_endpoint = get_glue_endpoint(); + let minio_endpoint = get_minio_endpoint(); + + let props = HashMap::from([ + (AWS_ACCESS_KEY_ID.to_string(), "my_access_id".to_string()), + ( + AWS_SECRET_ACCESS_KEY.to_string(), + "my_secret_key".to_string(), + ), + (AWS_REGION_NAME.to_string(), "us-east-1".to_string()), + (S3_ENDPOINT.to_string(), minio_endpoint), + (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), + (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), + (S3_REGION.to_string(), "us-east-1".to_string()), + ]); + + let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 { + configured_scheme: "s3a".to_string(), + customized_credential_load: None, + })) + .with_props(props.clone()) + .build(); + + let mut retries = 0; + while retries < 30 { + if file_io.exists("s3a://warehouse/").await.unwrap_or(false) { + break; + } + sleep(std::time::Duration::from_millis(1000)).await; + retries += 1; + } + + let mut glue_props = HashMap::from([ + (GLUE_CATALOG_PROP_URI.to_string(), glue_endpoint), + ( + GLUE_CATALOG_PROP_WAREHOUSE.to_string(), + "s3a://warehouse/hive".to_string(), + ), + ]); + glue_props.extend(props); + + GlueCatalogBuilder::default() + .load("glue", glue_props) + .await + .unwrap() +} + +async fn hms_catalog() -> HmsCatalog { + let hms_endpoint = get_hms_endpoint(); + let minio_endpoint = get_minio_endpoint(); + + let props = HashMap::from([ + (HMS_CATALOG_PROP_URI.to_string(), hms_endpoint), + ( + HMS_CATALOG_PROP_THRIFT_TRANSPORT.to_string(), + THRIFT_TRANSPORT_BUFFERED.to_string(), + ), + ( + HMS_CATALOG_PROP_WAREHOUSE.to_string(), + "s3a://warehouse/hive".to_string(), + ), + (S3_ENDPOINT.to_string(), minio_endpoint), + (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), + (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), + (S3_REGION.to_string(), "us-east-1".to_string()), + ]); + + let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 { + configured_scheme: "s3a".to_string(), + customized_credential_load: None, + })) + .with_props(props.clone()) + .build(); + + let mut retries = 0; + while retries < 30 { + if file_io.exists("s3a://warehouse/").await.unwrap_or(false) { + break; + } + sleep(std::time::Duration::from_millis(1000)).await; + retries += 1; + } + + HmsCatalogBuilder::default() + .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 { + configured_scheme: "s3a".to_string(), + customized_credential_load: None, + })) + .load("hms", props) + .await + .unwrap() +} + +// Common table schema used across suites to validate shared behavior. +pub fn table_creation(name: impl ToString) -> TableCreation { + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "bar", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + TableCreation::builder() + .name(name.to_string()) + .properties(HashMap::new()) + .schema(schema) + .build() +} + +pub fn assert_map_contains(expected: &HashMap, actual: &HashMap) { + for (key, value) in expected { + assert_eq!(actual.get(key), Some(value)); + } +} + +pub async fn cleanup_namespace_dyn(catalog: &dyn Catalog, namespace: &NamespaceIdent) { + if let Ok(tables) = catalog.list_tables(namespace).await { + for table in tables { + let _ = catalog.drop_table(&table).await; + } + } + let _ = catalog.drop_namespace(namespace).await; +} diff --git a/crates/catalog/loader/tests/namespace_suite.rs b/crates/catalog/loader/tests/namespace_suite.rs new file mode 100644 index 0000000000..024e8a62b8 --- /dev/null +++ b/crates/catalog/loader/tests/namespace_suite.rs @@ -0,0 +1,355 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common namespace behavior across catalogs. +//! +//! These tests assume Docker containers are started externally via `make docker-up`. + +mod common; + +use std::collections::HashMap; + +use common::{CatalogKind, assert_map_contains, cleanup_namespace_dyn, load_catalog}; +use iceberg::{ErrorKind, NamespaceIdent, Result}; +use iceberg_test_utils::normalize_test_name_with_parts; +use rstest::rstest; + +// Common behavior: querying a missing namespace should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_namespace_missing_returns_error(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_namespace_missing_returns_error", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + + let err = catalog.get_namespace(&namespace).await.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::NamespaceNotFound); + + Ok(()) +} + +// Common behavior: namespace lifecycle CRUD. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_namespace_lifecycle(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_namespace_lifecycle", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + + assert!(!catalog.namespace_exists(&namespace).await?); + + let props = HashMap::from([ + ("owner".to_string(), "rust".to_string()), + ("purpose".to_string(), "catalog_suite".to_string()), + ]); + let created = catalog.create_namespace(&namespace, props.clone()).await?; + assert_eq!(created.name(), &namespace); + assert_map_contains(&props, created.properties()); + + let fetched = catalog.get_namespace(&namespace).await?; + assert_eq!(fetched.name(), &namespace); + assert_map_contains(&props, fetched.properties()); + + let namespaces = catalog.list_namespaces(None).await?; + assert!(namespaces.contains(&namespace)); + + catalog.drop_namespace(&namespace).await?; + assert!(!catalog.namespace_exists(&namespace).await?); + + Ok(()) +} + +// Common behavior: update_namespace persists changes when supported. +#[rstest] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_update_namespace_supported(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_update_namespace_supported", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let updated_props = HashMap::from([("owner".to_string(), "updated".to_string())]); + catalog + .update_namespace(&namespace, updated_props.clone()) + .await?; + + let updated = catalog.get_namespace(&namespace).await?; + assert_map_contains(&updated_props, updated.properties()); + + Ok(()) +} + +// Common behavior: update_namespace returns FeatureUnsupported when not implemented. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[tokio::test] +async fn test_catalog_update_namespace_unsupported(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_update_namespace_unsupported", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let err = catalog + .update_namespace( + &namespace, + HashMap::from([("key".to_string(), "value".to_string())]), + ) + .await + .unwrap_err(); + assert_eq!(err.kind(), ErrorKind::FeatureUnsupported); + + Ok(()) +} + +// Common behavior: listing namespaces under a parent returns its children. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_namespace_listing_with_parent(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let parent_name = + normalize_test_name_with_parts!("catalog_namespace_listing_with_parent", harness.label); + let parent = NamespaceIdent::new(parent_name.clone()); + let child1 = NamespaceIdent::from_strs([&parent_name, "child1"]).unwrap(); + let child2 = NamespaceIdent::from_strs([&parent_name, "child2"]).unwrap(); + + cleanup_namespace_dyn(catalog.as_ref(), &child1).await; + cleanup_namespace_dyn(catalog.as_ref(), &child2).await; + cleanup_namespace_dyn(catalog.as_ref(), &parent).await; + + catalog.create_namespace(&parent, HashMap::new()).await?; + + catalog.create_namespace(&child1, HashMap::new()).await?; + catalog.create_namespace(&child2, HashMap::new()).await?; + + let top_level = catalog.list_namespaces(None).await?; + assert!(top_level.contains(&parent)); + + let children = catalog.list_namespaces(Some(&parent)).await?; + assert!(children.contains(&child1)); + assert!(children.contains(&child2)); + + Ok(()) +} + +// Common behavior: hierarchical namespaces are rejected when unsupported. +#[rstest] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[tokio::test] +async fn test_catalog_namespace_listing_with_parent_unsupported( + #[case] kind: CatalogKind, +) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let parent_name = normalize_test_name_with_parts!( + "catalog_namespace_listing_with_parent_unsupported", + harness.label + ); + let parent = NamespaceIdent::new(parent_name.clone()); + let child = NamespaceIdent::from_strs([&parent_name, "child"]).unwrap(); + + cleanup_namespace_dyn(catalog.as_ref(), &child).await; + cleanup_namespace_dyn(catalog.as_ref(), &parent).await; + + catalog.create_namespace(&parent, HashMap::new()).await?; + + let err = catalog + .create_namespace(&child, HashMap::new()) + .await + .unwrap_err(); + assert_eq!(err.kind(), ErrorKind::DataInvalid); + + Ok(()) +} + +// Common behavior: listing top-level namespaces includes created namespaces. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_list_namespaces_contains_created(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let ns_one = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_list_namespaces_contains_created", + harness.label, + "one" + )); + let ns_two = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_list_namespaces_contains_created", + harness.label, + "two" + )); + + cleanup_namespace_dyn(catalog.as_ref(), &ns_one).await; + cleanup_namespace_dyn(catalog.as_ref(), &ns_two).await; + + catalog.create_namespace(&ns_one, HashMap::new()).await?; + catalog.create_namespace(&ns_two, HashMap::new()).await?; + + let namespaces = catalog.list_namespaces(None).await?; + assert!(namespaces.contains(&ns_one)); + assert!(namespaces.contains(&ns_two)); + + Ok(()) +} + +// Common behavior: creating an existing namespace should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_create_namespace_duplicate_fails(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_create_namespace_duplicate_fails", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let err = catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap_err(); + assert_eq!(err.kind(), ErrorKind::NamespaceAlreadyExists); + Ok(()) +} + +// Common behavior: update on a missing namespace should return NamespaceNotFound. +#[rstest] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_update_namespace_missing_errors(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_update_namespace_missing_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + + let err = catalog + .update_namespace( + &namespace, + HashMap::from([("key".to_string(), "value".to_string())]), + ) + .await + .unwrap_err(); + assert_eq!(err.kind(), ErrorKind::NamespaceNotFound); + + Ok(()) +} + +// Common behavior: dropping a missing namespace should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_drop_namespace_missing_errors(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_drop_namespace_missing_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + + let err = catalog.drop_namespace(&namespace).await.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::NamespaceNotFound); + Ok(()) +} diff --git a/crates/catalog/loader/tests/table_register_suite.rs b/crates/catalog/loader/tests/table_register_suite.rs new file mode 100644 index 0000000000..b43054b833 --- /dev/null +++ b/crates/catalog/loader/tests/table_register_suite.rs @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common register-table behavior across catalogs. +//! +//! These tests assume Docker containers are started externally via `make docker-up`. + +mod common; + +use std::collections::HashMap; + +use common::{CatalogKind, cleanup_namespace_dyn, load_catalog, table_creation}; +use iceberg::{ErrorKind, NamespaceIdent, Result, TableIdent}; +use iceberg_test_utils::normalize_test_name_with_parts; +use rstest::rstest; + +// Common behavior: register_table rehydrates a dropped table. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_register_table_roundtrip(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_register_table_roundtrip", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table = catalog + .create_table( + &namespace, + table_creation(normalize_test_name_with_parts!( + "catalog_register_table_roundtrip", + harness.label, + "table" + )), + ) + .await?; + let table_ident = table.identifier().clone(); + let metadata_location = table + .metadata_location() + .ok_or_else(|| iceberg::Error::new(ErrorKind::Unexpected, "Missing metadata location"))? + .to_string(); + + catalog.drop_table(&table_ident).await?; + + let registered = catalog + .register_table(&table_ident, metadata_location.clone()) + .await?; + assert_eq!(registered.identifier(), &table_ident); + assert_eq!( + registered.metadata_location(), + Some(metadata_location.as_str()) + ); + + Ok(()) +} + +// HMS and S3Tables do not support register_table yet. +#[rstest] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[tokio::test] +async fn test_catalog_register_table_unsupported(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_register_table_unsupported", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table = catalog + .create_table( + &namespace, + table_creation(normalize_test_name_with_parts!( + "catalog_register_table_unsupported", + harness.label, + "table" + )), + ) + .await?; + let table_ident = table.identifier().clone(); + let metadata_location = table + .metadata_location() + .ok_or_else(|| iceberg::Error::new(ErrorKind::Unexpected, "Missing metadata location"))? + .to_string(); + + let err = catalog + .register_table(&table_ident, metadata_location) + .await + .unwrap_err(); + assert_eq!(err.kind(), ErrorKind::FeatureUnsupported); + + Ok(()) +} + +// Common behavior: registering a table with an existing name should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_register_table_conflict_errors(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_register_table_conflict_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_ident = TableIdent::new( + namespace.clone(), + normalize_test_name_with_parts!( + "catalog_register_table_conflict_errors", + harness.label, + "table" + ), + ); + let table = catalog + .create_table(&namespace, table_creation(table_ident.name.clone())) + .await?; + let metadata_location = table + .metadata_location() + .ok_or_else(|| iceberg::Error::new(ErrorKind::Unexpected, "Missing metadata location"))? + .to_string(); + + assert!( + catalog + .register_table(&table_ident, metadata_location) + .await + .is_err() + ); + Ok(()) +} diff --git a/crates/catalog/loader/tests/table_rename_suite.rs b/crates/catalog/loader/tests/table_rename_suite.rs new file mode 100644 index 0000000000..757d1cd044 --- /dev/null +++ b/crates/catalog/loader/tests/table_rename_suite.rs @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common rename behavior across catalogs. +//! +//! These tests assume Docker containers are started externally via `make docker-up`. + +mod common; + +use std::collections::HashMap; + +use common::{CatalogKind, cleanup_namespace_dyn, load_catalog, table_creation}; +use iceberg::{NamespaceIdent, Result, TableIdent}; +use iceberg_test_utils::normalize_test_name_with_parts; +use rstest::rstest; + +// Common behavior: renaming across namespaces moves the table. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_rename_table_across_namespaces(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let src_namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_rename_table_across_namespaces", + harness.label, + "src" + )); + let dst_namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_rename_table_across_namespaces", + harness.label, + "dst" + )); + + cleanup_namespace_dyn(catalog.as_ref(), &src_namespace).await; + cleanup_namespace_dyn(catalog.as_ref(), &dst_namespace).await; + catalog + .create_namespace(&src_namespace, HashMap::new()) + .await?; + catalog + .create_namespace(&dst_namespace, HashMap::new()) + .await?; + + let table = catalog + .create_table( + &src_namespace, + table_creation(normalize_test_name_with_parts!( + "catalog_rename_table_across_namespaces", + harness.label, + "table" + )), + ) + .await?; + let src_ident = table.identifier().clone(); + let dst_ident = TableIdent::new(dst_namespace.clone(), src_ident.name.clone()); + + catalog.rename_table(&src_ident, &dst_ident).await?; + assert!(catalog.table_exists(&dst_ident).await?); + assert!(!catalog.table_exists(&src_ident).await?); + + Ok(()) +} + +// Common behavior: renaming a missing table should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_rename_table_missing_source_errors(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_rename_table_missing_source_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let src_ident = TableIdent::new(namespace.clone(), "missing".to_string()); + let dst_ident = TableIdent::new(namespace.clone(), "dest".to_string()); + + assert!(catalog.rename_table(&src_ident, &dst_ident).await.is_err()); + Ok(()) +} + +// Common behavior: renaming to an existing destination should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_rename_table_dest_exists_errors(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_rename_table_dest_exists_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let src = catalog + .create_table( + &namespace, + table_creation(normalize_test_name_with_parts!( + "catalog_rename_table_dest_exists_errors", + harness.label, + "src" + )), + ) + .await? + .identifier() + .clone(); + let dst = catalog + .create_table( + &namespace, + table_creation(normalize_test_name_with_parts!( + "catalog_rename_table_dest_exists_errors", + harness.label, + "dst" + )), + ) + .await? + .identifier() + .clone(); + + assert!(catalog.rename_table(&src, &dst).await.is_err()); + Ok(()) +} diff --git a/crates/catalog/loader/tests/table_suite.rs b/crates/catalog/loader/tests/table_suite.rs new file mode 100644 index 0000000000..6b7a3a822c --- /dev/null +++ b/crates/catalog/loader/tests/table_suite.rs @@ -0,0 +1,276 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common table behavior across catalogs. +//! +//! These tests assume Docker containers are started externally via `make docker-up`. + +mod common; + +use std::collections::HashMap; + +use common::{CatalogKind, cleanup_namespace_dyn, load_catalog, table_creation}; +use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use iceberg::{ErrorKind, NamespaceIdent, Result, TableIdent}; +use iceberg_test_utils::normalize_test_name_with_parts; +use rstest::rstest; + +// Common behavior: table lifecycle CRUD. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_table_lifecycle(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_table_lifecycle", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = + normalize_test_name_with_parts!("catalog_table_lifecycle", harness.label, "table"); + let table = catalog + .create_table(&namespace, table_creation(table_name)) + .await?; + let ident = table.identifier().clone(); + + assert!(catalog.table_exists(&ident).await?); + let loaded = catalog.load_table(&ident).await?; + assert_eq!(loaded.identifier(), &ident); + + let tables = catalog.list_tables(&namespace).await?; + assert!(tables.contains(&ident)); + + let dest = TableIdent::new(ident.namespace.clone(), format!("{}_renamed", ident.name)); + catalog.rename_table(&ident, &dest).await?; + assert!(catalog.table_exists(&dest).await?); + assert!(!catalog.table_exists(&ident).await?); + + catalog.drop_table(&dest).await?; + assert!(!catalog.table_exists(&dest).await?); + + catalog.drop_namespace(&namespace).await?; + + Ok(()) +} + +// Common behavior: listing tables for a missing namespace should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_list_tables_missing_namespace_errors( + #[case] kind: CatalogKind, +) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_list_tables_missing_namespace_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + + assert!(catalog.list_tables(&namespace).await.is_err()); + Ok(()) +} + +// Common behavior: listing tables in an empty namespace returns empty. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_list_tables_empty_namespace(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_list_tables_empty_namespace", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let tables = catalog.list_tables(&namespace).await?; + assert!(tables.is_empty()); + + Ok(()) +} + +// Common behavior: created tables expose the requested schema. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_create_table_schema(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_create_table_schema", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = + normalize_test_name_with_parts!("catalog_create_table_schema", harness.label, "table"); + let creation = table_creation(table_name); + let expected_schema = creation.schema.clone(); + + let table = catalog.create_table(&namespace, creation).await?; + assert_eq!(table.identifier().namespace, namespace); + assert_eq!(table.metadata().current_schema().as_ref(), &expected_schema); + + Ok(()) +} + +// Common behavior: updating table properties persists through the catalog. +// HMS is excluded because update_table is not supported yet. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_update_table_properties(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_update_table_properties", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = + normalize_test_name_with_parts!("catalog_update_table_properties", harness.label, "table"); + let table = catalog + .create_table(&namespace, table_creation(table_name)) + .await?; + + let tx = Transaction::new(&table); + let tx = tx + .update_table_properties() + .set("test_property".to_string(), "test_value".to_string()) + .apply(tx)?; + let updated = tx.commit(catalog.as_ref()).await?; + + assert_eq!( + updated.metadata().properties().get("test_property"), + Some(&"test_value".to_string()) + ); + + Ok(()) +} + +// Common behavior: update_table_properties is rejected when unsupported. +#[rstest] +#[case::hms_catalog(CatalogKind::Hms)] +#[tokio::test] +async fn test_catalog_update_table_properties_unsupported(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_update_table_properties_unsupported", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = normalize_test_name_with_parts!( + "catalog_update_table_properties_unsupported", + harness.label, + "table" + ); + let table = catalog + .create_table(&namespace, table_creation(table_name)) + .await?; + + let tx = Transaction::new(&table); + let tx = tx + .update_table_properties() + .set("test_property".to_string(), "test_value".to_string()) + .apply(tx)?; + + let err = tx.commit(catalog.as_ref()).await.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::FeatureUnsupported); + + Ok(()) +} + +// Common behavior: dropping a missing table should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_drop_table_missing_errors(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_drop_table_missing_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_ident = TableIdent::new(namespace.clone(), "missing".to_string()); + assert!(catalog.drop_table(&table_ident).await.is_err()); + Ok(()) +} diff --git a/crates/catalog/rest/Cargo.toml b/crates/catalog/rest/Cargo.toml index de72b6c61b..40dd70a952 100644 --- a/crates/catalog/rest/Cargo.toml +++ b/crates/catalog/rest/Cargo.toml @@ -39,7 +39,6 @@ serde = { workspace = true } serde_derive = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true, features = ["sync"] } -tracing = { workspace = true } typed-builder = { workspace = true } uuid = { workspace = true, features = ["v4"] } diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs index 871643b36e..3551b05160 100644 --- a/crates/catalog/rest/src/catalog.rs +++ b/crates/catalog/rest/src/catalog.rs @@ -504,7 +504,7 @@ impl Catalog for RestCatalog { } StatusCode::NOT_FOUND => { return Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::NamespaceNotFound, "The parent parameter of the namespace provided does not exist", )); } @@ -546,7 +546,7 @@ impl Catalog for RestCatalog { Ok(Namespace::from(response)) } StatusCode::CONFLICT => Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::NamespaceAlreadyExists, "Tried to create a namespace that already exists", )), _ => Err(deserialize_unexpected_catalog_error( @@ -574,7 +574,7 @@ impl Catalog for RestCatalog { Ok(Namespace::from(response)) } StatusCode::NOT_FOUND => Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::NamespaceNotFound, "Tried to get a namespace that does not exist", )), _ => Err(deserialize_unexpected_catalog_error( @@ -630,7 +630,7 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::NO_CONTENT | StatusCode::OK => Ok(()), StatusCode::NOT_FOUND => Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::NamespaceNotFound, "Tried to drop a namespace that does not exist", )), _ => Err(deserialize_unexpected_catalog_error( @@ -670,7 +670,7 @@ impl Catalog for RestCatalog { } StatusCode::NOT_FOUND => { return Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::NamespaceNotFound, "Tried to list tables of a namespace that does not exist", )); } @@ -724,13 +724,13 @@ impl Catalog for RestCatalog { } StatusCode::NOT_FOUND => { return Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::NamespaceNotFound, "Tried to create a table under a namespace that does not exist", )); } StatusCode::CONFLICT => { return Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::TableAlreadyExists, "The table already exists", )); } @@ -791,7 +791,7 @@ impl Catalog for RestCatalog { } StatusCode::NOT_FOUND => { return Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::TableNotFound, "Tried to load a table that does not exist", )); } @@ -840,7 +840,7 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::NO_CONTENT | StatusCode::OK => Ok(()), StatusCode::NOT_FOUND => Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::TableNotFound, "Tried to drop a table that does not exist", )), _ => Err(deserialize_unexpected_catalog_error( @@ -891,11 +891,11 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::NO_CONTENT | StatusCode::OK => Ok(()), StatusCode::NOT_FOUND => Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::TableNotFound, "Tried to rename a table that does not exist (is the namespace correct?)", )), StatusCode::CONFLICT => Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::TableAlreadyExists, "Tried to rename a table to a name that already exists", )), _ => Err(deserialize_unexpected_catalog_error( diff --git a/crates/catalog/rest/tests/rest_catalog_test.rs b/crates/catalog/rest/tests/rest_catalog_test.rs deleted file mode 100644 index 98d8ee9b70..0000000000 --- a/crates/catalog/rest/tests/rest_catalog_test.rs +++ /dev/null @@ -1,497 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Integration tests for rest catalog. -//! -//! These tests assume Docker containers are started externally via `make docker-up`. -//! Each test uses unique namespaces based on module path to avoid conflicts. - -use std::collections::HashMap; -use std::sync::Arc; - -use iceberg::io::LocalFsStorageFactory; -use iceberg::spec::{FormatVersion, NestedField, PrimitiveType, Schema, Type}; -use iceberg::transaction::{ApplyTransactionAction, Transaction}; -use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent, TableCreation, TableIdent}; -use iceberg_catalog_rest::{REST_CATALOG_PROP_URI, RestCatalog, RestCatalogBuilder}; -use iceberg_test_utils::{ - cleanup_namespace, get_rest_catalog_endpoint, normalize_test_name_with_parts, set_up, -}; -use tokio::time::sleep; -use tracing::info; - -async fn get_catalog() -> RestCatalog { - set_up(); - - let rest_endpoint = get_rest_catalog_endpoint(); - - // Wait for catalog to be ready - let client = reqwest::Client::new(); - let mut retries = 0; - while retries < 30 { - match client - .get(format!("{rest_endpoint}/v1/config")) - .send() - .await - { - Ok(resp) if resp.status().is_success() => { - info!("REST catalog is ready at {}", rest_endpoint); - break; - } - _ => { - info!( - "Waiting for REST catalog to be ready... (attempt {})", - retries + 1 - ); - sleep(std::time::Duration::from_millis(1000)).await; - retries += 1; - } - } - } - - RestCatalogBuilder::default() - .with_storage_factory(Arc::new(LocalFsStorageFactory)) - .load( - "rest", - HashMap::from([(REST_CATALOG_PROP_URI.to_string(), rest_endpoint)]), - ) - .await - .unwrap() -} - -#[tokio::test] -async fn test_get_non_exist_namespace() { - let catalog = get_catalog().await; - - // Use unique namespace name to ensure it doesn't exist - let ns_ident = NamespaceIdent::new(normalize_test_name_with_parts!( - "test_get_non_exist_namespace" - )); - // Clean up from any previous test runs - cleanup_namespace(&catalog, &ns_ident).await; - - let result = catalog.get_namespace(&ns_ident).await; - - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("does not exist")); -} - -#[tokio::test] -async fn test_get_namespace() { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts with other tests - let ns = Namespace::with_properties( - NamespaceIdent::from_strs([ - "apple", - "ios", - &normalize_test_name_with_parts!("test_get_namespace"), - ]) - .unwrap(), - HashMap::from([ - ("owner".to_string(), "ray".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, ns.name()).await; - - // Verify that namespace doesn't exist - assert!(catalog.get_namespace(ns.name()).await.is_err()); - - // Create this namespace - let created_ns = catalog - .create_namespace(ns.name(), ns.properties().clone()) - .await - .unwrap(); - - assert_eq!(ns.name(), created_ns.name()); - assert_map_contains(ns.properties(), created_ns.properties()); - - // Check that this namespace already exists - let get_ns = catalog.get_namespace(ns.name()).await.unwrap(); - assert_eq!(ns.name(), get_ns.name()); - assert_map_contains(ns.properties(), created_ns.properties()); -} - -#[tokio::test] -async fn test_list_namespace() { - let catalog = get_catalog().await; - - // Use unique parent namespace to avoid conflicts - let parent_ns_name = normalize_test_name_with_parts!("test_list_namespace"); - let parent_ident = NamespaceIdent::from_strs([&parent_ns_name]).unwrap(); - - let ns1 = Namespace::with_properties( - NamespaceIdent::from_strs([&parent_ns_name, "ios"]).unwrap(), - HashMap::from([ - ("owner".to_string(), "ray".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - let ns2 = Namespace::with_properties( - NamespaceIdent::from_strs([&parent_ns_name, "macos"]).unwrap(), - HashMap::from([ - ("owner".to_string(), "xuanwo".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, ns1.name()).await; - cleanup_namespace(&catalog, ns2.name()).await; - cleanup_namespace(&catalog, &parent_ident).await; - - // Currently this namespace doesn't exist, so it should return error. - assert!(catalog.list_namespaces(Some(&parent_ident)).await.is_err()); - - // Create namespaces - catalog - .create_namespace(ns1.name(), ns1.properties().clone()) - .await - .unwrap(); - catalog - .create_namespace(ns2.name(), ns1.properties().clone()) - .await - .unwrap(); - - // List namespace - let nss = catalog.list_namespaces(Some(&parent_ident)).await.unwrap(); - - assert!(nss.contains(ns1.name())); - assert!(nss.contains(ns2.name())); -} - -#[tokio::test] -async fn test_list_empty_namespace() { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let ns_apple = Namespace::with_properties( - NamespaceIdent::from_strs([ - "list_empty", - "apple", - &normalize_test_name_with_parts!("test_list_empty_namespace"), - ]) - .unwrap(), - HashMap::from([ - ("owner".to_string(), "ray".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, ns_apple.name()).await; - - // Currently this namespace doesn't exist, so it should return error. - assert!( - catalog - .list_namespaces(Some(ns_apple.name())) - .await - .is_err() - ); - - // Create namespaces - catalog - .create_namespace(ns_apple.name(), ns_apple.properties().clone()) - .await - .unwrap(); - - // List namespace - let nss = catalog - .list_namespaces(Some(ns_apple.name())) - .await - .unwrap(); - assert!(nss.is_empty()); -} - -#[tokio::test] -async fn test_list_root_namespace() { - let catalog = get_catalog().await; - - // Use unique root namespace to avoid conflicts - let root_ns_name = normalize_test_name_with_parts!("test_list_root_namespace"); - let root_ident = NamespaceIdent::from_strs([&root_ns_name]).unwrap(); - - let ns1 = Namespace::with_properties( - NamespaceIdent::from_strs([&root_ns_name, "apple", "ios"]).unwrap(), - HashMap::from([ - ("owner".to_string(), "ray".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - let ns2 = Namespace::with_properties( - NamespaceIdent::from_strs([&root_ns_name, "google", "android"]).unwrap(), - HashMap::from([ - ("owner".to_string(), "xuanwo".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, ns1.name()).await; - cleanup_namespace(&catalog, ns2.name()).await; - cleanup_namespace(&catalog, &root_ident).await; - - // Currently this namespace doesn't exist, so it should return error. - assert!(catalog.list_namespaces(Some(&root_ident)).await.is_err()); - - // Create namespaces - catalog - .create_namespace(ns1.name(), ns1.properties().clone()) - .await - .unwrap(); - catalog - .create_namespace(ns2.name(), ns1.properties().clone()) - .await - .unwrap(); - - // List namespace - let nss = catalog.list_namespaces(None).await.unwrap(); - assert!(nss.contains(&root_ident)); -} - -#[tokio::test] -async fn test_create_table() { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let ns = Namespace::with_properties( - NamespaceIdent::from_strs([ - "create_table", - "apple", - "ios", - &normalize_test_name_with_parts!("test_create_table"), - ]) - .unwrap(), - HashMap::from([ - ("owner".to_string(), "ray".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, ns.name()).await; - - // Create namespaces - catalog - .create_namespace(ns.name(), ns.properties().clone()) - .await - .unwrap(); - - let schema = Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![2]) - .with_fields(vec![ - NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), - NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), - NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(), - ]) - .build() - .unwrap(); - - let table_creation = TableCreation::builder() - .name("t1".to_string()) - .schema(schema.clone()) - .build(); - - let table = catalog - .create_table(ns.name(), table_creation) - .await - .unwrap(); - - assert_eq!( - table.identifier(), - &TableIdent::new(ns.name().clone(), "t1".to_string()) - ); - - assert_eq!( - table.metadata().current_schema().as_struct(), - schema.as_struct() - ); - assert_eq!(table.metadata().format_version(), FormatVersion::V2); - assert!(table.metadata().current_snapshot().is_none()); - assert!(table.metadata().history().is_empty()); - assert!(table.metadata().default_sort_order().is_unsorted()); - assert!(table.metadata().default_partition_spec().is_unpartitioned()); -} - -#[tokio::test] -async fn test_update_table() { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let ns = Namespace::with_properties( - NamespaceIdent::from_strs([ - "update_table", - "apple", - "ios", - &normalize_test_name_with_parts!("test_update_table"), - ]) - .unwrap(), - HashMap::from([ - ("owner".to_string(), "ray".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, ns.name()).await; - - // Create namespaces - catalog - .create_namespace(ns.name(), ns.properties().clone()) - .await - .unwrap(); - - let schema = Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![2]) - .with_fields(vec![ - NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), - NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), - NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(), - ]) - .build() - .unwrap(); - - // Now we create a table - let table_creation = TableCreation::builder() - .name("t1".to_string()) - .schema(schema.clone()) - .build(); - - let table = catalog - .create_table(ns.name(), table_creation) - .await - .unwrap(); - - assert_eq!( - table.identifier(), - &TableIdent::new(ns.name().clone(), "t1".to_string()) - ); - - let tx = Transaction::new(&table); - // Update table by committing transaction - let table2 = tx - .update_table_properties() - .set("prop1".to_string(), "v1".to_string()) - .apply(tx) - .unwrap() - .commit(&catalog) - .await - .unwrap(); - - assert_map_contains( - &HashMap::from([("prop1".to_string(), "v1".to_string())]), - table2.metadata().properties(), - ); -} - -fn assert_map_contains(map1: &HashMap, map2: &HashMap) { - for (k, v) in map1 { - assert!(map2.contains_key(k)); - assert_eq!(map2.get(k).unwrap(), v); - } -} - -#[tokio::test] -async fn test_list_empty_multi_level_namespace() { - let catalog = get_catalog().await; - - // Use unique namespace to avoid conflicts - let ns_apple = Namespace::with_properties( - NamespaceIdent::from_strs([ - "multi_level", - "a_a", - "apple", - &normalize_test_name_with_parts!("test_list_empty_multi_level_namespace"), - ]) - .unwrap(), - HashMap::from([ - ("owner".to_string(), "ray".to_string()), - ("community".to_string(), "apache".to_string()), - ]), - ); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, ns_apple.name()).await; - - // Currently this namespace doesn't exist, so it should return error. - assert!( - catalog - .list_namespaces(Some(ns_apple.name())) - .await - .is_err() - ); - - // Create namespaces - catalog - .create_namespace(ns_apple.name(), ns_apple.properties().clone()) - .await - .unwrap(); - - // List namespace - let nss = catalog - .list_namespaces(Some(ns_apple.name())) - .await - .unwrap(); - assert!(nss.is_empty()); -} - -#[tokio::test] -async fn test_register_table() { - let catalog = get_catalog().await; - - // Create unique namespace to avoid conflicts - let ns = NamespaceIdent::new(normalize_test_name_with_parts!("test_register_table")); - - // Clean up from any previous test runs - cleanup_namespace(&catalog, &ns).await; - - catalog.create_namespace(&ns, HashMap::new()).await.unwrap(); - - // Create the table, store the metadata location, drop the table - let empty_schema = Schema::builder().build().unwrap(); - let table_creation = TableCreation::builder() - .name("t1".to_string()) - .schema(empty_schema) - .build(); - - let table = catalog.create_table(&ns, table_creation).await.unwrap(); - - let metadata_location = table.metadata_location().unwrap(); - catalog.drop_table(table.identifier()).await.unwrap(); - - let new_table_identifier = TableIdent::new(ns.clone(), "t2".to_string()); - let table_registered = catalog - .register_table(&new_table_identifier, metadata_location.to_string()) - .await - .unwrap(); - - assert_eq!( - table.metadata_location(), - table_registered.metadata_location() - ); - assert_ne!( - table.identifier().to_string(), - table_registered.identifier().to_string() - ); -} diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index e956937ce7..a416c38f22 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -308,6 +308,13 @@ impl Catalog for S3TablesCatalog { namespace: &NamespaceIdent, _properties: HashMap, ) -> Result { + if self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceAlreadyExists, + format!("Namespace {namespace:?} already exists"), + )); + } + let req = self .s3tables_client .create_namespace() @@ -330,6 +337,13 @@ impl Catalog for S3TablesCatalog { /// - If there is an error querying the database, returned by /// `from_aws_sdk_error`. async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result { + if !self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + )); + } + let req = self .s3tables_client .get_namespace() @@ -397,6 +411,13 @@ impl Catalog for S3TablesCatalog { /// - Errors from the underlying database deletion process, converted using /// `from_aws_sdk_error`. async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> { + if !self.namespace_exists(namespace).await? { + return Err(Error::new( + ErrorKind::NamespaceNotFound, + format!("Namespace {namespace:?} does not exist"), + )); + } + let req = self .s3tables_client .delete_namespace() diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index f2242dc407..195f6c9de4 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -434,7 +434,7 @@ impl Catalog for SqlCatalog { if exists { return Err(Error::new( - iceberg::ErrorKind::Unexpected, + iceberg::ErrorKind::NamespaceAlreadyExists, format!("Namespace {namespace:?} already exists"), )); } @@ -1006,7 +1006,6 @@ mod tests { use iceberg::io::LocalFsStorageFactory; use iceberg::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type}; use iceberg::table::Table; - use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent, TableCreation, TableIdent}; use itertools::Itertools; use regex::Regex; @@ -1386,20 +1385,6 @@ mod tests { assert_eq!(catalog2.list_namespaces(None).await.unwrap(), vec![]); } - #[tokio::test] - async fn test_list_namespaces_returns_multiple_namespaces() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident_1 = NamespaceIdent::new("a".into()); - let namespace_ident_2 = NamespaceIdent::new("b".into()); - create_namespaces(&catalog, &vec![&namespace_ident_1, &namespace_ident_2]).await; - - assert_eq!( - to_set(catalog.list_namespaces(None).await.unwrap()), - to_set(vec![namespace_ident_1, namespace_ident_2]) - ); - } - #[tokio::test] async fn test_list_namespaces_returns_only_top_level_namespaces() { let warehouse_loc = temp_path(); @@ -1546,31 +1531,6 @@ mod tests { ); } - #[tokio::test] - async fn test_create_namespace_throws_error_if_namespace_already_exists() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - create_namespace(&catalog, &namespace_ident).await; - - assert_eq!( - catalog - .create_namespace(&namespace_ident, HashMap::new()) - .await - .unwrap_err() - .to_string(), - format!( - "Unexpected => Namespace {:?} already exists", - &namespace_ident - ) - ); - - assert_eq!( - catalog.get_namespace(&namespace_ident).await.unwrap(), - Namespace::with_properties(namespace_ident, default_properties()) - ); - } - #[tokio::test] async fn test_create_nested_namespace() { let warehouse_loc = temp_path(); @@ -1640,35 +1600,6 @@ mod tests { ) } - #[tokio::test] - async fn test_update_namespace() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - create_namespace(&catalog, &namespace_ident).await; - - let mut props = HashMap::from_iter([ - ("prop1".to_string(), "val1".to_string()), - ("prop2".into(), "val2".into()), - ]); - - catalog - .update_namespace(&namespace_ident, props.clone()) - .await - .unwrap(); - - props.insert("exists".into(), "true".into()); - - assert_eq!( - *catalog - .get_namespace(&namespace_ident) - .await - .unwrap() - .properties(), - props - ) - } - #[tokio::test] async fn test_update_nested_namespace() { let warehouse_loc = temp_path(); @@ -1698,28 +1629,6 @@ mod tests { ) } - #[tokio::test] - async fn test_update_namespace_errors_if_namespace_doesnt_exist() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - - let props = HashMap::from_iter([ - ("prop1".to_string(), "val1".to_string()), - ("prop2".into(), "val2".into()), - ]); - - let err = catalog - .update_namespace(&namespace_ident, props) - .await - .unwrap_err(); - - assert_eq!( - err.message(), - format!("No such namespace: {namespace_ident:?}") - ); - } - #[tokio::test] async fn test_update_namespace_errors_if_nested_namespace_doesnt_exist() { let warehouse_loc = temp_path(); @@ -1742,18 +1651,6 @@ mod tests { ); } - #[tokio::test] - async fn test_drop_namespace() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("abc".into()); - create_namespace(&catalog, &namespace_ident).await; - - catalog.drop_namespace(&namespace_ident).await.unwrap(); - - assert!(!catalog.namespace_exists(&namespace_ident).await.unwrap()) - } - #[tokio::test] async fn test_drop_nested_namespace() { let warehouse_loc = temp_path(); @@ -1810,22 +1707,6 @@ mod tests { assert!(catalog.namespace_exists(&namespace_ident_a).await.unwrap()); } - #[tokio::test] - async fn test_drop_namespace_throws_error_if_namespace_doesnt_exist() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - - let non_existent_namespace_ident = NamespaceIdent::new("abc".into()); - assert_eq!( - catalog - .drop_namespace(&non_existent_namespace_ident) - .await - .unwrap_err() - .to_string(), - format!("Unexpected => No such namespace: {non_existent_namespace_ident:?}") - ) - } - #[tokio::test] async fn test_drop_namespace_throws_error_if_nested_namespace_doesnt_exist() { let warehouse_loc = temp_path(); @@ -1840,7 +1721,7 @@ mod tests { .await .unwrap_err() .to_string(), - format!("Unexpected => No such namespace: {non_existent_namespace_ident:?}") + format!("NamespaceNotFound => No such namespace: {non_existent_namespace_ident:?}") ) } @@ -1864,72 +1745,6 @@ mod tests { ); } - #[tokio::test] - async fn test_list_tables_returns_empty_vector() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - create_namespace(&catalog, &namespace_ident).await; - - assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![]); - } - - #[tokio::test] - async fn test_list_tables_throws_error_if_namespace_doesnt_exist() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - - let non_existent_namespace_ident = NamespaceIdent::new("n1".into()); - - assert_eq!( - catalog - .list_tables(&non_existent_namespace_ident) - .await - .unwrap_err() - .to_string(), - format!("Unexpected => No such namespace: {non_existent_namespace_ident:?}"), - ); - } - - #[tokio::test] - async fn test_create_table_with_location() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc.clone(), Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - create_namespace(&catalog, &namespace_ident).await; - - let table_name = "abc"; - let location = warehouse_loc.clone(); - let table_creation = TableCreation::builder() - .name(table_name.into()) - .location(location.clone()) - .schema(simple_table_schema()) - .build(); - - let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); - - assert_table_eq( - &catalog - .create_table(&namespace_ident, table_creation) - .await - .unwrap(), - &expected_table_ident, - &simple_table_schema(), - ); - - let table = catalog.load_table(&expected_table_ident).await.unwrap(); - - assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); - - assert!( - table - .metadata_location() - .unwrap() - .to_string() - .starts_with(&location) - ) - } - #[tokio::test] async fn test_create_table_falls_back_to_namespace_location_if_table_location_is_missing() { let warehouse_loc = temp_path(); @@ -2128,54 +1943,10 @@ mod tests { .await .unwrap_err() .to_string(), - format!("Unexpected => Table {:?} already exists.", &table_ident) - ); - } - - #[tokio::test] - async fn test_rename_table_in_same_namespace() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("n1".into()); - create_namespace(&catalog, &namespace_ident).await; - let src_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); - let dst_table_ident = TableIdent::new(namespace_ident.clone(), "tbl2".into()); - create_table(&catalog, &src_table_ident).await; - - catalog - .rename_table(&src_table_ident, &dst_table_ident) - .await - .unwrap(); - - assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![ - dst_table_ident - ],); - } - - #[tokio::test] - async fn test_rename_table_across_namespaces() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let src_namespace_ident = NamespaceIdent::new("a".into()); - let dst_namespace_ident = NamespaceIdent::new("b".into()); - create_namespaces(&catalog, &vec![&src_namespace_ident, &dst_namespace_ident]).await; - let src_table_ident = TableIdent::new(src_namespace_ident.clone(), "tbl1".into()); - let dst_table_ident = TableIdent::new(dst_namespace_ident.clone(), "tbl2".into()); - create_table(&catalog, &src_table_ident).await; - - catalog - .rename_table(&src_table_ident, &dst_table_ident) - .await - .unwrap(); - - assert_eq!( - catalog.list_tables(&src_namespace_ident).await.unwrap(), - vec![], - ); - - assert_eq!( - catalog.list_tables(&dst_namespace_ident).await.unwrap(), - vec![dst_table_ident], + format!( + "TableAlreadyExists => Table {:?} already exists.", + &table_ident + ) ); } @@ -2244,213 +2015,7 @@ mod tests { .await .unwrap_err() .to_string(), - format!("Unexpected => No such namespace: {non_existent_dst_namespace_ident:?}"), - ); - } - - #[tokio::test] - async fn test_rename_table_throws_error_if_src_table_doesnt_exist() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("n1".into()); - create_namespace(&catalog, &namespace_ident).await; - let src_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); - let dst_table_ident = TableIdent::new(namespace_ident.clone(), "tbl2".into()); - - assert_eq!( - catalog - .rename_table(&src_table_ident, &dst_table_ident) - .await - .unwrap_err() - .to_string(), - format!("Unexpected => No such table: {src_table_ident:?}"), - ); - } - - #[tokio::test] - async fn test_rename_table_throws_error_if_dst_table_already_exists() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("n1".into()); - create_namespace(&catalog, &namespace_ident).await; - let src_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); - let dst_table_ident = TableIdent::new(namespace_ident.clone(), "tbl2".into()); - create_tables(&catalog, vec![&src_table_ident, &dst_table_ident]).await; - - assert_eq!( - catalog - .rename_table(&src_table_ident, &dst_table_ident) - .await - .unwrap_err() - .to_string(), - format!("Unexpected => Table {:?} already exists.", &dst_table_ident), - ); - } - - #[tokio::test] - async fn test_drop_table_throws_error_if_table_not_exist() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc.clone(), Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - let table_name = "tbl1"; - let table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); - create_namespace(&catalog, &namespace_ident).await; - - let err = catalog - .drop_table(&table_ident) - .await - .unwrap_err() - .to_string(); - assert_eq!( - err, - "Unexpected => No such table: TableIdent { namespace: NamespaceIdent([\"a\"]), name: \"tbl1\" }" - ); - } - - #[tokio::test] - async fn test_drop_table() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc.clone(), Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - let table_name = "tbl1"; - let table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); - create_namespace(&catalog, &namespace_ident).await; - - let location = warehouse_loc.clone(); - let table_creation = TableCreation::builder() - .name(table_name.into()) - .location(location.clone()) - .schema(simple_table_schema()) - .build(); - - catalog - .create_table(&namespace_ident, table_creation) - .await - .unwrap(); - - let table = catalog.load_table(&table_ident).await.unwrap(); - assert_table_eq(&table, &table_ident, &simple_table_schema()); - - catalog.drop_table(&table_ident).await.unwrap(); - let err = catalog - .load_table(&table_ident) - .await - .unwrap_err() - .to_string(); - assert_eq!( - err, - "Unexpected => No such table: TableIdent { namespace: NamespaceIdent([\"a\"]), name: \"tbl1\" }" - ); - } - - #[tokio::test] - async fn test_register_table_throws_error_if_table_with_same_name_already_exists() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc.clone(), Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - create_namespace(&catalog, &namespace_ident).await; - let table_name = "tbl1"; - let table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); - create_table(&catalog, &table_ident).await; - - assert_eq!( - catalog - .register_table(&table_ident, warehouse_loc) - .await - .unwrap_err() - .to_string(), - format!("Unexpected => Table {:?} already exists.", &table_ident) - ); - } - - #[tokio::test] - async fn test_register_table() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc.clone(), Some("iceberg")).await; - let namespace_ident = NamespaceIdent::new("a".into()); - create_namespace(&catalog, &namespace_ident).await; - - let table_name = "abc"; - let location = warehouse_loc.clone(); - let table_creation = TableCreation::builder() - .name(table_name.into()) - .location(location.clone()) - .schema(simple_table_schema()) - .build(); - - let table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); - let expected_table = catalog - .create_table(&namespace_ident, table_creation) - .await - .unwrap(); - - let metadata_location = expected_table - .metadata_location() - .expect("Expected metadata location to be set") - .to_string(); - - assert_table_eq(&expected_table, &table_ident, &simple_table_schema()); - - let _ = catalog.drop_table(&table_ident).await; - - let table = catalog - .register_table(&table_ident, metadata_location.clone()) - .await - .unwrap(); - - assert_eq!(table.identifier(), expected_table.identifier()); - assert_eq!(table.metadata_location(), Some(metadata_location.as_str())); - } - - #[tokio::test] - async fn test_update_table() { - let warehouse_loc = temp_path(); - let catalog = new_sql_catalog(warehouse_loc, Some("iceberg")).await; - - // Create a test namespace and table - let namespace_ident = NamespaceIdent::new("ns1".into()); - create_namespace(&catalog, &namespace_ident).await; - let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); - create_table(&catalog, &table_ident).await; - - let table = catalog.load_table(&table_ident).await.unwrap(); - - // Store the original metadata location for comparison - let original_metadata_location = table.metadata_location().unwrap().to_string(); - - // Create a transaction to update the table - let tx = Transaction::new(&table); - let tx = tx - .update_table_properties() - .set("test_property".to_string(), "test_value".to_string()) - .apply(tx) - .unwrap(); - - // Commit the transaction to the catalog - let updated_table = tx.commit(&catalog).await.unwrap(); - - // Verify the update was successful - assert_eq!( - updated_table.metadata().properties().get("test_property"), - Some(&"test_value".to_string()) - ); - // Verify the metadata location has been updated - assert_ne!( - updated_table.metadata_location().unwrap(), - original_metadata_location.as_str() - ); - - // Load the table again from the catalog to verify changes were persisted - let reloaded = catalog.load_table(&table_ident).await.unwrap(); - - // Verify the reloaded table matches the updated table - assert_eq!( - reloaded.metadata().properties().get("test_property"), - Some(&"test_value".to_string()) - ); - assert_eq!( - reloaded.metadata_location(), - updated_table.metadata_location() + format!("NamespaceNotFound => No such namespace: {non_existent_dst_namespace_ident:?}"), ); } } diff --git a/crates/catalog/sql/src/error.rs b/crates/catalog/sql/src/error.rs index a08f755596..55e0e0a368 100644 --- a/crates/catalog/sql/src/error.rs +++ b/crates/catalog/sql/src/error.rs @@ -28,21 +28,21 @@ pub fn from_sqlx_error(error: sqlx::Error) -> Error { pub fn no_such_namespace_err(namespace: &NamespaceIdent) -> Result { Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::NamespaceNotFound, format!("No such namespace: {namespace:?}"), )) } pub fn no_such_table_err(table_ident: &TableIdent) -> Result { Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::TableNotFound, format!("No such table: {table_ident:?}"), )) } pub fn table_already_exists_err(table_ident: &TableIdent) -> Result { Err(Error::new( - ErrorKind::Unexpected, + ErrorKind::TableAlreadyExists, format!("Table {table_ident:?} already exists."), )) } diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index 992a0ed617..25ae004417 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -322,8 +322,8 @@ impl Catalog for MemoryCatalog { async fn drop_table(&self, table_ident: &TableIdent) -> Result<()> { let mut root_namespace_state = self.root_namespace_state.lock().await; - let metadata_location = root_namespace_state.remove_existing_table(table_ident)?; - self.file_io.delete(&metadata_location).await + root_namespace_state.remove_existing_table(table_ident)?; + Ok(()) } /// Check if a table exists in the catalog. From c2b24dba8db9f4c389dcd8423d85edb76d98e05f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 08:43:13 +0800 Subject: [PATCH 05/39] chore(deps): Bump datafusion from 52.2.0 to 52.3.0 (#2235) Bumps [datafusion](https://github.com/apache/datafusion) from 52.2.0 to 52.3.0.
Commits
  • 28d012a [branch-52] Bump to 52.3.0 and changelog (#20790)
  • 1bd7082 [branch-52] Fix repartition from dropping data when spilling (#20672) (#20777)
  • 9797095 [branch-52] perf: sort replace free()->try_grow() pattern with try_resize() t...
  • afc1c72 [branch-52] FFI_TableOptions are using default values only (#20705)
  • d317d00 [branch-52] fix: HashJoin panic with String dictionary keys (don't flatten ...
  • 72ea8ec [branch-52] Fix constant value from stats (#20042) (#20709)
  • 9a67de5 [branch-52] Fix Arrow Spill Underrun (#20159) (#20684)
  • 19a0fca [branch-52] SortMergeJoin don't wait for all input before emitting (#20699)
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=datafusion&package-manager=cargo&previous-version=52.2.0&new-version=52.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 178 ++++++++++++++++++++++------------------------------- 1 file changed, 72 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8419709a42..f9f6b42b38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1044,7 +1044,7 @@ version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" dependencies = [ - "darling 0.23.0", + "darling 0.20.11", "ident_case", "prettyplease", "proc-macro2", @@ -1257,7 +1257,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -1493,16 +1493,6 @@ dependencies = [ "darling_macro 0.21.3", ] -[[package]] -name = "darling" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" -dependencies = [ - "darling_core 0.23.0", - "darling_macro 0.23.0", -] - [[package]] name = "darling_core" version = "0.20.11" @@ -1531,19 +1521,6 @@ dependencies = [ "syn", ] -[[package]] -name = "darling_core" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" -dependencies = [ - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn", -] - [[package]] name = "darling_macro" version = "0.20.11" @@ -1566,17 +1543,6 @@ dependencies = [ "syn", ] -[[package]] -name = "darling_macro" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" -dependencies = [ - "darling_core 0.23.0", - "quote", - "syn", -] - [[package]] name = "dashmap" version = "6.1.0" @@ -1593,9 +1559,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f1f4a9060ae6e650d3dff5dc7a21266fea1302d890768d45b4b28586e830f" +checksum = "ea28305c211e3541c9cfcf06a23d0d8c7c824b4502ed1fdf0a6ff4ad24ee531c" dependencies = [ "arrow", "arrow-schema", @@ -1649,9 +1615,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14417a3ee4ae3d092b56cd6c1d32e8ff3e2c9ec130ecb2276ec91c89fd599399" +checksum = "78ab99b6df5f60a6ddbc515e4c05caee1192d395cf3cb67ce5d1c17e3c9b9b74" dependencies = [ "arrow", "async-trait", @@ -1674,9 +1640,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0eba824adb45a4b3ac6f0251d40df3f6a9382371cad136f4f14ac9ebc6bc10" +checksum = "77ae3d14912c0d779ada98d30dc60f3244f3c26c2446b87394629ea5c076a31c" dependencies = [ "arrow", "async-trait", @@ -1725,9 +1691,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0039deefbd00c56adf5168b7ca58568fb058e4ba4c5a03b09f8be371b4e434b6" +checksum = "ea2df29b9592a5d55b8238eaf67d2f21963d5a08cd1a8b7670134405206caabd" dependencies = [ "ahash", "apache-avro", @@ -1751,9 +1717,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec7e3e60b813048331f8fb9673583173e5d2dd8fef862834ee871fc98b57ca7" +checksum = "42639baa0049d5fffd7e283504b9b5e7b9b2e7a2dea476eed60ab0d40d999b85" dependencies = [ "futures", "log", @@ -1762,9 +1728,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802068957f620302ecf05f84ff4019601aeafd36f5f3f1334984af2e34265129" +checksum = "25951b617bb22a9619e1520450590cb2004bfcad10bcb396b961f4a1a10dcec5" dependencies = [ "arrow", "async-compression", @@ -1797,9 +1763,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fc387d5067c62d494a6647d29c5ad4fcdd5a6e50ab4ea1d2568caa2d66f2cc" +checksum = "dc0b28226960ba99c50d78ac6f736ebe09eb5cb3bb9bb58194266278000ca41f" dependencies = [ "arrow", "arrow-ipc", @@ -1821,9 +1787,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69ce35d9df5c672747f79df4b8f4967b39a3514c3af30b9a7b5426f83d4be814" +checksum = "18de2e914c2c9ed4b31a4920940b181b0957bc164eec4fc04c294533219bf0a7" dependencies = [ "apache-avro", "arrow", @@ -1841,9 +1807,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd5e20579bb6c8bd4e6c620253972fb723822030c280dd6aa047f660d09eeba" +checksum = "f538b57b052a678b1ce860181c65d3ace5a8486312dc50b41c01dd585a773a51" dependencies = [ "arrow", "async-trait", @@ -1864,9 +1830,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0788b0d48fcef31880a02013ea3cc18e5a4e0eacc3b0abdd2cd0597b99dc96e" +checksum = "89fbc1d32b1b03c9734e27c0c5f041232b68621c8455f22769838634750a196c" dependencies = [ "arrow", "async-trait", @@ -1886,9 +1852,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66639b70f1f363f5f0950733170100e588f1acfacac90c1894e231194aa35957" +checksum = "203271d31fe5613a5943181db70ec98162121d1de94a9a300d5e5f19f9500a32" dependencies = [ "arrow", "async-trait", @@ -1916,15 +1882,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e44b41f3e8267c6cf3eec982d63f34db9f1dd5f30abfd2e1f124f0871708952e" +checksum = "5b6450dc702b3d39e8ced54c3356abb453bd2f3cea86d90d555a4b92f7a38462" [[package]] name = "datafusion-execution" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e456f60e5d38db45335e84617006d90af14a8c8c5b8e959add708b2daaa0e2c" +checksum = "e66a02fa601de49da5181dbdcf904a18b16a184db2b31f5e5534552ea2d5e660" dependencies = [ "arrow", "async-trait", @@ -1944,9 +1910,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6507c719804265a58043134580c1c20767e7c23ba450724393f03ec982769ad9" +checksum = "cdf59a9b308a1a07dc2eb2f85e6366bc0226dc390b40f3aa0a72d79f1cfe2465" dependencies = [ "arrow", "async-trait", @@ -1967,9 +1933,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a413caa9c5885072b539337aed68488f0291653e8edd7d676c92df2480f6cab0" +checksum = "bd99eac4c6538c708638db43e7a3bd88e0e57955ddb722d420fb9a6d38dfc28f" dependencies = [ "arrow", "datafusion-common", @@ -1980,9 +1946,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189256495dc9cbbb8e20dbcf161f60422e628d201a78df8207e44bd4baefadb6" +checksum = "11aa2c492ac046397b36d57c62a72982aad306495bbcbcdbcabd424d4a2fe245" dependencies = [ "arrow", "arrow-buffer", @@ -2011,9 +1977,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e73dfee4cd67c4a507ffff4c5a711d39983adf544adbc09c09bf06f789f413" +checksum = "325a00081898945d48d6194d9ca26120e523c993be3bb7c084061a5a2a72e787" dependencies = [ "ahash", "arrow", @@ -2032,9 +1998,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87727bd9e65f4f9ac6d608c9810b7da9eaa3b18b26a4a4b76520592d49020acf" +checksum = "809bbcb1e0dbec5d0ce30d493d135aea7564f1ba4550395f7f94321223df2dae" dependencies = [ "ahash", "arrow", @@ -2045,9 +2011,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5ef761359224b7c2b5a1bfad6296ac63225f8583d08ad18af9ba1a89ac3887" +checksum = "29ebaa5d7024ef45973e0a7db1e9aeaa647936496f4d4061c0448f23d77d6320" dependencies = [ "arrow", "arrow-ord", @@ -2068,9 +2034,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b17dac25dfda2d2a90ff0ad1c054a11fb1523766226bec6e9bd8c410daee2ae" +checksum = "60eab6f39df9ee49a2c7fa38eddc01fa0086ee31b29c7d19f38e72f479609752" dependencies = [ "arrow", "async-trait", @@ -2084,9 +2050,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c594a29ddb22cbdbce500e4d99b5b2392c5cecb4c1086298b41d1ffec14dbb77" +checksum = "e00b2c15e342a90e65a846199c9e49293dd09fe1bcd63d8be2544604892f7eb8" dependencies = [ "arrow", "datafusion-common", @@ -2102,9 +2068,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aa1b15ed81c7543f62264a30dd49dec4b1b0b698053b968f53be32dfba4f729" +checksum = "493e2e1d1f4753dfc139a5213f1b5d0b97eea46a82d9bda3c7908aa96981b74b" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2112,9 +2078,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00c31c4795597aa25b74cab5174ac07a53051f27ce1e011ecaffa9eaeecef81" +checksum = "ba01c55ade8278a791b429f7bf5cb1de64de587a342d084b18245edfae7096e2" dependencies = [ "datafusion-doc", "quote", @@ -2123,9 +2089,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ccf60767c09302b2e0fc3afebb3761a6d508d07316fab8c5e93312728a21bb" +checksum = "a80c6dfbba6a2163a9507f6353ac78c69d8deb26232c9e419160e58ff7c3e047" dependencies = [ "arrow", "chrono", @@ -2143,9 +2109,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64b7f277556944e4edd3558da01d9e9ff9f5416f1c0aa7fee088e57bd141a7e" +checksum = "5d3a86264bb9163e7360b6622e789bc7fcbb43672e78a8493f0bc369a41a57c6" dependencies = [ "ahash", "arrow", @@ -2167,9 +2133,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7abaee372ea2d19c016ee9ef8629c4415257d291cdd152bc7f0b75f28af1b63" +checksum = "3f5e00e524ac33500be6c5eeac940bd3f6b984ba9b7df0cd5f6c34a8a2cc4d6b" dependencies = [ "arrow", "datafusion-common", @@ -2182,9 +2148,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42237efe621f92adc22d111b531fdbc2cc38ca9b5e02327535628fb103ae2157" +checksum = "2ae769ea5d688b4e74e9be5cad6f9d9f295b540825355868a3ab942380dd97ce" dependencies = [ "ahash", "arrow", @@ -2199,9 +2165,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd093498bd1319c6e5c76e9dfa905e78486f01b34579ce97f2e3a49f84c37fac" +checksum = "f3588753ab2b47b0e43cd823fe5e7944df6734dabd6dafb72e2cc1c2a22f1944" dependencies = [ "arrow", "datafusion-common", @@ -2218,9 +2184,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cbe61b12daf81a9f20ba03bd3541165d51f86e004ef37426b11881330eed261" +checksum = "79949cbb109c2a45c527bfe0d956b9f2916807c05d4d2e66f3fd0af827ac2b61" dependencies = [ "ahash", "arrow", @@ -2249,9 +2215,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0124331116db7f79df92ebfd2c3b11a8f90240f253555c9bb084f10b6fecf1dd" +checksum = "6434e2ee8a39d04b95fed688ff34dc251af6e4a0c2e1714716b6e3846690d589" dependencies = [ "arrow", "datafusion-common", @@ -2266,9 +2232,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1673e3c58ba618a6ea0568672f00664087b8982c581e9afd5aa6c3c79c9b431f" +checksum = "c91efb8302b4877d499c37e9a71886b90236ab27d9cc42fd51112febf341abd6" dependencies = [ "async-trait", "datafusion-common", @@ -2303,9 +2269,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5272d256dab5347bb39d2040589f45d8c6b715b27edcb5fffe88cc8b9c3909cb" +checksum = "3f01eef7bcf4d00e87305b55f1b75792384e130fe0258bac02cd48378ae5ff87" dependencies = [ "arrow", "bigdecimal", @@ -3283,7 +3249,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.2", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -5009,7 +4975,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "petgraph", @@ -5028,7 +4994,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn", @@ -5112,7 +5078,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.37", - "socket2 0.6.2", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tracing", @@ -5149,7 +5115,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.2", + "socket2 0.5.10", "tracing", "windows-sys 0.60.2", ] @@ -7475,7 +7441,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] From dde0aea1568dc8178dd2ff49274e51462e9672f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 09:34:57 +0800 Subject: [PATCH 06/39] chore(deps): Bump serde_with from 3.17.0 to 3.18.0 (#2233) Bumps [serde_with](https://github.com/jonasbb/serde_with) from 3.17.0 to 3.18.0.
Release notes

Sourced from serde_with's releases.

serde_with v3.18.0

Added

  • Support OneOrMany with more sequence and set types (#929)

Changed

  • Bump MSRV to 1.88 due to the darling dependency
Commits
  • d50ec96 Bump version to 3.18.0 (#931)
  • 984fe32 Bump version to 3.18.0
  • 4ba41c7 Bump actions/upload-artifact from 6 to 7 in the github-actions group (#927)
  • 8fb2468 Bump actions/upload-artifact from 6 to 7 in the github-actions group
  • aec0a23 Bump MSRV to 1.88 (#930)
  • 25c15a2 Update time dependency to 0.3.47
  • 93bd3f4 Update test output after darling update
  • f825dbf Upgrade darling to 0.23.0
  • 65cbd73 Bump MSRV to 1.88
  • daff02e Extend OneOrMany implementation to more collection types (#929)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=serde_with&package-manager=cargo&previous-version=3.17.0&new-version=3.18.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9f6b42b38..d5cbcd4e50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1485,12 +1485,12 @@ dependencies = [ [[package]] name = "darling" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", + "darling_core 0.23.0", + "darling_macro 0.23.0", ] [[package]] @@ -1509,11 +1509,10 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" dependencies = [ - "fnv", "ident_case", "proc-macro2", "quote", @@ -1534,11 +1533,11 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core 0.21.3", + "darling_core 0.23.0", "quote", "syn", ] @@ -5934,9 +5933,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64", "chrono", @@ -5953,11 +5952,11 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", "syn", From d16a9bb720d27446acdd89303eef1a42f0c2f576 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 10:16:49 +0800 Subject: [PATCH 07/39] chore(deps): Bump tempfile from 3.26.0 to 3.27.0 (#2234) --- Cargo.lock | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5cbcd4e50..ebb51dfb7a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,7 +109,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -120,7 +120,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -2418,7 +2418,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2571,7 +2571,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -3810,7 +3810,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -4337,7 +4337,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5556,7 +5556,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -6537,15 +6537,15 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tempfile" -version = "3.26.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.4.1", + "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] From 16c21266e07b156a7e1cef69b2567a014ba3f520 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 09:20:35 +0800 Subject: [PATCH 08/39] chore(deps): Bump lz4_flex from 0.12.0 to 0.12.1 (#2239) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [lz4_flex](https://github.com/pseitz/lz4_flex) from 0.12.0 to 0.12.1.
Changelog

Sourced from lz4_flex's changelog.

0.12.1 (2026-03-14)

Security Fix

Invalid match offsets (offset == 0) during decompression were
not properly
handled, which could lead to invalid memory reads on untrusted input.
Users on 0.12.x should upgrade to 0.12.1.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=lz4_flex&package-manager=cargo&previous-version=0.12.0&new-version=0.12.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/iceberg-rust/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ebb51dfb7a..908b5a58a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4081,9 +4081,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] From cc256f567e198a8a941f90bf91cf7ed23e15b76c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 09:49:33 +0800 Subject: [PATCH 09/39] chore(deps): Bump lz4_flex from 0.12.0 to 0.12.1 in /bindings/python (#2238) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [lz4_flex](https://github.com/pseitz/lz4_flex) from 0.12.0 to 0.12.1.
Changelog

Sourced from lz4_flex's changelog.

0.12.1 (2026-03-14)

Security Fix

Invalid match offsets (offset == 0) during decompression were
not properly
handled, which could lead to invalid memory reads on untrusted input.
Users on 0.12.x should upgrade to 0.12.1.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=lz4_flex&package-manager=cargo&previous-version=0.12.0&new-version=0.12.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/iceberg-rust/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: blackmwk --- bindings/python/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 5dac677618..60442de906 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -2769,9 +2769,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] From a54e44291935c42bfd3da254f7fb521b3f7f3c05 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Mon, 16 Mar 2026 18:57:37 -0700 Subject: [PATCH 10/39] feat(io): Add delete_stream to Storage trait (#2216) ## Which issue does this PR close? - Closes #2065 ## What changes are included in this PR? - Add `delete_stream` to `Storage` trait to support batch delete - Expose `delete_stream` in `FileIO` as well ## Are these changes tested? Added uts Addded integtests for opendal --- Cargo.lock | 1 + crates/iceberg/src/io/file_io.rs | 13 + crates/iceberg/src/io/storage/local_fs.rs | 66 ++++ crates/iceberg/src/io/storage/memory.rs | 61 ++++ crates/iceberg/src/io/storage/mod.rs | 4 + crates/storage/opendal/Cargo.toml | 1 + crates/storage/opendal/src/azdls.rs | 6 +- crates/storage/opendal/src/lib.rs | 311 ++++++++++++++++++ .../storage/opendal/tests/file_io_s3_test.rs | 43 +++ 9 files changed, 503 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 908b5a58a7..39812b010f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3548,6 +3548,7 @@ dependencies = [ "async-trait", "bytes", "cfg-if", + "futures", "iceberg", "iceberg_test_utils", "opendal", diff --git a/crates/iceberg/src/io/file_io.rs b/crates/iceberg/src/io/file_io.rs index 341b19d090..594b070e03 100644 --- a/crates/iceberg/src/io/file_io.rs +++ b/crates/iceberg/src/io/file_io.rs @@ -19,6 +19,7 @@ use std::ops::Range; use std::sync::{Arc, OnceLock}; use bytes::Bytes; +use futures::{Stream, StreamExt}; use super::storage::{ LocalFsStorageFactory, MemoryStorageFactory, Storage, StorageConfig, StorageFactory, @@ -140,6 +141,18 @@ impl FileIO { self.get_storage()?.delete_prefix(path.as_ref()).await } + /// Delete multiple files from a stream of paths. + /// + /// # Arguments + /// + /// * paths: A stream of absolute paths starting with the scheme string used to construct [`FileIO`]. + pub async fn delete_stream( + &self, + paths: impl Stream + Send + 'static, + ) -> Result<()> { + self.get_storage()?.delete_stream(paths.boxed()).await + } + /// Check file exists. /// /// # Arguments diff --git a/crates/iceberg/src/io/storage/local_fs.rs b/crates/iceberg/src/io/storage/local_fs.rs index d6dd5b433b..e96e951baa 100644 --- a/crates/iceberg/src/io/storage/local_fs.rs +++ b/crates/iceberg/src/io/storage/local_fs.rs @@ -29,6 +29,8 @@ use std::sync::Arc; use async_trait::async_trait; use bytes::Bytes; +use futures::StreamExt; +use futures::stream::BoxStream; use serde::{Deserialize, Serialize}; use crate::io::{ @@ -200,6 +202,13 @@ impl Storage for LocalFsStorage { Ok(()) } + async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> Result<()> { + while let Some(path) = paths.next().await { + self.delete(&path).await?; + } + Ok(()) + } + fn new_input(&self, path: &str) -> Result { Ok(InputFile::new(Arc::new(self.clone()), path.to_string())) } @@ -534,4 +543,61 @@ mod tests { assert!(path.exists()); } + + #[tokio::test] + async fn test_local_fs_storage_delete_stream() { + use futures::stream; + + let tmp_dir = TempDir::new().unwrap(); + let storage = LocalFsStorage::new(); + + // Create multiple files + let file1 = tmp_dir.path().join("file1.txt"); + let file2 = tmp_dir.path().join("file2.txt"); + let file3 = tmp_dir.path().join("file3.txt"); + + storage + .write(file1.to_str().unwrap(), Bytes::from("1")) + .await + .unwrap(); + storage + .write(file2.to_str().unwrap(), Bytes::from("2")) + .await + .unwrap(); + storage + .write(file3.to_str().unwrap(), Bytes::from("3")) + .await + .unwrap(); + + // Verify files exist + assert!(storage.exists(file1.to_str().unwrap()).await.unwrap()); + assert!(storage.exists(file2.to_str().unwrap()).await.unwrap()); + assert!(storage.exists(file3.to_str().unwrap()).await.unwrap()); + + // Delete multiple files using stream + let paths = vec![ + file1.to_str().unwrap().to_string(), + file2.to_str().unwrap().to_string(), + ]; + let path_stream = stream::iter(paths).boxed(); + storage.delete_stream(path_stream).await.unwrap(); + + // Verify deleted files no longer exist + assert!(!storage.exists(file1.to_str().unwrap()).await.unwrap()); + assert!(!storage.exists(file2.to_str().unwrap()).await.unwrap()); + + // Verify file3 still exists + assert!(storage.exists(file3.to_str().unwrap()).await.unwrap()); + } + + #[tokio::test] + async fn test_local_fs_storage_delete_stream_empty() { + use futures::stream; + + let storage = LocalFsStorage::new(); + + // Delete with empty stream should succeed + let path_stream = stream::iter(Vec::::new()).boxed(); + storage.delete_stream(path_stream).await.unwrap(); + } } diff --git a/crates/iceberg/src/io/storage/memory.rs b/crates/iceberg/src/io/storage/memory.rs index cb01ee4709..f33dbd07b1 100644 --- a/crates/iceberg/src/io/storage/memory.rs +++ b/crates/iceberg/src/io/storage/memory.rs @@ -28,6 +28,8 @@ use std::sync::{Arc, RwLock}; use async_trait::async_trait; use bytes::Bytes; +use futures::StreamExt; +use futures::stream::BoxStream; use serde::{Deserialize, Serialize}; use crate::io::{ @@ -220,6 +222,13 @@ impl Storage for MemoryStorage { Ok(()) } + async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> Result<()> { + while let Some(path) = paths.next().await { + self.delete(&path).await?; + } + Ok(()) + } + fn new_input(&self, path: &str) -> Result { Ok(InputFile::new(Arc::new(self.clone()), path.to_string())) } @@ -594,4 +603,56 @@ mod tests { assert_eq!(storage.read("/path/to/file").await.unwrap(), content); assert_eq!(storage.read("path/to/file").await.unwrap(), content); } + + #[tokio::test] + async fn test_memory_storage_delete_stream() { + use futures::stream; + + let storage = MemoryStorage::new(); + + // Create multiple files + storage + .write("memory://file1.txt", Bytes::from("1")) + .await + .unwrap(); + storage + .write("memory://file2.txt", Bytes::from("2")) + .await + .unwrap(); + storage + .write("memory://file3.txt", Bytes::from("3")) + .await + .unwrap(); + + // Verify files exist + assert!(storage.exists("memory://file1.txt").await.unwrap()); + assert!(storage.exists("memory://file2.txt").await.unwrap()); + assert!(storage.exists("memory://file3.txt").await.unwrap()); + + // Delete multiple files using stream + let paths = vec![ + "memory://file1.txt".to_string(), + "memory://file2.txt".to_string(), + ]; + let path_stream = stream::iter(paths).boxed(); + storage.delete_stream(path_stream).await.unwrap(); + + // Verify deleted files no longer exist + assert!(!storage.exists("memory://file1.txt").await.unwrap()); + assert!(!storage.exists("memory://file2.txt").await.unwrap()); + + // Verify file3 still exists + assert!(storage.exists("memory://file3.txt").await.unwrap()); + } + + #[tokio::test] + async fn test_memory_storage_delete_stream_empty() { + use futures::stream; + + let storage = MemoryStorage::new(); + + // Delete with empty stream should succeed + let path_stream = stream::iter(Vec::::new()).boxed(); + storage.delete_stream(path_stream).await.unwrap(); + } } diff --git a/crates/iceberg/src/io/storage/mod.rs b/crates/iceberg/src/io/storage/mod.rs index 3c7c555a55..5276c7771f 100644 --- a/crates/iceberg/src/io/storage/mod.rs +++ b/crates/iceberg/src/io/storage/mod.rs @@ -27,6 +27,7 @@ use std::sync::Arc; use async_trait::async_trait; use bytes::Bytes; pub use config::*; +use futures::stream::BoxStream; pub use local_fs::{LocalFsStorage, LocalFsStorageFactory}; pub use memory::{MemoryStorage, MemoryStorageFactory}; @@ -93,6 +94,9 @@ pub trait Storage: Debug + Send + Sync { /// Delete all files with the given prefix async fn delete_prefix(&self, path: &str) -> Result<()>; + /// Delete multiple files from a stream of paths. + async fn delete_stream(&self, paths: BoxStream<'static, String>) -> Result<()>; + /// Create a new input file for reading fn new_input(&self, path: &str) -> Result; diff --git a/crates/storage/opendal/Cargo.toml b/crates/storage/opendal/Cargo.toml index e0a3cf8ed6..84f7e1147a 100644 --- a/crates/storage/opendal/Cargo.toml +++ b/crates/storage/opendal/Cargo.toml @@ -49,6 +49,7 @@ reqwest = { workspace = true } serde = { workspace = true } typetag = { workspace = true } url = { workspace = true } +futures = { workspace = true } [dev-dependencies] async-trait = { workspace = true } diff --git a/crates/storage/opendal/src/azdls.rs b/crates/storage/opendal/src/azdls.rs index 70caae7c4e..6251f8cdaa 100644 --- a/crates/storage/opendal/src/azdls.rs +++ b/crates/storage/opendal/src/azdls.rs @@ -160,7 +160,7 @@ impl FromStr for AzureStorageScheme { } /// Validates whether the given path matches what's configured for the backend. -fn match_path_with_config( +pub(crate) fn match_path_with_config( path: &AzureStoragePath, config: &AzdlsConfig, configured_scheme: &AzureStorageScheme, @@ -220,7 +220,7 @@ fn azdls_config_build(config: &AzdlsConfig, path: &AzureStoragePath) -> Result(&self, path: &'a str) -> Result<&'a str> { + match self { + #[cfg(feature = "opendal-memory")] + OpenDalStorage::Memory(_) => Ok(path.strip_prefix("memory:/").unwrap_or(&path[1..])), + #[cfg(feature = "opendal-fs")] + OpenDalStorage::LocalFs => Ok(path.strip_prefix("file:/").unwrap_or(&path[1..])), + #[cfg(feature = "opendal-s3")] + OpenDalStorage::S3 { + configured_scheme, .. + } => { + let url = url::Url::parse(path)?; + let bucket = url.host_str().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!("Invalid s3 url: {path}, missing bucket"), + ) + })?; + let prefix = format!("{}://{}/", configured_scheme, bucket); + if path.starts_with(&prefix) { + Ok(&path[prefix.len()..]) + } else { + Err(Error::new( + ErrorKind::DataInvalid, + format!("Invalid s3 url: {path}, should start with {prefix}"), + )) + } + } + #[cfg(feature = "opendal-gcs")] + OpenDalStorage::Gcs { .. } => { + let url = url::Url::parse(path)?; + let bucket = url.host_str().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!("Invalid gcs url: {path}, missing bucket"), + ) + })?; + let prefix = format!("gs://{}/", bucket); + if path.starts_with(&prefix) { + Ok(&path[prefix.len()..]) + } else { + Err(Error::new( + ErrorKind::DataInvalid, + format!("Invalid gcs url: {path}, should start with {prefix}"), + )) + } + } + #[cfg(feature = "opendal-oss")] + OpenDalStorage::Oss { .. } => { + let url = url::Url::parse(path)?; + let bucket = url.host_str().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!("Invalid oss url: {path}, missing bucket"), + ) + })?; + let prefix = format!("oss://{}/", bucket); + if path.starts_with(&prefix) { + Ok(&path[prefix.len()..]) + } else { + Err(Error::new( + ErrorKind::DataInvalid, + format!("Invalid oss url: {path}, should start with {prefix}"), + )) + } + } + #[cfg(feature = "opendal-azdls")] + OpenDalStorage::Azdls { + configured_scheme, + config, + } => { + let azure_path = path.parse::()?; + match_path_with_config(&azure_path, config, configured_scheme)?; + let relative_path_len = azure_path.path.len(); + Ok(&path[path.len() - relative_path_len..]) + } + #[cfg(all( + not(feature = "opendal-s3"), + not(feature = "opendal-fs"), + not(feature = "opendal-gcs"), + not(feature = "opendal-oss"), + not(feature = "opendal-azdls"), + ))] + _ => Err(Error::new( + ErrorKind::FeatureUnsupported, + "No storage service has been enabled", + )), + } + } } #[typetag::serde(name = "OpenDalStorage")] @@ -400,6 +499,40 @@ impl Storage for OpenDalStorage { Ok(op.remove_all(&path).await.map_err(from_opendal_error)?) } + async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> Result<()> { + let mut deleters: HashMap = HashMap::new(); + + while let Some(path) = paths.next().await { + let bucket = url::Url::parse(&path) + .ok() + .and_then(|u| u.host_str().map(|s| s.to_string())) + .unwrap_or_default(); + + let (relative_path, deleter) = match deleters.entry(bucket) { + Entry::Occupied(entry) => { + (self.relativize_path(&path)?.to_string(), entry.into_mut()) + } + Entry::Vacant(entry) => { + let (op, rel) = self.create_operator(&path)?; + let rel = rel.to_string(); + let deleter = op.deleter().await.map_err(from_opendal_error)?; + (rel, entry.insert(deleter)) + } + }; + + deleter + .delete(relative_path) + .await + .map_err(from_opendal_error)?; + } + + for (_, mut deleter) in deleters { + deleter.close().await.map_err(from_opendal_error)?; + } + + Ok(()) + } + #[allow(unreachable_code, unused_variables)] fn new_input(&self, path: &str) -> Result { Ok(InputFile::new(Arc::new(self.clone()), path.to_string())) @@ -457,4 +590,182 @@ mod tests { let op = default_memory_operator(); assert_eq!(op.info().scheme().to_string(), "memory"); } + + #[cfg(feature = "opendal-memory")] + #[test] + fn test_relativize_path_memory() { + let storage = OpenDalStorage::Memory(default_memory_operator()); + + assert_eq!( + storage.relativize_path("memory:/path/to/file").unwrap(), + "path/to/file" + ); + // Without the scheme prefix, falls back to stripping the leading slash + assert_eq!( + storage.relativize_path("/path/to/file").unwrap(), + "path/to/file" + ); + } + + #[cfg(feature = "opendal-fs")] + #[test] + fn test_relativize_path_fs() { + let storage = OpenDalStorage::LocalFs; + + assert_eq!( + storage + .relativize_path("file:/tmp/data/file.parquet") + .unwrap(), + "tmp/data/file.parquet" + ); + assert_eq!( + storage.relativize_path("/tmp/data/file.parquet").unwrap(), + "tmp/data/file.parquet" + ); + } + + #[cfg(feature = "opendal-s3")] + #[test] + fn test_relativize_path_s3() { + let storage = OpenDalStorage::S3 { + configured_scheme: "s3".to_string(), + config: Arc::new(S3Config::default()), + customized_credential_load: None, + }; + + assert_eq!( + storage + .relativize_path("s3://my-bucket/path/to/file.parquet") + .unwrap(), + "path/to/file.parquet" + ); + + // s3a scheme + let storage_s3a = OpenDalStorage::S3 { + configured_scheme: "s3a".to_string(), + config: Arc::new(S3Config::default()), + customized_credential_load: None, + }; + assert_eq!( + storage_s3a + .relativize_path("s3a://my-bucket/path/to/file.parquet") + .unwrap(), + "path/to/file.parquet" + ); + } + + #[cfg(feature = "opendal-s3")] + #[test] + fn test_relativize_path_s3_scheme_mismatch() { + let storage = OpenDalStorage::S3 { + configured_scheme: "s3".to_string(), + config: Arc::new(S3Config::default()), + customized_credential_load: None, + }; + + // Scheme mismatch should error + assert!( + storage + .relativize_path("s3a://my-bucket/path/to/file.parquet") + .is_err() + ); + } + + #[cfg(feature = "opendal-gcs")] + #[test] + fn test_relativize_path_gcs() { + let storage = OpenDalStorage::Gcs { + config: Arc::new(GcsConfig::default()), + }; + + assert_eq!( + storage + .relativize_path("gs://my-bucket/path/to/file.parquet") + .unwrap(), + "path/to/file.parquet" + ); + } + + #[cfg(feature = "opendal-gcs")] + #[test] + fn test_relativize_path_gcs_invalid_scheme() { + let storage = OpenDalStorage::Gcs { + config: Arc::new(GcsConfig::default()), + }; + + assert!( + storage + .relativize_path("s3://my-bucket/path/to/file.parquet") + .is_err() + ); + } + + #[cfg(feature = "opendal-oss")] + #[test] + fn test_relativize_path_oss() { + let storage = OpenDalStorage::Oss { + config: Arc::new(OssConfig::default()), + }; + + assert_eq!( + storage + .relativize_path("oss://my-bucket/path/to/file.parquet") + .unwrap(), + "path/to/file.parquet" + ); + } + + #[cfg(feature = "opendal-oss")] + #[test] + fn test_relativize_path_oss_invalid_scheme() { + let storage = OpenDalStorage::Oss { + config: Arc::new(OssConfig::default()), + }; + + assert!( + storage + .relativize_path("s3://my-bucket/path/to/file.parquet") + .is_err() + ); + } + + #[cfg(feature = "opendal-azdls")] + #[test] + fn test_relativize_path_azdls() { + let storage = OpenDalStorage::Azdls { + configured_scheme: AzureStorageScheme::Abfss, + config: Arc::new(AzdlsConfig { + account_name: Some("myaccount".to_string()), + endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()), + ..Default::default() + }), + }; + + assert_eq!( + storage + .relativize_path("abfss://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet") + .unwrap(), + "/path/to/file.parquet" + ); + } + + #[cfg(feature = "opendal-azdls")] + #[test] + fn test_relativize_path_azdls_scheme_mismatch() { + let storage = OpenDalStorage::Azdls { + configured_scheme: AzureStorageScheme::Abfss, + config: Arc::new(AzdlsConfig { + account_name: Some("myaccount".to_string()), + endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()), + ..Default::default() + }), + }; + + // wasbs scheme doesn't match configured abfss + assert!( + storage + .relativize_path("wasbs://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet") + .is_err() + ); + } } diff --git a/crates/storage/opendal/tests/file_io_s3_test.rs b/crates/storage/opendal/tests/file_io_s3_test.rs index 5801af0606..207a4454d7 100644 --- a/crates/storage/opendal/tests/file_io_s3_test.rs +++ b/crates/storage/opendal/tests/file_io_s3_test.rs @@ -24,6 +24,7 @@ mod tests { use std::sync::Arc; use async_trait::async_trait; + use futures::StreamExt; use iceberg::io::{ FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY, }; @@ -203,4 +204,46 @@ mod tests { } } } + + #[tokio::test] + async fn test_file_io_s3_delete_stream() { + let file_io = get_file_io().await; + + // Write multiple files + let paths: Vec = (0..5) + .map(|i| { + format!( + "s3://bucket1/{}/file-{i}", + normalize_test_name_with_parts!("test_file_io_s3_delete_stream") + ) + }) + .collect(); + for path in &paths { + let _ = file_io.delete(path).await; + file_io + .new_output(path) + .unwrap() + .write("delete-me".into()) + .await + .unwrap(); + assert!(file_io.exists(path).await.unwrap()); + } + + // Delete via delete_stream + let stream = futures::stream::iter(paths.clone()).boxed(); + file_io.delete_stream(stream).await.unwrap(); + + // Verify all files are gone + for path in &paths { + assert!(!file_io.exists(path).await.unwrap()); + } + } + + #[tokio::test] + async fn test_file_io_s3_delete_stream_empty() { + let file_io = get_file_io().await; + let stream = futures::stream::empty().boxed(); + // Should succeed with no-op + file_io.delete_stream(stream).await.unwrap(); + } } From ffd6454fe283e0815593d2fb93169ea36b690174 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Mon, 16 Mar 2026 20:46:31 -0700 Subject: [PATCH 11/39] feat(storage): implement opendal resolving storage (#2231) ## Which issue does this PR close? - Closes #2210 ## What changes are included in this PR? - Add OpenDalResolvingStorage ## Are these changes tested? Added a new test --- crates/storage/opendal/src/lib.rs | 3 + crates/storage/opendal/src/resolving.rs | 319 ++++++++++++++++++ .../opendal/tests/resolving_storage_test.rs | 297 ++++++++++++++++ 3 files changed, 619 insertions(+) create mode 100644 crates/storage/opendal/src/resolving.rs create mode 100644 crates/storage/opendal/tests/resolving_storage_test.rs diff --git a/crates/storage/opendal/src/lib.rs b/crates/storage/opendal/src/lib.rs index 7c11f80add..8160680523 100644 --- a/crates/storage/opendal/src/lib.rs +++ b/crates/storage/opendal/src/lib.rs @@ -90,6 +90,9 @@ cfg_if! { } } +mod resolving; +pub use resolving::{OpenDalResolvingStorage, OpenDalResolvingStorageFactory}; + /// OpenDAL-based storage factory. /// /// Maps scheme to the corresponding OpenDalStorage storage variant. diff --git a/crates/storage/opendal/src/resolving.rs b/crates/storage/opendal/src/resolving.rs new file mode 100644 index 0000000000..7c06cf96a5 --- /dev/null +++ b/crates/storage/opendal/src/resolving.rs @@ -0,0 +1,319 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Resolving storage that auto-detects the scheme from a path and delegates +//! to the appropriate [`OpenDalStorage`] variant. + +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use async_trait::async_trait; +use bytes::Bytes; +use futures::StreamExt; +use futures::stream::BoxStream; +use iceberg::io::{ + FileMetadata, FileRead, FileWrite, InputFile, OutputFile, Storage, StorageConfig, + StorageFactory, +}; +use iceberg::{Error, ErrorKind, Result}; +use opendal::Scheme; +use serde::{Deserialize, Serialize}; +use url::Url; + +use crate::OpenDalStorage; +#[cfg(feature = "opendal-s3")] +use crate::s3::CustomAwsCredentialLoader; + +/// Schemes supported by OpenDalResolvingStorage +pub const SCHEME_MEMORY: &str = "memory"; +pub const SCHEME_FILE: &str = "file"; +pub const SCHEME_S3: &str = "s3"; +pub const SCHEME_S3A: &str = "s3a"; +pub const SCHEME_S3N: &str = "s3n"; +pub const SCHEME_GS: &str = "gs"; +pub const SCHEME_GCS: &str = "gcs"; +pub const SCHEME_OSS: &str = "oss"; +pub const SCHEME_ABFSS: &str = "abfss"; +pub const SCHEME_ABFS: &str = "abfs"; +pub const SCHEME_WASBS: &str = "wasbs"; +pub const SCHEME_WASB: &str = "wasb"; + +/// Parse a URL scheme string into an [`opendal::Scheme`]. +fn parse_scheme(scheme: &str) -> Result { + match scheme { + SCHEME_MEMORY => Ok(Scheme::Memory), + SCHEME_FILE | "" => Ok(Scheme::Fs), + SCHEME_S3 | SCHEME_S3A | SCHEME_S3N => Ok(Scheme::S3), + SCHEME_GS | SCHEME_GCS => Ok(Scheme::Gcs), + SCHEME_OSS => Ok(Scheme::Oss), + SCHEME_ABFSS | SCHEME_ABFS | SCHEME_WASBS | SCHEME_WASB => Ok(Scheme::Azdls), + s => s.parse::().map_err(|e| { + Error::new( + ErrorKind::FeatureUnsupported, + format!("Unsupported storage scheme: {s}: {e}"), + ) + }), + } +} + +/// Extract the scheme string from a path URL. +fn extract_scheme(path: &str) -> Result { + let url = Url::parse(path).map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!("Invalid path: {path}, failed to parse URL: {e}"), + ) + })?; + Ok(url.scheme().to_string()) +} + +/// Build an [`OpenDalStorage`] variant for the given scheme and config properties. +fn build_storage_for_scheme( + scheme: &str, + props: &HashMap, + #[cfg(feature = "opendal-s3")] customized_credential_load: &Option, +) -> Result { + match parse_scheme(scheme)? { + #[cfg(feature = "opendal-s3")] + Scheme::S3 => { + let config = crate::s3::s3_config_parse(props.clone())?; + Ok(OpenDalStorage::S3 { + configured_scheme: scheme.to_string(), + config: Arc::new(config), + customized_credential_load: customized_credential_load.clone(), + }) + } + #[cfg(feature = "opendal-gcs")] + Scheme::Gcs => { + let config = crate::gcs::gcs_config_parse(props.clone())?; + Ok(OpenDalStorage::Gcs { + config: Arc::new(config), + }) + } + #[cfg(feature = "opendal-oss")] + Scheme::Oss => { + let config = crate::oss::oss_config_parse(props.clone())?; + Ok(OpenDalStorage::Oss { + config: Arc::new(config), + }) + } + #[cfg(feature = "opendal-azdls")] + Scheme::Azdls => { + let configured_scheme: crate::azdls::AzureStorageScheme = scheme.parse()?; + let config = crate::azdls::azdls_config_parse(props.clone())?; + Ok(OpenDalStorage::Azdls { + configured_scheme, + config: Arc::new(config), + }) + } + #[cfg(feature = "opendal-fs")] + Scheme::Fs => Ok(OpenDalStorage::LocalFs), + #[cfg(feature = "opendal-memory")] + Scheme::Memory => Ok(OpenDalStorage::Memory(crate::memory::memory_config_build()?)), + unsupported => Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Unsupported storage scheme: {unsupported}"), + )), + } +} + +/// A resolving storage factory that creates [`OpenDalResolvingStorage`] instances. +/// +/// This factory accepts paths from any supported storage system and dynamically +/// delegates operations to the appropriate [`OpenDalStorage`] variant based on +/// the path scheme. +/// +/// # Example +/// +/// ```rust,ignore +/// use std::sync::Arc; +/// use iceberg::io::FileIOBuilder; +/// use iceberg_storage_opendal::OpenDalResolvingStorageFactory; +/// +/// let factory = OpenDalResolvingStorageFactory::new(); +/// let file_io = FileIOBuilder::new(Arc::new(factory)) +/// .with_prop("s3.region", "us-east-1") +/// .build(); +/// ``` +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct OpenDalResolvingStorageFactory { + /// Custom AWS credential loader for S3 storage. + #[cfg(feature = "opendal-s3")] + #[serde(skip)] + customized_credential_load: Option, +} + +impl Default for OpenDalResolvingStorageFactory { + fn default() -> Self { + Self::new() + } +} + +impl OpenDalResolvingStorageFactory { + /// Create a new resolving storage factory. + pub fn new() -> Self { + Self { + #[cfg(feature = "opendal-s3")] + customized_credential_load: None, + } + } + + /// Set a custom AWS credential loader for S3 storage. + #[cfg(feature = "opendal-s3")] + pub fn with_s3_credential_loader(mut self, loader: CustomAwsCredentialLoader) -> Self { + self.customized_credential_load = Some(loader); + self + } +} + +#[typetag::serde] +impl StorageFactory for OpenDalResolvingStorageFactory { + fn build(&self, config: &StorageConfig) -> Result> { + Ok(Arc::new(OpenDalResolvingStorage { + props: config.props().clone(), + storages: RwLock::new(HashMap::new()), + #[cfg(feature = "opendal-s3")] + customized_credential_load: self.customized_credential_load.clone(), + })) + } +} + +/// A resolving storage that auto-detects the scheme from a path and delegates +/// to the appropriate [`OpenDalStorage`] variant. +/// +/// Sub-storages are lazily created on first use for each scheme and cached +/// for subsequent operations. +#[derive(Debug, Serialize, Deserialize)] +pub struct OpenDalResolvingStorage { + /// Configuration properties shared across all backends. + props: HashMap, + /// Cache of scheme โ†’ storage mappings. + #[serde(skip, default)] + storages: RwLock>>, + /// Custom AWS credential loader for S3 storage. + #[cfg(feature = "opendal-s3")] + #[serde(skip)] + customized_credential_load: Option, +} + +impl OpenDalResolvingStorage { + /// Resolve the storage for the given path by extracting the scheme and + /// returning the cached or newly-created [`OpenDalStorage`]. + fn resolve(&self, path: &str) -> Result> { + let scheme = extract_scheme(path)?; + + // Fast path: check read lock first. + { + let cache = self + .storages + .read() + .map_err(|_| Error::new(ErrorKind::Unexpected, "Storage cache lock poisoned"))?; + if let Some(storage) = cache.get(&scheme) { + return Ok(storage.clone()); + } + } + + // Slow path: build and insert under write lock. + let mut cache = self + .storages + .write() + .map_err(|_| Error::new(ErrorKind::Unexpected, "Storage cache lock poisoned"))?; + + // Double-check after acquiring write lock. + if let Some(storage) = cache.get(&scheme) { + return Ok(storage.clone()); + } + + let storage = build_storage_for_scheme( + &scheme, + &self.props, + #[cfg(feature = "opendal-s3")] + &self.customized_credential_load, + )?; + let storage = Arc::new(storage); + cache.insert(scheme, storage.clone()); + Ok(storage) + } +} + +#[async_trait] +#[typetag::serde] +impl Storage for OpenDalResolvingStorage { + async fn exists(&self, path: &str) -> Result { + self.resolve(path)?.exists(path).await + } + + async fn metadata(&self, path: &str) -> Result { + self.resolve(path)?.metadata(path).await + } + + async fn read(&self, path: &str) -> Result { + self.resolve(path)?.read(path).await + } + + async fn reader(&self, path: &str) -> Result> { + self.resolve(path)?.reader(path).await + } + + async fn write(&self, path: &str, bs: Bytes) -> Result<()> { + self.resolve(path)?.write(path, bs).await + } + + async fn writer(&self, path: &str) -> Result> { + self.resolve(path)?.writer(path).await + } + + async fn delete(&self, path: &str) -> Result<()> { + self.resolve(path)?.delete(path).await + } + + async fn delete_prefix(&self, path: &str) -> Result<()> { + self.resolve(path)?.delete_prefix(path).await + } + + async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> Result<()> { + // Group paths by scheme so each resolved storage receives a batch, + // avoiding repeated operator creation per path. + let mut grouped: HashMap> = HashMap::new(); + while let Some(path) = paths.next().await { + let scheme = extract_scheme(&path)?; + grouped.entry(scheme).or_default().push(path); + } + + for (_, paths) in grouped { + let storage = self.resolve(&paths[0])?; + storage + .delete_stream(futures::stream::iter(paths).boxed()) + .await?; + } + Ok(()) + } + + fn new_input(&self, path: &str) -> Result { + Ok(InputFile::new( + Arc::new(self.resolve(path)?.as_ref().clone()), + path.to_string(), + )) + } + + fn new_output(&self, path: &str) -> Result { + Ok(OutputFile::new( + Arc::new(self.resolve(path)?.as_ref().clone()), + path.to_string(), + )) + } +} diff --git a/crates/storage/opendal/tests/resolving_storage_test.rs b/crates/storage/opendal/tests/resolving_storage_test.rs new file mode 100644 index 0000000000..4572ad2c2d --- /dev/null +++ b/crates/storage/opendal/tests/resolving_storage_test.rs @@ -0,0 +1,297 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Integration tests for OpenDalResolvingStorage. +//! +//! These tests assume Docker containers are started externally via `make docker-up`. +//! Each test uses unique file paths based on module path to avoid conflicts. + +#[cfg(all( + feature = "opendal-s3", + feature = "opendal-fs", + feature = "opendal-memory" +))] +mod tests { + use std::sync::Arc; + + use iceberg::io::{ + FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY, + }; + use iceberg_storage_opendal::OpenDalResolvingStorageFactory; + use iceberg_test_utils::{get_minio_endpoint, normalize_test_name_with_parts, set_up}; + + fn get_resolving_file_io() -> iceberg::io::FileIO { + set_up(); + + let minio_endpoint = get_minio_endpoint(); + + FileIOBuilder::new(Arc::new(OpenDalResolvingStorageFactory::new())) + .with_props(vec![ + (S3_ENDPOINT, minio_endpoint), + (S3_ACCESS_KEY_ID, "admin".to_string()), + (S3_SECRET_ACCESS_KEY, "password".to_string()), + (S3_REGION, "us-east-1".to_string()), + ]) + .build() + } + + fn temp_fs_path(name: &str) -> String { + let dir = std::env::temp_dir().join("iceberg_resolving_tests"); + std::fs::create_dir_all(&dir).unwrap(); + let path = dir.join(name); + // Clean up from previous runs + let _ = std::fs::remove_file(&path); + format!("file:/{}", path.display()) + } + + #[tokio::test] + async fn test_mixed_scheme_write_and_read() { + let file_io = get_resolving_file_io(); + + let s3_path = format!( + "s3://bucket1/{}", + normalize_test_name_with_parts!("test_mixed_scheme_write_and_read") + ); + let fs_path = temp_fs_path("mixed_write_and_read.txt"); + let mem_path = "memory://test_mixed_scheme_write_and_read"; + + // Write to all three schemes + file_io + .new_output(&s3_path) + .unwrap() + .write("from_s3".into()) + .await + .unwrap(); + file_io + .new_output(&fs_path) + .unwrap() + .write("from_fs".into()) + .await + .unwrap(); + file_io + .new_output(mem_path) + .unwrap() + .write("from_memory".into()) + .await + .unwrap(); + + // Read back from all three + assert_eq!( + file_io.new_input(&s3_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("from_s3") + ); + assert_eq!( + file_io.new_input(&fs_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("from_fs") + ); + assert_eq!( + file_io.new_input(mem_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("from_memory") + ); + } + + #[tokio::test] + async fn test_mixed_scheme_exists_independently() { + let file_io = get_resolving_file_io(); + + let s3_path = format!( + "s3://bucket1/{}", + normalize_test_name_with_parts!("test_mixed_scheme_exists_independently") + ); + let fs_path = temp_fs_path("mixed_exists_independently.txt"); + let mem_path = "memory://test_mixed_scheme_exists_independently"; + + // Clean up S3 from previous runs + let _ = file_io.delete(&s3_path).await; + + // None exist initially + assert!(!file_io.exists(&s3_path).await.unwrap()); + assert!(!file_io.exists(&fs_path).await.unwrap()); + assert!(!file_io.exists(mem_path).await.unwrap()); + + // Write only to fs + file_io + .new_output(&fs_path) + .unwrap() + .write("fs_only".into()) + .await + .unwrap(); + + // Only fs exists + assert!(!file_io.exists(&s3_path).await.unwrap()); + assert!(file_io.exists(&fs_path).await.unwrap()); + assert!(!file_io.exists(mem_path).await.unwrap()); + } + + #[tokio::test] + async fn test_mixed_scheme_delete_one_keeps_others() { + let file_io = get_resolving_file_io(); + + let s3_path = format!( + "s3://bucket1/{}", + normalize_test_name_with_parts!("test_mixed_scheme_delete_one_keeps_others") + ); + let fs_path = temp_fs_path("mixed_delete_one_keeps_others.txt"); + let mem_path = "memory://test_mixed_scheme_delete_one_keeps_others"; + + // Write to all three + file_io + .new_output(&s3_path) + .unwrap() + .write("s3".into()) + .await + .unwrap(); + file_io + .new_output(&fs_path) + .unwrap() + .write("fs".into()) + .await + .unwrap(); + file_io + .new_output(mem_path) + .unwrap() + .write("mem".into()) + .await + .unwrap(); + + // Delete only the fs file + file_io.delete(&fs_path).await.unwrap(); + + // fs gone, S3 and memory still there + assert!(file_io.exists(&s3_path).await.unwrap()); + assert!(!file_io.exists(&fs_path).await.unwrap()); + assert!(file_io.exists(mem_path).await.unwrap()); + + assert_eq!( + file_io.new_input(&s3_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("s3") + ); + assert_eq!( + file_io.new_input(mem_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("mem") + ); + } + + #[tokio::test] + async fn test_mixed_scheme_interleaved_operations() { + let file_io = get_resolving_file_io(); + + let s3_path = format!( + "s3://bucket1/{}", + normalize_test_name_with_parts!("test_mixed_scheme_interleaved") + ); + let fs_path = temp_fs_path("mixed_interleaved.txt"); + let mem_path = "memory://test_mixed_scheme_interleaved"; + + // Interleave: write fs, write memory, write s3 + file_io + .new_output(&fs_path) + .unwrap() + .write("fs_data".into()) + .await + .unwrap(); + file_io + .new_output(mem_path) + .unwrap() + .write("mem_data".into()) + .await + .unwrap(); + file_io + .new_output(&s3_path) + .unwrap() + .write("s3_data".into()) + .await + .unwrap(); + + // Read in reverse order: s3, memory, fs + assert_eq!( + file_io.new_input(&s3_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("s3_data") + ); + assert_eq!( + file_io.new_input(mem_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("mem_data") + ); + assert_eq!( + file_io.new_input(&fs_path).unwrap().read().await.unwrap(), + bytes::Bytes::from("fs_data") + ); + } + + #[tokio::test] + async fn test_invalid_scheme() { + let file_io = get_resolving_file_io(); + let result = file_io.exists("unknown://bucket/key").await; + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Unsupported storage scheme"), + ); + } + + #[tokio::test] + async fn test_missing_scheme() { + let file_io = get_resolving_file_io(); + let result = file_io.exists("no-scheme-path").await; + assert!(result.is_err()); + } + + #[cfg(feature = "opendal-s3")] + #[tokio::test] + async fn test_with_custom_credential_loader() { + use async_trait::async_trait; + use iceberg_storage_opendal::CustomAwsCredentialLoader; + use reqsign::{AwsCredential, AwsCredentialLoad}; + use reqwest::Client; + + struct MinioCredentialLoader; + + #[async_trait] + impl AwsCredentialLoad for MinioCredentialLoader { + async fn load_credential( + &self, + _client: Client, + ) -> anyhow::Result> { + Ok(Some(AwsCredential { + access_key_id: "admin".to_string(), + secret_access_key: "password".to_string(), + session_token: None, + expires_in: None, + })) + } + } + + set_up(); + let minio_endpoint = get_minio_endpoint(); + + let factory = OpenDalResolvingStorageFactory::new().with_s3_credential_loader( + CustomAwsCredentialLoader::new(Arc::new(MinioCredentialLoader)), + ); + + let file_io = FileIOBuilder::new(Arc::new(factory)) + .with_props(vec![ + (S3_ENDPOINT, minio_endpoint), + (S3_REGION, "us-east-1".to_string()), + ]) + .build(); + + // Should be able to access S3 using the custom credential loader + assert!(file_io.exists("s3://bucket1/").await.unwrap()); + } +} From c1497a9762b54df62343ab0239fbd07118078794 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Tue, 17 Mar 2026 17:57:21 -0700 Subject: [PATCH 12/39] doc: Update README.md to include more components (#2248) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? ## Are these changes tested? --- README.md | 50 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 693180c6df..20d3b11d87 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,18 @@ Rust implementation of [Apache Icebergโ„ข](https://iceberg.apache.org/). The Apache Iceberg Rust project is composed of the following components: -| Name | Release | Docs | -|--------------------------|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------| -| [iceberg] | [![iceberg image]][iceberg link] | [![docs release]][iceberg release docs] [![docs dev]][iceberg dev docs] | -| [iceberg-datafusion] | [![iceberg-datafusion image]][iceberg-datafusion link] | [![docs release]][iceberg-datafusion release docs] [![docs dev]][iceberg-datafusion dev docs] | -| [iceberg-catalog-glue] | [![iceberg-catalog-glue image]][iceberg-catalog-glue link] | [![docs release]][iceberg-catalog-glue release docs] [![docs dev]][iceberg-catalog-glue dev docs] | -| [iceberg-catalog-hms] | [![iceberg-catalog-hms image]][iceberg-catalog-hms link] | [![docs release]][iceberg-catalog-hms release docs] [![docs dev]][iceberg-catalog-hms dev docs] | -| [iceberg-catalog-rest] | [![iceberg-catalog-rest image]][iceberg-catalog-rest link] | [![docs release]][iceberg-catalog-rest release docs] [![docs dev]][iceberg-catalog-rest dev docs] | +| Name | Release | Docs | +|-------------------------------|--------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------| +| [iceberg] | [![iceberg image]][iceberg link] | [![docs release]][iceberg release docs] [![docs dev]][iceberg dev docs] | +| [iceberg-catalog-loader] | [![iceberg-catalog-loader image]][iceberg-catalog-loader link] | [![docs release]][iceberg-catalog-loader release docs] [![docs dev]][iceberg-catalog-loader dev docs] | +| [iceberg-catalog-glue] | [![iceberg-catalog-glue image]][iceberg-catalog-glue link] | [![docs release]][iceberg-catalog-glue release docs] [![docs dev]][iceberg-catalog-glue dev docs] | +| [iceberg-catalog-hms] | [![iceberg-catalog-hms image]][iceberg-catalog-hms link] | [![docs release]][iceberg-catalog-hms release docs] [![docs dev]][iceberg-catalog-hms dev docs] | +| [iceberg-catalog-rest] | [![iceberg-catalog-rest image]][iceberg-catalog-rest link] | [![docs release]][iceberg-catalog-rest release docs] [![docs dev]][iceberg-catalog-rest dev docs] | +| [iceberg-catalog-s3tables] | [![iceberg-catalog-s3tables image]][iceberg-catalog-s3tables link] | [![docs release]][iceberg-catalog-s3tables release docs] [![docs dev]][iceberg-catalog-s3tables dev docs] | +| [iceberg-catalog-sql] | [![iceberg-catalog-sql image]][iceberg-catalog-sql link] | [![docs release]][iceberg-catalog-sql release docs] [![docs dev]][iceberg-catalog-sql dev docs] | +| [iceberg-cache-moka] | [![iceberg-cache-moka image]][iceberg-cache-moka link] | [![docs release]][iceberg-cache-moka release docs] [![docs dev]][iceberg-cache-moka dev docs] | +| [iceberg-datafusion] | [![iceberg-datafusion image]][iceberg-datafusion link] | [![docs release]][iceberg-datafusion release docs] [![docs dev]][iceberg-datafusion dev docs] | +| [iceberg-storage-opendal] | [![iceberg-storage-opendal image]][iceberg-storage-opendal link] | [![docs release]][iceberg-storage-opendal release docs] [![docs dev]][iceberg-storage-opendal dev docs] | [docs release]: https://img.shields.io/badge/docs-release-blue [docs dev]: https://img.shields.io/badge/docs-dev-blue @@ -61,13 +66,42 @@ The Apache Iceberg Rust project is composed of the following components: [iceberg-catalog-hms release docs]: https://docs.rs/iceberg-catalog-hms [iceberg-catalog-hms dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_hms/ - [iceberg-catalog-rest]: crates/catalog/rest/README.md [iceberg-catalog-rest image]: https://img.shields.io/crates/v/iceberg-catalog-rest.svg [iceberg-catalog-rest link]: https://crates.io/crates/iceberg-catalog-rest [iceberg-catalog-rest release docs]: https://docs.rs/iceberg-catalog-rest [iceberg-catalog-rest dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_rest/ +[iceberg-catalog-sql]: crates/catalog/sql +[iceberg-catalog-sql image]: https://img.shields.io/crates/v/iceberg-catalog-sql.svg +[iceberg-catalog-sql link]: https://crates.io/crates/iceberg-catalog-sql +[iceberg-catalog-sql release docs]: https://docs.rs/iceberg-catalog-sql +[iceberg-catalog-sql dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_sql/ + +[iceberg-catalog-s3tables]: crates/catalog/s3tables/README.md +[iceberg-catalog-s3tables image]: https://img.shields.io/crates/v/iceberg-catalog-s3tables.svg +[iceberg-catalog-s3tables link]: https://crates.io/crates/iceberg-catalog-s3tables +[iceberg-catalog-s3tables release docs]: https://docs.rs/iceberg-catalog-s3tables +[iceberg-catalog-s3tables dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_s3tables/ + +[iceberg-storage-opendal]: crates/storage/opendal/README.md +[iceberg-storage-opendal image]: https://img.shields.io/crates/v/iceberg-storage-opendal.svg +[iceberg-storage-opendal link]: https://crates.io/crates/iceberg-storage-opendal +[iceberg-storage-opendal release docs]: https://docs.rs/iceberg-storage-opendal +[iceberg-storage-opendal dev docs]: https://rust.iceberg.apache.org/api/iceberg_storage_opendal/ + +[iceberg-catalog-loader]: crates/catalog/loader +[iceberg-catalog-loader image]: https://img.shields.io/crates/v/iceberg-catalog-loader.svg +[iceberg-catalog-loader link]: https://crates.io/crates/iceberg-catalog-loader +[iceberg-catalog-loader release docs]: https://docs.rs/iceberg-catalog-loader +[iceberg-catalog-loader dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_loader/ + +[iceberg-cache-moka]: crates/integrations/cache-moka +[iceberg-cache-moka image]: https://img.shields.io/crates/v/iceberg-cache-moka.svg +[iceberg-cache-moka link]: https://crates.io/crates/iceberg-cache-moka +[iceberg-cache-moka release docs]: https://docs.rs/iceberg-cache-moka +[iceberg-cache-moka dev docs]: https://rust.iceberg.apache.org/api/iceberg_cache_moka/ + ## Iceberg Rust Implementation Status The features that Iceberg Rust currently supports can be found [here](https://iceberg.apache.org/status/). From 0196cb1fbda064936611f72bb1762e8747ee6c99 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 18 Mar 2026 12:33:07 -0700 Subject: [PATCH 13/39] fix(python): use resolving storage for python binding (#2246) --- bindings/python/Cargo.toml | 2 +- .../python/src/datafusion_table_provider.rs | 30 ++----------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 4813c72db7..9b551ea205 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -33,7 +33,7 @@ crate-type = ["cdylib"] [dependencies] arrow = { version = "57.1", features = ["pyarrow", "chrono-tz"] } iceberg = { path = "../../crates/iceberg" } -iceberg-storage-opendal = { path = "../../crates/storage/opendal", features = ["opendal-s3", "opendal-fs", "opendal-memory"] } +iceberg-storage-opendal = { path = "../../crates/storage/opendal", features = ["opendal-all"] } pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] } iceberg-datafusion = { path = "../../crates/integrations/datafusion" } datafusion-ffi = { version = "52.1" } diff --git a/bindings/python/src/datafusion_table_provider.rs b/bindings/python/src/datafusion_table_provider.rs index 7fa9f53dbd..95b3eb90d0 100644 --- a/bindings/python/src/datafusion_table_provider.rs +++ b/bindings/python/src/datafusion_table_provider.rs @@ -22,40 +22,16 @@ use std::sync::Arc; use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use datafusion_ffi::table_provider::FFI_TableProvider; use iceberg::TableIdent; -use iceberg::io::{FileIOBuilder, StorageFactory}; +use iceberg::io::FileIOBuilder; use iceberg::table::StaticTable; use iceberg_datafusion::table::IcebergStaticTableProvider; -use iceberg_storage_opendal::OpenDalStorageFactory; +use iceberg_storage_opendal::OpenDalResolvingStorageFactory; use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods, *}; use pyo3::types::{PyAny, PyCapsule}; use crate::runtime::runtime; -/// Parse the scheme from a URL and return the appropriate StorageFactory. -fn storage_factory_from_path(path: &str) -> PyResult> { - let scheme = path - .split("://") - .next() - .ok_or_else(|| PyRuntimeError::new_err(format!("Invalid path, missing scheme: {path}")))?; - - let factory: Arc = match scheme { - "file" | "" => Arc::new(OpenDalStorageFactory::Fs), - "s3" | "s3a" => Arc::new(OpenDalStorageFactory::S3 { - configured_scheme: scheme.to_string(), - customized_credential_load: None, - }), - "memory" => Arc::new(OpenDalStorageFactory::Memory), - _ => { - return Err(PyRuntimeError::new_err(format!( - "Unsupported storage scheme: {scheme}" - ))); - } - }; - - Ok(factory) -} - pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyResult<()> { let capsule_name = capsule.name()?; if capsule_name.is_none() { @@ -110,7 +86,7 @@ impl PyIcebergDataFusionTable { let table_ident = TableIdent::from_strs(identifier) .map_err(|e| PyRuntimeError::new_err(format!("Invalid table identifier: {e}")))?; - let factory = storage_factory_from_path(&metadata_location)?; + let factory = Arc::new(OpenDalResolvingStorageFactory::new()); let mut builder = FileIOBuilder::new(factory); From 7db4b018153f1eaefd10e1b62720c5f8cb292692 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 18 Mar 2026 18:11:47 -0700 Subject: [PATCH 14/39] chore: Change publish parallism back to 1 (#2254) ## Which issue does this PR close? - publish has to be done one by one, otherwise we may see failure like this: https://github.com/apache/iceberg-rust/actions/runs/23260056698 ## What changes are included in this PR? - Change publish parallism back to 1 ## Are these changes tested? --- .github/workflows/publish.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c1c9046154..26f61118b7 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -33,8 +33,7 @@ jobs: publish: runs-on: ubuntu-latest strategy: - # Publish package one by one instead of flooding the registry - max-parallel: 15 + max-parallel: 1 # Publish package one by one instead of flooding the registry matrix: # Order here is sensitive, as it will be used to determine the order of publishing package: From cb7f78a25cf6c13a2af7fd295aee31d0560eee39 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sun, 22 Mar 2026 18:12:34 -0700 Subject: [PATCH 15/39] Infra: Remove GitHub Actions from Dependabot configuration (#2267) Removed GitHub Actions dependency update configuration. ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? Related to https://github.com/apache/iceberg-python/issues/3186 Dont auto update since we now depend on github action being allowlisted by asf-infra first, https://github.com/apache/infrastructure-actions/blob/main/approved_patterns.yml ## Are these changes tested? --- .github/dependabot.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a94ae0151b..23c9b239ee 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -17,13 +17,6 @@ version: 2 updates: - # Maintain dependencies for GitHub Actions - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" - day: "sunday" - # Maintain dependencies for iceberg - package-ecosystem: "cargo" directory: "/" From 99dbb168eccfb42d67d3b302fbae532ab1acc21e Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sun, 22 Mar 2026 18:18:21 -0700 Subject: [PATCH 16/39] ci: pin third-party actions to Apache-approved SHAs (#2266) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? Pin `astral-sh/setup-uv` to commit SHAs from Apache's [infrastructure-actions allowlist](https://github.com/apache/infrastructure-actions/blob/07f5f9d2b05fe0ec9886e3ef0a9d79797817f0cb/approved_patterns.yml#L9) Fixes https://github.com/apache/infrastructure-actions/issues/550 ## Are these changes tested? --- .github/workflows/bindings_python_ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml index 78e1a9615c..1177073f1e 100644 --- a/.github/workflows/bindings_python_ci.yml +++ b/.github/workflows/bindings_python_ci.yml @@ -59,7 +59,7 @@ jobs: runs-on: ubuntu-slim steps: - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@v7 + - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 with: version: "0.9.3" enable-cache: true @@ -94,7 +94,7 @@ jobs: working-directory: "bindings/python" command: build args: --out dist - - uses: astral-sh/setup-uv@v7 + - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 with: version: "0.9.3" enable-cache: true From 2820d47a549b38c56a453788ff5bff3296003eb2 Mon Sep 17 00:00:00 2001 From: maybe-vibe <268501654+maybe-vibe@users.noreply.github.com> Date: Mon, 23 Mar 2026 10:54:04 +0800 Subject: [PATCH 17/39] chore(deps): fix audit check and disable inherited aws sdk default features (#2274) ## Which issue does this PR close? - Fix the audit check by updating `aws-lc-sys` and `rustls-webpki`. - Avoid pulling both the legacy `rustls` / Hyper 0.14 stack and the newer `default-https-client` stack through inherited AWS SDK defaults. ([AWS SDK announcement](https://github.com/awslabs/aws-sdk-rust/discussions/1257)) ## What changes are included in this PR? - Bump to `aws-lc-sys>=0.39.0` and `rustls-webpki>=0.103.10` to pass security audit. - Disable inherited AWS SDK default features for `aws-sdk-glue` and `aws-sdk-s3tables` - Explicitly enable `default-https-client` and `rt-tokio` - Bump the minimum `aws-sdk-glue` version to `1.85`, the first version that provides `default-https-client` ## Are these changes tested? --------- Co-authored-by: blackmwk --- Cargo.lock | 172 ++++++++++------------------------------------------- Cargo.toml | 4 +- 2 files changed, 35 insertions(+), 141 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 39812b010f..83c28aa512 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -546,9 +546,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.1" +version = "1.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" dependencies = [ "aws-lc-sys", "zeroize", @@ -556,9 +556,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.38.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a" dependencies = [ "cc", "cmake", @@ -775,23 +775,17 @@ dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "h2 0.3.27", - "h2 0.4.13", - "http 0.2.12", + "h2", "http 1.4.0", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper 1.8.1", - "hyper-rustls 0.24.2", - "hyper-rustls 0.27.7", + "hyper", + "hyper-rustls", "hyper-util", "pin-project-lite", - "rustls 0.21.12", - "rustls 0.23.37", + "rustls", "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower", "tracing", ] @@ -2571,7 +2565,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2934,25 +2928,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "h2" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap 2.13.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "h2" version = "0.4.13" @@ -3151,30 +3126,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" -[[package]] -name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", - "want", -] - [[package]] name = "hyper" version = "1.8.1" @@ -3185,7 +3136,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2 0.4.13", + "h2", "http 1.4.0", "http-body 1.0.1", "httparse", @@ -3198,21 +3149,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.32", - "log", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.27.7" @@ -3220,13 +3156,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.4.0", - "hyper 1.8.1", + "hyper", "hyper-util", - "rustls 0.23.37", + "rustls", "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower-service", "webpki-roots 1.0.6", ] @@ -3243,7 +3179,7 @@ dependencies = [ "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper 1.8.1", + "hyper", "ipnet", "libc", "percent-encoding", @@ -3811,7 +3747,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4205,7 +4141,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "log", "pin-project-lite", @@ -4459,7 +4395,7 @@ dependencies = [ "http 1.4.0", "http-body-util", "humantime", - "hyper 1.8.1", + "hyper", "itertools 0.14.0", "md-5", "parking_lot", @@ -5077,7 +5013,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.37", + "rustls", "socket2 0.5.10", "thiserror 2.0.18", "tokio", @@ -5097,7 +5033,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls 0.23.37", + "rustls", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -5383,19 +5319,19 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "h2 0.4.13", + "h2", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.8.1", - "hyper-rustls 0.27.7", + "hyper", + "hyper-rustls", "hyper-util", "js-sys", "log", "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.37", + "rustls", "rustls-native-certs", "rustls-pki-types", "serde", @@ -5403,7 +5339,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tokio-util", "tower", "tower-http", @@ -5557,19 +5493,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", + "windows-sys 0.59.0", ] [[package]] @@ -5582,7 +5506,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.9", + "rustls-webpki", "subtle", "zeroize", ] @@ -5620,19 +5544,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.7" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - -[[package]] -name = "rustls-webpki" -version = "0.103.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "aws-lc-rs", "ring", @@ -5766,16 +5680,6 @@ dependencies = [ "sha2", ] -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "security-framework" version = "3.7.0" @@ -6250,7 +6154,7 @@ dependencies = [ "memchr", "once_cell", "percent-encoding", - "rustls 0.23.37", + "rustls", "serde", "serde_json", "sha2", @@ -6543,10 +6447,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.1", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -6708,23 +6612,13 @@ dependencies = [ "syn", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.37", + "rustls", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 6a361ecbd8..eee1e6dc7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,8 +54,8 @@ arrow-string = "57.1" as-any = "0.3.2" async-trait = "0.1.89" aws-config = "1.8.7" -aws-sdk-glue = "1.39" -aws-sdk-s3tables = "1.28.0" +aws-sdk-glue = { version = "1.85", default-features = false, features = ["default-https-client", "rt-tokio"] } +aws-sdk-s3tables = { version = "1.28", default-features = false, features = ["default-https-client", "rt-tokio"] } backon = "1.5.1" base64 = "0.22.1" bimap = "0.6" From dc0a3fab9f5a009f5447ec9f6f7d3fd22a41ea3a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Mar 2026 11:19:58 +0800 Subject: [PATCH 18/39] chore(deps): Bump minijinja from 2.17.1 to 2.18.0 (#2273) Bumps [minijinja](https://github.com/mitsuhiko/minijinja) from 2.17.1 to 2.18.0.
Changelog

Sourced from minijinja's changelog.

2.18.0

  • Added keyword argument support (width, first, blank) to the indent filter for Jinja2 compatibility in Rust and Go. #864
  • Added support for dotted integer lookup (for example foo.0) in Rust and Go for Jinja compatibility. #881
  • Added support for dotted filter and test names (including foo . bar . baz) for Jinja compatibility. #879
  • Fixed string escape handling to preserve unknown escapes (such as \s) for Jinja compatibility in Rust and Go. #880
  • Improved generic performance across template parsing, compilation, and rendering.
  • Fixed minijinja-cabi ownership and pointer-safety issues that could leak mj_value values on error paths.
  • Added high-priority minijinja-cabi APIs for callback-based functions/filters/tests, globals, loaders, path joining, auto-escape configuration, and fuel limits.
  • Switched minijinja-cabi header maintenance to manual source-based syncing and removed cbindgen-based generation tooling.
  • Added lightweight C smoke tests for minijinja-cabi (via make -C minijinja-cabi test) with coverage across all exported C ABI functions, and wired them into top-level testing and CI.
  • Added render_captured and render_captured_to methods on Template which return a Captured type holding the rendered output and the template state.
  • Added into_output method on Captured to consume and return the output string.
  • Deprecated render_and_return_state, eval_to_state, and render_to_write in favor of the new render_captured / render_captured_to / Captured API.
Commits
  • 92f114d release 2.18.0
  • 80d30a7 refactor(vendor): prune unused self_cell API surface
  • 50ce37a fix: typos
  • 24891e1 feat(filters): add kwargs support to indent filter for Jinja2 parity
  • 4cca670 refactor: deprecate render_to_write in favor of render_captured_to
  • ac88f8e fix: correct typo render_capturedd_to -> render_captured_to
  • 710137b chore: remove dead_code allow and unused MutBorrow from vendored self_cell
  • 39d00e6 feat: Added new capture methods for state
  • 42b0d08 feat: vendor self_cell and make loader default
  • cc12ae0 fix: make cabi compatible with older rustc
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=minijinja&package-manager=cargo&previous-version=2.17.1&new-version=2.18.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: blackmwk --- Cargo.lock | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 83c28aa512..8856dd61fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4041,6 +4041,12 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memo-map" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" + [[package]] name = "memoffset" version = "0.9.1" @@ -4074,10 +4080,11 @@ dependencies = [ [[package]] name = "minijinja" -version = "2.17.1" +version = "2.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea5ea1e90055f200af6b8e52a4a34e05e77e7fee953a9fb40c631efdc43cab1" +checksum = "328251e58ad8e415be6198888fc207502727dc77945806421ab34f35bf012e7d" dependencies = [ + "memo-map", "serde", ] From 958ae65b9e2a8b3f7b0b8a61e87b203888036e2d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Mar 2026 11:47:38 +0800 Subject: [PATCH 19/39] chore(deps): Bump datafusion from 52.3.0 to 52.4.0 (#2271) Bumps [datafusion](https://github.com/apache/datafusion) from 52.3.0 to 52.4.0.
Commits
  • e5bad58 [branch-52] Update version to 52.4.0 and update changelog (#21004)
  • e034c6b [branch-52] Update to use lz4_flex 0.12.1 and quinn-proto 0.11.14 (#21009)
  • 664099b [branch-52] fix: InList Dictionary filter pushdown type mismatch (#20962) (#2...
  • 74aaa65 [branch-52] chore: Ignore RUSTSEC-2024-0014 (#20862) (#21020)
  • 5881ede [branch-52] fix: SanityCheckPlan error with window functions and NVL filter (...
  • 7e20eb7 [branch-52] perf: Cache num_output_rows in sort merge join to avoid O(n) reco...
  • e5547e2 [branch-52] Fix duplicate group keys after hash aggregation spill (#20724) (#...
  • 2947378 [branch-52] fix: disable dynamic filter pushdown for non min/max aggregates (...
  • 41acbf8 [branch-52] fix: Return probe_side.len() for RightMark/Anti count(*) querie...
  • a5f6fbb [branch-52] fix: interval analysis error when have two filterexec that inner ...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=datafusion&package-manager=cargo&previous-version=52.3.0&new-version=52.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: blackmwk --- Cargo.lock | 156 ++++++++++++++++++++++++++--------------------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8856dd61fa..6a221c903a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,7 +109,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -120,7 +120,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1038,7 +1038,7 @@ version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" dependencies = [ - "darling 0.20.11", + "darling 0.23.0", "ident_case", "prettyplease", "proc-macro2", @@ -1251,7 +1251,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -1552,9 +1552,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea28305c211e3541c9cfcf06a23d0d8c7c824b4502ed1fdf0a6ff4ad24ee531c" +checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" dependencies = [ "arrow", "arrow-schema", @@ -1608,9 +1608,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ab99b6df5f60a6ddbc515e4c05caee1192d395cf3cb67ce5d1c17e3c9b9b74" +checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" dependencies = [ "arrow", "async-trait", @@ -1633,9 +1633,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ae3d14912c0d779ada98d30dc60f3244f3c26c2446b87394629ea5c076a31c" +checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" dependencies = [ "arrow", "async-trait", @@ -1684,9 +1684,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2df29b9592a5d55b8238eaf67d2f21963d5a08cd1a8b7670134405206caabd" +checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" dependencies = [ "ahash", "apache-avro", @@ -1710,9 +1710,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42639baa0049d5fffd7e283504b9b5e7b9b2e7a2dea476eed60ab0d40d999b85" +checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" dependencies = [ "futures", "log", @@ -1721,9 +1721,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25951b617bb22a9619e1520450590cb2004bfcad10bcb396b961f4a1a10dcec5" +checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" dependencies = [ "arrow", "async-compression", @@ -1756,9 +1756,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc0b28226960ba99c50d78ac6f736ebe09eb5cb3bb9bb58194266278000ca41f" +checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" dependencies = [ "arrow", "arrow-ipc", @@ -1780,9 +1780,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18de2e914c2c9ed4b31a4920940b181b0957bc164eec4fc04c294533219bf0a7" +checksum = "474d9b26f185b57f549a0f7ce9183428dd0042014a2e0d093f5430fdc9dae289" dependencies = [ "apache-avro", "arrow", @@ -1800,9 +1800,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f538b57b052a678b1ce860181c65d3ace5a8486312dc50b41c01dd585a773a51" +checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" dependencies = [ "arrow", "async-trait", @@ -1823,9 +1823,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89fbc1d32b1b03c9734e27c0c5f041232b68621c8455f22769838634750a196c" +checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" dependencies = [ "arrow", "async-trait", @@ -1845,9 +1845,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "203271d31fe5613a5943181db70ec98162121d1de94a9a300d5e5f19f9500a32" +checksum = "23798383465e0c569bd442d1453b50691261f8ad6511d840c48457b3bf51ae21" dependencies = [ "arrow", "async-trait", @@ -1875,15 +1875,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6450dc702b3d39e8ced54c3356abb453bd2f3cea86d90d555a4b92f7a38462" +checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" [[package]] name = "datafusion-execution" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e66a02fa601de49da5181dbdcf904a18b16a184db2b31f5e5534552ea2d5e660" +checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" dependencies = [ "arrow", "async-trait", @@ -1903,9 +1903,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf59a9b308a1a07dc2eb2f85e6366bc0226dc390b40f3aa0a72d79f1cfe2465" +checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" dependencies = [ "arrow", "async-trait", @@ -1926,9 +1926,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99eac4c6538c708638db43e7a3bd88e0e57955ddb722d420fb9a6d38dfc28f" +checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" dependencies = [ "arrow", "datafusion-common", @@ -1939,9 +1939,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11aa2c492ac046397b36d57c62a72982aad306495bbcbcdbcabd424d4a2fe245" +checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" dependencies = [ "arrow", "arrow-buffer", @@ -1970,9 +1970,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325a00081898945d48d6194d9ca26120e523c993be3bb7c084061a5a2a72e787" +checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" dependencies = [ "ahash", "arrow", @@ -1991,9 +1991,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809bbcb1e0dbec5d0ce30d493d135aea7564f1ba4550395f7f94321223df2dae" +checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" dependencies = [ "ahash", "arrow", @@ -2004,9 +2004,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ebaa5d7024ef45973e0a7db1e9aeaa647936496f4d4061c0448f23d77d6320" +checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" dependencies = [ "arrow", "arrow-ord", @@ -2027,9 +2027,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60eab6f39df9ee49a2c7fa38eddc01fa0086ee31b29c7d19f38e72f479609752" +checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" dependencies = [ "arrow", "async-trait", @@ -2043,9 +2043,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e00b2c15e342a90e65a846199c9e49293dd09fe1bcd63d8be2544604892f7eb8" +checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" dependencies = [ "arrow", "datafusion-common", @@ -2061,9 +2061,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "493e2e1d1f4753dfc139a5213f1b5d0b97eea46a82d9bda3c7908aa96981b74b" +checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2071,9 +2071,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba01c55ade8278a791b429f7bf5cb1de64de587a342d084b18245edfae7096e2" +checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" dependencies = [ "datafusion-doc", "quote", @@ -2082,9 +2082,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a80c6dfbba6a2163a9507f6353ac78c69d8deb26232c9e419160e58ff7c3e047" +checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" dependencies = [ "arrow", "chrono", @@ -2102,9 +2102,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d3a86264bb9163e7360b6622e789bc7fcbb43672e78a8493f0bc369a41a57c6" +checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" dependencies = [ "ahash", "arrow", @@ -2126,9 +2126,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f5e00e524ac33500be6c5eeac940bd3f6b984ba9b7df0cd5f6c34a8a2cc4d6b" +checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" dependencies = [ "arrow", "datafusion-common", @@ -2141,9 +2141,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ae769ea5d688b4e74e9be5cad6f9d9f295b540825355868a3ab942380dd97ce" +checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" dependencies = [ "ahash", "arrow", @@ -2158,9 +2158,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3588753ab2b47b0e43cd823fe5e7944df6734dabd6dafb72e2cc1c2a22f1944" +checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" dependencies = [ "arrow", "datafusion-common", @@ -2177,9 +2177,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79949cbb109c2a45c527bfe0d956b9f2916807c05d4d2e66f3fd0af827ac2b61" +checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" dependencies = [ "ahash", "arrow", @@ -2208,9 +2208,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6434e2ee8a39d04b95fed688ff34dc251af6e4a0c2e1714716b6e3846690d589" +checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" dependencies = [ "arrow", "datafusion-common", @@ -2225,9 +2225,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91efb8302b4877d499c37e9a71886b90236ab27d9cc42fd51112febf341abd6" +checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" dependencies = [ "async-trait", "datafusion-common", @@ -2262,9 +2262,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f01eef7bcf4d00e87305b55f1b75792384e130fe0258bac02cd48378ae5ff87" +checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" dependencies = [ "arrow", "bigdecimal", @@ -2412,7 +2412,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2565,7 +2565,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3184,7 +3184,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.2", "tokio", "tower-service", "tracing", @@ -3747,7 +3747,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4281,7 +4281,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5021,7 +5021,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.2", "thiserror 2.0.18", "tokio", "tracing", @@ -5058,7 +5058,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.2", "tracing", "windows-sys 0.60.2", ] @@ -5500,7 +5500,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6457,7 +6457,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7342,7 +7342,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] From 1e2ffb9331127cacf8b94286588317f0f7e4ced2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Mar 2026 12:32:25 +0800 Subject: [PATCH 20/39] chore(deps): Bump datafusion-cli from 52.2.0 to 52.3.0 (#2272) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6a221c903a..330e15822a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1656,9 +1656,9 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8af8e5117e7bcac98fdbf4eb981562986f578b9d8a3c7eb91192dc955d450ee" +checksum = "d6cc57c2a8889e722be7913bb3c053c554f23abafa2e99005ad6fe84c765f7ce" dependencies = [ "arrow", "async-trait", From f17663b037752b4fd684457ba5f7d3a96ea97d0c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Mar 2026 12:33:18 +0800 Subject: [PATCH 21/39] chore(deps): Bump datafusion-sqllogictest from 52.2.0 to 52.3.0 (#2270) --- Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 330e15822a..970c4bad09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2239,9 +2239,9 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d28510abfc85709578fcf9065325d43ee3303012c0ccec2dce351bdc577d00" +checksum = "25f2e5519037772210eee5bb87a95dc953e1bd94bc2f9c9d6bb14b0c7fb9ab0a" dependencies = [ "arrow", "bigdecimal", @@ -2280,9 +2280,9 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb859e97759dcbff66b484bdf4f251f9a76784d3dd7883c124de57510b1e1c2" +checksum = "74e697441492ce35353b07842181f0f92765c5d6ac1daaead4974ecf20058247" dependencies = [ "arrow", "async-trait", @@ -2306,9 +2306,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199790fd96e852997b30da4ff11109378c944841757d93875ea85fc69587ec91" +checksum = "fe00df31ca03a167d3e40054120930fe5fb689e66bc625b602fac7153b222aea" dependencies = [ "async-recursion", "async-trait", @@ -4918,7 +4918,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -4937,7 +4937,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn", From 4c08d35cdc337d2428bb9ef4e14c0ed12caaec35 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 23 Mar 2026 12:11:19 -0700 Subject: [PATCH 22/39] fix: explicitly set Python interpreter in maturin build to prevent using wrong version (#2277) --- .github/workflows/bindings_python_ci.yml | 2 +- .github/workflows/release_python.yml | 2 +- .github/workflows/release_python_nightly.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml index 1177073f1e..efd4a78098 100644 --- a/.github/workflows/bindings_python_ci.yml +++ b/.github/workflows/bindings_python_ci.yml @@ -93,7 +93,7 @@ jobs: with: working-directory: "bindings/python" command: build - args: --out dist + args: --out dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 with: version: "0.9.3" diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index 5d97e23a2e..abf8b52b6d 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -191,7 +191,7 @@ jobs: manylinux: ${{ matrix.manylinux || 'auto' }} working-directory: "bindings/python" command: build - args: --release -o dist + args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one - name: Upload wheels uses: actions/upload-artifact@v7 with: diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 6f9655e29c..595cb42d05 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -101,7 +101,7 @@ jobs: manylinux: ${{ matrix.manylinux || 'auto' }} working-directory: "bindings/python" command: build - args: --release -o dist + args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one - name: Upload wheels uses: actions/upload-artifact@v7 From 1f1ba537bc9abad2214e1850e664e24d051bfa75 Mon Sep 17 00:00:00 2001 From: blackmwk Date: Tue, 24 Mar 2026 23:36:30 +0800 Subject: [PATCH 23/39] chore(deps): Bump rustls-webpki from 0.103.7 to 0.103.10 in bindings/python (#2278) Addresses the security advisory GHSA-pwjx-qhcg-rvj4 for rustls-webpki < 0.103.10 in the Python bindings lockfile. This is a rebase of #2268 onto main which already includes the root Cargo.lock audit fix from #2274 (aws-lc-sys >= 0.39.0). ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? ## Are these changes tested? ci. --- bindings/python/Cargo.lock | 271 ++++++++++++++++++++++++++++++++++++- 1 file changed, 269 insertions(+), 2 deletions(-) diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 60442de906..e4be380322 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -56,6 +56,17 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "ahash" version = "0.8.12" @@ -504,6 +515,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bigdecimal" version = "0.4.9" @@ -561,6 +578,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + [[package]] name = "bnum" version = "0.12.1" @@ -650,6 +676,15 @@ dependencies = [ "libbz2-rs-sys", ] +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" version = "1.2.43" @@ -698,6 +733,16 @@ dependencies = [ "phf", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "comfy-table" version = "7.1.2" @@ -1694,6 +1739,17 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.5" @@ -2355,6 +2411,7 @@ dependencies = [ "async-trait", "bytes", "cfg-if", + "futures", "iceberg", "opendal", "reqsign", @@ -2504,6 +2561,16 @@ dependencies = [ "rustversion", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -2620,6 +2687,30 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonwebtoken" +version = "9.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +dependencies = [ + "base64", + "js-sys", + "pem", + "ring", + "serde", + "serde_json", + "simple_asn1", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] + [[package]] name = "lexical-core" version = "1.0.6" @@ -2860,6 +2951,22 @@ dependencies = [ "serde", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -2884,6 +2991,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -3062,6 +3180,35 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64", + "serde_core", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -3110,6 +3257,44 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2", + "scrypt", + "sha2", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "pkcs5", + "rand_core 0.6.4", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -3545,11 +3730,14 @@ dependencies = [ "hmac", "home", "http", + "jsonwebtoken", "log", + "once_cell", "percent-encoding", "quick-xml 0.37.5", "rand 0.8.5", "reqwest", + "rsa", "rust-ini", "serde", "serde_json", @@ -3623,6 +3811,27 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "sha2", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -3687,9 +3896,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.7" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "ring", "rustls-pki-types", @@ -3708,6 +3917,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "same-file" version = "1.0.6" @@ -3747,6 +3965,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2", + "salsa20", + "sha2", +] + [[package]] name = "semver" version = "1.0.27" @@ -3903,6 +4132,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.7" @@ -3915,6 +4154,18 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "simple_asn1" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" +dependencies = [ + "num-bigint", + "num-traits", + "thiserror", + "time", +] + [[package]] name = "siphasher" version = "1.0.1" @@ -3949,6 +4200,22 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "sqlparser" version = "0.59.0" From 44e73f6f3167e9c10c3793eeb4c04b4a6946220a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:22:41 -0700 Subject: [PATCH 24/39] chore(deps): Bump bytes from 1.11.0 to 1.11.1 in /bindings/python (#2281) Bumps [bytes](https://github.com/tokio-rs/bytes) from 1.11.0 to 1.11.1.
Release notes

Sourced from bytes's releases.

Bytes v1.11.1

1.11.1 (February 3rd, 2026)

  • Fix integer overflow in BytesMut::reserve
Changelog

Sourced from bytes's changelog.

1.11.1 (February 3rd, 2026)

  • Fix integer overflow in BytesMut::reserve
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=bytes&package-manager=cargo&previous-version=1.11.0&new-version=1.11.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/iceberg-rust/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- bindings/python/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index e4be380322..842a9260b3 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -663,9 +663,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bzip2" From 9e9a2b1312f4f7f3d071464c08e0606a61ff512e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:22:59 -0700 Subject: [PATCH 25/39] chore(deps): Bump time from 0.3.44 to 0.3.47 in /bindings/python (#2282) Bumps [time](https://github.com/time-rs/time) from 0.3.44 to 0.3.47.
Release notes

Sourced from time's releases.

v0.3.47

See the changelog for details.

v0.3.46

See the changelog for details.

v0.3.45

See the changelog for details.

Changelog

Sourced from time's changelog.

0.3.47 [2026-02-05]

Security

  • The possibility of a stack exhaustion denial of service attack when parsing RFC 2822 has been eliminated. Previously, it was possible to craft input that would cause unbounded recursion. Now, the depth of the recursion is tracked, causing an error to be returned if it exceeds a reasonable limit.

    This attack vector requires parsing user-provided input, with any type, using the RFC 2822 format.

Compatibility

  • Attempting to format a value with a well-known format (i.e. RFC 3339, RFC 2822, or ISO 8601) will error at compile time if the type being formatted does not provide sufficient information. This would previously fail at runtime. Similarly, attempting to format a value with ISO 8601 that is only configured for parsing (i.e. Iso8601::PARSING) will error at compile time.

Added

  • Builder methods for format description modifiers, eliminating the need for verbose initialization when done manually.
  • date!(2026-W01-2) is now supported. Previously, a space was required between W and 01.
  • [end] now has a trailing_input modifier which can either be prohibit (the default) or discard. When it is discard, all remaining input is ignored. Note that if there are components after [end], they will still attempt to be parsed, likely resulting in an error.

Changed

  • More performance gains when parsing.

Fixed

  • If manually formatting a value, the number of bytes written was one short for some components. This has been fixed such that the number of bytes written is always correct.
  • The possibility of integer overflow when parsing an owned format description has been effectively eliminated. This would previously wrap when overflow checks were disabled. Instead of storing the depth as u8, it is stored as u32. This would require multiple gigabytes of nested input to overflow, at which point we've got other problems and trivial mitigations are available by downstream users.

0.3.46 [2026-01-23]

Added

  • All possible panics are now documented for the relevant methods.
  • The need to use #[serde(default)] when using custom serde formats is documented. This applies only when deserializing an Option<T>.
  • Duration::nanoseconds_i128 has been made public, mirroring std::time::Duration::from_nanos_u128.

... (truncated)

Commits
  • d5144cd v0.3.47 release
  • f6206b0 Guard against integer overflow in release mode
  • 1c63dc7 Avoid denial of service when parsing Rfc2822
  • 5940df6 Add builder methods to avoid verbose construction
  • 00881a4 Manually format macros everywhere
  • bb723b6 Add trailing_input modifier to end
  • 31c4f8e Permit W12 in date! macro
  • 490a17b Mark error paths in well-known formats as cold
  • 6cb1896 Optimize Rfc2822 parsing
  • 6d264d5 Remove erroneous #[inline(never)] attributes
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=time&package-manager=cargo&previous-version=0.3.44&new-version=0.3.47)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/iceberg-rust/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- bindings/python/Cargo.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 842a9260b3..3ce0df6e41 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -2978,9 +2978,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" [[package]] name = "num-integer" @@ -4409,30 +4409,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", From 89a9c9087d9c506e064f818abd37d4bcc9bc8a70 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 24 Mar 2026 14:25:44 -0400 Subject: [PATCH 26/39] docs: Add DataFusion Comet to Users section of README (#2283) ## Which issue does this PR close? - N/A. ## What changes are included in this PR? - Add DataFusion Comet to the list of users with a description. ## Are these changes tested? - N/A. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 20d3b11d87..4839a855c0 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,7 @@ The Apache Iceberg community is built on the principles described in the [Apache - [RisingWave](https://github.com/risingwavelabs/risingwave): A Postgres-compatible SQL database designed for real-time event streaming data processing, analysis, and management. - [Wrappers](https://github.com/supabase/wrappers): Postgres Foreign Data Wrapper development framework in Rust. - [ETL](https://github.com/supabase/etl): Stream your Postgres data anywhere in real-time. +- [Apache DataFusion Comet](https://github.com/apache/datafusion-comet): High-performance accelerator for Apache Spark, built on top of the powerful Apache DataFusion query engine. ## License From fea5906cbceead1f6993c408d50bb2b18eea7cf0 Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 25 Mar 2026 00:58:29 +0000 Subject: [PATCH 27/39] feat(encryption) [1/N] Support encryption: Add crypto for AES-GCM (#2026) Add Core Encryption Primitives for Iceberg Encryption Support. Part of https://github.com/apache/iceberg-rust/issues/2034 ## Summary This PR introduces the foundational cryptographic primitives needed for implementing encryption in iceberg-rust, providing AES-GCM encryption operations that match the Java implementation's behavior and data format. ## Motivation Iceberg's Java implementation supports table-level encryption to protect sensitive data at rest. To achieve feature parity and ensure interoperability between Java and Rust implementations, we need to build encryption support from the ground up. This PR provides the core cryptographic operations that will serve as the foundation for the complete encryption feature. ## Changes New Module: encryption Added a new encryption module with core AES-GCM cryptographic operations: - encryption/crypto.rs - Core encryption implementation - EncryptionAlgorithm enum supporting AES-128-GCM as this is the only algorithm currently supported in arrow parquet - SecureKey struct with automatic memory zeroization for security - AesGcmEncryptor providing encrypt/decrypt operations with AAD support Key Features 1. Java-Compatible Format: Ciphertext format matches Java's implementation exactly: [12-byte nonce][encrypted data][16-byte GCM authentication tag] 1. This ensures files encrypted by Java can be decrypted by Rust and vice versa. 2. Secure Key Handling: Uses the zeroize crate to automatically clear encryption keys from memory when dropped, preventing key material from lingering in memory. 3. Additional Authenticated Data (AAD): Full support for AAD to ensure integrity of associated metadata that isn't encrypted. 4. Comprehensive Testing: 8 tests covering: - Round-trip encryption/decryption for both AES-128 and AES-256 - AAD validation - Empty plaintext handling - Tamper detection - Format compatibility verification Dependencies Added - aes-gcm = "0.10" - Industry-standard AES-GCM implementation - zeroize = "1.7" - Secure memory cleanup for encryption keys Compatibility This implementation directly corresponds to Java's https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/encryption/Ciphers.java: | Java Class | Rust Implementation | |-----------------------------|------------------------------------------| | Ciphers.AesGcmEncryptor | AesGcmEncryptor::encrypt() | | Ciphers.AesGcmDecryptor | AesGcmEncryptor::decrypt() | | EncryptionAlgorithm.AES_GCM | EncryptionAlgorithm::Aes128Gcm| Testing Future Work This PR is the first in a series to implement full encryption support. Upcoming PRs will add: 1. Table properties for encryption configuration 2. Key management interfaces (KeyManagementClient trait) 3. EncryptionManager implementation 4. Native Parquet encryption integration 5. AWS KMS support 6. Integration with Table and FileIO Review Notes - This PR is intentionally minimal and self-contained - No existing code paths are modified - this is purely additive - The module is public but won't be used until future PRs wire it up - Format compatibility with Java has been prioritized to ensure interoperability ## Which issue does this PR close? - Closes #. https://github.com/apache/iceberg-rust/issues/2035 ## What changes are included in this PR? ## Are these changes tested? Yes --- Cargo.lock | 356 ++++++++++------ Cargo.toml | 3 + crates/iceberg/Cargo.toml | 2 + crates/iceberg/src/encryption/crypto.rs | 523 ++++++++++++++++++++++++ crates/iceberg/src/encryption/mod.rs | 25 ++ crates/iceberg/src/lib.rs | 1 + 6 files changed, 788 insertions(+), 122 deletions(-) create mode 100644 crates/iceberg/src/encryption/crypto.rs create mode 100644 crates/iceberg/src/encryption/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 970c4bad09..8171f28386 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "aes" version = "0.8.4" @@ -19,6 +29,20 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.8.12" @@ -80,7 +104,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", - "anstyle-parse", + "anstyle-parse 0.2.7", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse 1.0.0", "anstyle-query", "anstyle-wincon", "colorchoice", @@ -90,9 +129,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" @@ -103,6 +142,15 @@ dependencies = [ "utf8parse", ] +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + [[package]] name = "anstyle-query" version = "1.1.5" @@ -504,9 +552,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.14" +version = "1.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2" +checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -524,7 +572,7 @@ dependencies = [ "fastrand", "hex", "http 1.4.0", - "ring", + "sha1", "time", "tokio", "tracing", @@ -534,9 +582,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.13" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -568,9 +616,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" +checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -593,9 +641,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.139.0" +version = "1.142.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af3da2f5cf74983a60a7d5a182d76db1609ee4401057c98732ed8be973cb30ee" +checksum = "3962675ec1f2012ae6439814e784557550fa239a4a291bd4f33d8f514d4fdb5b" dependencies = [ "aws-credential-types", "aws-runtime", @@ -617,9 +665,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3tables" -version = "1.51.0" +version = "1.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c7f1b4eb404522622f5489fc649ba193c1e3ce4416bfcfbbcb008ad0cbfe4f" +checksum = "c91febb29f5287a7b723dbacca6d81b1086b8ac0af6b35b873539ee19c74827f" dependencies = [ "aws-credential-types", "aws-runtime", @@ -641,9 +689,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.95.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" dependencies = [ "aws-credential-types", "aws-runtime", @@ -665,9 +713,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.97.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -689,9 +737,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.99.0" +version = "1.101.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -714,9 +762,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -862,9 +910,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b1117b3b2bbe166d11199b540ceed0d0f7676e36e7b962b5a437a9971eac75" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" dependencies = [ "base64-simd", "bytes", @@ -897,9 +945,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.13" +version = "1.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" +checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1024,9 +1072,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1034,9 +1082,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ "darling 0.23.0", "ident_case", @@ -1125,9 +1173,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.56" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "jobserver", @@ -1183,9 +1231,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", "clap_derive", @@ -1193,11 +1241,11 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ - "anstream", + "anstream 1.0.0", "anstyle", "clap_lex", "strsim", @@ -1205,9 +1253,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.55" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" dependencies = [ "heck", "proc-macro2", @@ -1217,9 +1265,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "clipboard-win" @@ -1241,9 +1289,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "colored" @@ -1296,13 +1344,12 @@ dependencies = [ [[package]] name = "console" -version = "0.16.2" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" dependencies = [ "encode_unicode", "libc", - "once_cell", "unicode-width 0.2.2", "windows-sys 0.61.2", ] @@ -1443,6 +1490,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] @@ -1467,6 +1515,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "darling" version = "0.20.11" @@ -1656,9 +1713,9 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6cc57c2a8889e722be7913bb3c053c554f23abafa2e99005ad6fe84c765f7ce" +checksum = "46a0b3ed9bfda5f234c62e179bbc1258fc89452a89cd3d652da73efcb994ecf5" dependencies = [ "arrow", "async-trait", @@ -2239,9 +2296,9 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25f2e5519037772210eee5bb87a95dc953e1bd94bc2f9c9d6bb14b0c7fb9ab0a" +checksum = "8e53604bca77d4544426a425e2a50d7b911bbe35d3c8193de24093b445f23856" dependencies = [ "arrow", "bigdecimal", @@ -2280,9 +2337,9 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74e697441492ce35353b07842181f0f92765c5d6ac1daaead4974ecf20058247" +checksum = "3929b7067193345bc345a5ea5f231cccde36fe58fb055d8caef7247ad7566fd5" dependencies = [ "arrow", "async-trait", @@ -2306,9 +2363,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.3.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe00df31ca03a167d3e40054120930fe5fb689e66bc625b602fac7153b222aea" +checksum = "2379388ecab67079eeb1185c953fb9c5ed4b283fa3cb81417538378a30545957" dependencies = [ "async-recursion", "async-trait", @@ -2428,9 +2485,9 @@ dependencies = [ [[package]] name = "dissimilar" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e" [[package]] name = "dlv-list" @@ -2534,7 +2591,7 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" dependencies = [ - "anstream", + "anstream 0.6.21", "anstyle", "env_filter", "jiff", @@ -2892,24 +2949,34 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", "wasip2", "wasip3", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "glob" version = "0.3.3" @@ -3184,7 +3251,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -3218,6 +3285,7 @@ dependencies = [ name = "iceberg" version = "0.9.0" dependencies = [ + "aes-gcm", "anyhow", "apache-avro", "array-init", @@ -3269,6 +3337,7 @@ dependencies = [ "typetag", "url", "uuid", + "zeroize", "zstd", ] @@ -3697,9 +3766,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" dependencies = [ "memchr", "serde", @@ -3731,15 +3800,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819b44bc7c87d9117eb522f14d46e918add69ff12713c475946b0a29363ed1c2" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3752,9 +3821,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "470252db18ecc35fd766c0891b1e3ec6cbbcd62507e85276c01bf75d8e94d4a1" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", @@ -3763,9 +3832,9 @@ dependencies = [ [[package]] name = "jiff-tzdb" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -3891,9 +3960,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.182" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "liblzma" @@ -3956,11 +4025,11 @@ dependencies = [ [[package]] name = "libtest-mimic" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" +checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" dependencies = [ - "anstream", + "anstream 1.0.0", "anstyle", "clap", "escape8259", @@ -4162,9 +4231,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -4358,9 +4427,9 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" dependencies = [ "num_enum_derive", "rustversion", @@ -4368,9 +4437,9 @@ dependencies = [ [[package]] name = "num_enum_derive" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -4426,9 +4495,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -4436,6 +4505,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "opendal" version = "0.55.0" @@ -4798,6 +4873,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -4806,9 +4893,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] @@ -4885,11 +4972,11 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ - "toml_edit 0.23.10+spec-1.0.0", + "toml_edit 0.25.5+spec-1.1.0", ] [[package]] @@ -5021,7 +5108,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.6.2", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -5058,16 +5145,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.2", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -5078,6 +5165,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "radix_trie" version = "0.2.1" @@ -5615,9 +5708,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -6013,12 +6106,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6454,7 +6547,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.4.1", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -6578,9 +6671,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -6603,7 +6696,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -6676,9 +6769,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.5+spec-1.1.0" +version = "1.0.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9" dependencies = [ "serde_core", ] @@ -6694,28 +6787,28 @@ dependencies = [ "serde_spanned", "toml_datetime 0.6.11", "toml_write", - "winnow", + "winnow 0.7.15", ] [[package]] name = "toml_edit" -version = "0.23.10+spec-1.0.0" +version = "0.25.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1" dependencies = [ "indexmap 2.13.0", - "toml_datetime 0.7.5+spec-1.1.0", + "toml_datetime 1.0.1+spec-1.1.0", "toml_parser", - "winnow", + "winnow 1.0.0", ] [[package]] name = "toml_parser" -version = "1.0.9+spec-1.1.0" +version = "1.0.10+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420" dependencies = [ - "winnow", + "winnow 1.0.0", ] [[package]] @@ -6815,9 +6908,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -6999,6 +7092,16 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsafe-libyaml" version = "0.2.11" @@ -7047,7 +7150,7 @@ version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ - "getrandom 0.4.1", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", @@ -7637,9 +7740,18 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.14" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" dependencies = [ "memchr", ] @@ -7775,18 +7887,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.40" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.40" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index eee1e6dc7d..1f3eec4ace 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,8 @@ repository = "https://github.com/apache/iceberg-rust" rust-version = "1.92" [workspace.dependencies] +aes = { version = "0.8", features = ["zeroize"] } +aes-gcm = "0.10" anyhow = "1.0.72" apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" @@ -134,4 +136,5 @@ url = "2.5.7" uuid = { version = "1.18", features = ["v7"] } volo = "0.10.6" volo-thrift = "0.10.8" +zeroize = "1.7" zstd = "0.13.3" diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 41ee771617..aa1d0cd4a5 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -33,6 +33,7 @@ default = [] [dependencies] +aes-gcm = { workspace = true } anyhow = { workspace = true } apache-avro = { workspace = true } array-init = { workspace = true } @@ -78,6 +79,7 @@ typed-builder = { workspace = true } typetag = { workspace = true } url = { workspace = true } uuid = { workspace = true } +zeroize = { workspace = true } zstd = { workspace = true } [dev-dependencies] diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs new file mode 100644 index 0000000000..0b34580db8 --- /dev/null +++ b/crates/iceberg/src/encryption/crypto.rs @@ -0,0 +1,523 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Core cryptographic operations for Iceberg encryption. + +use std::fmt; +use std::str::FromStr; + +use aes_gcm::aead::generic_array::typenum::U12; +use aes_gcm::aead::rand_core::RngCore; +use aes_gcm::aead::{Aead, AeadCore, KeyInit, OsRng, Payload}; +use aes_gcm::{Aes128Gcm, Aes256Gcm, AesGcm, Nonce}; +use zeroize::Zeroizing; + +/// AES-192-GCM with 96-bit nonce. Not provided by `aes-gcm` but constructible +/// from the underlying primitives, same as `Aes128Gcm` and `Aes256Gcm`. +type Aes192Gcm = AesGcm; + +use crate::{Error, ErrorKind, Result}; + +/// Wrapper for sensitive byte data (encryption keys, DEKs, etc.) that: +/// - Zeroizes memory on drop +/// - Redacts content in [`Debug`] and [`Display`] output +/// - Provides only `&[u8]` access via [`as_bytes()`](Self::as_bytes) +/// - Uses `Box<[u8]>` (immutable boxed slice) since key bytes never grow +/// +/// Use this type for any struct field that holds plaintext key material. +/// Because its [`Debug`] impl always prints `[N bytes REDACTED]`, structs +/// containing `SensitiveBytes` can safely derive or implement `Debug` +/// without risk of leaking key material. +#[derive(Clone, PartialEq, Eq)] +struct SensitiveBytes(Zeroizing>); + +impl SensitiveBytes { + /// Wraps the given bytes as sensitive material. + pub fn new(bytes: impl Into>) -> Self { + Self(Zeroizing::new(bytes.into())) + } + + /// Returns the underlying bytes. + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Returns the number of bytes. + #[allow(dead_code)] // Encryption work is ongoing so currently unused + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns `true` if the byte slice is empty. + #[allow(dead_code)] // Encryption work is ongoing so currently unused + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + +impl fmt::Debug for SensitiveBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{} bytes REDACTED]", self.0.len()) + } +} + +impl fmt::Display for SensitiveBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{} bytes REDACTED]", self.0.len()) + } +} + +/// Supported AES key sizes for AES-GCM encryption. +/// +/// The Iceberg spec supports 128, 192, and 256-bit keys for AES-GCM. +/// See: +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AesKeySize { + /// 128-bit AES key (16 bytes) + Bits128 = 128, + /// 192-bit AES key (24 bytes) + Bits192 = 192, + /// 256-bit AES key (32 bytes) + Bits256 = 256, +} + +impl AesKeySize { + /// Returns the key length in bytes for this key size. + pub fn key_length(&self) -> usize { + match self { + Self::Bits128 => 16, + Self::Bits192 => 24, + Self::Bits256 => 32, + } + } + + /// Returns the key size for a given DEK length in bytes. + /// + /// Matches Java's `encryption.data-key-length` property semantics: + /// 16 โ†’ 128-bit, 24 โ†’ 192-bit, 32 โ†’ 256-bit. + pub fn from_key_length(len: usize) -> Result { + match len { + 16 => Ok(Self::Bits128), + 24 => Ok(Self::Bits192), + 32 => Ok(Self::Bits256), + _ => Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Unsupported data key length: {len} (must be 16, 24, or 32)"), + )), + } + } +} + +impl FromStr for AesKeySize { + type Err = Error; + + fn from_str(s: &str) -> Result { + match s { + "128" | "AES_GCM_128" | "AES128_GCM" => Ok(Self::Bits128), + "192" | "AES_GCM_192" | "AES192_GCM" => Ok(Self::Bits192), + "256" | "AES_GCM_256" | "AES256_GCM" => Ok(Self::Bits256), + _ => Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Unsupported AES key size: {s}"), + )), + } + } +} + +/// A secure encryption key that zeroes its memory on drop. +pub struct SecureKey { + key: SensitiveBytes, + key_size: AesKeySize, +} + +impl SecureKey { + /// Creates a new secure key with the specified key size. + /// + /// # Errors + /// Returns an error if the key length doesn't match the key size requirements. + pub fn new(key: &[u8]) -> Result { + let key_size = AesKeySize::from_key_length(key.len())?; + Ok(Self { + key: SensitiveBytes::new(key), + key_size, + }) + } + + /// Generates a new random key for the specified key size. + pub fn generate(key_size: AesKeySize) -> Self { + let mut key = vec![0u8; key_size.key_length()]; + OsRng.fill_bytes(&mut key); + Self { + key: SensitiveBytes::new(key), + key_size, + } + } + + /// Returns the AES key size. + pub fn key_size(&self) -> AesKeySize { + self.key_size + } + + /// Returns the key bytes. + pub fn as_bytes(&self) -> &[u8] { + self.key.as_bytes() + } +} + +/// AES-GCM cipher for encrypting and decrypting data. +pub struct AesGcmCipher { + key: SensitiveBytes, + key_size: AesKeySize, +} + +impl AesGcmCipher { + /// AES-GCM nonce length in bytes (96 bits). + pub const NONCE_LEN: usize = 12; + /// AES-GCM authentication tag length in bytes (128 bits). + pub const TAG_LEN: usize = 16; + + /// Creates a new cipher with the specified key. + pub fn new(key: SecureKey) -> Self { + Self { + key: SensitiveBytes::new(key.as_bytes()), + key_size: key.key_size(), + } + } + + /// Encrypts data using AES-GCM. + /// + /// # Arguments + /// * `plaintext` - The data to encrypt + /// * `aad` - Additional authenticated data (optional) + /// + /// # Returns + /// The encrypted data in the format: [12-byte nonce][ciphertext][16-byte auth tag] + /// This matches the Java implementation format for compatibility. + pub fn encrypt(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { + match self.key_size { + AesKeySize::Bits128 => { + encrypt_aes_gcm::(self.key.as_bytes(), plaintext, aad) + } + AesKeySize::Bits192 => { + encrypt_aes_gcm::(self.key.as_bytes(), plaintext, aad) + } + AesKeySize::Bits256 => { + encrypt_aes_gcm::(self.key.as_bytes(), plaintext, aad) + } + } + } + + /// Decrypts data using AES-GCM. + /// + /// # Arguments + /// * `ciphertext` - The encrypted data with format: [12-byte nonce][encrypted data][16-byte auth tag] + /// * `aad` - Additional authenticated data (must match encryption) + /// + /// # Returns + /// The decrypted plaintext. + pub fn decrypt(&self, ciphertext: &[u8], aad: Option<&[u8]>) -> Result> { + if ciphertext.len() < Self::NONCE_LEN + Self::TAG_LEN { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Ciphertext too short: expected at least {} bytes, got {}", + Self::NONCE_LEN + Self::TAG_LEN, + ciphertext.len() + ), + )); + } + + match self.key_size { + AesKeySize::Bits128 => { + decrypt_aes_gcm::(self.key.as_bytes(), ciphertext, aad) + } + AesKeySize::Bits192 => { + decrypt_aes_gcm::(self.key.as_bytes(), ciphertext, aad) + } + AesKeySize::Bits256 => { + decrypt_aes_gcm::(self.key.as_bytes(), ciphertext, aad) + } + } + } +} + +fn encrypt_aes_gcm(key_bytes: &[u8], plaintext: &[u8], aad: Option<&[u8]>) -> Result> +where C: Aead + AeadCore + KeyInit { + let cipher = C::new_from_slice(key_bytes).map_err(|e| { + Error::new(ErrorKind::DataInvalid, "Invalid AES key").with_source(anyhow::anyhow!(e)) + })?; + let nonce = C::generate_nonce(&mut OsRng); + + let ciphertext = if let Some(aad) = aad { + cipher.encrypt(&nonce, Payload { + msg: plaintext, + aad, + }) + } else { + cipher.encrypt(&nonce, plaintext.as_ref()) + } + .map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-GCM encryption failed") + .with_source(anyhow::anyhow!(e)) + })?; + + // Prepend nonce to ciphertext (Java compatible format) + let mut result = Vec::with_capacity(nonce.len() + ciphertext.len()); + result.extend_from_slice(&nonce); + result.extend_from_slice(&ciphertext); + Ok(result) +} + +fn decrypt_aes_gcm(key_bytes: &[u8], ciphertext: &[u8], aad: Option<&[u8]>) -> Result> +where C: Aead + AeadCore + KeyInit { + let cipher = C::new_from_slice(key_bytes).map_err(|e| { + Error::new(ErrorKind::DataInvalid, "Invalid AES key").with_source(anyhow::anyhow!(e)) + })?; + + let nonce = Nonce::from_slice(&ciphertext[..AesGcmCipher::NONCE_LEN]); + let encrypted_data = &ciphertext[AesGcmCipher::NONCE_LEN..]; + + let plaintext = if let Some(aad) = aad { + cipher.decrypt(nonce, Payload { + msg: encrypted_data, + aad, + }) + } else { + cipher.decrypt(nonce, encrypted_data) + } + .map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-GCM decryption failed") + .with_source(anyhow::anyhow!(e)) + })?; + + Ok(plaintext) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_aes_key_size() { + assert_eq!(AesKeySize::Bits128.key_length(), 16); + assert_eq!(AesKeySize::Bits192.key_length(), 24); + assert_eq!(AesKeySize::Bits256.key_length(), 32); + + assert_eq!( + AesKeySize::from_key_length(16).unwrap(), + AesKeySize::Bits128 + ); + assert_eq!( + AesKeySize::from_key_length(24).unwrap(), + AesKeySize::Bits192 + ); + assert_eq!( + AesKeySize::from_key_length(32).unwrap(), + AesKeySize::Bits256 + ); + assert!(AesKeySize::from_key_length(8).is_err()); + + assert_eq!(AesKeySize::from_str("128").unwrap(), AesKeySize::Bits128); + assert_eq!( + AesKeySize::from_str("AES_GCM_128").unwrap(), + AesKeySize::Bits128 + ); + assert_eq!( + AesKeySize::from_str("AES_GCM_256").unwrap(), + AesKeySize::Bits256 + ); + assert!(AesKeySize::from_str("INVALID").is_err()); + } + + #[test] + fn test_secure_key() { + // Test key generation + let key1 = SecureKey::generate(AesKeySize::Bits128); + assert_eq!(key1.as_bytes().len(), 16); + assert_eq!(key1.key_size(), AesKeySize::Bits128); + + // Test key creation with validation + let valid_key = [0u8; 16]; + assert!(SecureKey::new(valid_key.as_slice()).is_ok()); + + let invalid_key = [0u8; 33]; + assert!(SecureKey::new(invalid_key.as_slice()).is_err()); + } + + #[test] + fn test_aes128_gcm_encryption_roundtrip() { + let key = SecureKey::generate(AesKeySize::Bits128); + let cipher = AesGcmCipher::new(key); + + let plaintext = b"Hello, Iceberg encryption!"; + let aad = b"additional authenticated data"; + + // Test without AAD + let ciphertext = cipher.encrypt(plaintext, None).unwrap(); + assert!(ciphertext.len() > plaintext.len() + 12); // nonce + tag + assert_ne!(&ciphertext[12..], plaintext); // encrypted portion differs + + let decrypted = cipher.decrypt(&ciphertext, None).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with AAD + let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap(); + let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with wrong AAD fails + assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err()); + } + + #[test] + fn test_aes192_gcm_encryption_roundtrip() { + let key = SecureKey::generate(AesKeySize::Bits192); + let cipher = AesGcmCipher::new(key); + + let plaintext = b"Hello, Iceberg encryption!"; + let aad = b"additional authenticated data"; + + // Test without AAD + let ciphertext = cipher.encrypt(plaintext, None).unwrap(); + let decrypted = cipher.decrypt(&ciphertext, None).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with AAD + let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap(); + let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with wrong AAD fails + assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err()); + } + + #[test] + fn test_aes256_gcm_encryption_roundtrip() { + let key = SecureKey::generate(AesKeySize::Bits256); + let cipher = AesGcmCipher::new(key); + + let plaintext = b"Hello, Iceberg encryption!"; + let aad = b"additional authenticated data"; + + // Test without AAD + let ciphertext = cipher.encrypt(plaintext, None).unwrap(); + let decrypted = cipher.decrypt(&ciphertext, None).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with AAD + let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap(); + let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with wrong AAD fails + assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err()); + } + + #[test] + fn test_cross_key_size_incompatibility() { + let plaintext = b"Cross-key test"; + + let key128 = SecureKey::generate(AesKeySize::Bits128); + let key256 = SecureKey::generate(AesKeySize::Bits256); + + let cipher128 = AesGcmCipher::new(key128); + let cipher256 = AesGcmCipher::new(key256); + + // Ciphertext from 128-bit key should not decrypt with 256-bit key + let ciphertext = cipher128.encrypt(plaintext, None).unwrap(); + assert!(cipher256.decrypt(&ciphertext, None).is_err()); + } + + #[test] + fn test_encryption_with_empty_plaintext() { + let key = SecureKey::generate(AesKeySize::Bits128); + let cipher = AesGcmCipher::new(key); + + let plaintext = b""; + let ciphertext = cipher.encrypt(plaintext, None).unwrap(); + + // Even empty plaintext produces nonce + tag + assert_eq!(ciphertext.len(), 12 + 16); // 12-byte nonce + 16-byte tag + + let decrypted = cipher.decrypt(&ciphertext, None).unwrap(); + assert_eq!(decrypted, plaintext); + } + + #[test] + fn test_decryption_with_tampered_ciphertext() { + let key = SecureKey::generate(AesKeySize::Bits128); + let cipher = AesGcmCipher::new(key); + + let plaintext = b"Sensitive data"; + let mut ciphertext = cipher.encrypt(plaintext, None).unwrap(); + + // Tamper with the encrypted portion (after the nonce) + if ciphertext.len() > 12 { + ciphertext[12] ^= 0xFF; + } + + // Decryption should fail due to authentication tag mismatch + assert!(cipher.decrypt(&ciphertext, None).is_err()); + } + + #[test] + fn test_different_keys_produce_different_ciphertexts() { + let key1 = SecureKey::generate(AesKeySize::Bits128); + let key2 = SecureKey::generate(AesKeySize::Bits128); + + let cipher1 = AesGcmCipher::new(key1); + let cipher2 = AesGcmCipher::new(key2); + + let plaintext = b"Same plaintext"; + + let ciphertext1 = cipher1.encrypt(plaintext, None).unwrap(); + let ciphertext2 = cipher2.encrypt(plaintext, None).unwrap(); + + // Different keys should produce different ciphertexts (comparing the encrypted portion) + // Note: The nonces will also be different, but we're mainly interested in the encrypted data + assert_ne!(&ciphertext1[12..], &ciphertext2[12..]); + } + + #[test] + fn test_ciphertext_format_java_compatible() { + // Test that our ciphertext format matches Java's: [12-byte nonce][ciphertext][16-byte tag] + let key = SecureKey::generate(AesKeySize::Bits128); + let cipher = AesGcmCipher::new(key); + + let plaintext = b"Test data"; + let ciphertext = cipher.encrypt(plaintext, None).unwrap(); + + // Format should be: [12-byte nonce][encrypted_data + 16-byte GCM tag] + assert_eq!( + ciphertext.len(), + 12 + plaintext.len() + 16, + "Ciphertext should be nonce + plaintext + tag length" + ); + + // Verify we can decrypt by extracting nonce from the beginning + let nonce = &ciphertext[..12]; + assert_eq!(nonce.len(), 12, "Nonce should be 12 bytes"); + + // The rest is encrypted data + tag + let encrypted_with_tag = &ciphertext[12..]; + assert_eq!( + encrypted_with_tag.len(), + plaintext.len() + 16, + "Encrypted portion should be plaintext length + 16-byte tag" + ); + } +} diff --git a/crates/iceberg/src/encryption/mod.rs b/crates/iceberg/src/encryption/mod.rs new file mode 100644 index 0000000000..097f4f24e3 --- /dev/null +++ b/crates/iceberg/src/encryption/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Encryption module for Apache Iceberg. +//! +//! This module provides core cryptographic primitives for encrypting +//! and decrypting data in Iceberg tables. + +mod crypto; + +pub use crypto::{AesGcmCipher, AesKeySize, SecureKey}; diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 8b345deb6e..0b138d2818 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -92,6 +92,7 @@ mod runtime; pub mod arrow; pub(crate) mod delete_file_index; +pub mod encryption; pub mod test_utils; mod utils; pub mod writer; From 56fda82f3ad68f3231ece7150f52b78df71bb06e Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Tue, 24 Mar 2026 18:00:35 -0700 Subject: [PATCH 28/39] feat!(catalog): adding support for purge_table (#2232) ## Which issue does this PR close? - Closes #2133 ## What changes are included in this PR? - Add catalog/utils.rs to provide helpers to delete table data using file_io and table_metadata - Add new API `purge_table` to `Catalog` trait and add default implementation - Implement purge_table for S3TableCatalog and RestCatalog ## Are these changes tested? Added new tests in table_suite --- crates/catalog/glue/src/catalog.rs | 11 ++ crates/catalog/hms/src/catalog.rs | 11 ++ crates/catalog/loader/tests/common/mod.rs | 2 +- crates/catalog/loader/tests/table_suite.rs | 77 ++++++++++++ crates/catalog/rest/src/catalog.rs | 55 +++++--- crates/catalog/s3tables/src/catalog.rs | 19 +-- crates/catalog/sql/src/catalog.rs | 11 ++ crates/iceberg/src/catalog/memory/catalog.rs | 11 ++ crates/iceberg/src/catalog/mod.rs | 9 ++ crates/iceberg/src/catalog/utils.rs | 124 +++++++++++++++++++ crates/iceberg/src/lib.rs | 1 + crates/iceberg/src/spec/table_properties.rs | 35 ++++++ 12 files changed, 337 insertions(+), 29 deletions(-) create mode 100644 crates/iceberg/src/catalog/utils.rs diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index 9e9d4580c3..a7e0171337 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -659,6 +659,17 @@ impl Catalog for GlueCatalog { Ok(()) } + async fn purge_table(&self, table: &TableIdent) -> Result<()> { + let table_info = self.load_table(table).await?; + self.drop_table(table).await?; + iceberg::drop_table_data( + table_info.file_io(), + table_info.metadata(), + table_info.metadata_location(), + ) + .await + } + /// Asynchronously checks the existence of a specified table /// in the database. /// diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index bd78193732..4a030c1104 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -604,6 +604,17 @@ impl Catalog for HmsCatalog { Ok(()) } + async fn purge_table(&self, table: &TableIdent) -> Result<()> { + let table_info = self.load_table(table).await?; + self.drop_table(table).await?; + iceberg::drop_table_data( + table_info.file_io(), + table_info.metadata(), + table_info.metadata_location(), + ) + .await + } + /// Asynchronously checks the existence of a specified table /// in the database. /// diff --git a/crates/catalog/loader/tests/common/mod.rs b/crates/catalog/loader/tests/common/mod.rs index 90b72df8ab..600cd9b6f4 100644 --- a/crates/catalog/loader/tests/common/mod.rs +++ b/crates/catalog/loader/tests/common/mod.rs @@ -335,7 +335,7 @@ pub fn assert_map_contains(expected: &HashMap, actual: &HashMap< pub async fn cleanup_namespace_dyn(catalog: &dyn Catalog, namespace: &NamespaceIdent) { if let Ok(tables) = catalog.list_tables(namespace).await { for table in tables { - let _ = catalog.drop_table(&table).await; + let _ = catalog.purge_table(&table).await; } } let _ = catalog.drop_namespace(namespace).await; diff --git a/crates/catalog/loader/tests/table_suite.rs b/crates/catalog/loader/tests/table_suite.rs index 6b7a3a822c..cdc9b11043 100644 --- a/crates/catalog/loader/tests/table_suite.rs +++ b/crates/catalog/loader/tests/table_suite.rs @@ -274,3 +274,80 @@ async fn test_catalog_drop_table_missing_errors(#[case] kind: CatalogKind) -> Re assert!(catalog.drop_table(&table_ident).await.is_err()); Ok(()) } + +// Common behavior: purge_table removes the table from the catalog. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_purge_table(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_purge_table", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = normalize_test_name_with_parts!("catalog_purge_table", harness.label, "table"); + let table = catalog + .create_table(&namespace, table_creation(table_name)) + .await?; + let ident = table.identifier().clone(); + + assert!(catalog.table_exists(&ident).await?); + + // Capture metadata location and file_io before purge so we can verify + // that the underlying files are actually deleted. + let metadata_location = table.metadata_location().map(|s| s.to_string()); + let file_io = table.file_io().clone(); + + catalog.purge_table(&ident).await?; + assert!(!catalog.table_exists(&ident).await?); + + if let Some(location) = &metadata_location { + assert!( + !file_io.exists(location).await?, + "Metadata file should have been deleted after purge" + ); + } + + catalog.drop_namespace(&namespace).await?; + + Ok(()) +} + +// Common behavior: purging a missing table should error. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::hms_catalog(CatalogKind::Hms)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_purge_table_missing_errors(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_purge_table_missing_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_ident = TableIdent::new(namespace.clone(), "missing".to_string()); + assert!(catalog.purge_table(&table_ident).await.is_err()); + Ok(()) +} diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs index 3551b05160..7d5df24d52 100644 --- a/crates/catalog/rest/src/catalog.rs +++ b/crates/catalog/rest/src/catalog.rs @@ -363,6 +363,35 @@ impl RestCatalog { } } + /// Sends a DELETE request for the given table, optionally requesting purge. + async fn delete_table(&self, table: &TableIdent, purge: bool) -> Result<()> { + let context = self.context().await?; + + let mut request_builder = context + .client + .request(Method::DELETE, context.config.table_endpoint(table)); + + if purge { + request_builder = request_builder.query(&[("purgeRequested", "true")]); + } + + let request = request_builder.build()?; + let http_response = context.client.query_catalog(request).await?; + + match http_response.status() { + StatusCode::NO_CONTENT | StatusCode::OK => Ok(()), + StatusCode::NOT_FOUND => Err(Error::new( + ErrorKind::TableNotFound, + "Tried to drop a table that does not exist", + )), + _ => Err(deserialize_unexpected_catalog_error( + http_response, + context.client.disable_header_redaction(), + ) + .await), + } + } + /// Gets the [`RestContext`] from the catalog. async fn context(&self) -> Result<&RestContext> { self.ctx @@ -828,27 +857,13 @@ impl Catalog for RestCatalog { /// Drop a table from the catalog. async fn drop_table(&self, table: &TableIdent) -> Result<()> { - let context = self.context().await?; - - let request = context - .client - .request(Method::DELETE, context.config.table_endpoint(table)) - .build()?; - - let http_response = context.client.query_catalog(request).await?; + self.delete_table(table, false).await + } - match http_response.status() { - StatusCode::NO_CONTENT | StatusCode::OK => Ok(()), - StatusCode::NOT_FOUND => Err(Error::new( - ErrorKind::TableNotFound, - "Tried to drop a table that does not exist", - )), - _ => Err(deserialize_unexpected_catalog_error( - http_response, - context.client.disable_header_redaction(), - ) - .await), - } + /// Drop a table from the catalog and purge its data by sending + /// `purgeRequested=true` to the REST server. + async fn purge_table(&self, table: &TableIdent) -> Result<()> { + self.delete_table(table, true).await } /// Check if a table exists in the catalog. diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index a416c38f22..b88bd77d29 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -562,15 +562,18 @@ impl Catalog for S3TablesCatalog { Ok(self.load_table_with_version_token(table_ident).await?.0) } - /// Drops an existing table from the s3tables catalog. + /// Not supported for S3Tables. Use `purge_table` instead. /// - /// Validates the table identifier and then deletes the corresponding - /// table from the s3tables catalog. - /// - /// This function can return an error in the following situations: - /// - Errors from the underlying database deletion process, converted using - /// `from_aws_sdk_error`. - async fn drop_table(&self, table: &TableIdent) -> Result<()> { + /// S3 Tables doesn't support soft delete, so dropping a table will permanently remove it from the catalog. + async fn drop_table(&self, _table: &TableIdent) -> Result<()> { + Err(Error::new( + ErrorKind::FeatureUnsupported, + "drop_table is not supported for S3Tables; use purge_table instead", + )) + } + + /// Purge a table from the S3 Tables catalog. + async fn purge_table(&self, table: &TableIdent) -> Result<()> { let req = self .s3tables_client .delete_table() diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 195f6c9de4..7e468e7e37 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -757,6 +757,17 @@ impl Catalog for SqlCatalog { Ok(()) } + async fn purge_table(&self, table: &TableIdent) -> Result<()> { + let table_info = self.load_table(table).await?; + self.drop_table(table).await?; + iceberg::drop_table_data( + table_info.file_io(), + table_info.metadata(), + table_info.metadata_location(), + ) + .await + } + async fn load_table(&self, identifier: &TableIdent) -> Result { if !self.table_exists(identifier).await? { return no_such_table_err(identifier); diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index 25ae004417..8fa5c479c3 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -326,6 +326,17 @@ impl Catalog for MemoryCatalog { Ok(()) } + async fn purge_table(&self, table_ident: &TableIdent) -> Result<()> { + let table_info = self.load_table(table_ident).await?; + self.drop_table(table_ident).await?; + crate::catalog::utils::drop_table_data( + table_info.file_io(), + table_info.metadata(), + table_info.metadata_location(), + ) + .await + } + /// Check if a table exists in the catalog. async fn table_exists(&self, table_ident: &TableIdent) -> Result { let root_namespace_state = self.root_namespace_state.lock().await; diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 06326917ec..f296cf2260 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -19,6 +19,7 @@ pub mod memory; mod metadata_location; +pub(crate) mod utils; use std::collections::HashMap; use std::fmt::{Debug, Display}; @@ -98,6 +99,14 @@ pub trait Catalog: Debug + Sync + Send { /// Drop a table from the catalog, or returns error if it doesn't exist. async fn drop_table(&self, table: &TableIdent) -> Result<()>; + /// Drop a table from the catalog and delete the underlying table data. + /// + /// Implementations should load the table metadata, drop the table + /// from the catalog, then delete all associated data and metadata files. + /// The [`drop_table_data`](utils::drop_table_data) utility function can + /// be used for the file cleanup step. + async fn purge_table(&self, table: &TableIdent) -> Result<()>; + /// Check if a table exists in the catalog. async fn table_exists(&self, table: &TableIdent) -> Result; diff --git a/crates/iceberg/src/catalog/utils.rs b/crates/iceberg/src/catalog/utils.rs new file mode 100644 index 0000000000..d450f9df80 --- /dev/null +++ b/crates/iceberg/src/catalog/utils.rs @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Utility functions for catalog operations. + +use std::collections::HashSet; + +use futures::{TryStreamExt, stream}; + +use crate::Result; +use crate::io::FileIO; +use crate::spec::TableMetadata; + +const DELETE_CONCURRENCY: usize = 10; + +/// Deletes all data and metadata files referenced by the given table metadata. +/// +/// This mirrors the Java implementation's `CatalogUtil.dropTableData`. +/// It collects all manifest files, manifest lists, previous metadata files, +/// statistics files, and partition statistics files, then deletes them. +/// +/// Data files within manifests are only deleted if the `gc.enabled` table +/// property is `true` (the default), to avoid corrupting other tables that +/// may share the same data files. +pub async fn drop_table_data( + io: &FileIO, + metadata: &TableMetadata, + metadata_location: Option<&str>, +) -> Result<()> { + let mut manifest_lists_to_delete: HashSet = HashSet::new(); + let mut manifests_to_delete: HashSet = HashSet::new(); + + // Load all manifest lists concurrently + let results: Vec<_> = + futures::future::try_join_all(metadata.snapshots().map(|snapshot| async { + let manifest_list = snapshot.load_manifest_list(io, metadata).await?; + Ok::<_, crate::Error>((snapshot.manifest_list().to_string(), manifest_list)) + })) + .await?; + + for (manifest_list_location, manifest_list) in results { + if !manifest_list_location.is_empty() { + manifest_lists_to_delete.insert(manifest_list_location); + } + for manifest_file in manifest_list.entries() { + manifests_to_delete.insert(manifest_file.manifest_path.clone()); + } + } + + // Delete data files only if gc.enabled is true, to avoid corrupting shared tables + if metadata.table_properties()?.gc_enabled { + delete_data_files(io, &manifests_to_delete).await?; + } + + // Delete manifest files + io.delete_stream(stream::iter(manifests_to_delete)).await?; + + // Delete manifest lists + io.delete_stream(stream::iter(manifest_lists_to_delete)) + .await?; + + // Delete previous metadata files + let prev_metadata_paths: Vec = metadata + .metadata_log() + .iter() + .map(|m| m.metadata_file.clone()) + .collect(); + io.delete_stream(stream::iter(prev_metadata_paths)).await?; + + // Delete statistics files + let stats_paths: Vec = metadata + .statistics_iter() + .map(|s| s.statistics_path.clone()) + .collect(); + io.delete_stream(stream::iter(stats_paths)).await?; + + // Delete partition statistics files + let partition_stats_paths: Vec = metadata + .partition_statistics_iter() + .map(|s| s.statistics_path.clone()) + .collect(); + io.delete_stream(stream::iter(partition_stats_paths)) + .await?; + + // Delete the current metadata file + if let Some(location) = metadata_location { + io.delete(location).await?; + } + + Ok(()) +} + +/// Reads manifests concurrently and deletes the data files referenced within. +async fn delete_data_files(io: &FileIO, manifest_paths: &HashSet) -> Result<()> { + stream::iter(manifest_paths.iter().map(Ok)) + .try_for_each_concurrent(DELETE_CONCURRENCY, |manifest_path| async move { + let input = io.new_input(manifest_path)?; + let manifest_content = input.read().await?; + let manifest = crate::spec::Manifest::parse_avro(&manifest_content)?; + + let data_file_paths = manifest + .entries() + .iter() + .map(|entry| entry.data_file.file_path().to_string()) + .collect::>(); + + io.delete_stream(stream::iter(data_file_paths)).await + }) + .await +} diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 0b138d2818..44a3601428 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -71,6 +71,7 @@ pub use error::{Error, ErrorKind, Result}; mod catalog; +pub use catalog::utils::drop_table_data; pub use catalog::*; pub mod table; diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 6e08318479..07c157304e 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -114,6 +114,9 @@ pub struct TableProperties { pub metadata_compression_codec: CompressionCodec, /// Whether to use `FanoutWriter` for partitioned tables. pub write_datafusion_fanout_enabled: bool, + /// Whether garbage collection is enabled on drop. + /// When `false`, data files will not be deleted when a table is dropped. + pub gc_enabled: bool, } impl TableProperties { @@ -212,6 +215,13 @@ impl TableProperties { pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED: &str = "write.datafusion.fanout.enabled"; /// Default value for fanout writer enabled pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT: bool = true; + + /// Property key for enabling garbage collection on drop. + /// When set to `false`, data files will not be deleted when a table is dropped. + /// Defaults to `true`. + pub const PROPERTY_GC_ENABLED: &str = "gc.enabled"; + /// Default value for gc.enabled + pub const PROPERTY_GC_ENABLED_DEFAULT: bool = true; } impl TryFrom<&HashMap> for TableProperties { @@ -256,6 +266,11 @@ impl TryFrom<&HashMap> for TableProperties { TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED, TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT, )?, + gc_enabled: parse_property( + props, + TableProperties::PROPERTY_GC_ENABLED, + TableProperties::PROPERTY_GC_ENABLED_DEFAULT, + )?, }) } } @@ -294,6 +309,10 @@ mod tests { table_properties.metadata_compression_codec, CompressionCodec::None ); + assert_eq!( + table_properties.gc_enabled, + TableProperties::PROPERTY_GC_ENABLED_DEFAULT + ); } #[test] @@ -377,12 +396,17 @@ mod tests { TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES.to_string(), "512".to_string(), ), + ( + TableProperties::PROPERTY_GC_ENABLED.to_string(), + "false".to_string(), + ), ]); let table_properties = TableProperties::try_from(&props).unwrap(); assert_eq!(table_properties.commit_num_retries, 10); assert_eq!(table_properties.commit_max_retry_wait_ms, 20); assert_eq!(table_properties.write_format_default, "avro".to_string()); assert_eq!(table_properties.write_target_file_size_bytes, 512); + assert!(!table_properties.gc_enabled); } #[test] @@ -429,6 +453,17 @@ mod tests { assert!(table_properties.to_string().contains( "Invalid value for write.target-file-size-bytes: invalid digit found in string" )); + + let invalid_gc_enabled = HashMap::from([( + TableProperties::PROPERTY_GC_ENABLED.to_string(), + "notabool".to_string(), + )]); + let table_properties = TableProperties::try_from(&invalid_gc_enabled).unwrap_err(); + assert!( + table_properties + .to_string() + .contains("Invalid value for gc.enabled") + ); } #[test] From 477a1e525b4915895388a4f45557b825ea541ef2 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 25 Mar 2026 13:43:06 -0400 Subject: [PATCH 29/39] deps: upgrade DataFusion to 53.0, Arrow to 58 (#2206) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? - Bump DataFusion to 53.0.0, Arrow/Parquet to 58, sqllogictest to 0.29, pyo3 to 0.28. - Adapt to DataFusion 53 API changes in physical plan executors and python bindings. - Update SLT expected test output. ## Are these changes tested? Existing tests. --------- Co-authored-by: Xander --- Cargo.lock | 318 +++-- Cargo.toml | 26 +- bindings/python/Cargo.lock | 1223 ++++++++++------- bindings/python/Cargo.toml | 6 +- .../python/src/datafusion_table_provider.rs | 37 +- bindings/python/src/transform.rs | 4 +- .../tests/test_datafusion_table_provider.py | 4 +- crates/iceberg/src/arrow/reader.rs | 10 +- .../src/expr/visitors/page_index_evaluator.rs | 6 +- crates/iceberg/src/transform/temporal.rs | 6 +- .../src/writer/file_writer/parquet_writer.rs | 2 +- .../datafusion/src/physical_plan/commit.rs | 18 +- .../src/physical_plan/metadata_scan.rs | 10 +- .../datafusion/src/physical_plan/scan.rs | 10 +- .../datafusion/src/physical_plan/write.rs | 18 +- .../tests/integration_datafusion_test.rs | 10 +- .../df_test/binary_predicate_pushdown.slt | 3 +- .../df_test/boolean_predicate_pushdown.slt | 9 +- .../slts/df_test/like_predicate_pushdown.slt | 6 +- .../df_test/timestamp_predicate_pushdown.slt | 12 +- 20 files changed, 996 insertions(+), 742 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8171f28386..a24ef04626 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,7 +26,7 @@ checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ "cfg-if", "cipher", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -235,9 +235,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -256,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -289,9 +289,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -301,9 +301,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -323,9 +323,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -338,9 +338,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -351,9 +351,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -367,9 +367,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -391,9 +391,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -404,9 +404,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -417,9 +417,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "serde_core", "serde_json", @@ -427,9 +427,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -441,9 +441,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -1039,7 +1039,7 @@ dependencies = [ "cc", "cfg-if", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -1195,6 +1195,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "rand_core 0.10.0", +] + [[package]] name = "chrono" version = "0.4.44" @@ -1411,6 +1422,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc" version = "3.4.0" @@ -1609,9 +1629,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" +checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16" dependencies = [ "arrow", "arrow-schema", @@ -1665,9 +1685,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" +checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137" dependencies = [ "arrow", "async-trait", @@ -1690,9 +1710,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" +checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6" dependencies = [ "arrow", "async-trait", @@ -1713,9 +1733,9 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46a0b3ed9bfda5f234c62e179bbc1258fc89452a89cd3d652da73efcb994ecf5" +checksum = "8735220c84a731c3917dce75ec837a8376eddf5462b0c5dbaf5a2e354c9b6e05" dependencies = [ "arrow", "async-trait", @@ -1741,9 +1761,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" +checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293" dependencies = [ "ahash", "apache-avro", @@ -1754,6 +1774,7 @@ dependencies = [ "hashbrown 0.16.1", "hex", "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", "object_store", @@ -1767,9 +1788,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" +checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd" dependencies = [ "futures", "log", @@ -1778,9 +1799,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" +checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79" dependencies = [ "arrow", "async-compression", @@ -1813,9 +1834,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" +checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15" dependencies = [ "arrow", "arrow-ipc", @@ -1837,9 +1858,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "474d9b26f185b57f549a0f7ce9183428dd0042014a2e0d093f5430fdc9dae289" +checksum = "49dda81c79b6ba57b1853a9158abc66eb85a3aa1cede0c517dabec6d8a4ed3aa" dependencies = [ "apache-avro", "arrow", @@ -1857,9 +1878,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" +checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c" dependencies = [ "arrow", "async-trait", @@ -1880,9 +1901,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" +checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8" dependencies = [ "arrow", "async-trait", @@ -1897,14 +1918,16 @@ dependencies = [ "datafusion-session", "futures", "object_store", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23798383465e0c569bd442d1453b50691261f8ad6511d840c48457b3bf51ae21" +checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946" dependencies = [ "arrow", "async-trait", @@ -1932,22 +1955,24 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" +checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a" [[package]] name = "datafusion-execution" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" +checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", "object_store", @@ -1960,9 +1985,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" +checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf" dependencies = [ "arrow", "async-trait", @@ -1983,9 +2008,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" +checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9" dependencies = [ "arrow", "datafusion-common", @@ -1996,9 +2021,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" +checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e" dependencies = [ "arrow", "arrow-buffer", @@ -2017,6 +2042,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -2027,9 +2053,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" +checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6" dependencies = [ "ahash", "arrow", @@ -2043,14 +2069,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" +checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311" dependencies = [ "ahash", "arrow", @@ -2061,9 +2088,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" +checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790" dependencies = [ "arrow", "arrow-ord", @@ -2077,16 +2104,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" +checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6" dependencies = [ "arrow", "async-trait", @@ -2100,9 +2129,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" +checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1" dependencies = [ "arrow", "datafusion-common", @@ -2118,9 +2147,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" +checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2128,9 +2157,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" +checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578" dependencies = [ "datafusion-doc", "quote", @@ -2139,9 +2168,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" +checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46" dependencies = [ "arrow", "chrono", @@ -2159,9 +2188,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" +checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36" dependencies = [ "ahash", "arrow", @@ -2183,9 +2212,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" +checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64" dependencies = [ "arrow", "datafusion-common", @@ -2198,9 +2227,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" +checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c" dependencies = [ "ahash", "arrow", @@ -2215,9 +2244,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" +checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941" dependencies = [ "arrow", "datafusion-common", @@ -2234,9 +2263,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" +checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d" dependencies = [ "ahash", "arrow", @@ -2258,6 +2287,7 @@ dependencies = [ "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2265,9 +2295,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" +checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6" dependencies = [ "arrow", "datafusion-common", @@ -2282,9 +2312,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" +checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba" dependencies = [ "async-trait", "datafusion-common", @@ -2296,38 +2326,43 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e53604bca77d4544426a425e2a50d7b911bbe35d3c8193de24093b445f23856" +checksum = "923a8b871962a9d860f036f743a20af50ff04729f1da2468ed220dab4f61c97d" dependencies = [ "arrow", "bigdecimal", "chrono", "crc32fast", + "datafusion", "datafusion-catalog", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-functions", + "datafusion-functions-aggregate", "datafusion-functions-nested", "log", "percent-encoding", "rand 0.9.2", + "serde_json", "sha1", + "sha2", "url", ] [[package]] name = "datafusion-sql" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" +checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "recursive", @@ -2337,9 +2372,9 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3929b7067193345bc345a5ea5f231cccde36fe58fb055d8caef7247ad7566fd5" +checksum = "a43746bd59e7f2655be4c5553ede4a1ceb1cd34005932fa9e2bd0641c714c46e" dependencies = [ "arrow", "async-trait", @@ -2363,9 +2398,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2379388ecab67079eeb1185c953fb9c5ed4b283fa3cb81417538378a30545957" +checksum = "d5e5656a7e63d51dd3e5af3dbd347ea83bbe993a77c66b854b74961570d16490" dependencies = [ "async-recursion", "async-trait", @@ -2963,6 +2998,7 @@ dependencies = [ "cfg-if", "libc", "r-efi 6.0.0", + "rand_core 0.10.0", "wasip2", "wasip3", ] @@ -4087,9 +4123,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -4458,16 +4494,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.5" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" dependencies = [ "async-trait", "base64", "bytes", "chrono", "form_urlencoded", - "futures", + "futures-channel", + "futures-core", + "futures-util", "http 1.4.0", "http-body-util", "humantime", @@ -4476,11 +4514,11 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", - "rand 0.9.2", + "quick-xml 0.39.2", + "rand 0.10.0", "reqwest", "ring", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", @@ -4625,14 +4663,13 @@ dependencies = [ [[package]] name = "parquet" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -4880,7 +4917,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "opaque-debug", "universal-hash", ] @@ -5095,6 +5132,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958f21e8e7ceb5a1aa7fa87fab28e7c75976e0bfe7e23ff069e0a260f894067d" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.9" @@ -5212,6 +5259,17 @@ dependencies = [ "rand_core 0.9.5", ] +[[package]] +name = "rand" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.0", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -5251,6 +5309,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + [[package]] name = "recursive" version = "0.1.1" @@ -5623,15 +5687,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -5987,7 +6042,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -5998,7 +6053,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -6173,9 +6228,9 @@ dependencies = [ [[package]] name = "sqllogictest" -version = "0.28.4" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3566426f72a13e393aa34ca3d542c5b0eb86da4c0db137ee9b5cfccc6179e52d" +checksum = "d03b2262a244037b0b510edbd25a8e6c9fb8d73ee0237fc6cc95a54c16f94a82" dependencies = [ "async-trait", "educe", @@ -6198,9 +6253,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "recursive", @@ -6209,9 +6264,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", @@ -6731,6 +6786,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1f3eec4ace..778e69c9d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,14 +45,14 @@ aes-gcm = "0.10" anyhow = "1.0.72" apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" -arrow-arith = "57.1" -arrow-array = "57.1" -arrow-buffer = "57.1" -arrow-cast = "57.1" -arrow-ord = "57.1" -arrow-schema = "57.1" -arrow-select = "57.1" -arrow-string = "57.1" +arrow-arith = "58" +arrow-array = "58" +arrow-buffer = "58" +arrow-cast = "58" +arrow-ord = "58" +arrow-schema = "58" +arrow-select = "58" +arrow-string = "58" as-any = "0.3.2" async-trait = "0.1.89" aws-config = "1.8.7" @@ -66,9 +66,9 @@ cfg-if = "1" chrono = "0.4.41" clap = { version = "4.5.48", features = ["derive", "cargo"] } dashmap = "6" -datafusion = "52.2" -datafusion-cli = "52.2" -datafusion-sqllogictest = "52.2" +datafusion = "53.0.0" +datafusion-cli = "53.0.0" +datafusion-sqllogictest = "53.0.0" derive_builder = "0.20" dirs = "6" enum-ordinalize = "4.3.0" @@ -105,7 +105,7 @@ murmur3 = "0.5.2" once_cell = "1.20" opendal = "0.55.0" ordered-float = "4" -parquet = "57.1" +parquet = "58" pilota = "0.11.10" pretty_assertions = "1.4" rand = "0.8.5" @@ -120,7 +120,7 @@ serde_derive = "1.0.219" serde_json = "1.0.142" serde_repr = "0.1.16" serde_with = "3.4" -sqllogictest = "0.28.3" +sqllogictest = "0.29" sqlx = { version = "0.8.1", default-features = false } stacker = "0.1.20" strum = "0.27.2" diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 3ce0df6e41..1b5c06f492 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -56,6 +56,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "aes" version = "0.8.4" @@ -67,6 +77,20 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.8.12" @@ -83,9 +107,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -122,9 +146,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "apache-avro" @@ -144,8 +168,8 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "strum 0.27.2", - "strum_macros 0.27.2", + "strum", + "strum_macros", "thiserror", "uuid", "zstd", @@ -153,9 +177,9 @@ dependencies = [ [[package]] name = "ar_archive_writer" -version = "0.2.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" dependencies = [ "object", ] @@ -180,9 +204,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -202,9 +226,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +240,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -235,9 +259,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -247,9 +271,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -269,9 +293,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -284,9 +308,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -297,9 +321,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -313,9 +337,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -324,7 +348,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.12.1", + "indexmap 2.13.0", "itoa", "lexical-core", "memchr", @@ -337,9 +361,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -350,9 +374,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbd810e3997bae72f58cda57231ccb0a2fda07911ca1b0a5718cbf9379abb297" +checksum = "e63351dc11981a316c828a6032a5021345bba882f68bc4a36c36825a50725089" dependencies = [ "arrow-array", "arrow-data", @@ -362,9 +386,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -375,9 +399,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags", "serde_core", @@ -386,9 +410,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -400,9 +424,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -435,13 +459,12 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.36" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ec5f6c2f8bc326c994cb9e241cc257ddaba9afa8555a43cffbb5dd86efaa37" +checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" dependencies = [ "compression-codecs", "compression-core", - "futures-core", "pin-project-lite", "tokio", ] @@ -457,9 +480,9 @@ dependencies = [ [[package]] name = "async-lock" -version = "3.4.1" +version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ "event-listener", "event-listener-strategy", @@ -474,7 +497,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -523,9 +546,9 @@ checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bigdecimal" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" dependencies = [ "autocfg", "libm", @@ -543,9 +566,9 @@ checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "blake2" @@ -558,15 +581,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -599,9 +623,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.8.1" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -609,17 +633,17 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.1" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "ident_case", "prettyplease", "proc-macro2", "quote", "rustversion", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -645,15 +669,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] name = "byteorder" @@ -687,9 +711,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.43" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "jobserver", @@ -711,9 +735,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -745,20 +769,19 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", + "unicode-segmentation", "unicode-width", ] [[package]] name = "compression-codecs" -version = "0.4.35" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f7ac3e5b97fdce45e8922fb05cae2c37f7bbd63d30dd94821dacfd8f3f2bf2" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" dependencies = [ "bzip2", "compression-core", @@ -805,7 +828,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] @@ -821,9 +844,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "core-foundation-sys" @@ -905,11 +928,12 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] @@ -934,6 +958,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "darling" version = "0.20.11" @@ -946,12 +979,12 @@ dependencies = [ [[package]] name = "darling" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", + "darling_core 0.23.0", + "darling_macro 0.23.0", ] [[package]] @@ -965,21 +998,20 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "darling_core" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" dependencies = [ - "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -990,18 +1022,18 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "darling_macro" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core 0.21.3", + "darling_core 0.23.0", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -1020,9 +1052,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f1f4a9060ae6e650d3dff5dc7a21266fea1302d890768d45b4b28586e830f" +checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16" dependencies = [ "arrow", "arrow-schema", @@ -1075,9 +1107,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14417a3ee4ae3d092b56cd6c1d32e8ff3e2c9ec130ecb2276ec91c89fd599399" +checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137" dependencies = [ "arrow", "async-trait", @@ -1100,9 +1132,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0eba824adb45a4b3ac6f0251d40df3f6a9382371cad136f4f14ac9ebc6bc10" +checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6" dependencies = [ "arrow", "async-trait", @@ -1123,9 +1155,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0039deefbd00c56adf5168b7ca58568fb058e4ba4c5a03b09f8be371b4e434b6" +checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293" dependencies = [ "ahash", "arrow", @@ -1133,7 +1165,8 @@ dependencies = [ "chrono", "half", "hashbrown 0.16.1", - "indexmap 2.12.1", + "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", "object_store", @@ -1147,9 +1180,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec7e3e60b813048331f8fb9673583173e5d2dd8fef862834ee871fc98b57ca7" +checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd" dependencies = [ "futures", "log", @@ -1158,9 +1191,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802068957f620302ecf05f84ff4019601aeafd36f5f3f1334984af2e34265129" +checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79" dependencies = [ "arrow", "async-compression", @@ -1193,9 +1226,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fc387d5067c62d494a6647d29c5ad4fcdd5a6e50ab4ea1d2568caa2d66f2cc" +checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15" dependencies = [ "arrow", "arrow-ipc", @@ -1217,9 +1250,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd5e20579bb6c8bd4e6c620253972fb723822030c280dd6aa047f660d09eeba" +checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c" dependencies = [ "arrow", "async-trait", @@ -1240,9 +1273,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0788b0d48fcef31880a02013ea3cc18e5a4e0eacc3b0abdd2cd0597b99dc96e" +checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8" dependencies = [ "arrow", "async-trait", @@ -1257,14 +1290,16 @@ dependencies = [ "datafusion-session", "futures", "object_store", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66639b70f1f363f5f0950733170100e588f1acfacac90c1894e231194aa35957" +checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946" dependencies = [ "arrow", "async-trait", @@ -1292,22 +1327,24 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e44b41f3e8267c6cf3eec982d63f34db9f1dd5f30abfd2e1f124f0871708952e" +checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a" [[package]] name = "datafusion-execution" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e456f60e5d38db45335e84617006d90af14a8c8c5b8e959add708b2daaa0e2c" +checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", "object_store", @@ -1319,9 +1356,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6507c719804265a58043134580c1c20767e7c23ba450724393f03ec982769ad9" +checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf" dependencies = [ "arrow", "async-trait", @@ -1332,7 +1369,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.12.1", + "indexmap 2.13.0", "itertools 0.14.0", "paste", "recursive", @@ -1342,22 +1379,22 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a413caa9c5885072b539337aed68488f0291653e8edd7d676c92df2480f6cab0" +checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.12.1", + "indexmap 2.13.0", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-ffi" -version = "52.1.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30f57f7f63a25a0b78b3f2a5e18c0ecbd54851b64064ac0d5a9eb05efd5586d2" +checksum = "4b8250f7cdf463a0ad145f41d7508bcfa54c9b9f027317e599f0331097e3cc38" dependencies = [ "abi_stable", "arrow", @@ -1385,9 +1422,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189256495dc9cbbb8e20dbcf161f60422e628d201a78df8207e44bd4baefadb6" +checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e" dependencies = [ "arrow", "arrow-buffer", @@ -1406,6 +1443,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -1416,9 +1454,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e73dfee4cd67c4a507ffff4c5a711d39983adf544adbc09c09bf06f789f413" +checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6" dependencies = [ "ahash", "arrow", @@ -1432,14 +1470,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87727bd9e65f4f9ac6d608c9810b7da9eaa3b18b26a4a4b76520592d49020acf" +checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311" dependencies = [ "ahash", "arrow", @@ -1450,9 +1489,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5ef761359224b7c2b5a1bfad6296ac63225f8583d08ad18af9ba1a89ac3887" +checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790" dependencies = [ "arrow", "arrow-ord", @@ -1466,16 +1505,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b17dac25dfda2d2a90ff0ad1c054a11fb1523766226bec6e9bd8c410daee2ae" +checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6" dependencies = [ "arrow", "async-trait", @@ -1489,9 +1530,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c594a29ddb22cbdbce500e4d99b5b2392c5cecb4c1086298b41d1ffec14dbb77" +checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1" dependencies = [ "arrow", "datafusion-common", @@ -1507,9 +1548,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aa1b15ed81c7543f62264a30dd49dec4b1b0b698053b968f53be32dfba4f729" +checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1517,20 +1558,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00c31c4795597aa25b74cab5174ac07a53051f27ce1e011ecaffa9eaeecef81" +checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ccf60767c09302b2e0fc3afebb3761a6d508d07316fab8c5e93312728a21bb" +checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46" dependencies = [ "arrow", "chrono", @@ -1538,7 +1579,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.12.1", + "indexmap 2.13.0", "itertools 0.14.0", "log", "recursive", @@ -1548,9 +1589,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64b7f277556944e4edd3558da01d9e9ff9f5416f1c0aa7fee088e57bd141a7e" +checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36" dependencies = [ "ahash", "arrow", @@ -1561,7 +1602,7 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.16.1", - "indexmap 2.12.1", + "indexmap 2.13.0", "itertools 0.14.0", "parking_lot", "paste", @@ -1572,9 +1613,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7abaee372ea2d19c016ee9ef8629c4415257d291cdd152bc7f0b75f28af1b63" +checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64" dependencies = [ "arrow", "datafusion-common", @@ -1587,9 +1628,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42237efe621f92adc22d111b531fdbc2cc38ca9b5e02327535628fb103ae2157" +checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c" dependencies = [ "ahash", "arrow", @@ -1597,16 +1638,16 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.16.1", - "indexmap 2.12.1", + "indexmap 2.13.0", "itertools 0.14.0", "parking_lot", ] [[package]] name = "datafusion-physical-optimizer" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd093498bd1319c6e5c76e9dfa905e78486f01b34579ce97f2e3a49f84c37fac" +checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941" dependencies = [ "arrow", "datafusion-common", @@ -1623,9 +1664,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cbe61b12daf81a9f20ba03bd3541165d51f86e004ef37426b11881330eed261" +checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d" dependencies = [ "ahash", "arrow", @@ -1644,9 +1685,10 @@ dependencies = [ "futures", "half", "hashbrown 0.16.1", - "indexmap 2.12.1", + "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -1654,9 +1696,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "52.1.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cf75daf56aa6b1c6867cc33ff0fb035d517d6d06737fd355a3e1ef67cba6e7a" +checksum = "677ee4448a010ed5faeff8d73ff78972c2ace59eff3cd7bd15833a1dafa00492" dependencies = [ "arrow", "chrono", @@ -1677,13 +1719,14 @@ dependencies = [ "datafusion-proto-common", "object_store", "prost", + "rand 0.9.2", ] [[package]] name = "datafusion-proto-common" -version = "52.1.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12a0cb3cce232a3de0d14ef44b58a6537aeb1362cfb6cf4d808691ddbb918956" +checksum = "965eca01edc8259edbbd95883a00b6d81e329fd44a019cfac3a03b026a83eade" dependencies = [ "arrow", "datafusion-common", @@ -1692,9 +1735,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0124331116db7f79df92ebfd2c3b11a8f90240f253555c9bb084f10b6fecf1dd" +checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6" dependencies = [ "arrow", "datafusion-common", @@ -1709,9 +1752,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1673e3c58ba618a6ea0568672f00664087b8982c581e9afd5aa6c3c79c9b431f" +checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba" dependencies = [ "async-trait", "datafusion-common", @@ -1723,16 +1766,17 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5272d256dab5347bb39d2040589f45d8c6b715b27edcb5fffe88cc8b9c3909cb" +checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.12.1", + "datafusion-functions-nested", + "indexmap 2.13.0", "log", "recursive", "regex", @@ -1752,9 +1796,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", "serde_core", @@ -1778,7 +1822,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -1788,7 +1832,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -1811,14 +1855,14 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "dissimilar" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e" [[package]] name = "dlv-list" @@ -1849,9 +1893,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "erased-serde" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3" +checksum = "d2add8a07dd6a8d93ff627029c51de145e12686fbc36ecb298ac22e74cf02dec" dependencies = [ "serde", "serde_core", @@ -1919,9 +1963,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -1931,9 +1975,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.9.23" +version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ "bitflags", "rustc_version", @@ -1941,13 +1985,13 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", - "libz-rs-sys", "miniz_oxide", + "zlib-rs", ] [[package]] @@ -1979,9 +2023,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -1994,9 +2038,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -2004,15 +2048,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -2021,38 +2065,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -2062,7 +2106,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -2077,9 +2120,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -2087,9 +2130,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", @@ -2107,11 +2150,34 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "glob" version = "0.3.3" @@ -2197,21 +2263,20 @@ dependencies = [ [[package]] name = "home" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -2252,9 +2317,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", @@ -2290,14 +2355,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64", "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -2314,9 +2378,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2340,6 +2404,7 @@ dependencies = [ name = "iceberg" version = "0.9.0" dependencies = [ + "aes-gcm", "anyhow", "apache-avro", "array-init", @@ -2379,12 +2444,13 @@ dependencies = [ "serde_json", "serde_repr", "serde_with", - "strum 0.27.2", + "strum", "tokio", "typed-builder", "typetag", "url", "uuid", + "zeroize", "zstd", ] @@ -2423,9 +2489,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f578a71f2bfaf7ceb30b519a645ae48024b45f9eecbe060a31a004d7b4ba9462" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -2436,9 +2502,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c219b62bf5a06801012446193fdfcbd7970e876823aba4c62def2ce957dcb44" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -2449,9 +2515,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33747cecc725eebb47ac503fab725e395d50cb7889ae490a1359f130611d4cc5" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -2463,15 +2529,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6ce2d23e1b3c45624ba6a23e2c767e01c9680e0c0800b39c7abfff9565175d8" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.1.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d70f9b6574c79f7a83ea5ce72cc88d271a3e77355c5f7748a107e751d8617fb" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ "icu_collections", "icu_locale_core", @@ -2483,15 +2549,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17fa55bf868e28e638ed132bcee1e5c21ba2c1e52c15e7c78b781858e7b54342" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64958e359123591ae1f17a27b5fc9ebdb50c98b04e0401146154de1d8fe3e44" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", @@ -2502,6 +2568,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -2542,9 +2614,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -2552,15 +2624,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "inout" version = "0.1.4" @@ -2588,15 +2651,15 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" dependencies = [ "memchr", "serde", @@ -2622,15 +2685,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" -version = "0.2.16" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -2643,20 +2706,20 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.16" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "jiff-tzdb" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -2679,9 +2742,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", @@ -2711,6 +2774,12 @@ dependencies = [ "spin", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "lexical-core" version = "1.0.6" @@ -2776,9 +2845,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.177" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libloading" @@ -2792,18 +2861,18 @@ dependencies = [ [[package]] name = "liblzma" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73c36d08cad03a3fbe2c4e7bb3a9e84c57e4ee4135ed0b065cade3d98480c648" +checksum = "b6033b77c21d1f56deeae8014eb9fbe7bdf1765185a6c508b5ca82eeaed7f899" dependencies = [ "liblzma-sys", ] [[package]] name = "liblzma-sys" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736" +checksum = "9f2db66f3268487b5033077f266da6777d057949b8f93c8ad82e441df25e6186" dependencies = [ "cc", "libc", @@ -2812,24 +2881,15 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" - -[[package]] -name = "libz-rs-sys" -version = "0.5.2" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" -dependencies = [ - "zlib-rs", -] +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" @@ -2848,9 +2908,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru-slab" @@ -2860,9 +2920,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -2879,18 +2939,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" - -[[package]] -name = "memoffset" -version = "0.9.1" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "miniz_oxide" @@ -2904,9 +2955,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", "wasi", @@ -2915,9 +2966,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.11" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -2928,7 +2979,6 @@ dependencies = [ "futures-util", "parking_lot", "portable-atomic", - "rustc_version", "smallvec", "tagptr", "uuid", @@ -3014,23 +3064,25 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] [[package]] name = "object_store" -version = "0.12.4" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" dependencies = [ "async-trait", "bytes", "chrono", - "futures", + "futures-channel", + "futures-core", + "futures-util", "http", "humantime", "itertools 0.14.0", @@ -3047,9 +3099,15 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "opaque-debug" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "opendal" @@ -3063,14 +3121,14 @@ dependencies = [ "bytes", "crc32c", "futures", - "getrandom 0.2.16", + "getrandom 0.2.17", "http", "http-body", "jiff", "log", "md-5", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "reqsign", "reqwest", "serde", @@ -3139,14 +3197,13 @@ dependencies = [ [[package]] name = "parquet" -version = "57.1.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -3223,7 +3280,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.12.1", + "indexmap 2.13.0", "serde", ] @@ -3247,9 +3304,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -3301,17 +3358,29 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] @@ -3347,23 +3416,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -3371,22 +3440,22 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "psm" -version = "0.1.28" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" dependencies = [ "ar_archive_writer", "cc", @@ -3407,35 +3476,32 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" +checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" dependencies = [ - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" +checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" +checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" dependencies = [ "libc", "pyo3-build-config", @@ -3443,27 +3509,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" +checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "pyo3-macros-backend" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" +checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -3484,9 +3550,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.3" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -3549,9 +3615,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.41" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -3562,6 +3628,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.8.5" @@ -3580,7 +3652,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3600,7 +3672,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3609,14 +3681,14 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -3638,7 +3710,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -3667,14 +3739,14 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -3684,9 +3756,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -3695,15 +3767,15 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "repr_offset" @@ -3725,7 +3797,7 @@ dependencies = [ "base64", "chrono", "form_urlencoded", - "getrandom 0.2.16", + "getrandom 0.2.17", "hex", "hmac", "home", @@ -3748,9 +3820,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.24" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ "base64", "bytes", @@ -3795,7 +3867,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -3803,9 +3875,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f08d6a905edb32d74a5d5737a0c9d7e950c312f3c46cb0ca0a2ca09ea11878a0" +checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" dependencies = [ "bytemuck", "byteorder", @@ -3859,9 +3931,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", @@ -3872,9 +3944,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.34" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "once_cell", "ring", @@ -3886,9 +3958,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -3913,9 +3985,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "salsa20" @@ -3949,9 +4021,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.0.4" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -4034,20 +4106,20 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -4058,7 +4130,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4075,17 +4147,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.15.1" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa66c845eee442168b2c8134fec70ac50dc20e760769c8ba0ad1319ca1959b04" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.12.1", + "indexmap 2.13.0", "schemars 0.9.0", - "schemars 1.0.4", + "schemars 1.2.1", "serde_core", "serde_json", "serde_with_macros", @@ -4094,14 +4166,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.15.1" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91a903660542fced4e99881aa481bdbaec1634568ee02e0b8bd57c64cb38955" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4144,9 +4216,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simdutf8" @@ -4156,9 +4228,9 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" +checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" dependencies = [ "num-bigint", "num-traits", @@ -4168,15 +4240,15 @@ dependencies = [ [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -4192,12 +4264,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4218,9 +4290,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "recursive", @@ -4229,13 +4301,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4246,9 +4318,9 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" dependencies = [ "cc", "cfg-if", @@ -4263,32 +4335,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros 0.27.2", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.113", + "strum_macros", ] [[package]] @@ -4300,7 +4353,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4322,9 +4375,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.113" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678faa00651c9eb72dd2020cbdf275d92eccb2400d568e419efdd64838145cb4" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -4348,7 +4401,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4359,18 +4412,18 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "target-lexicon" -version = "0.13.3" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "tempfile" -version = "3.23.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -4378,22 +4431,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4459,9 +4512,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -4474,9 +4527,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -4489,13 +4542,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4508,6 +4561,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", + "tokio-util", +] + [[package]] name = "tokio-util" version = "0.7.18" @@ -4523,9 +4588,9 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -4538,9 +4603,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "bitflags", "bytes", @@ -4568,9 +4633,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -4579,20 +4644,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", ] @@ -4647,7 +4712,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4683,7 +4748,7 @@ checksum = "27a7a9b72ba121f6f1f6c3632b85604cac41aedb5ddc70accbebb6cac83de846" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4694,15 +4759,15 @@ checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" [[package]] name = "unicode-ident" -version = "1.0.20" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "a559e63b5d8004e12f9bce88af5c6d939c58de839b7532cfe9653846cedd2a9e" [[package]] name = "unicode-width" @@ -4711,10 +4776,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] -name = "unindent" -version = "0.2.4" +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "universal-hash" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] [[package]] name = "untrusted" @@ -4724,9 +4799,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -4742,11 +4817,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.19.0" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", @@ -4785,18 +4860,27 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -4807,11 +4891,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -4820,9 +4905,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4830,26 +4915,48 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.13.0", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -4863,11 +4970,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap 2.13.0", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -4885,9 +5004,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -4944,7 +5063,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -4955,7 +5074,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -5149,9 +5268,91 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap 2.13.0", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap 2.13.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.13.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "writeable" @@ -5178,28 +5379,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] @@ -5219,7 +5420,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", "synstructure", ] @@ -5259,14 +5460,20 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.117", ] [[package]] name = "zlib-rs" -version = "0.5.2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" + +[[package]] +name = "zmij" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zstd" diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 9b551ea205..251f96d169 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -31,12 +31,12 @@ license = "Apache-2.0" crate-type = ["cdylib"] [dependencies] -arrow = { version = "57.1", features = ["pyarrow", "chrono-tz"] } +arrow = { version = "58", features = ["pyarrow", "chrono-tz"] } iceberg = { path = "../../crates/iceberg" } iceberg-storage-opendal = { path = "../../crates/storage/opendal", features = ["opendal-all"] } -pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] } +pyo3 = { version = "0.28", features = ["extension-module", "abi3-py310"] } iceberg-datafusion = { path = "../../crates/integrations/datafusion" } -datafusion-ffi = { version = "52.1" } +datafusion-ffi = "53.0.0" tokio = { version = "1.46.1", default-features = false } [profile.release] diff --git a/bindings/python/src/datafusion_table_provider.rs b/bindings/python/src/datafusion_table_provider.rs index 95b3eb90d0..2f4745fa7a 100644 --- a/bindings/python/src/datafusion_table_provider.rs +++ b/bindings/python/src/datafusion_table_provider.rs @@ -16,7 +16,7 @@ // under the License. use std::collections::HashMap; -use std::ffi::CString; +use std::ffi::{CStr, CString}; use std::sync::Arc; use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; @@ -27,27 +27,24 @@ use iceberg::table::StaticTable; use iceberg_datafusion::table::IcebergStaticTableProvider; use iceberg_storage_opendal::OpenDalResolvingStorageFactory; use pyo3::exceptions::{PyRuntimeError, PyValueError}; -use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods, *}; +use pyo3::prelude::*; use pyo3::types::{PyAny, PyCapsule}; use crate::runtime::runtime; -pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyResult<()> { +// pyo3 0.28's CapsuleName only exposes `unsafe fn as_cstr() -> &CStr`, +// so we accept &CStr to allow direct comparison without UTF-8 validation. +pub(crate) fn validate_pycapsule(capsule: &Bound, name: &CStr) -> PyResult<()> { let capsule_name = capsule.name()?; - if capsule_name.is_none() { - return Err(PyValueError::new_err(format!( - "Expected {name} PyCapsule to have name set." - ))); + match capsule_name { + None => Err(PyValueError::new_err( + "Expected PyCapsule to have name set.", + )), + Some(capsule_name) if unsafe { capsule_name.as_cstr() } != name => { + Err(PyValueError::new_err("PyCapsule name mismatch")) + } + _ => Ok(()), } - - let capsule_name = capsule_name.unwrap().to_str()?; - if capsule_name != name { - return Err(PyValueError::new_err(format!( - "Expected name '{name}' in PyCapsule, instead got '{capsule_name}'" - ))); - } - - Ok(()) } pub(crate) fn ffi_logical_codec_from_pycapsule( @@ -60,10 +57,12 @@ pub(crate) fn ffi_logical_codec_from_pycapsule( obj }; - let capsule = capsule.downcast::()?; - validate_pycapsule(capsule, "datafusion_logical_extension_codec")?; + let capsule_name = c"datafusion_logical_extension_codec"; + let capsule = capsule.cast::()?; + validate_pycapsule(capsule, capsule_name)?; - let codec = unsafe { capsule.reference::() }; + let ptr = capsule.pointer_checked(Some(capsule_name))?; + let codec = unsafe { &*(ptr.as_ptr() as *const FFI_LogicalExtensionCodec) }; Ok(codec.clone()) } diff --git a/bindings/python/src/transform.rs b/bindings/python/src/transform.rs index c159d573fc..b50a0fa84d 100644 --- a/bindings/python/src/transform.rs +++ b/bindings/python/src/transform.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Array, ArrayData, make_array}; +use arrow::array::{ArrayData, make_array}; use arrow::pyarrow::{FromPyArrow, ToPyArrow}; use iceberg::spec::Transform; use iceberg::transform::create_transform_function; @@ -70,7 +70,7 @@ fn apply(py: Python, array: Py, transform: Transform) -> PyResult=52 for FFI compatibility", + "Iceberg table provider requires datafusion>=53 for FFI compatibility", allow_module_level=True, ) diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index 1de006de74..042a730e19 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -2546,7 +2546,7 @@ message schema { let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) - .set_max_row_group_size(100) + .set_max_row_group_row_count(Some(100)) .build(); let file = File::create(&file_path).unwrap(); @@ -2861,7 +2861,7 @@ message schema { // Force each batch into its own row group let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) - .set_max_row_group_size(100) + .set_max_row_group_row_count(Some(100)) .build(); let file = File::create(&data_file_path).unwrap(); @@ -3057,7 +3057,7 @@ message schema { // Force each batch into its own row group let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) - .set_max_row_group_size(100) + .set_max_row_group_row_count(Some(100)) .build(); let file = File::create(&data_file_path).unwrap(); @@ -3281,7 +3281,7 @@ message schema { // Force each batch into its own row group let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) - .set_max_row_group_size(100) + .set_max_row_group_row_count(Some(100)) .build(); let file = File::create(&data_file_path).unwrap(); @@ -3729,7 +3729,7 @@ message schema { let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) .set_write_batch_size(2) - .set_max_row_group_size(2) + .set_max_row_group_row_count(Some(2)) .build(); let file = File::create(format!("{table_location}/1.parquet")).unwrap(); diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index 66e2898532..96d1c651cd 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -791,7 +791,7 @@ mod tests { use parquet::arrow::arrow_reader::{ ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelector, }; - use parquet::file::metadata::ParquetMetaData; + use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData}; use parquet::file::properties::WriterProperties; use rand::{Rng, thread_rng}; use tempfile::NamedTempFile; @@ -895,7 +895,7 @@ mod tests { writer.close().unwrap(); let file = temp_file.reopen().unwrap(); - let options = ArrowReaderOptions::new().with_page_index(true); + let options = ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required); let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); let metadata = reader.metadata().clone(); @@ -936,7 +936,7 @@ mod tests { writer.close().unwrap(); let file = temp_file.reopen().unwrap(); - let options = ArrowReaderOptions::new().with_page_index(true); + let options = ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required); let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); let metadata = reader.metadata(); diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index d0a0da249b..1cd4d6a436 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -81,7 +81,7 @@ impl TransformFunction for Year { fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { let val = match (input.data_type(), input.literal()) { (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => { - Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR + Date32Type::to_naive_date_opt(*v).unwrap().year() - UNIX_EPOCH_YEAR } (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => { Self::timestamp_to_year_micros(*v)? @@ -178,8 +178,8 @@ impl TransformFunction for Month { fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { let val = match (input.data_type(), input.literal()) { (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => { - (Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12 - + Date32Type::to_naive_date(*v).month0() as i32 + (Date32Type::to_naive_date_opt(*v).unwrap().year() - UNIX_EPOCH_YEAR) * 12 + + Date32Type::to_naive_date_opt(*v).unwrap().month0() as i32 } (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => { Self::timestamp_to_month_micros(*v)? diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 0984d8fc64..840d1a5f16 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -820,7 +820,7 @@ mod tests { // write data let mut pw = ParquetWriterBuilder::new( WriterProperties::builder() - .set_max_row_group_size(128) + .set_max_row_group_row_count(Some(128)) .build(), Arc::new(to_write.schema().as_ref().try_into().unwrap()), ) diff --git a/crates/integrations/datafusion/src/physical_plan/commit.rs b/crates/integrations/datafusion/src/physical_plan/commit.rs index f876908ae6..3b3ff3d6b3 100644 --- a/crates/integrations/datafusion/src/physical_plan/commit.rs +++ b/crates/integrations/datafusion/src/physical_plan/commit.rs @@ -47,7 +47,7 @@ pub(crate) struct IcebergCommitExec { input: Arc, schema: ArrowSchemaRef, count_schema: ArrowSchemaRef, - plan_properties: PlanProperties, + plan_properties: Arc, } impl IcebergCommitExec { @@ -72,13 +72,13 @@ impl IcebergCommitExec { } // Compute the plan properties for this execution plan - fn compute_properties(schema: ArrowSchemaRef) -> PlanProperties { - PlanProperties::new( + fn compute_properties(schema: ArrowSchemaRef) -> Arc { + Arc::new(PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ) + )) } // Create a record batch with just the count of rows written @@ -133,7 +133,7 @@ impl ExecutionPlan for IcebergCommitExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.plan_properties } @@ -301,7 +301,7 @@ mod tests { struct MockWriteExec { schema: Arc, data_files_json: Vec, - plan_properties: PlanProperties, + plan_properties: Arc, } impl MockWriteExec { @@ -312,12 +312,12 @@ mod tests { false, )])); - let plan_properties = PlanProperties::new( + let plan_properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Self { schema, @@ -340,7 +340,7 @@ mod tests { self.schema.clone() } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.plan_properties } diff --git a/crates/integrations/datafusion/src/physical_plan/metadata_scan.rs b/crates/integrations/datafusion/src/physical_plan/metadata_scan.rs index 9a9d0aa0d9..a1a65dec1f 100644 --- a/crates/integrations/datafusion/src/physical_plan/metadata_scan.rs +++ b/crates/integrations/datafusion/src/physical_plan/metadata_scan.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + use datafusion::catalog::TableProvider; use datafusion::physical_expr::EquivalenceProperties; use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; @@ -27,17 +29,17 @@ use crate::metadata_table::IcebergMetadataTableProvider; #[derive(Debug)] pub struct IcebergMetadataScan { provider: IcebergMetadataTableProvider, - properties: PlanProperties, + properties: Arc, } impl IcebergMetadataScan { pub fn new(provider: IcebergMetadataTableProvider) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(provider.schema()), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { provider, properties, @@ -64,7 +66,7 @@ impl ExecutionPlan for IcebergMetadataScan { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/crates/integrations/datafusion/src/physical_plan/scan.rs b/crates/integrations/datafusion/src/physical_plan/scan.rs index 7bb4e7ed0e..234ab26470 100644 --- a/crates/integrations/datafusion/src/physical_plan/scan.rs +++ b/crates/integrations/datafusion/src/physical_plan/scan.rs @@ -46,7 +46,7 @@ pub struct IcebergTableScan { snapshot_id: Option, /// Stores certain, often expensive to compute, /// plan properties used in query optimization. - plan_properties: PlanProperties, + plan_properties: Arc, /// Projection column names, None means all columns projection: Option>, /// Filters to apply to the table scan @@ -104,16 +104,16 @@ impl IcebergTableScan { } /// Computes [`PlanProperties`] used in query optimization. - fn compute_properties(schema: ArrowSchemaRef) -> PlanProperties { + fn compute_properties(schema: ArrowSchemaRef) -> Arc { // TODO: // This is more or less a placeholder, to be replaced // once we support output-partitioning - PlanProperties::new( + Arc::new(PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, - ) + )) } } @@ -137,7 +137,7 @@ impl ExecutionPlan for IcebergTableScan { Ok(self) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.plan_properties } diff --git a/crates/integrations/datafusion/src/physical_plan/write.rs b/crates/integrations/datafusion/src/physical_plan/write.rs index 0dea150d31..3b227e20fa 100644 --- a/crates/integrations/datafusion/src/physical_plan/write.rs +++ b/crates/integrations/datafusion/src/physical_plan/write.rs @@ -64,7 +64,7 @@ pub(crate) struct IcebergWriteExec { table: Table, input: Arc, result_schema: ArrowSchemaRef, - plan_properties: PlanProperties, + plan_properties: Arc, } impl IcebergWriteExec { @@ -82,13 +82,13 @@ impl IcebergWriteExec { fn compute_properties( input: &Arc, schema: ArrowSchemaRef, - ) -> PlanProperties { - PlanProperties::new( + ) -> Arc { + Arc::new(PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(input.output_partitioning().partition_count()), EmissionType::Final, Boundedness::Bounded, - ) + )) } // Create a record batch with serialized data files @@ -153,7 +153,7 @@ impl ExecutionPlan for IcebergWriteExec { vec![true; self.children().len()] } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.plan_properties } @@ -336,17 +336,17 @@ mod tests { struct MockExecutionPlan { schema: ArrowSchemaRef, batches: Vec, - properties: PlanProperties, + properties: Arc, } impl MockExecutionPlan { fn new(schema: ArrowSchemaRef, batches: Vec) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Self { schema, @@ -383,7 +383,7 @@ mod tests { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/crates/integrations/datafusion/tests/integration_datafusion_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_test.rs index b99c0875c8..cebac75dd9 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_test.rs @@ -535,9 +535,9 @@ fn get_nested_struct_type() -> StructType { 10, "address", Type::Struct(StructType::new(vec![ - NestedField::required(11, "street", Type::Primitive(PrimitiveType::String)).into(), - NestedField::required(12, "city", Type::Primitive(PrimitiveType::String)).into(), - NestedField::required(13, "zip", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(11, "street", Type::Primitive(PrimitiveType::String)).into(), + NestedField::optional(12, "city", Type::Primitive(PrimitiveType::String)).into(), + NestedField::optional(13, "zip", Type::Primitive(PrimitiveType::Int)).into(), ])), ) .into(), @@ -662,7 +662,7 @@ async fn test_insert_into_nested() -> Result<()> { expect![[r#" Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, - Field { "profile": nullable Struct("address": Struct("street": non-null Utf8, metadata: {"PARQUET:field_id": "6"}, "city": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "zip": non-null Int32, metadata: {"PARQUET:field_id": "8"}), metadata: {"PARQUET:field_id": "4"}, "contact": Struct("email": Utf8, metadata: {"PARQUET:field_id": "9"}, "phone": Utf8, metadata: {"PARQUET:field_id": "10"}), metadata: {"PARQUET:field_id": "5"}), metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "profile": nullable Struct("address": Struct("street": Utf8, metadata: {"PARQUET:field_id": "6"}, "city": Utf8, metadata: {"PARQUET:field_id": "7"}, "zip": Int32, metadata: {"PARQUET:field_id": "8"}), metadata: {"PARQUET:field_id": "4"}, "contact": Struct("email": Utf8, metadata: {"PARQUET:field_id": "9"}, "phone": Utf8, metadata: {"PARQUET:field_id": "10"}), metadata: {"PARQUET:field_id": "5"}), metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -681,7 +681,7 @@ async fn test_insert_into_nested() -> Result<()> { valid, ] [ - -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, metadata: {"PARQUET:field_id": "8"} }])) + -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: true, metadata: {"PARQUET:field_id": "8"} }])) StructArray -- validity: [ diff --git a/crates/sqllogictest/testdata/slts/df_test/binary_predicate_pushdown.slt b/crates/sqllogictest/testdata/slts/df_test/binary_predicate_pushdown.slt index 54d74f5e70..aa68ab2762 100644 --- a/crates/sqllogictest/testdata/slts/df_test/binary_predicate_pushdown.slt +++ b/crates/sqllogictest/testdata/slts/df_test/binary_predicate_pushdown.slt @@ -28,8 +28,7 @@ logical_plan physical_plan 01)FilterExec: data@1 = 0102 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,data] predicate:[data = 0102] +03)----IcebergTableScan projection:[id,data] predicate:[data = 0102] # Verify empty result from empty table query I? diff --git a/crates/sqllogictest/testdata/slts/df_test/boolean_predicate_pushdown.slt b/crates/sqllogictest/testdata/slts/df_test/boolean_predicate_pushdown.slt index 466a45f8c6..496f719261 100644 --- a/crates/sqllogictest/testdata/slts/df_test/boolean_predicate_pushdown.slt +++ b/crates/sqllogictest/testdata/slts/df_test/boolean_predicate_pushdown.slt @@ -39,8 +39,7 @@ logical_plan physical_plan 01)FilterExec: is_active@1 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,is_active,description] predicate:[is_active = true] +03)----IcebergTableScan projection:[id,is_active,description] predicate:[is_active = true] # Query with is_active = true query ITT rowsort @@ -60,8 +59,7 @@ logical_plan physical_plan 01)FilterExec: NOT is_active@1 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,is_active,description] predicate:[is_active = false] +03)----IcebergTableScan projection:[id,is_active,description] predicate:[is_active = false] # Query with is_active = false query ITT rowsort @@ -80,8 +78,7 @@ logical_plan physical_plan 01)FilterExec: NOT is_active@1 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,is_active,description] predicate:[is_active = false] +03)----IcebergTableScan projection:[id,is_active,description] predicate:[is_active = false] # Query with is_active != true (includes false and NULL) query ITT rowsort diff --git a/crates/sqllogictest/testdata/slts/df_test/like_predicate_pushdown.slt b/crates/sqllogictest/testdata/slts/df_test/like_predicate_pushdown.slt index a160ca77f4..3d8b151aa9 100644 --- a/crates/sqllogictest/testdata/slts/df_test/like_predicate_pushdown.slt +++ b/crates/sqllogictest/testdata/slts/df_test/like_predicate_pushdown.slt @@ -37,8 +37,7 @@ logical_plan physical_plan 01)FilterExec: name@1 LIKE Al% 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,name] predicate:[name STARTS WITH "Al"] +03)----IcebergTableScan projection:[id,name] predicate:[name STARTS WITH "Al"] # Test LIKE filtering with case-sensitive match query IT rowsort @@ -57,8 +56,7 @@ logical_plan physical_plan 01)FilterExec: name@1 NOT LIKE Al% 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,name] predicate:[name NOT STARTS WITH "Al"] +03)----IcebergTableScan projection:[id,name] predicate:[name NOT STARTS WITH "Al"] # Test NOT LIKE filtering query IT rowsort diff --git a/crates/sqllogictest/testdata/slts/df_test/timestamp_predicate_pushdown.slt b/crates/sqllogictest/testdata/slts/df_test/timestamp_predicate_pushdown.slt index 3427625291..ffa74173dc 100644 --- a/crates/sqllogictest/testdata/slts/df_test/timestamp_predicate_pushdown.slt +++ b/crates/sqllogictest/testdata/slts/df_test/timestamp_predicate_pushdown.slt @@ -50,8 +50,7 @@ logical_plan physical_plan 01)FilterExec: ts@1 = 1672921800000000000 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,ts] predicate:[ts = 2023-01-05 12:30:00] +03)----IcebergTableScan projection:[id,ts] predicate:[ts = 2023-01-05 12:30:00] # Verify timestamp equality filtering works query I? @@ -69,8 +68,7 @@ logical_plan physical_plan 01)FilterExec: ts@1 > 1673308800000000000 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,ts] predicate:[ts > 2023-01-10 00:00:00] +03)----IcebergTableScan projection:[id,ts] predicate:[ts > 2023-01-10 00:00:00] # Verify timestamp greater than filtering query I? rowsort @@ -99,8 +97,7 @@ logical_plan physical_plan 01)FilterExec: ts@1 >= 1672876800000000000 AND ts@1 <= 1673827199000000000 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,ts] predicate:[(ts >= 2023-01-05 00:00:00) AND (ts <= 2023-01-15 23:59:59)] +03)----IcebergTableScan projection:[id,ts] predicate:[(ts >= 2023-01-05 00:00:00) AND (ts <= 2023-01-15 23:59:59)] # Test timestamp range predicate filtering query I? rowsort @@ -165,8 +162,7 @@ logical_plan physical_plan 01)FilterExec: ts@1 > 1672531200000000 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----CooperativeExec -04)------IcebergTableScan projection:[id,ts] predicate:[ts > 2023-01-01 00:00:00] +03)----IcebergTableScan projection:[id,ts] predicate:[ts > 2023-01-01 00:00:00] query I? SELECT * FROM default.default.test_timestamp_micros WHERE ts > CAST('2023-01-01 00:00:00' AS TIMESTAMP) From 39eca833a7304d7e14af4b8ee16f04a3e3d37ad5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Mar 2026 09:09:32 +0800 Subject: [PATCH 30/39] chore(deps): Bump uuid from 1.22.0 to 1.23.0 (#2291) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [uuid](https://github.com/uuid-rs/uuid) from 1.22.0 to 1.23.0.
Release notes

Sourced from uuid's releases.

v1.23.0

What's Changed

New Contributors

Special thanks

@โ€‹meng-xu-cs raised a series of bugs against the timestamp logic in uuid using automated tooling. The issues themselves were reasonably and responsibly presented and the end result is a better uuid library for everyone. Thanks!

Deprecations

This release includes the following deprecations:

  • Context: Renamed to ContextV1
  • Timestamp::from_gregorian: Renamed to Timestamp::from_gregorian_time

Change to Version::Max

Version::Max's u8 representation has changed from 0xff to 0x0f to match the value returned by Uuid::get_version_num.

Change to Uuid::get_version for the max UUID

Uuid::get_version will only return Some(Version::Max) if the UUID is actually the max UUID (all bytes are 0xff). Previously it would return Some if only the version field was 0x0f. This change matches the behaviour of the nil UUID, which only returns Some(Version::Nil) if the UUID is the nil UUID (all bytes are 0x00).

Full Changelog: https://github.com/uuid-rs/uuid/compare/v1.22.0...v1.23.0

Commits
  • 00ab922 Merge pull request #876 from uuid-rs/cargo/v1.23.0
  • 726ba45 prepare for 1.23.0 release
  • 996dade Merge pull request #875 from uuid-rs/fix/context-ordering
  • e140479 simplify a use stmt
  • 8ed9142 reorganize and document more v7 context internals
  • e09a322 use LazyLock to synchronize v1/v6 context initialization
  • 0f260cc Merge pull request #874 from uuid-rs/chore/impl-cleanups
  • 1419e91 clean up and refactor main lib tests
  • ceeaf4b ensure we don't overflow on counters less than 12
  • 63bc8f5 Merge pull request #873 from uuid-rs/fix/error-msg
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=uuid&package-manager=cargo&previous-version=1.22.0&new-version=1.23.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a24ef04626..5110d5a480 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6602,7 +6602,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.4.2", + "getrandom 0.3.4", "once_cell", "rustix", "windows-sys 0.61.2", @@ -7202,9 +7202,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ "getrandom 0.4.2", "js-sys", From aff502df62c40c977b28ccce83f63f8fb8fccef5 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sun, 29 Mar 2026 18:23:57 -0700 Subject: [PATCH 31/39] ci: improve github workflows (#2289) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? Relates to https://github.com/apache/iceberg/issues/15742 This PR - Add "ASF allowlist check" - Pin commit for codeql.yml (zizmor recommended) - Add back Github Action auto-update for dependabot (reverts #2267) - Add cooldown to dependabot (zizmor recommended) - `Swatinem/rust-cache@v2` -> `swatinem/rust-cache@v2` (fix case sensitivity) [asf infra allowlist uses lowercase](https://github.com/apache/infrastructure-actions/blob/fae466bc0d9821859a623cbc7648c750ff359ec6/approved_patterns.yml#L271) We can add back dependabot for github action because the "ASF allowlist check" will now alert when an action is not allowed (failures will no longer be silent) ## Are these changes tested? --- .github/dependabot.yml | 11 ++++++ .github/workflows/asf-allowlist-check.yml | 47 +++++++++++++++++++++++ .github/workflows/ci.yml | 6 +-- .github/workflows/codeql.yml | 8 ++-- 4 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/asf-allowlist-check.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 23c9b239ee..03235972dd 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -17,6 +17,15 @@ version: 2 updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + day: "sunday" + cooldown: + default-days: 7 + # Maintain dependencies for iceberg - package-ecosystem: "cargo" directory: "/" @@ -35,3 +44,5 @@ updates: patterns: - "arrow*" - "parquet" + cooldown: + default-days: 7 diff --git a/.github/workflows/asf-allowlist-check.yml b/.github/workflows/asf-allowlist-check.yml new file mode 100644 index 0000000000..d4e84c5922 --- /dev/null +++ b/.github/workflows/asf-allowlist-check.yml @@ -0,0 +1,47 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Verifies all GitHub Actions refs are on the ASF allowlist. +# Actions not on the allowlist silently fail with "Startup failure" โ€” no logs, +# no notifications, and PRs may appear green because no checks ran. +# See https://github.com/apache/infrastructure-actions/issues/574 +name: "ASF Allowlist Check" + +on: + pull_request: + paths: + - ".github/**" + push: + branches: + - main + paths: + - ".github/**" + +permissions: + contents: read + +jobs: + asf-allowlist-check: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + # Intentionally unpinned to always use the latest allowlist from the ASF. + - uses: apache/infrastructure-actions/allowlist-check@main # zizmor: ignore[unpinned-uses] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ea2257b676..af96f9ad66 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,7 +97,7 @@ jobs: uses: ./.github/actions/setup-builder - name: Cache Rust artifacts - uses: Swatinem/rust-cache@v2 + uses: swatinem/rust-cache@v2 - name: Install protoc uses: arduino/setup-protoc@v3 @@ -123,7 +123,7 @@ jobs: uses: ./.github/actions/setup-builder - name: Cache Rust artifacts - uses: Swatinem/rust-cache@v2 + uses: swatinem/rust-cache@v2 - name: Build run: cargo build -p iceberg --no-default-features @@ -149,7 +149,7 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Cache Rust artifacts - uses: Swatinem/rust-cache@v2 + uses: swatinem/rust-cache@v2 with: key: ${{ matrix.test-suite.name }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 17bfd8bf3d..75c10c7eaf 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -41,14 +41,16 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Initialize CodeQL - uses: github/codeql-action/init@v4 + uses: github/codeql-action/init@d4b3ca9fa7f69d38bfcd667bdc45bc373d16277e # v4 with: languages: actions - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v4 + uses: github/codeql-action/analyze@d4b3ca9fa7f69d38bfcd667bdc45bc373d16277e # v4 with: category: "/language:actions" From 3212c31e206c89c956e221566fd434e04e4f5b56 Mon Sep 17 00:00:00 2001 From: Den Date: Mon, 30 Mar 2026 12:11:37 +0300 Subject: [PATCH 32/39] Make `convert_filters_to_predicate` public (#2118) ## What changes are included in this PR? - Make `convert_filters_to_predicate` public in the DataFusion integration to allow external usage of the filter conversion logic. ## Are these changes tested? - This is a visibility change (`pub use`) and does not introduce new logic. Co-authored-by: Denis Semenov --- crates/integrations/datafusion/src/physical_plan/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/integrations/datafusion/src/physical_plan/mod.rs b/crates/integrations/datafusion/src/physical_plan/mod.rs index 5a9845cde0..aeac30de32 100644 --- a/crates/integrations/datafusion/src/physical_plan/mod.rs +++ b/crates/integrations/datafusion/src/physical_plan/mod.rs @@ -26,5 +26,6 @@ pub(crate) mod write; pub(crate) const DATA_FILES_COL_NAME: &str = "data_files"; +pub use expr_to_predicate::convert_filters_to_predicate; pub use project::project_with_partition; pub use scan::IcebergTableScan; From 6ee5e71b04dad06183655d4fae4f28635541a072 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 31 Mar 2026 02:43:39 -0700 Subject: [PATCH 33/39] ci: fix zizmor security findings (#2290) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Which issue does this PR close? Relates to https://github.com/apache/iceberg/issues/15742 Follow up to #2289 ## What changes are included in this PR? Fix github workflow based on zizmor recommendation for security best practice ## Are these changes tested? Yes ``` โžœ iceberg-rust git:(kevinjqliu/zizmor-fix) uvx --from zizmor zizmor --offline .github/ ๐ŸŒˆ zizmor v1.23.1 INFO audit: zizmor: ๐ŸŒˆ completed .github/actions/get-msrv/action.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/actions/overwrite-package-version/action.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/actions/setup-builder/action.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/dependabot.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/audit.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/bindings_python_ci.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/ci.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/ci_typos.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/codeql.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/publish.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/release_python.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/release_python_nightly.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/stale.yml INFO audit: zizmor: ๐ŸŒˆ completed .github/workflows/website.yml No findings to report. Good job! (1 ignored, 37 suppressed) ``` --- .../overwrite-package-version/action.yml | 2 +- .github/workflows/audit.yml | 6 +- .github/workflows/bindings_python_ci.yml | 20 ++++-- .github/workflows/ci.yml | 38 +++++++---- .github/workflows/ci_typos.yml | 6 +- .github/workflows/codeql.yml | 4 +- .github/workflows/publish.yml | 19 +++++- .github/workflows/release_python.yml | 66 +++++++++---------- .github/workflows/release_python_nightly.yml | 22 ++++--- .github/workflows/stale.yml | 2 +- .github/workflows/website.yml | 10 +-- .github/workflows/zizmor.yml | 44 +++++++++++++ 12 files changed, 160 insertions(+), 79 deletions(-) create mode 100644 .github/workflows/zizmor.yml diff --git a/.github/actions/overwrite-package-version/action.yml b/.github/actions/overwrite-package-version/action.yml index 8a2739456e..aed736ecf9 100644 --- a/.github/actions/overwrite-package-version/action.yml +++ b/.github/actions/overwrite-package-version/action.yml @@ -25,7 +25,7 @@ runs: using: "composite" steps: - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: '3.12' diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index abe0c377c5..68731cbed3 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -44,11 +44,13 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'apache/iceberg-rust' steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: rust-version: stable - - uses: rustsec/audit-check@v2.0.0 + - uses: rustsec/audit-check@69366f33c96575abad1ee0dba8212993eecbe998 # v2.0.0 with: token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml index efd4a78098..a7abfcbeed 100644 --- a/.github/workflows/bindings_python_ci.yml +++ b/.github/workflows/bindings_python_ci.yml @@ -47,7 +47,9 @@ jobs: check-rust: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Check format working-directory: "bindings/python" run: cargo fmt --all -- --check @@ -58,8 +60,10 @@ jobs: check-python: runs-on: ubuntu-slim steps: - - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1 with: version: "0.9.3" enable-cache: true @@ -85,16 +89,18 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: python-version: 3.12 - - uses: PyO3/maturin-action@v1 + - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1 with: working-directory: "bindings/python" command: build args: --out dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one - - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 + - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1 with: version: "0.9.3" enable-cache: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af96f9ad66..63ee893fa2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,19 +53,21 @@ jobs: - ubuntu-latest - macos-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Setup Rust toolchain uses: ./.github/actions/setup-builder - name: Check License Header - uses: apache/skywalking-eyes/header@v0.8.0 + uses: apache/skywalking-eyes/header@61275cc80d0798a405cb070f7d3a8aaf7cf2c2c1 # v0.8.0 - name: Check toml format run: make check-toml - name: Install protoc - uses: arduino/setup-protoc@v3 + uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -91,16 +93,18 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Setup Rust toolchain uses: ./.github/actions/setup-builder - name: Cache Rust artifacts - uses: swatinem/rust-cache@v2 + uses: swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 - name: Install protoc - uses: arduino/setup-protoc@v3 + uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -117,13 +121,15 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Setup Rust toolchain uses: ./.github/actions/setup-builder - name: Cache Rust artifacts - uses: swatinem/rust-cache@v2 + uses: swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 - name: Build run: cargo build -p iceberg --no-default-features @@ -138,24 +144,26 @@ jobs: - { name: "doc", args: "--doc --all-features --workspace" } name: Tests (${{ matrix.test-suite.name }}) steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Setup Rust toolchain uses: ./.github/actions/setup-builder - name: Install protoc - uses: arduino/setup-protoc@v3 + uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Cache Rust artifacts - uses: swatinem/rust-cache@v2 + uses: swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 with: key: ${{ matrix.test-suite.name }} - name: Install cargo-nextest if: matrix.test-suite.name == 'default' - uses: taiki-e/install-action@v2 + uses: taiki-e/install-action@0fde6d128a3d980ceac30be8c8b8739abd963b81 # v2.70.0 with: tool: cargo-nextest @@ -182,9 +190,11 @@ jobs: name: Verify MSRV runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Install protoc - uses: arduino/setup-protoc@v3 + uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Get MSRV diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index 8031cd8ca9..9373c7295d 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -43,6 +43,8 @@ jobs: env: FORCE_COLOR: 1 steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Check typos - uses: crate-ci/typos@v1.44.0 + uses: crate-ci/typos@631208b7aac2daa8b707f55e7331f9112b0e062d # v1.44.0 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 75c10c7eaf..81bc6b16f8 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -46,11 +46,11 @@ jobs: persist-credentials: false - name: Initialize CodeQL - uses: github/codeql-action/init@d4b3ca9fa7f69d38bfcd667bdc45bc373d16277e # v4 + uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4 with: languages: actions - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@d4b3ca9fa7f69d38bfcd667bdc45bc373d16277e # v4 + uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4 with: category: "/language:actions" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 26f61118b7..d6ba35d9f4 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -32,6 +32,7 @@ permissions: jobs: publish: runs-on: ubuntu-latest + environment: publish strategy: max-parallel: 1 # Publish package one by one instead of flooding the registry matrix: @@ -46,7 +47,9 @@ jobs: - "crates/catalog/sql" - "crates/integrations/datafusion" steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Get MSRV id: get-msrv @@ -61,6 +64,18 @@ jobs: working-directory: ${{ matrix.package }} # Only publish if it's a tag and the tag is not a pre-release if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} - run: cargo publish --all-features + run: cargo publish --all-features # zizmor: ignore[use-trusted-publishing] -- https://github.com/apache/iceberg-rust/issues/1539 env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + + # Trigger Python release after crate publishing completes. + # Only runs for tag pushes; for manual Python releases, use workflow_dispatch on release_python.yml directly. + release-python: + needs: [publish] + if: ${{ startsWith(github.ref, 'refs/tags/') }} + permissions: + contents: read + id-token: write # Required for PyPI trusted publishing in the called workflow + uses: ./.github/workflows/release_python.yml + with: + release_tag: ${{ github.ref_name }} diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index abf8b52b6d..d9fcdd406b 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -18,10 +18,12 @@ name: Publish Python ๐Ÿ distribution ๐Ÿ“ฆ to PyPI on: - workflow_run: - workflows: ["Publish"] # Trigger this workflow after the "publish.yml" workflow completes - types: - - completed + workflow_call: + inputs: + release_tag: + description: 'Release tag (e.g., v0.4.0 or v0.4.0-rc.1)' + required: true + type: string workflow_dispatch: inputs: release_tag: @@ -33,37 +35,23 @@ permissions: contents: read jobs: - check-cargo-publish: - runs-on: ubuntu-latest - # Only run if the triggering workflow succeeded OR if manually triggered - if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} - steps: - - run: echo 'The Publish workflow passed or was manually triggered' - validate-release-tag: runs-on: ubuntu-latest - needs: [check-cargo-publish] outputs: cargo-version: ${{ steps.validate.outputs.cargo-version }} is-rc: ${{ steps.validate.outputs.is-rc }} steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 if: ${{ github.event_name == 'workflow_dispatch' }} + with: + persist-credentials: false - name: Validate release tag format id: validate - # Use input for workflow_dispatch, otherwise use `workflow_run.head_branch` - # Note, `workflow_run.head_branch` does not contain `refs/tags/` prefix, just the tag name, i.e. `v0.4.0` or `v0.4.0-rc.1` # Valid formats: v.. OR v..-rc. env: - DISPATCH_RELEASE_TAG: ${{ github.event.inputs.release_tag }} - RUN_HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }} + RELEASE_TAG: ${{ inputs.release_tag }} run: | - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - RELEASE_TAG="$DISPATCH_RELEASE_TAG" - else - RELEASE_TAG="$RUN_HEAD_BRANCH" - fi echo "Validating release tag: $RELEASE_TAG" if [[ ! "$RELEASE_TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)?$ ]]; then echo "โŒ Invalid release tag format: $RELEASE_TAG" @@ -114,7 +102,9 @@ jobs: runs-on: ubuntu-latest needs: [validate-release-tag] steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} @@ -124,19 +114,21 @@ jobs: if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} working-directory: "bindings/python" run: | - echo "Setting cargo version to: ${{ needs.validate-release-tag.outputs.cargo-version }}" - toml set Cargo.toml package.version ${{ needs.validate-release-tag.outputs.cargo-version }} > Cargo.toml.tmp + echo "Setting cargo version to: ${NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION}" + toml set Cargo.toml package.version "${NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION}" > Cargo.toml.tmp # doing this explicitly to avoid issue in Windows where `mv` does not overwrite existing file rm Cargo.toml mv Cargo.toml.tmp Cargo.toml + env: + NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION: ${{ needs.validate-release-tag.outputs.cargo-version }} - - uses: PyO3/maturin-action@v1 + - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1 with: working-directory: "bindings/python" command: sdist args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 with: name: wheels-sdist path: bindings/python/dist @@ -158,7 +150,9 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} @@ -167,14 +161,16 @@ jobs: - name: Set cargo version for RC if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} working-directory: "bindings/python" + env: + CARGO_VERSION: ${{ needs.validate-release-tag.outputs.cargo-version }} run: | - echo "Setting cargo version to: ${{ needs.validate-release-tag.outputs.cargo-version }}" - toml set Cargo.toml package.version ${{ needs.validate-release-tag.outputs.cargo-version }} > Cargo.toml.tmp + echo "Setting cargo version to: $CARGO_VERSION" + toml set Cargo.toml package.version "$CARGO_VERSION" > Cargo.toml.tmp # doing this explicitly to avoid issue in Windows where `mv` does not overwrite existing file rm Cargo.toml mv Cargo.toml.tmp Cargo.toml - - uses: actions/setup-python@v6 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: python-version: 3.12 - name: Get MSRV @@ -185,7 +181,7 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: ${{ steps.get-msrv.outputs.msrv }} - - uses: PyO3/maturin-action@v1 + - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1 with: target: ${{ matrix.target }} manylinux: ${{ matrix.manylinux || 'auto' }} @@ -193,7 +189,7 @@ jobs: command: build args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one - name: Upload wheels - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist @@ -212,13 +208,13 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v8 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 with: pattern: wheels-* merge-multiple: true path: bindings/python/dist - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: skip-existing: true packages-dir: bindings/python/dist diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 595cb42d05..86e589acd6 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -40,20 +40,22 @@ jobs: if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: timestamp: ${{ needs.set-version.outputs.TIMESTAMP }} - - uses: PyO3/maturin-action@v1 + - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1 with: working-directory: "bindings/python" command: sdist args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 with: name: wheels-sdist path: bindings/python/dist @@ -76,13 +78,15 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: timestamp: ${{ needs.set-version.outputs.TIMESTAMP }} - - uses: actions/setup-python@v6 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: python-version: 3.12 @@ -95,7 +99,7 @@ jobs: with: rust-version: ${{ steps.get-msrv.outputs.msrv }} - - uses: PyO3/maturin-action@v1 + - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1 with: target: ${{ matrix.target }} manylinux: ${{ matrix.manylinux || 'auto' }} @@ -104,7 +108,7 @@ jobs: args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one - name: Upload wheels - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist @@ -122,7 +126,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v8 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 with: pattern: wheels-* merge-multiple: true @@ -132,7 +136,7 @@ jobs: - name: Publish to TestPyPI id: publish-testpypi continue-on-error: true - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index e2afce4c71..c3d3f18294 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -32,7 +32,7 @@ jobs: if: github.repository_owner == 'apache' runs-on: ubuntu-24.04 steps: - - uses: actions/stale@v10.2.0 + - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0 with: # stale issues stale-issue-label: 'stale,security' diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index 59bd2c6f2c..71fb9503c9 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -39,15 +39,17 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Setup mdBook - uses: peaceiris/actions-mdbook@v2 + uses: peaceiris/actions-mdbook@ee69d230fe19748b7abf22df32acaa93833fad08 # v2 with: mdbook-version: "0.4.36" - name: Install protoc - uses: arduino/setup-protoc@v3 + uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -64,7 +66,7 @@ jobs: cp -r target/doc ./website/book/api - name: Deploy to gh-pages - uses: peaceiris/actions-gh-pages@v4.0.0 + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 if: github.event_name == 'push' && github.ref_name == 'main' with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml new file mode 100644 index 0000000000..313835fcbe --- /dev/null +++ b/.github/workflows/zizmor.yml @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: GitHub Actions Security Analysis with zizmor ๐ŸŒˆ + +on: + push: + branches: ["main"] + pull_request: + branches: ["**"] + +permissions: {} + +jobs: + zizmor: + name: Run zizmor ๐ŸŒˆ + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run zizmor ๐ŸŒˆ + uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2 + with: + advanced-security: false From 14f2e1439cc765c5ae666e0e028c9cb3d089660b Mon Sep 17 00:00:00 2001 From: Geser Dugarov Date: Tue, 31 Mar 2026 16:46:04 +0700 Subject: [PATCH 34/39] fix(spec): clean up -1 snapshot ID sentinel usage and add deserialization test (#2294) ## Which issue does this PR close? - Closes #352. ## What changes are included in this PR? - Replaces hardcoded `-1` with `EMPTY_SNAPSHOT_ID` constant in table metadata deserialization. - Adds `test_empty_snapshot_id_is_normalized_to_none` to verify that the Java-style `-1` sentinel for `current-snapshot-id` is normalized to `None` during deserialization. - Removes the public `UNASSIGNED_SNAPSHOT_ID` constant and moving it to a private constant scoped to the manifest writer module. ## Are these changes tested? Adds a test `test_empty_snapshot_id_is_normalized_to_none` verifying the deserialization normalization. --- crates/iceberg/src/spec/manifest/writer.rs | 6 ++++- crates/iceberg/src/spec/snapshot.rs | 2 -- crates/iceberg/src/spec/table_metadata.rs | 25 +++++++++++++++---- .../src/spec/table_metadata_builder.rs | 2 +- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index cc5ef737fb..1b3b605fd8 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -32,10 +32,14 @@ use crate::spec::manifest::_serde::{ManifestEntryV1, ManifestEntryV2}; use crate::spec::manifest::{manifest_schema_v1, manifest_schema_v2}; use crate::spec::{ DataContentType, DataFile, FieldSummary, ManifestEntry, ManifestFile, ManifestMetadata, - ManifestStatus, PrimitiveLiteral, SchemaRef, StructType, UNASSIGNED_SNAPSHOT_ID, + ManifestStatus, PrimitiveLiteral, SchemaRef, StructType, }; use crate::{Error, ErrorKind}; +/// Placeholder for snapshot ID. The field with this value must be replaced +/// with the actual snapshot ID before it is committed. +const UNASSIGNED_SNAPSHOT_ID: i64 = -1; + /// The builder used to create a [`ManifestWriter`]. pub struct ManifestWriterBuilder { output: OutputFile, diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs index f60579e014..72b5417c47 100644 --- a/crates/iceberg/src/spec/snapshot.rs +++ b/crates/iceberg/src/spec/snapshot.rs @@ -33,8 +33,6 @@ use crate::{Error, ErrorKind}; /// The ref name of the main branch of the table. pub const MAIN_BRANCH: &str = "main"; -/// Placeholder for snapshot ID. The field with this value must be replaced with the actual snapshot ID before it is committed. -pub const UNASSIGNED_SNAPSHOT_ID: i64 = -1; /// Reference to [`Snapshot`]. pub type SnapshotRef = Arc; diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index b91599b74f..f16ee1a6ae 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -47,6 +47,9 @@ use crate::{Error, ErrorKind}; static MAIN_BRANCH: &str = "main"; pub(crate) static ONE_MINUTE_MS: i64 = 60_000; +/// Sentinel value used by the Java implementation and older metadata files +/// to represent a missing/empty current snapshot ID. During deserialization, +/// this value is normalized to `None`. pub(crate) static EMPTY_SNAPSHOT_ID: i64 = -1; pub(crate) static INITIAL_SEQUENCE_NUMBER: i64 = 0; @@ -765,8 +768,8 @@ pub(super) mod _serde { use uuid::Uuid; use super::{ - DEFAULT_PARTITION_SPEC_ID, FormatVersion, MAIN_BRANCH, MetadataLog, SnapshotLog, - TableMetadata, + DEFAULT_PARTITION_SPEC_ID, EMPTY_SNAPSHOT_ID, FormatVersion, MAIN_BRANCH, MetadataLog, + SnapshotLog, TableMetadata, }; use crate::spec::schema::_serde::{SchemaV1, SchemaV2}; use crate::spec::snapshot::_serde::{SnapshotV1, SnapshotV2, SnapshotV3}; @@ -950,7 +953,7 @@ pub(super) mod _serde { encryption_keys, snapshots, } = value; - let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id { + let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) { None } else { value.current_snapshot_id @@ -1063,7 +1066,7 @@ pub(super) mod _serde { fn try_from(value: TableMetadataV2) -> Result { let snapshots = value.snapshots; let value = value.shared; - let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id { + let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) { None } else { value.current_snapshot_id @@ -1170,7 +1173,7 @@ pub(super) mod _serde { impl TryFrom for TableMetadata { type Error = Error; fn try_from(value: TableMetadataV1) -> Result { - let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id { + let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) { None } else { value.current_snapshot_id @@ -3300,6 +3303,18 @@ mod tests { check_table_metadata_serde(&metadata, expected); } + #[test] + fn test_empty_snapshot_id_is_normalized_to_none() { + let metadata = + fs::read_to_string("testdata/table_metadata/TableMetadataV1Valid.json").unwrap(); + let deserialized: TableMetadata = serde_json::from_str(&metadata).unwrap(); + assert_eq!( + deserialized.current_snapshot_id(), + None, + "current_snapshot_id of -1 should be deserialized as None" + ); + } + #[test] fn test_table_metadata_v1_compat() { let metadata = diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 62311a15a2..65dbae1bfc 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -570,7 +570,7 @@ impl TableMetadataBuilder { /// Remove a reference /// - /// If `ref_name='main'` the current snapshot id is set to -1. + /// If `ref_name='main'` the current snapshot id is set to `None`. pub fn remove_ref(mut self, ref_name: &str) -> Self { if ref_name == MAIN_BRANCH { self.metadata.current_snapshot_id = None; From d8011a0c75e84711f54608c7f13cc79e8ec2fa65 Mon Sep 17 00:00:00 2001 From: emkornfield Date: Tue, 31 Mar 2026 02:58:13 -0700 Subject: [PATCH 35/39] feat!: Enhance compression codec enum. (#2288) ## Which issue does this PR close? This is an intermediate PR for #1731 I'm splitting out changes from https://github.com/apache/iceberg-rust/pull/1851 to the compression codec to make it easier to review. Once we decide on approach here and merge it I'll update https://github.com/apache/iceberg-rust/pull/1851 accordingly. ## What changes are included in this PR? - Add optional compression level to gzip and zstd (needed for when avro compression usage). - Add Snappy as a compression codec (also will be used for Avro) - Manually code up some previously auto-generated methods as a result. AI helped with an initial version of this PR. ## Are these changes tested? Additional unit tests --- .../iceberg/src/catalog/metadata_location.rs | 21 ++- crates/iceberg/src/compression.rs | 150 +++++++++++++++--- crates/iceberg/src/puffin/metadata.rs | 5 +- crates/iceberg/src/puffin/mod.rs | 39 ++--- crates/iceberg/src/puffin/reader.rs | 4 +- crates/iceberg/src/puffin/test_utils.rs | 4 +- crates/iceberg/src/puffin/writer.rs | 11 +- crates/iceberg/src/spec/table_metadata.rs | 6 +- crates/iceberg/src/spec/table_properties.rs | 24 +-- 9 files changed, 191 insertions(+), 73 deletions(-) diff --git a/crates/iceberg/src/catalog/metadata_location.rs b/crates/iceberg/src/catalog/metadata_location.rs index ed28118879..acd041d5e1 100644 --- a/crates/iceberg/src/catalog/metadata_location.rs +++ b/crates/iceberg/src/catalog/metadata_location.rs @@ -114,9 +114,9 @@ impl MetadataLocation { ))?; // Check for compression suffix (e.g., .gz) - let gzip_suffix = CompressionCodec::Gzip.suffix()?; + let gzip_suffix = CompressionCodec::gzip_default().suffix()?; let (stripped, compression_codec) = if let Some(s) = stripped.strip_suffix(gzip_suffix) { - (s, CompressionCodec::Gzip) + (s, CompressionCodec::gzip_default()) } else { (stripped, CompressionCodec::None) }; @@ -261,7 +261,7 @@ mod test { table_location: "/abc".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), - compression_codec: CompressionCodec::Gzip, + compression_codec: CompressionCodec::gzip_default(), }), ), // Negative version @@ -345,10 +345,16 @@ mod test { "/test/table/metadata/00005-81056704-ce5b-41c4-bb83-eb6408081af6.gz.metadata.json", ) .unwrap(); - assert_eq!(location_gzip.compression_codec, CompressionCodec::Gzip); + assert_eq!( + location_gzip.compression_codec, + CompressionCodec::gzip_default() + ); let next_gzip = location_gzip.with_next_version(); - assert_eq!(next_gzip.compression_codec, CompressionCodec::Gzip); + assert_eq!( + next_gzip.compression_codec, + CompressionCodec::gzip_default() + ); assert_eq!(next_gzip.version, 6); } @@ -369,7 +375,10 @@ mod test { ); let metadata_gzip = create_test_metadata(props_gzip); let updated_gzip = location.with_new_metadata(&metadata_gzip); - assert_eq!(updated_gzip.compression_codec, CompressionCodec::Gzip); + assert_eq!( + updated_gzip.compression_codec, + CompressionCodec::gzip_default() + ); assert_eq!(updated_gzip.version, 0); assert_eq!( updated_gzip.to_string(), diff --git a/crates/iceberg/src/compression.rs b/crates/iceberg/src/compression.rs index 42f5298437..929d9226e7 100644 --- a/crates/iceberg/src/compression.rs +++ b/crates/iceberg/src/compression.rs @@ -17,28 +17,101 @@ //! Compression codec support for data compression and decompression. +use std::fmt; use std::io::{Read, Write}; use flate2::Compression; use flate2::read::GzDecoder; use flate2::write::GzEncoder; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::{Error, ErrorKind, Result}; +/// Default compression level for Zstandard (zstd). +const ZSTD_DEFAULT_LEVEL: u8 = 3; +/// Default compression level for Gzip. +const GZIP_DEFAULT_LEVEL: u8 = 6; +/// Maximum compression level for Gzip. +const GZIP_MAX_LEVEL: u8 = 9; + /// Data compression formats -#[derive(Debug, PartialEq, Eq, Clone, Copy, Default, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] pub enum CompressionCodec { #[default] /// No compression None, /// LZ4 single compression frame with content size present Lz4, - /// Zstandard single compression frame with content size present - Zstd, - /// Gzip compression - Gzip, + /// Zstandard single compression frame with content size present. + /// Level range is 0โ€“22, where 0 means default compression level (not no compression). + /// Use [`CompressionCodec::zstd_default`] to construct with the default level. + Zstd(u8), + /// Gzip compression. Level range is 0โ€“9, where 0 means no compression. + /// Use [`CompressionCodec::gzip_default`] to construct with the default level. + Gzip(u8), + /// Snappy compression + Snappy, +} + +impl CompressionCodec { + /// Returns a Zstd codec with the default compression level. + pub const fn zstd_default() -> Self { + CompressionCodec::Zstd(ZSTD_DEFAULT_LEVEL) + } + + /// Returns a Gzip codec with the default compression level. + pub const fn gzip_default() -> Self { + CompressionCodec::Gzip(GZIP_DEFAULT_LEVEL) + } + + /// Returns the codec name as used in serialization and error messages. + pub fn name(&self) -> &'static str { + match self { + CompressionCodec::None => "none", + CompressionCodec::Lz4 => "lz4", + CompressionCodec::Zstd(_) => "zstd", + CompressionCodec::Gzip(_) => "gzip", + CompressionCodec::Snappy => "snappy", + } + } +} + +// Note: serialize/deserialize do not round-trip the compression level. Iceberg configuration +// only the codec name (e.g. "zstd"), not the level, so deserialization always produces the +// default level. A `Zstd(5)` written to metadata will be read back as `Zstd(3)`. Some +// compression configuration (e.g. Avro metadata) has a separate level field alongside the codec name. +impl Serialize for CompressionCodec { + fn serialize(&self, serializer: S) -> std::result::Result { + serializer.serialize_str(self.name()) + } +} + +impl<'de> Deserialize<'de> for CompressionCodec { + fn deserialize>(deserializer: D) -> std::result::Result { + let s = String::deserialize(deserializer)?; + match s.to_lowercase().as_str() { + "none" => Ok(CompressionCodec::None), + "lz4" => Ok(CompressionCodec::Lz4), + "zstd" => Ok(CompressionCodec::zstd_default()), + "gzip" => Ok(CompressionCodec::gzip_default()), + "snappy" => Ok(CompressionCodec::Snappy), + other => Err(serde::de::Error::unknown_variant(other, &[ + "none", "lz4", "zstd", "gzip", "snappy", + ])), + } + } +} + +impl fmt::Display for CompressionCodec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CompressionCodec::None => write!(f, "None"), + CompressionCodec::Lz4 => write!(f, "Lz4"), + CompressionCodec::Zstd(level) => write!(f, "Zstd(level={level})"), + CompressionCodec::Gzip(level) => write!(f, "Gzip(level={level})"), + CompressionCodec::Snappy => write!(f, "Snappy"), + } + } } impl CompressionCodec { @@ -49,13 +122,17 @@ impl CompressionCodec { ErrorKind::FeatureUnsupported, "LZ4 decompression is not supported currently", )), - CompressionCodec::Zstd => Ok(zstd::stream::decode_all(&bytes[..])?), - CompressionCodec::Gzip => { + CompressionCodec::Zstd(_) => Ok(zstd::stream::decode_all(&bytes[..])?), + CompressionCodec::Gzip(_) => { let mut decoder = GzDecoder::new(&bytes[..]); let mut decompressed = Vec::new(); decoder.read_to_end(&mut decompressed)?; Ok(decompressed) } + CompressionCodec::Snappy => Err(Error::new( + ErrorKind::FeatureUnsupported, + "Snappy decompression is not supported currently", + )), } } @@ -66,19 +143,24 @@ impl CompressionCodec { ErrorKind::FeatureUnsupported, "LZ4 compression is not supported currently", )), - CompressionCodec::Zstd => { + CompressionCodec::Zstd(level) => { let writer = Vec::::new(); - let mut encoder = zstd::stream::Encoder::new(writer, 3)?; + let mut encoder = zstd::stream::Encoder::new(writer, *level as i32)?; encoder.include_checksum(true)?; encoder.set_pledged_src_size(Some(bytes.len().try_into()?))?; std::io::copy(&mut &bytes[..], &mut encoder)?; Ok(encoder.finish()?) } - CompressionCodec::Gzip => { - let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + CompressionCodec::Gzip(level) => { + let compression = Compression::new((*level).min(GZIP_MAX_LEVEL) as u32); + let mut encoder = GzEncoder::new(Vec::new(), compression); encoder.write_all(&bytes)?; Ok(encoder.finish()?) } + CompressionCodec::Snappy => Err(Error::new( + ErrorKind::FeatureUnsupported, + "Snappy compression is not supported currently", + )), } } @@ -95,8 +177,10 @@ impl CompressionCodec { pub fn suffix(&self) -> Result<&'static str> { match self { CompressionCodec::None => Ok(""), - CompressionCodec::Gzip => Ok(".gz"), - codec @ (CompressionCodec::Lz4 | CompressionCodec::Zstd) => Err(Error::new( + CompressionCodec::Gzip(_) => Ok(".gz"), + codec @ (CompressionCodec::Lz4 + | CompressionCodec::Zstd(_) + | CompressionCodec::Snappy) => Err(Error::new( ErrorKind::FeatureUnsupported, format!("suffix not defined for {codec:?}"), )), @@ -123,7 +207,10 @@ mod tests { async fn test_compression_codec_compress() { let bytes_vec = [0_u8; 100].to_vec(); - let compression_codecs = [CompressionCodec::Zstd, CompressionCodec::Gzip]; + let compression_codecs = [ + CompressionCodec::zstd_default(), + CompressionCodec::gzip_default(), + ]; for codec in compression_codecs { let compressed = codec.compress(bytes_vec.clone()).unwrap(); @@ -135,7 +222,10 @@ mod tests { #[tokio::test] async fn test_compression_codec_unsupported() { - let unsupported_codecs = [(CompressionCodec::Lz4, "LZ4")]; + let unsupported_codecs = [ + (CompressionCodec::Lz4, "LZ4"), + (CompressionCodec::Snappy, "Snappy"), + ]; let bytes_vec = [0_u8; 100].to_vec(); for (codec, name) in unsupported_codecs { @@ -153,18 +243,34 @@ mod tests { #[test] fn test_suffix() { - // Test supported codecs assert_eq!(CompressionCodec::None.suffix().unwrap(), ""); - assert_eq!(CompressionCodec::Gzip.suffix().unwrap(), ".gz"); + assert_eq!(CompressionCodec::gzip_default().suffix().unwrap(), ".gz"); - // Test unsupported codecs return errors assert!(CompressionCodec::Lz4.suffix().is_err()); - assert!(CompressionCodec::Zstd.suffix().is_err()); + assert!(CompressionCodec::zstd_default().suffix().is_err()); + assert!(CompressionCodec::Snappy.suffix().is_err()); let lz4_err = CompressionCodec::Lz4.suffix().unwrap_err(); assert!(lz4_err.to_string().contains("suffix not defined for Lz4")); - let zstd_err = CompressionCodec::Zstd.suffix().unwrap_err(); + let zstd_err = CompressionCodec::zstd_default().suffix().unwrap_err(); assert!(zstd_err.to_string().contains("suffix not defined for Zstd")); } + + #[test] + fn test_display() { + assert_eq!(CompressionCodec::None.to_string(), "None"); + assert_eq!(CompressionCodec::Lz4.to_string(), "Lz4"); + assert_eq!( + CompressionCodec::zstd_default().to_string(), + "Zstd(level=3)" + ); + assert_eq!(CompressionCodec::Zstd(5).to_string(), "Zstd(level=5)"); + assert_eq!( + CompressionCodec::gzip_default().to_string(), + "Gzip(level=6)" + ); + assert_eq!(CompressionCodec::Gzip(9).to_string(), "Gzip(level=9)"); + assert_eq!(CompressionCodec::Snappy.to_string(), "Snappy"); + } } diff --git a/crates/iceberg/src/puffin/metadata.rs b/crates/iceberg/src/puffin/metadata.rs index 1d39cf249b..e2dfc10c23 100644 --- a/crates/iceberg/src/puffin/metadata.rs +++ b/crates/iceberg/src/puffin/metadata.rs @@ -985,6 +985,9 @@ mod tests { assert!(result.is_ok()); let metadata = result.unwrap(); assert_eq!(metadata.blobs.len(), 1); - assert_eq!(metadata.blobs[0].compression_codec, CompressionCodec::Gzip); + assert_eq!( + metadata.blobs[0].compression_codec, + CompressionCodec::gzip_default() + ); } } diff --git a/crates/iceberg/src/puffin/mod.rs b/crates/iceberg/src/puffin/mod.rs index 854d4070ff..0e054cac51 100644 --- a/crates/iceberg/src/puffin/mod.rs +++ b/crates/iceberg/src/puffin/mod.rs @@ -26,30 +26,22 @@ pub use blob::{APACHE_DATASKETCHES_THETA_V1, Blob, DELETION_VECTOR_V1}; pub use crate::compression::CompressionCodec; -/// Compression codecs supported by the Puffin spec. -const SUPPORTED_PUFFIN_CODECS: &[CompressionCodec] = &[ - CompressionCodec::None, - CompressionCodec::Lz4, - CompressionCodec::Zstd, -]; - /// Validates that the compression codec is supported for Puffin files. /// Returns an error if the codec is not supported. fn validate_puffin_compression(codec: CompressionCodec) -> Result<()> { - if !SUPPORTED_PUFFIN_CODECS.contains(&codec) { - let supported_names: Vec = SUPPORTED_PUFFIN_CODECS - .iter() - .map(|c| format!("{c:?}")) - .collect(); - return Err(Error::new( + match codec { + CompressionCodec::None | CompressionCodec::Lz4 | CompressionCodec::Zstd(_) => Ok(()), + other => Err(Error::new( ErrorKind::DataInvalid, format!( - "Compression codec {codec:?} is not supported for Puffin files. Only {} are supported.", - supported_names.join(", ") + "Compression codec {} is not supported for Puffin files. Only {}, {}, and {} are supported.", + other.name(), + CompressionCodec::None.name(), + CompressionCodec::Lz4.name(), + CompressionCodec::zstd_default().name() ), - )); + )), } - Ok(()) } mod metadata; @@ -70,12 +62,13 @@ mod tests { #[test] fn test_puffin_codec_validation() { - // All codecs in SUPPORTED_PUFFIN_CODECS should be valid - for codec in SUPPORTED_PUFFIN_CODECS { - assert!(validate_puffin_compression(*codec).is_ok()); - } + // Supported codecs + assert!(validate_puffin_compression(CompressionCodec::None).is_ok()); + assert!(validate_puffin_compression(CompressionCodec::Lz4).is_ok()); + assert!(validate_puffin_compression(CompressionCodec::zstd_default()).is_ok()); + assert!(validate_puffin_compression(CompressionCodec::Zstd(5)).is_ok()); - // Gzip should not be supported for Puffin files - assert!(validate_puffin_compression(CompressionCodec::Gzip).is_err()); + // Unsupported codecs + assert!(validate_puffin_compression(CompressionCodec::gzip_default()).is_err()); } } diff --git a/crates/iceberg/src/puffin/reader.rs b/crates/iceberg/src/puffin/reader.rs index d272f02d41..0aced4186f 100644 --- a/crates/iceberg/src/puffin/reader.rs +++ b/crates/iceberg/src/puffin/reader.rs @@ -144,7 +144,7 @@ mod tests { sequence_number: 1, offset: 4, length: 10, - compression_codec: CompressionCodec::Gzip, + compression_codec: CompressionCodec::gzip_default(), properties: HashMap::new(), }; @@ -153,7 +153,7 @@ mod tests { assert!(result.is_err()); let err = result.unwrap_err(); assert_eq!(err.kind(), ErrorKind::DataInvalid); - assert!(err.to_string().contains("Gzip")); + assert!(err.to_string().contains("gzip")); assert!( err.to_string() .contains("is not supported for Puffin files") diff --git a/crates/iceberg/src/puffin/test_utils.rs b/crates/iceberg/src/puffin/test_utils.rs index 39fecc6f80..e0844e2002 100644 --- a/crates/iceberg/src/puffin/test_utils.rs +++ b/crates/iceberg/src/puffin/test_utils.rs @@ -77,7 +77,7 @@ pub(crate) fn zstd_compressed_metric_blob_0_metadata() -> BlobMetadata { sequence_number: METRIC_BLOB_0_SEQUENCE_NUMBER, offset: 4, length: 22, - compression_codec: CompressionCodec::Zstd, + compression_codec: CompressionCodec::zstd_default(), properties: HashMap::new(), } } @@ -134,7 +134,7 @@ pub(crate) fn zstd_compressed_metric_blob_1_metadata() -> BlobMetadata { sequence_number: METRIC_BLOB_1_SEQUENCE_NUMBER, offset: 26, length: 77, - compression_codec: CompressionCodec::Zstd, + compression_codec: CompressionCodec::zstd_default(), properties: HashMap::new(), } } diff --git a/crates/iceberg/src/puffin/writer.rs b/crates/iceberg/src/puffin/writer.rs index 30b97f09dd..4af4970b04 100644 --- a/crates/iceberg/src/puffin/writer.rs +++ b/crates/iceberg/src/puffin/writer.rs @@ -251,7 +251,8 @@ mod tests { async fn test_write_zstd_compressed_metric_data() { let temp_dir = TempDir::new().unwrap(); let blobs = vec![blob_0(), blob_1()]; - let blobs_with_compression = blobs_with_compression(blobs.clone(), CompressionCodec::Zstd); + let blobs_with_compression = + blobs_with_compression(blobs.clone(), CompressionCodec::zstd_default()); let input_file = write_puffin_file(&temp_dir, blobs_with_compression, file_properties()) .await @@ -323,7 +324,8 @@ mod tests { async fn test_zstd_compressed_metric_data_is_bit_identical_to_java_generated_file() { let temp_dir = TempDir::new().unwrap(); let blobs = vec![blob_0(), blob_1()]; - let blobs_with_compression = blobs_with_compression(blobs, CompressionCodec::Zstd); + let blobs_with_compression = + blobs_with_compression(blobs, CompressionCodec::zstd_default()); assert_files_are_bit_identical( write_puffin_file(&temp_dir, blobs_with_compression, file_properties()) @@ -338,14 +340,15 @@ mod tests { async fn test_gzip_compression_rejected() { let temp_dir = TempDir::new().unwrap(); let blobs = vec![blob_0()]; - let blobs_with_compression = blobs_with_compression(blobs, CompressionCodec::Gzip); + let blobs_with_compression = + blobs_with_compression(blobs, CompressionCodec::gzip_default()); let result = write_puffin_file(&temp_dir, blobs_with_compression, file_properties()).await; assert!(result.is_err()); let err = result.unwrap_err(); assert_eq!(err.kind(), ErrorKind::DataInvalid); - assert!(err.to_string().contains("Gzip")); + assert!(err.to_string().contains("gzip")); assert!( err.to_string() .contains("is not supported for Puffin files") diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index f16ee1a6ae..607fd98350 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -460,7 +460,7 @@ impl TableMetadata { && metadata_content[0] == 0x1F && metadata_content[1] == 0x8B { - let decompressed_data = CompressionCodec::Gzip + let decompressed_data = CompressionCodec::gzip_default() .decompress(metadata_content.to_vec()) .map_err(|e| { Error::new( @@ -502,7 +502,7 @@ impl TableMetadata { // Apply compression based on codec let data_to_write = match codec { - CompressionCodec::Gzip => codec.compress(json_data)?, + CompressionCodec::Gzip(_) => codec.compress(json_data)?, CompressionCodec::None => json_data, _ => { return Err(Error::new( @@ -3633,7 +3633,7 @@ mod tests { let original_metadata: TableMetadata = get_test_table_metadata("TableMetadataV2Valid.json"); let json = serde_json::to_string(&original_metadata).unwrap(); - let compressed = CompressionCodec::Gzip + let compressed = CompressionCodec::gzip_default() .compress(json.into_bytes()) .expect("failed to compress metadata"); std::fs::write(&metadata_location, &compressed).expect("failed to write metadata"); diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 07c157304e..a3d4e7fdaa 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -78,18 +78,22 @@ pub(crate) fn parse_metadata_file_compression( Error::new( ErrorKind::DataInvalid, format!( - "Invalid metadata compression codec: {value}. Only 'none' and 'gzip' are supported." + "Invalid metadata compression codec: {value}. Only '{}' and '{}' are supported.", + CompressionCodec::None.name(), + CompressionCodec::gzip_default().name() ), ) })?; // Validate that only None and Gzip are used for metadata match codec { - CompressionCodec::None | CompressionCodec::Gzip => Ok(codec), - CompressionCodec::Lz4 | CompressionCodec::Zstd => Err(Error::new( + CompressionCodec::None | CompressionCodec::Gzip(_) => Ok(codec), + _ => Err(Error::new( ErrorKind::DataInvalid, format!( - "Invalid metadata compression codec: {value}. Only 'none' and 'gzip' are supported for metadata files." + "Invalid metadata compression codec: {value}. Only '{}' and '{}' are supported for metadata files.", + CompressionCodec::None.name(), + CompressionCodec::gzip_default().name() ), )), } @@ -324,7 +328,7 @@ mod tests { let table_properties = TableProperties::try_from(&props).unwrap(); assert_eq!( table_properties.metadata_compression_codec, - CompressionCodec::Gzip + CompressionCodec::gzip_default() ); } @@ -351,7 +355,7 @@ mod tests { let table_properties = TableProperties::try_from(&props_upper).unwrap(); assert_eq!( table_properties.metadata_compression_codec, - CompressionCodec::Gzip + CompressionCodec::gzip_default() ); // Test mixed case @@ -362,7 +366,7 @@ mod tests { let table_properties = TableProperties::try_from(&props_mixed).unwrap(); assert_eq!( table_properties.metadata_compression_codec, - CompressionCodec::Gzip + CompressionCodec::gzip_default() ); // Test "NONE" should also be case-insensitive @@ -517,7 +521,7 @@ mod tests { )]); assert_eq!( parse_metadata_file_compression(&props).unwrap(), - CompressionCodec::Gzip + CompressionCodec::gzip_default() ); // Test case insensitivity - "NONE" @@ -537,7 +541,7 @@ mod tests { )]); assert_eq!( parse_metadata_file_compression(&props).unwrap(), - CompressionCodec::Gzip + CompressionCodec::gzip_default() ); // Test case insensitivity - "GzIp" @@ -547,7 +551,7 @@ mod tests { )]); assert_eq!( parse_metadata_file_compression(&props).unwrap(), - CompressionCodec::Gzip + CompressionCodec::gzip_default() ); // Test default when property is missing From ff6c7006205c64966402207217a6e3650868ce0c Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 31 Mar 2026 15:04:00 -0700 Subject: [PATCH 36/39] ci: allows nightly to run on workflow dispatch (#2304) --- .github/workflows/release_python_nightly.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 86e589acd6..66ae0e1db2 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -27,6 +27,7 @@ permissions: jobs: set-version: + if: github.repository == 'apache/iceberg-rust' || github.event_name == 'workflow_dispatch' # Run on schedule for apache repo, or on manual dispatch from any repo runs-on: ubuntu-latest outputs: timestamp: ${{ steps.set-ts.outputs.TIMESTAMP }} @@ -37,7 +38,6 @@ jobs: sdist: needs: set-version - if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -62,7 +62,6 @@ jobs: wheels: needs: set-version - if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: "${{ matrix.os }}" strategy: max-parallel: 15 @@ -114,6 +113,7 @@ jobs: path: bindings/python/dist testpypi-publish: + if: github.repository == 'apache/iceberg-rust' # Only run for apache repo needs: [sdist, wheels] runs-on: ubuntu-latest From dc3a2d573b666b5bea2b0f33fc2719e7bfe35dd8 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 31 Mar 2026 16:44:59 -0700 Subject: [PATCH 37/39] ci: ensure use bash shell for env (#2305) --- .github/actions/setup-builder/action.yml | 2 +- .github/workflows/ci.yml | 1 + .github/workflows/publish.yml | 1 + .github/workflows/release_python.yml | 3 +++ 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-builder/action.yml b/.github/actions/setup-builder/action.yml index 532174590f..e961ed6335 100644 --- a/.github/actions/setup-builder/action.yml +++ b/.github/actions/setup-builder/action.yml @@ -26,8 +26,8 @@ runs: using: "composite" steps: - name: Setup specified Rust toolchain - shell: bash if: ${{ inputs.rust-version != '' }} + shell: bash env: RUST_VERSION: ${{ inputs.rust-version }} run: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 63ee893fa2..071d6dbcbf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,6 +172,7 @@ jobs: run: make docker-up - name: Run tests + shell: bash env: # Disable debug info to speed up compilation and reduce artifact size RUSTFLAGS: "-C debuginfo=0" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index d6ba35d9f4..83e1031d17 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -65,6 +65,7 @@ jobs: # Only publish if it's a tag and the tag is not a pre-release if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} run: cargo publish --all-features # zizmor: ignore[use-trusted-publishing] -- https://github.com/apache/iceberg-rust/issues/1539 + shell: bash env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index d9fcdd406b..b19fa165dc 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -49,6 +49,7 @@ jobs: - name: Validate release tag format id: validate # Valid formats: v.. OR v..-rc. + shell: bash env: RELEASE_TAG: ${{ inputs.release_tag }} run: | @@ -119,6 +120,7 @@ jobs: # doing this explicitly to avoid issue in Windows where `mv` does not overwrite existing file rm Cargo.toml mv Cargo.toml.tmp Cargo.toml + shell: bash env: NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION: ${{ needs.validate-release-tag.outputs.cargo-version }} @@ -161,6 +163,7 @@ jobs: - name: Set cargo version for RC if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} working-directory: "bindings/python" + shell: bash env: CARGO_VERSION: ${{ needs.validate-release-tag.outputs.cargo-version }} run: | From 626de2e965c289110a09cd394e229d94596d53bf Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 1 Apr 2026 02:43:00 -0700 Subject: [PATCH 38/39] feat(iceberg): Add snapshot utils to scan ancestors (#2285) ## Which issue does this PR close? - Closes #2241 ## What changes are included in this PR? - Add `Ancestors` to help scan past snapshots - Moved existing util to the new utils mod ## Are these changes tested? Yes --- crates/iceberg/src/arrow/reader.rs | 2 +- crates/iceberg/src/lib.rs | 3 +- crates/iceberg/src/scan/mod.rs | 35 +++- crates/iceberg/src/{utils.rs => util/mod.rs} | 3 + crates/iceberg/src/util/snapshot.rs | 185 ++++++++++++++++++ ...xample_table_metadata_v2_deep_history.json | 104 ++++++++++ 6 files changed, 329 insertions(+), 3 deletions(-) rename crates/iceberg/src/{utils.rs => util/mod.rs} (96%) create mode 100644 crates/iceberg/src/util/snapshot.rs create mode 100644 crates/iceberg/testdata/example_table_metadata_v2_deep_history.json diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index 042a730e19..700ba69262 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -58,7 +58,7 @@ use crate::io::{FileIO, FileMetadata, FileRead}; use crate::metadata_columns::{RESERVED_FIELD_ID_FILE, is_metadata_field}; use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream}; use crate::spec::{Datum, NameMapping, NestedField, PrimitiveType, Schema, Type}; -use crate::utils::available_parallelism; +use crate::util::available_parallelism; use crate::{Error, ErrorKind}; /// Default gap between byte ranges below which they are coalesced into a diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 44a3601428..ae0708146b 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -95,9 +95,10 @@ pub mod arrow; pub(crate) mod delete_file_index; pub mod encryption; pub mod test_utils; -mod utils; pub mod writer; mod delete_vector; pub mod metadata_columns; pub mod puffin; +/// Utility functions and modules. +pub mod util; diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index e52b3bdeae..4a1e27bdc1 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -40,7 +40,7 @@ use crate::metadata_columns::{get_metadata_field_id, is_metadata_column_name}; use crate::runtime::spawn; use crate::spec::{DataContentType, SnapshotRef}; use crate::table::Table; -use crate::utils::available_parallelism; +use crate::util::available_parallelism; use crate::{Error, ErrorKind, Result}; /// A stream of arrow [`RecordBatch`]es. @@ -683,6 +683,39 @@ pub mod tests { } } + /// Creates a fixture with 5 snapshots chained as: + /// S1 (root) -> S2 -> S3 -> S4 -> S5 (current) + /// Useful for testing snapshot history traversal. + pub fn new_with_deep_history() -> Self { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path().join("table1"); + let table_metadata1_location = table_location.join("metadata/v1.json"); + + let file_io = FileIO::new_with_fs(); + + let table_metadata = { + let json_str = fs::read_to_string(format!( + "{}/testdata/example_table_metadata_v2_deep_history.json", + env!("CARGO_MANIFEST_DIR") + )) + .unwrap(); + serde_json::from_str::(&json_str).unwrap() + }; + + let table = Table::builder() + .metadata(table_metadata) + .identifier(TableIdent::from_strs(["db", "table1"]).unwrap()) + .file_io(file_io.clone()) + .metadata_location(table_metadata1_location.as_os_str().to_str().unwrap()) + .build() + .unwrap(); + + Self { + table_location: table_location.to_str().unwrap().to_string(), + table, + } + } + pub fn new_unpartitioned() -> Self { let tmp_dir = TempDir::new().unwrap(); let table_location = tmp_dir.path().join("table1"); diff --git a/crates/iceberg/src/utils.rs b/crates/iceberg/src/util/mod.rs similarity index 96% rename from crates/iceberg/src/utils.rs rename to crates/iceberg/src/util/mod.rs index 00d3e69bd3..28eda66d49 100644 --- a/crates/iceberg/src/utils.rs +++ b/crates/iceberg/src/util/mod.rs @@ -17,6 +17,9 @@ use std::num::NonZeroUsize; +/// Utilities for working with snapshots. +pub mod snapshot; + // Use a default value of 1 as the safest option. // See https://doc.rust-lang.org/std/thread/fn.available_parallelism.html#limitations // for more details. diff --git a/crates/iceberg/src/util/snapshot.rs b/crates/iceberg/src/util/snapshot.rs new file mode 100644 index 0000000000..98997ae815 --- /dev/null +++ b/crates/iceberg/src/util/snapshot.rs @@ -0,0 +1,185 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::spec::{SnapshotRef, TableMetadataRef}; + +struct Ancestors { + next: Option, + get_snapshot: Box Option + Send>, +} + +impl Iterator for Ancestors { + type Item = SnapshotRef; + + fn next(&mut self) -> Option { + let snapshot = self.next.take()?; + self.next = snapshot + .parent_snapshot_id() + .and_then(|id| (self.get_snapshot)(id)); + Some(snapshot) + } +} + +/// Iterate starting from `snapshot_id` (inclusive) to the root snapshot. +pub fn ancestors_of( + table_metadata: &TableMetadataRef, + snapshot_id: i64, +) -> impl Iterator + Send { + let initial = table_metadata.snapshot_by_id(snapshot_id).cloned(); + let table_metadata = table_metadata.clone(); + Ancestors { + next: initial, + get_snapshot: Box::new(move |id| table_metadata.snapshot_by_id(id).cloned()), + } +} + +/// Iterate starting from `latest_snapshot_id` (inclusive) to `oldest_snapshot_id` (exclusive). +pub fn ancestors_between( + table_metadata: &TableMetadataRef, + latest_snapshot_id: i64, + oldest_snapshot_id: Option, +) -> impl Iterator + Send { + ancestors_of(table_metadata, latest_snapshot_id).take_while(move |snapshot| { + oldest_snapshot_id + .map(|id| snapshot.snapshot_id() != id) + .unwrap_or(true) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::scan::tests::TableTestFixture; + + // Five snapshots chained as: S1 (root) -> S2 -> S3 -> S4 -> S5 (current) + const S1: i64 = 3051729675574597004; + const S2: i64 = 3055729675574597004; + const S3: i64 = 3056729675574597004; + const S4: i64 = 3057729675574597004; + const S5: i64 = 3059729675574597004; + + fn metadata() -> TableMetadataRef { + let fixture = TableTestFixture::new_with_deep_history(); + std::sync::Arc::new(fixture.table.metadata().clone()) + } + + // --- ancestors_of --- + + #[test] + fn test_ancestors_of_nonexistent_snapshot_returns_empty() { + let meta = metadata(); + let ids: Vec = ancestors_of(&meta, 999).map(|s| s.snapshot_id()).collect(); + assert!(ids.is_empty()); + } + + #[test] + fn test_ancestors_of_root_returns_only_root() { + let meta = metadata(); + let ids: Vec = ancestors_of(&meta, S1).map(|s| s.snapshot_id()).collect(); + assert_eq!(ids, vec![S1]); + } + + #[test] + fn test_ancestors_of_leaf_returns_full_chain() { + let meta = metadata(); + let ids: Vec = ancestors_of(&meta, S5).map(|s| s.snapshot_id()).collect(); + assert_eq!(ids, vec![S5, S4, S3, S2, S1]); + } + + #[test] + fn test_ancestors_of_mid_chain_returns_partial_chain() { + let meta = metadata(); + let ids: Vec = ancestors_of(&meta, S3).map(|s| s.snapshot_id()).collect(); + assert_eq!(ids, vec![S3, S2, S1]); + } + + #[test] + fn test_ancestors_of_second_snapshot() { + let meta = metadata(); + let ids: Vec = ancestors_of(&meta, S2).map(|s| s.snapshot_id()).collect(); + assert_eq!(ids, vec![S2, S1]); + } + + // --- ancestors_between --- + + #[test] + fn test_ancestors_between_same_id_returns_empty() { + let meta = metadata(); + let ids: Vec = ancestors_between(&meta, S3, Some(S3)) + .map(|s| s.snapshot_id()) + .collect(); + assert!(ids.is_empty()); + } + + #[test] + fn test_ancestors_between_no_oldest_returns_all_ancestors() { + let meta = metadata(); + let ids: Vec = ancestors_between(&meta, S5, None) + .map(|s| s.snapshot_id()) + .collect(); + assert_eq!(ids, vec![S5, S4, S3, S2, S1]); + } + + #[test] + fn test_ancestors_between_excludes_oldest_snapshot() { + let meta = metadata(); + // S5 down to (but not including) S2 + let ids: Vec = ancestors_between(&meta, S5, Some(S2)) + .map(|s| s.snapshot_id()) + .collect(); + assert_eq!(ids, vec![S5, S4, S3]); + } + + #[test] + fn test_ancestors_between_adjacent_snapshots() { + let meta = metadata(); + // S3 down to (but not including) S2 โ€” only S3 itself + let ids: Vec = ancestors_between(&meta, S3, Some(S2)) + .map(|s| s.snapshot_id()) + .collect(); + assert_eq!(ids, vec![S3]); + } + + #[test] + fn test_ancestors_between_leaf_and_root() { + let meta = metadata(); + // S5 down to (but not including) S1 + let ids: Vec = ancestors_between(&meta, S5, Some(S1)) + .map(|s| s.snapshot_id()) + .collect(); + assert_eq!(ids, vec![S5, S4, S3, S2]); + } + + #[test] + fn test_ancestors_between_nonexistent_oldest_returns_full_chain() { + let meta = metadata(); + // oldest_snapshot_id doesn't exist in the chain, so take_while never stops + let ids: Vec = ancestors_between(&meta, S5, Some(999)) + .map(|s| s.snapshot_id()) + .collect(); + assert_eq!(ids, vec![S5, S4, S3, S2, S1]); + } + + #[test] + fn test_ancestors_between_nonexistent_latest_returns_empty() { + let meta = metadata(); + let ids: Vec = ancestors_between(&meta, 999, Some(S1)) + .map(|s| s.snapshot_id()) + .collect(); + assert!(ids.is_empty()); + } +} diff --git a/crates/iceberg/testdata/example_table_metadata_v2_deep_history.json b/crates/iceberg/testdata/example_table_metadata_v2_deep_history.json new file mode 100644 index 0000000000..a354958697 --- /dev/null +++ b/crates/iceberg/testdata/example_table_metadata_v2_deep_history.json @@ -0,0 +1,104 @@ +{ + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"} + ] + }, + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [1, 2], + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"}, + {"id": 2, "name": "y", "required": true, "type": "long", "doc": "comment"}, + {"id": 3, "name": "z", "required": true, "type": "long"} + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [ + {"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000} + ] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + {"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"}, + {"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"} + ] + } + ], + "properties": {}, + "current-snapshot-id": 3059729675574597004, + "snapshots": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770, + "sequence-number": 0, + "summary": {"operation": "append"}, + "manifest-list": "s3://bucket/metadata/snap-3051729675574597004.avro" + }, + { + "snapshot-id": 3055729675574597004, + "parent-snapshot-id": 3051729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": {"operation": "append"}, + "manifest-list": "s3://bucket/metadata/snap-3055729675574597004.avro", + "schema-id": 1 + }, + { + "snapshot-id": 3056729675574597004, + "parent-snapshot-id": 3055729675574597004, + "timestamp-ms": 1575100955770, + "sequence-number": 2, + "summary": {"operation": "append"}, + "manifest-list": "s3://bucket/metadata/snap-3056729675574597004.avro", + "schema-id": 1 + }, + { + "snapshot-id": 3057729675574597004, + "parent-snapshot-id": 3056729675574597004, + "timestamp-ms": 1595100955770, + "sequence-number": 3, + "summary": {"operation": "overwrite"}, + "manifest-list": "s3://bucket/metadata/snap-3057729675574597004.avro", + "schema-id": 1 + }, + { + "snapshot-id": 3059729675574597004, + "parent-snapshot-id": 3057729675574597004, + "timestamp-ms": 1602638573590, + "sequence-number": 4, + "summary": {"operation": "append"}, + "manifest-list": "s3://bucket/metadata/snap-3059729675574597004.avro", + "schema-id": 1 + } + ], + "snapshot-log": [ + {"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770}, + {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770}, + {"snapshot-id": 3056729675574597004, "timestamp-ms": 1575100955770}, + {"snapshot-id": 3057729675574597004, "timestamp-ms": 1595100955770}, + {"snapshot-id": 3059729675574597004, "timestamp-ms": 1602638573590} + ], + "metadata-log": [], + "refs": {"main": {"snapshot-id": 3059729675574597004, "type": "branch"}} +} From b2fdeeda7025827ae612bb8e93da8ec51dc8d1fa Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 2 Apr 2026 09:30:26 +0200 Subject: [PATCH 39/39] Fix build errors --- crates/iceberg/src/io/storage/opendal/mod.rs | 9 +++++++++ crates/iceberg/src/scan/incremental/mod.rs | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/io/storage/opendal/mod.rs b/crates/iceberg/src/io/storage/opendal/mod.rs index 2bc56d31c4..ca0ce8a245 100644 --- a/crates/iceberg/src/io/storage/opendal/mod.rs +++ b/crates/iceberg/src/io/storage/opendal/mod.rs @@ -25,6 +25,8 @@ use async_trait::async_trait; #[cfg(feature = "storage-azdls")] use azdls::AzureStorageScheme; use bytes::Bytes; +use futures::StreamExt; +use futures::stream::BoxStream; use opendal::Operator; use opendal::layers::RetryLayer; #[cfg(feature = "storage-azdls")] @@ -438,6 +440,13 @@ impl Storage for OpenDalStorage { Ok(op.delete(relative_path).await?) } + async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> Result<()> { + while let Some(path) = paths.next().await { + self.delete(&path).await?; + } + Ok(()) + } + async fn delete_prefix(&self, path: &str) -> Result<()> { let (op, relative_path) = self.create_operator(&path)?; let path = if relative_path.ends_with('/') { diff --git a/crates/iceberg/src/scan/incremental/mod.rs b/crates/iceberg/src/scan/incremental/mod.rs index 59c5d75811..74b9090f4e 100644 --- a/crates/iceberg/src/scan/incremental/mod.rs +++ b/crates/iceberg/src/scan/incremental/mod.rs @@ -40,8 +40,8 @@ use crate::spec::{ DataContentType, ManifestEntryRef, ManifestStatus, Snapshot, SnapshotRef, TableMetadataRef, }; use crate::table::Table; +use crate::util::available_parallelism; use crate::util::snapshot::ancestors_between; -use crate::utils::available_parallelism; use crate::{Error, ErrorKind, Result}; mod context;