From 0436357fb61908fd13dbff7dea429a3bcf5e5220 Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Wed, 20 May 2026 10:58:24 -0700 Subject: [PATCH 01/14] All: Remove deprecated methods for 1.12.0 Co-Authored-By: Claude Sonnet 4.6 --- .palantir/revapi.yml | 174 ++++++++ .../iceberg/aws/s3/signer/S3ObjectMapper.java | 123 ------ .../iceberg/aws/s3/signer/S3SignRequest.java | 30 -- .../aws/s3/signer/S3SignRequestParser.java | 67 ---- .../iceberg/aws/s3/signer/S3SignResponse.java | 30 -- .../aws/s3/signer/S3SignResponseParser.java | 57 --- .../aws/s3/signer/S3V4RestSignerClient.java | 70 +--- .../s3/signer/TestS3V4RestSignerClient.java | 31 +- .../bigquery/BigQueryMetastoreCatalog.java | 18 - .../java/org/apache/iceberg/BaseScan.java | 4 - .../org/apache/iceberg/ManifestFiles.java | 42 -- .../org/apache/iceberg/MetricsConfig.java | 30 -- .../org/apache/iceberg/SystemConfigs.java | 15 - .../org/apache/iceberg/SystemProperties.java | 57 --- .../org/apache/iceberg/TableProperties.java | 10 - .../apache/iceberg/avro/AvroSchemaUtil.java | 10 - .../apache/iceberg/data/avro/DataReader.java | 193 --------- .../apache/iceberg/data/avro/RawDecoder.java | 22 - .../iceberg/deletes/PositionDelete.java | 11 - .../encryption/StandardEncryptionManager.java | 25 -- .../apache/iceberg/hadoop/HadoopFileIO.java | 15 +- .../org/apache/iceberg/io/ContentCache.java | 11 - .../apache/iceberg/rest/CatalogHandlers.java | 9 - .../iceberg/rest/RESTSessionCatalog.java | 6 - .../org/apache/iceberg/rest/RESTUtil.java | 57 +-- .../rest/responses/BaseScanTaskResponse.java | 9 - .../org/apache/iceberg/util/SnapshotUtil.java | 66 --- .../apache/iceberg/util/TableScanUtil.java | 25 -- .../org/apache/iceberg/util/ThreadPools.java | 42 -- .../apache/iceberg/TestManifestReader.java | 5 +- .../iceberg/data/avro/TestDataReader.java | 208 ---------- .../org/apache/iceberg/rest/TestRESTUtil.java | 8 +- .../iceberg/rest/TestResourcePaths.java | 8 +- ...TestFetchPlanningResultResponseParser.java | 12 - .../TestFetchScanTasksResponseParser.java | 10 - .../TestPlanTableScanResponseParser.java | 12 - .../iceberg/data/BaseFileWriterFactory.java | 375 ------------------ .../iceberg/data/GenericAppenderFactory.java | 350 ---------------- .../data/GenericFileWriterFactory.java | 5 +- .../data/RegistryBasedFileWriterFactory.java | 3 +- .../iceberg/TestGenericAppenderFactory.java | 139 ------- .../maintenance/api/RewriteDataFiles.java | 14 - .../flink/sink/FlinkAppenderFactory.java | 2 +- .../maintenance/api/TestRewriteDataFiles.java | 2 +- .../maintenance/api/RewriteDataFiles.java | 14 - .../flink/sink/FlinkAppenderFactory.java | 2 +- .../maintenance/api/TestRewriteDataFiles.java | 2 +- .../maintenance/api/RewriteDataFiles.java | 14 - .../flink/sink/FlinkAppenderFactory.java | 2 +- .../maintenance/api/TestRewriteDataFiles.java | 2 +- .../connect/events/TableReference.java | 25 -- .../connect/data/IcebergWriterResult.java | 26 -- .../iceberg/connect/channel/TestWorker.java | 3 +- .../iceberg/connect/data/TestSinkWriter.java | 5 +- .../data/parquet/BaseParquetReaders.java | 22 +- .../org/apache/iceberg/parquet/Parquet.java | 5 +- .../spark/source/SparkFileWriterFactory.java | 5 +- .../spark/source/TestSparkDVWriters.java | 1 - .../source/TestSparkPartitioningWriters.java | 1 - .../source/TestSparkRollingFileWriters.java | 1 - .../spark/source/TestSparkWriterMetrics.java | 1 - .../apache/iceberg/spark/SparkSchemaUtil.java | 45 --- .../apache/iceberg/spark/SparkTableUtil.java | 304 -------------- .../spark/source/SparkFileWriterFactory.java | 5 +- .../spark/source/TestSparkDVWriters.java | 1 - .../source/TestSparkPartitioningWriters.java | 1 - .../source/TestSparkRollingFileWriters.java | 1 - .../spark/source/TestSparkWriterMetrics.java | 1 - .../apache/iceberg/spark/SparkReadConf.java | 10 - .../apache/iceberg/spark/SparkSchemaUtil.java | 45 --- .../apache/iceberg/spark/SparkTableUtil.java | 304 -------------- .../apache/iceberg/spark/SparkWriteConf.java | 12 +- .../spark/source/SparkFileWriterFactory.java | 5 +- .../spark/source/TestSparkDVWriters.java | 1 - .../source/TestSparkPartitioningWriters.java | 1 - .../source/TestSparkRollingFileWriters.java | 1 - .../spark/source/TestSparkWriterMetrics.java | 1 - 77 files changed, 223 insertions(+), 3058 deletions(-) delete mode 100644 aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3ObjectMapper.java delete mode 100644 aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequest.java delete mode 100644 aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequestParser.java delete mode 100644 aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponse.java delete mode 100644 aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponseParser.java delete mode 100644 core/src/main/java/org/apache/iceberg/SystemProperties.java delete mode 100644 core/src/main/java/org/apache/iceberg/data/avro/DataReader.java delete mode 100644 core/src/test/java/org/apache/iceberg/data/avro/TestDataReader.java delete mode 100644 data/src/main/java/org/apache/iceberg/data/BaseFileWriterFactory.java delete mode 100644 data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java delete mode 100644 data/src/test/java/org/apache/iceberg/TestGenericAppenderFactory.java diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 80fa8f15f168..11e7948e72f4 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -517,6 +517,180 @@ acceptedBreaks: - code: "java.class.removed" old: "class org.apache.iceberg.data.PartitionStatsHandler" justification: "Removing deprecated code for 1.11.0" + "1.12.0": + org.apache.iceberg:iceberg-core: + - code: "java.class.removed" + old: "class org.apache.iceberg.SystemProperties" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.class.removed" + old: "class org.apache.iceberg.data.avro.DataReader" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removed" + old: "field org.apache.iceberg.SystemConfigs.NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removed" + old: "field org.apache.iceberg.rest.RESTUtil.NAMESPACE_JOINER" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removed" + old: "field org.apache.iceberg.rest.RESTUtil.NAMESPACE_SPLITTER" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removedWithConstant" + old: "field org.apache.iceberg.TableProperties.MANIFEST_LISTS_ENABLED" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removedWithConstant" + old: "field org.apache.iceberg.TableProperties.MANIFEST_LISTS_ENABLED_DEFAULT" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removedWithConstant" + old: "field org.apache.iceberg.rest.RESTSessionCatalog.REST_PAGE_SIZE" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.noLongerDefault" + old: "method org.apache.iceberg.deletes.PositionDeleteWriter\ + \ org.apache.iceberg.RewriteTablePathUtil.PositionDeleteReaderWriter::writer(org.apache.iceberg.io.OutputFile,\ + \ org.apache.iceberg.FileFormat, org.apache.iceberg.PartitionSpec, org.apache.iceberg.StructLike)\ + \ throws java.io.IOException" + new: "method org.apache.iceberg.deletes.PositionDeleteWriter\ + \ org.apache.iceberg.RewriteTablePathUtil.PositionDeleteReaderWriter::writer(org.apache.iceberg.io.OutputFile,\ + \ org.apache.iceberg.FileFormat, org.apache.iceberg.PartitionSpec, org.apache.iceberg.StructLike)\ + \ throws java.io.IOException" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.nowAbstract" + old: "method org.apache.iceberg.deletes.PositionDeleteWriter\ + \ org.apache.iceberg.RewriteTablePathUtil.PositionDeleteReaderWriter::writer(org.apache.iceberg.io.OutputFile,\ + \ org.apache.iceberg.FileFormat, org.apache.iceberg.PartitionSpec, org.apache.iceberg.StructLike)\ + \ throws java.io.IOException" + new: "method org.apache.iceberg.deletes.PositionDeleteWriter\ + \ org.apache.iceberg.RewriteTablePathUtil.PositionDeleteReaderWriter::writer(org.apache.iceberg.io.OutputFile,\ + \ org.apache.iceberg.FileFormat, org.apache.iceberg.PartitionSpec, org.apache.iceberg.StructLike)\ + \ throws java.io.IOException" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method B org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R\ + \ extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::withDeleteFiles(java.util.List)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method boolean org.apache.iceberg.util.TableScanUtil::hasDeletes(org.apache.iceberg.CombinedScanTask)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method boolean org.apache.iceberg.util.TableScanUtil::hasEqDeletes(org.apache.iceberg.CombinedScanTask)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method java.lang.String org.apache.iceberg.rest.RESTUtil::encodeNamespace(org.apache.iceberg.catalog.Namespace)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method java.nio.ByteBuffer org.apache.iceberg.encryption.StandardEncryptionManager::unwrapKey(java.nio.ByteBuffer)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method java.nio.ByteBuffer org.apache.iceberg.encryption.StandardEncryptionManager::wrapKey(java.nio.ByteBuffer)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method java.util.List org.apache.iceberg.util.SnapshotUtil::newFiles(java.lang.Long,\ + \ long, java.util.function.Function,\ + \ org.apache.iceberg.io.FileIO)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method java.util.concurrent.ExecutorService org.apache.iceberg.util.ThreadPools::newWorkerPool(java.lang.String)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method java.util.concurrent.ExecutorService org.apache.iceberg.util.ThreadPools::newWorkerPool(java.lang.String,\ + \ int)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.avro.Schema org.apache.iceberg.avro.AvroSchemaUtil::pruneColumns(org.apache.avro.Schema,\ + \ java.util.Set, org.apache.iceberg.mapping.NameMapping)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.ManifestReader\ + \ org.apache.iceberg.ManifestFiles::read(org.apache.iceberg.ManifestFile,\ + \ org.apache.iceberg.io.FileIO)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.MetricsConfig org.apache.iceberg.MetricsConfig::forPositionDelete(org.apache.iceberg.Table)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.catalog.Namespace org.apache.iceberg.rest.RESTUtil::decodeNamespace(java.lang.String)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.deletes.PositionDeleteWriter\ + \ org.apache.iceberg.RewriteTablePathUtil.PositionDeleteReaderWriter::writer(org.apache.iceberg.io.OutputFile,\ + \ org.apache.iceberg.FileFormat, org.apache.iceberg.PartitionSpec, org.apache.iceberg.StructLike,\ + \ org.apache.iceberg.Schema) throws java.io.IOException" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.CloseableIterable org.apache.iceberg.ManifestFiles::readPaths(org.apache.iceberg.ManifestFile,\ + \ org.apache.iceberg.io.FileIO)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.CloseableIterable\ + \ org.apache.iceberg.util.SnapshotUtil::newFilesBetween(java.lang.Long, long,\ + \ java.util.function.Function,\ + \ org.apache.iceberg.io.FileIO)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.rest.responses.LoadTableResponse org.apache.iceberg.rest.CatalogHandlers::loadTable(org.apache.iceberg.catalog.Catalog,\ + \ org.apache.iceberg.catalog.TableIdentifier)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.avro.RawDecoder::(org.apache.iceberg.Schema,\ + \ java.util.function.Function>,\ + \ org.apache.avro.Schema)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.encryption.StandardEncryptionManager::(java.lang.String,\ + \ int, org.apache.iceberg.encryption.KeyManagementClient)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.hadoop.HadoopFileIO::(org.apache.iceberg.util.SerializableSupplier)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.io.ContentCache::invalidateAll()" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + org.apache.iceberg:iceberg-data: + - code: "java.class.removed" + old: "class org.apache.iceberg.data.BaseFileWriterFactory" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.class.removed" + old: "class org.apache.iceberg.data.GenericAppenderFactory" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configureDataWrite(org.apache.iceberg.avro.Avro.DataWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configureDataWrite(org.apache.iceberg.orc.ORC.DataWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configureDataWrite(org.apache.iceberg.parquet.Parquet.DataWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configureEqualityDelete(org.apache.iceberg.avro.Avro.DeleteWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configureEqualityDelete(org.apache.iceberg.orc.ORC.DeleteWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configureEqualityDelete(org.apache.iceberg.parquet.Parquet.DeleteWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configurePositionDelete(org.apache.iceberg.avro.Avro.DeleteWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configurePositionDelete(org.apache.iceberg.orc.ORC.DeleteWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method void org.apache.iceberg.data.GenericFileWriterFactory::configurePositionDelete(org.apache.iceberg.parquet.Parquet.DeleteWriteBuilder)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + org.apache.iceberg:iceberg-parquet: + - code: "java.method.removed" + old: "method org.apache.iceberg.parquet.ParquetValueReader org.apache.iceberg.data.parquet.BaseParquetReaders::createStructReader(java.util.List>,\ + \ org.apache.iceberg.types.Types.StructType) @ org.apache.iceberg.data.parquet.GenericParquetReaders" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.parquet.ParquetValueReader org.apache.iceberg.data.parquet.BaseParquetReaders::createStructReader(java.util.List>,\ + \ org.apache.iceberg.types.Types.StructType) @ org.apache.iceberg.data.parquet.InternalReader" + justification: "Removing deprecated API scheduled for removal in 1.12.0" "1.11.0": org.apache.iceberg:iceberg-core: - code: "java.class.removed" diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3ObjectMapper.java b/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3ObjectMapper.java deleted file mode 100644 index 7f1d6c3cc848..000000000000 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3ObjectMapper.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.aws.s3.signer; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.PropertyAccessor; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.PropertyNamingStrategies; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.module.SimpleModule; -import java.io.IOException; -import org.apache.iceberg.rest.RESTSerializers.ErrorResponseDeserializer; -import org.apache.iceberg.rest.RESTSerializers.ErrorResponseSerializer; -import org.apache.iceberg.rest.RESTSerializers.OAuthTokenResponseDeserializer; -import org.apache.iceberg.rest.RESTSerializers.OAuthTokenResponseSerializer; -import org.apache.iceberg.rest.responses.ErrorResponse; -import org.apache.iceberg.rest.responses.OAuthTokenResponse; - -/** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@code RESTObjectMapper} instead. - */ -@Deprecated -public class S3ObjectMapper { - - private static final JsonFactory FACTORY = new JsonFactory(); - private static final ObjectMapper MAPPER = new ObjectMapper(FACTORY); - private static volatile boolean isInitialized = false; - - private S3ObjectMapper() {} - - static ObjectMapper mapper() { - if (!isInitialized) { - synchronized (S3ObjectMapper.class) { - if (!isInitialized) { - MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); - MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - // even though using new PropertyNamingStrategy.KebabCaseStrategy() is deprecated - // and PropertyNamingStrategies.KebabCaseStrategy.INSTANCE (introduced in jackson 2.14) is - // recommended, we can't use it because Spark still relies on jackson 2.13.x stuff - MAPPER.setPropertyNamingStrategy(new PropertyNamingStrategies.KebabCaseStrategy()); - MAPPER.registerModule(initModule()); - isInitialized = true; - } - } - } - - return MAPPER; - } - - public static SimpleModule initModule() { - return new SimpleModule() - .addSerializer(ErrorResponse.class, new ErrorResponseSerializer()) - .addDeserializer(ErrorResponse.class, new ErrorResponseDeserializer()) - .addSerializer(OAuthTokenResponse.class, new OAuthTokenResponseSerializer()) - .addDeserializer(OAuthTokenResponse.class, new OAuthTokenResponseDeserializer()) - .addSerializer(S3SignRequest.class, new S3SignRequestSerializer<>()) - .addSerializer(ImmutableS3SignRequest.class, new S3SignRequestSerializer<>()) - .addDeserializer(S3SignRequest.class, new S3SignRequestDeserializer<>()) - .addDeserializer(ImmutableS3SignRequest.class, new S3SignRequestDeserializer<>()) - .addSerializer(S3SignResponse.class, new S3SignResponseSerializer<>()) - .addSerializer(ImmutableS3SignResponse.class, new S3SignResponseSerializer<>()) - .addDeserializer(S3SignResponse.class, new S3SignResponseDeserializer<>()) - .addDeserializer(ImmutableS3SignResponse.class, new S3SignResponseDeserializer<>()); - } - - public static class S3SignRequestSerializer extends JsonSerializer { - @Override - public void serialize(T request, JsonGenerator gen, SerializerProvider serializers) - throws IOException { - S3SignRequestParser.toJson(request, gen); - } - } - - public static class S3SignRequestDeserializer - extends JsonDeserializer { - @Override - public T deserialize(JsonParser p, DeserializationContext context) throws IOException { - JsonNode jsonNode = p.getCodec().readTree(p); - return (T) S3SignRequestParser.fromJson(jsonNode); - } - } - - public static class S3SignResponseSerializer extends JsonSerializer { - @Override - public void serialize(T request, JsonGenerator gen, SerializerProvider serializers) - throws IOException { - S3SignResponseParser.toJson(request, gen); - } - } - - public static class S3SignResponseDeserializer - extends JsonDeserializer { - @Override - public T deserialize(JsonParser p, DeserializationContext context) throws IOException { - JsonNode jsonNode = p.getCodec().readTree(p); - return (T) S3SignResponseParser.fromJson(jsonNode); - } - } -} diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequest.java b/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequest.java deleted file mode 100644 index 995f6e7e4860..000000000000 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequest.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.aws.s3.signer; - -import org.apache.iceberg.rest.requests.RemoteSignRequest; -import org.immutables.value.Value; - -/** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link RemoteSignRequest} instead. - */ -@Deprecated -@Value.Immutable -@SuppressWarnings("immutables:subtype") -public interface S3SignRequest extends RemoteSignRequest {} diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequestParser.java b/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequestParser.java deleted file mode 100644 index 5d2a7d684460..000000000000 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignRequestParser.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.aws.s3.signer; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.JsonNode; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import org.apache.iceberg.rest.requests.RemoteSignRequest; -import org.apache.iceberg.rest.requests.RemoteSignRequestParser; - -/** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link RemoteSignRequestParser} instead. - */ -@Deprecated -public class S3SignRequestParser { - - private S3SignRequestParser() {} - - public static String toJson(S3SignRequest request) { - return RemoteSignRequestParser.toJson(request, false); - } - - public static String toJson(S3SignRequest request, boolean pretty) { - return RemoteSignRequestParser.toJson(request, pretty); - } - - public static void toJson(S3SignRequest request, JsonGenerator gen) throws IOException { - RemoteSignRequestParser.toJson(request, gen); - } - - public static S3SignRequest fromJson(String json) { - RemoteSignRequest request = RemoteSignRequestParser.fromJson(json); - return ImmutableS3SignRequest.builder().from(request).build(); - } - - public static S3SignRequest fromJson(JsonNode json) { - RemoteSignRequest request = RemoteSignRequestParser.fromJson(json); - return ImmutableS3SignRequest.builder().from(request).build(); - } - - static void headersToJson(String property, Map> headers, JsonGenerator gen) - throws IOException { - RemoteSignRequestParser.headersToJson(property, headers, gen); - } - - static Map> headersFromJson(String property, JsonNode json) { - return RemoteSignRequestParser.headersFromJson(property, json); - } -} diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponse.java b/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponse.java deleted file mode 100644 index 6fbaa90fe7af..000000000000 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponse.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.aws.s3.signer; - -import org.apache.iceberg.rest.responses.RemoteSignResponse; -import org.immutables.value.Value; - -/** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link RemoteSignResponse} instead. - */ -@Deprecated -@Value.Immutable -@SuppressWarnings("immutables:subtype") -public interface S3SignResponse extends RemoteSignResponse {} diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponseParser.java b/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponseParser.java deleted file mode 100644 index be63a51b38fb..000000000000 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3SignResponseParser.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.aws.s3.signer; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.JsonNode; -import java.io.IOException; -import org.apache.iceberg.rest.responses.RemoteSignResponse; -import org.apache.iceberg.rest.responses.RemoteSignResponseParser; - -/** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link RemoteSignResponseParser} - * instead. - */ -@Deprecated -public class S3SignResponseParser { - - private S3SignResponseParser() {} - - public static String toJson(S3SignResponse response) { - return RemoteSignResponseParser.toJson(response, false); - } - - public static String toJson(S3SignResponse response, boolean pretty) { - return RemoteSignResponseParser.toJson(response, pretty); - } - - public static void toJson(S3SignResponse response, JsonGenerator gen) throws IOException { - RemoteSignResponseParser.toJson(response, gen); - } - - public static S3SignResponse fromJson(String json) { - RemoteSignResponse result = RemoteSignResponseParser.fromJson(json); - return ImmutableS3SignResponse.builder().from(result).build(); - } - - public static S3SignResponse fromJson(JsonNode json) { - RemoteSignResponse result = RemoteSignResponseParser.fromJson(json); - return ImmutableS3SignResponse.builder().from(result).build(); - } -} diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3V4RestSignerClient.java b/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3V4RestSignerClient.java index 7a463abd3d2d..20e12406cb5e 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3V4RestSignerClient.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3V4RestSignerClient.java @@ -71,23 +71,6 @@ public abstract class S3V4RestSignerClient public static final String S3_PROVIDER = "s3"; - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * RESTCatalogProperties#SIGNER_URI} instead. - */ - @Deprecated public static final String S3_SIGNER_URI = "s3.signer.uri"; - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * RESTCatalogProperties#SIGNER_URI} instead. - */ - @Deprecated public static final String S3_SIGNER_ENDPOINT = "s3.signer.endpoint"; - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; there is no replacement. - */ - @Deprecated static final String S3_SIGNER_DEFAULT_ENDPOINT = "v1/aws/s3/sign"; - @VisibleForTesting static final String UNSIGNED_PAYLOAD = "UNSIGNED-PAYLOAD"; private static final String CACHE_CONTROL = "Cache-Control"; @@ -115,27 +98,17 @@ public Supplier> requestPropertiesSupplier() { @Value.Lazy public String baseSignerUri() { - // TODO remove in 1.12.0 - if (properties().containsKey(S3_SIGNER_URI)) { - return properties().get(S3_SIGNER_URI); - } - return properties() .getOrDefault(RESTCatalogProperties.SIGNER_URI, properties().get(CatalogProperties.URI)); } @Value.Lazy public String endpoint() { - // TODO remove in 1.12.0 - String endpointPath; - if (properties().containsKey(S3_SIGNER_ENDPOINT)) { - endpointPath = properties().get(S3_SIGNER_ENDPOINT); - } else { - endpointPath = - properties() - .getOrDefault(RESTCatalogProperties.SIGNER_ENDPOINT, S3_SIGNER_DEFAULT_ENDPOINT); - } - + String endpointPath = properties().get(RESTCatalogProperties.SIGNER_ENDPOINT); + Preconditions.checkArgument( + endpointPath != null, + "S3 signer endpoint (%s) is required", + RESTCatalogProperties.SIGNER_ENDPOINT); return RESTUtil.resolveEndpoint(baseSignerUri(), endpointPath); } @@ -232,36 +205,13 @@ private boolean credentialProvided() { @Value.Check protected void check() { Preconditions.checkArgument( - properties().containsKey(S3_SIGNER_URI) - || properties().containsKey(RESTCatalogProperties.SIGNER_URI) + properties().containsKey(RESTCatalogProperties.SIGNER_URI) || properties().containsKey(CatalogProperties.URI), "S3 signer service URI is required"); - - if (properties().containsKey(S3_SIGNER_URI) - && !properties().containsKey(RESTCatalogProperties.SIGNER_URI)) { - LOG.warn( - "S3 signer URI is configured via deprecated property {}, this won't be supported in future releases. " - + "Please use {} instead.", - S3_SIGNER_URI, - RESTCatalogProperties.SIGNER_URI); - } - - if (properties().containsKey(S3_SIGNER_ENDPOINT) - && !properties().containsKey(RESTCatalogProperties.SIGNER_ENDPOINT)) { - LOG.warn( - "Signer endpoint is configured via deprecated property {}, this won't be supported in future releases. " - + "Please use {} instead.", - S3_SIGNER_ENDPOINT, - RESTCatalogProperties.SIGNER_ENDPOINT); - } - - // TODO change to required in 1.12.0 - if (!properties().containsKey(S3_SIGNER_ENDPOINT) - && !properties().containsKey(RESTCatalogProperties.SIGNER_ENDPOINT)) { - LOG.warn( - "Signer endpoint is not set, this won't be supported in future releases. Using deprecated default: {}", - S3_SIGNER_DEFAULT_ENDPOINT); - } + Preconditions.checkArgument( + properties().containsKey(RESTCatalogProperties.SIGNER_ENDPOINT), + "S3 signer endpoint (%s) is required", + RESTCatalogProperties.SIGNER_ENDPOINT); } @Override diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/signer/TestS3V4RestSignerClient.java b/aws/src/test/java/org/apache/iceberg/aws/s3/signer/TestS3V4RestSignerClient.java index aadbf036b567..6ffc99e85a86 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/signer/TestS3V4RestSignerClient.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/signer/TestS3V4RestSignerClient.java @@ -198,21 +198,9 @@ void legacySignerProperties( } } - @SuppressWarnings("deprecation") public static Stream legacySignerProperties() { return Stream.of( - // Only legacy properties - Arguments.of( - Map.of( - CatalogProperties.URI, - "https://catalog.com", - S3V4RestSignerClient.S3_SIGNER_URI, - "https://legacy-signer.com", - S3V4RestSignerClient.S3_SIGNER_ENDPOINT, - "v1/legacy/sign"), - "https://legacy-signer.com", - "https://legacy-signer.com/v1/legacy/sign"), - // Only new properties + // Signer URI + endpoint Arguments.of( Map.of( CatalogProperties.URI, @@ -223,25 +211,14 @@ public static Stream legacySignerProperties() { "v1/new/sign"), "https://new-signer.com", "https://new-signer.com/v1/new/sign"), - // Mixed properties: legacy properties take precedence + // No signer URI: the catalog URI is used as base Arguments.of( Map.of( CatalogProperties.URI, "https://catalog.com", - RESTCatalogProperties.SIGNER_URI, - "https://new-signer.com", RESTCatalogProperties.SIGNER_ENDPOINT, - "v1/new/sign", - S3V4RestSignerClient.S3_SIGNER_URI, - "https://legacy-signer.com", - S3V4RestSignerClient.S3_SIGNER_ENDPOINT, - "v1/legacy/sign"), - "https://legacy-signer.com", - "https://legacy-signer.com/v1/legacy/sign"), - // No signer properties: the catalog URI and the deprecated default endpoint are used - Arguments.of( - Map.of(CatalogProperties.URI, "https://catalog.com"), + "v1/tables/t/sign"), "https://catalog.com", - "https://catalog.com/" + S3V4RestSignerClient.S3_SIGNER_DEFAULT_ENDPOINT)); + "https://catalog.com/v1/tables/t/sign")); } } diff --git a/bigquery/src/main/java/org/apache/iceberg/gcp/bigquery/BigQueryMetastoreCatalog.java b/bigquery/src/main/java/org/apache/iceberg/gcp/bigquery/BigQueryMetastoreCatalog.java index dd01246cb01f..df81d5b8ac84 100644 --- a/bigquery/src/main/java/org/apache/iceberg/gcp/bigquery/BigQueryMetastoreCatalog.java +++ b/bigquery/src/main/java/org/apache/iceberg/gcp/bigquery/BigQueryMetastoreCatalog.java @@ -57,24 +57,6 @@ public class BigQueryMetastoreCatalog extends BaseMetastoreCatalog implements SupportsNamespaces, Configurable { - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link BigQueryProperties#PROJECT_ID} - * instead. - */ - @Deprecated public static final String PROJECT_ID = "gcp.bigquery.project-id"; - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * BigQueryProperties#GCP_LOCATION} instead. - */ - @Deprecated public static final String GCP_LOCATION = "gcp.bigquery.location"; - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * BigQueryProperties#LIST_ALL_TABLES} instead. - */ - @Deprecated public static final String LIST_ALL_TABLES = "gcp.bigquery.list-all-tables"; - private static final Logger LOG = LoggerFactory.getLogger(BigQueryMetastoreCatalog.class); private String catalogName; diff --git a/core/src/main/java/org/apache/iceberg/BaseScan.java b/core/src/main/java/org/apache/iceberg/BaseScan.java index 242a5aaacc09..fd0a6986121a 100644 --- a/core/src/main/java/org/apache/iceberg/BaseScan.java +++ b/core/src/main/java/org/apache/iceberg/BaseScan.java @@ -103,10 +103,6 @@ public Table table() { return table; } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link BaseScan#fileIO()} instead. - */ - @Deprecated protected FileIO io() { return table.io(); } diff --git a/core/src/main/java/org/apache/iceberg/ManifestFiles.java b/core/src/main/java/org/apache/iceberg/ManifestFiles.java index 5ac55f0cf41f..008f60caac62 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestFiles.java +++ b/core/src/main/java/org/apache/iceberg/ManifestFiles.java @@ -108,39 +108,6 @@ public static CloseableIterable readPaths( entry -> entry.file().location()); } - /** - * Returns a {@link CloseableIterable} of file paths in the {@link ManifestFile}. - * - * @param manifest a ManifestFile - * @param io a FileIO - * @return a manifest reader - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link #readPaths(ManifestFile, - * FileIO, Map)} instead. - */ - @Deprecated - public static CloseableIterable readPaths(ManifestFile manifest, FileIO io) { - return readPaths(manifest, io, null); - } - - /** - * Returns a new {@link ManifestReader} for a {@link ManifestFile}. - * - *

Note: Callers should use {@link ManifestFiles#read(ManifestFile, FileIO, Map)} to - * ensure the schema used by filters is the latest table schema. This should be used only when - * reading a manifest without filters. - * - * @param manifest a ManifestFile - * @param io a FileIO - * @return a manifest reader - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link #read(ManifestFile, FileIO, - * Map)} instead. Reading partition specs from manifest file metadata will not be supported - * for non-Avro manifest formats. - */ - @Deprecated - public static ManifestReader read(ManifestFile manifest, FileIO io) { - return read(manifest, io, null); - } - /** * Returns a new {@link ManifestReader} for a {@link ManifestFile}. * @@ -437,15 +404,6 @@ public static ManifestFile decode(byte[] manifestData) throws IOException { return AvroEncoderUtil.decode(manifestData); } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link #open(ManifestFile, FileIO, - * Map)} instead. - */ - @Deprecated - static ManifestReader open(ManifestFile manifest, FileIO io) { - return open(manifest, io, null); - } - static ManifestReader open( ManifestFile manifest, FileIO io, Map specsById) { switch (manifest.content()) { diff --git a/core/src/main/java/org/apache/iceberg/MetricsConfig.java b/core/src/main/java/org/apache/iceberg/MetricsConfig.java index 2b55bcbeab22..0de04596c29a 100644 --- a/core/src/main/java/org/apache/iceberg/MetricsConfig.java +++ b/core/src/main/java/org/apache/iceberg/MetricsConfig.java @@ -32,7 +32,6 @@ import javax.annotation.concurrent.Immutable; import org.apache.iceberg.MetricsModes.MetricsMode; import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; @@ -49,7 +48,6 @@ public final class MetricsConfig implements Serializable { private static final Logger LOG = LoggerFactory.getLogger(MetricsConfig.class); - private static final Joiner DOT = Joiner.on('.'); // Disable metrics by default for wide tables to prevent excessive metadata private static final MetricsMode DEFAULT_MODE = @@ -100,34 +98,6 @@ public static MetricsConfig forTable(Table table) { return from(table.properties(), table.schema(), table.sortOrder()); } - /** - * Creates a metrics config for a position delete file. - * - * @param table an Iceberg table - * @deprecated This method is deprecated as of version 1.11.0 and will be removed in 1.12.0. - * Position deletes that include row data are no longer supported. Use {@link - * #forPositionDelete()} instead. - */ - @Deprecated - public static MetricsConfig forPositionDelete(Table table) { - ImmutableMap.Builder columnModes = ImmutableMap.builder(); - - columnModes.put(MetadataColumns.DELETE_FILE_PATH.name(), MetricsModes.Full.get()); - columnModes.put(MetadataColumns.DELETE_FILE_POS.name(), MetricsModes.Full.get()); - - MetricsConfig tableConfig = forTable(table); - - MetricsMode defaultMode = tableConfig.defaultMode; - tableConfig.columnModes.forEach( - (columnAlias, mode) -> { - String positionDeleteColumnAlias = - DOT.join(MetadataColumns.DELETE_FILE_ROW_FIELD_NAME, columnAlias); - columnModes.put(positionDeleteColumnAlias, mode); - }); - - return new MetricsConfig(columnModes.build(), defaultMode); - } - static Set limitFieldIds(Schema schema, int limit) { return TypeUtil.visit( schema, diff --git a/core/src/main/java/org/apache/iceberg/SystemConfigs.java b/core/src/main/java/org/apache/iceberg/SystemConfigs.java index be59424992ee..66b122003c3b 100644 --- a/core/src/main/java/org/apache/iceberg/SystemConfigs.java +++ b/core/src/main/java/org/apache/iceberg/SystemConfigs.java @@ -80,21 +80,6 @@ private SystemConfigs() {} 8, Integer::parseUnsignedInt); - /** - * @deprecated will be removed in 1.12.0; use name mapping instead - */ - @Deprecated - public static final ConfigEntry NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED = - new ConfigEntry<>( - "iceberg.netflix.unsafe-parquet-id-fallback.enabled", - "ICEBERG_NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED", - true, - s -> { - LOG.warn( - "Fallback ID assignment in Parquet is UNSAFE and will be removed in 1.12.0. Use name mapping instead."); - return Boolean.parseBoolean(s); - }); - public static class ConfigEntry { private final String propertyKey; private final String envKey; diff --git a/core/src/main/java/org/apache/iceberg/SystemProperties.java b/core/src/main/java/org/apache/iceberg/SystemProperties.java deleted file mode 100644 index 484879bb21b1..000000000000 --- a/core/src/main/java/org/apache/iceberg/SystemProperties.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -/** - * Configuration properties that are controlled by Java system properties. - * - * @deprecated Use {@link SystemConfigs} instead; will be removed in 1.12.0 - */ -@Deprecated -public class SystemProperties { - - private SystemProperties() {} - - /** - * Sets the size of the worker pool. The worker pool limits the number of tasks concurrently - * processing manifests in the base table implementation across all concurrent planning or commit - * operations. - */ - public static final String WORKER_THREAD_POOL_SIZE_PROP = "iceberg.worker.num-threads"; - - /** Whether to use the shared worker pool when planning table scans. */ - public static final String SCAN_THREAD_POOL_ENABLED = "iceberg.scan.plan-in-worker-pool"; - - /** - * Maximum number of distinct {@link org.apache.iceberg.io.FileIO} that is allowed to have - * associated {@link org.apache.iceberg.io.ContentCache} in memory at a time. - */ - public static final String IO_MANIFEST_CACHE_MAX_FILEIO = "iceberg.io.manifest.cache.fileio-max"; - - public static final int IO_MANIFEST_CACHE_MAX_FILEIO_DEFAULT = 8; - - static boolean getBoolean(String systemProperty, boolean defaultValue) { - String value = System.getProperty(systemProperty); - if (value != null) { - return Boolean.parseBoolean(value); - } - - return defaultValue; - } -} diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index 021ef95d9122..dbd9295a064e 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -309,16 +309,6 @@ private TableProperties() {} public static final String WRITE_PARTITION_SUMMARY_LIMIT = "write.summary.partition-limit"; public static final int WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT = 0; - /** - * @deprecated will be removed in 1.12.0, writing manifest lists is always enabled - */ - @Deprecated public static final String MANIFEST_LISTS_ENABLED = "write.manifest-lists.enabled"; - - /** - * @deprecated will be removed in 1.12.0, writing manifest lists is always enabled - */ - @Deprecated public static final boolean MANIFEST_LISTS_ENABLED_DEFAULT = true; - public static final String METADATA_COMPRESSION = "write.metadata.compression-codec"; public static final String METADATA_COMPRESSION_DEFAULT = "none"; diff --git a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java index c67a3089a6bf..e0e940622342 100644 --- a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java +++ b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java @@ -128,16 +128,6 @@ public static Schema pruneColumns(Schema schema, Set selectedIds) { return new PruneColumns(selectedIds, null).rootSchema(schema); } - /** - * @deprecated will be removed in 1.12.0; use applyNameMapping and pruneColumns(Schema, Set) - * instead. - */ - @Deprecated - public static Schema pruneColumns( - Schema schema, Set selectedIds, NameMapping nameMapping) { - return new PruneColumns(selectedIds, nameMapping).rootSchema(schema); - } - public static Schema buildAvroProjection( Schema schema, org.apache.iceberg.Schema expected, Map renames) { return AvroCustomOrderSchemaVisitor.visit(schema, new BuildAvroProjection(expected, renames)); diff --git a/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java b/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java deleted file mode 100644 index 5f813f8db576..000000000000 --- a/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.data.avro; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.function.Supplier; -import org.apache.avro.LogicalType; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.io.DatumReader; -import org.apache.avro.io.Decoder; -import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.avro.AvroSchemaWithTypeVisitor; -import org.apache.iceberg.avro.SupportsRowPosition; -import org.apache.iceberg.avro.ValueReader; -import org.apache.iceberg.avro.ValueReaders; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; - -/** - * @deprecated will be removed in 1.12.0; use {@link PlannedDataReader} instead. - */ -@Deprecated -public class DataReader implements DatumReader, SupportsRowPosition { - - public static DataReader create( - org.apache.iceberg.Schema expectedSchema, Schema readSchema) { - return create(expectedSchema, readSchema, ImmutableMap.of()); - } - - public static DataReader create( - org.apache.iceberg.Schema expectedSchema, Schema readSchema, Map idToConstant) { - return new DataReader<>(expectedSchema, readSchema, idToConstant); - } - - private final Schema readSchema; - private final ValueReader reader; - private Schema fileSchema = null; - - @SuppressWarnings("unchecked") - protected DataReader( - org.apache.iceberg.Schema expectedSchema, Schema readSchema, Map idToConstant) { - this.readSchema = readSchema; - this.reader = - (ValueReader) - AvroSchemaWithTypeVisitor.visit( - expectedSchema, readSchema, new ReadBuilder(idToConstant)); - } - - @Override - public void setSchema(Schema newFileSchema) { - this.fileSchema = Schema.applyAliases(newFileSchema, readSchema); - } - - @Override - public T read(T reuse, Decoder decoder) throws IOException { - return DecoderResolver.resolveAndRead(decoder, readSchema, fileSchema, reader, reuse); - } - - @Override - public void setRowPositionSupplier(Supplier posSupplier) { - if (reader instanceof SupportsRowPosition) { - ((SupportsRowPosition) reader).setRowPositionSupplier(posSupplier); - } - } - - protected ValueReader createStructReader( - Types.StructType struct, List> fields, Map idToConstant) { - return GenericReaders.struct(struct, fields, idToConstant); - } - - private class ReadBuilder extends AvroSchemaWithTypeVisitor> { - private final Map idToConstant; - - private ReadBuilder(Map idToConstant) { - this.idToConstant = idToConstant; - } - - @Override - public ValueReader record( - Types.StructType struct, Schema record, List names, List> fields) { - return createStructReader(struct, fields, idToConstant); - } - - @Override - public ValueReader union(Type ignored, Schema union, List> options) { - return ValueReaders.union(options); - } - - @Override - public ValueReader array( - Types.ListType ignored, Schema array, ValueReader elementReader) { - return ValueReaders.array(elementReader); - } - - @Override - public ValueReader map( - Types.MapType iMap, Schema map, ValueReader keyReader, ValueReader valueReader) { - return ValueReaders.arrayMap(keyReader, valueReader); - } - - @Override - public ValueReader map(Types.MapType ignored, Schema map, ValueReader valueReader) { - return ValueReaders.map(ValueReaders.strings(), valueReader); - } - - @Override - public ValueReader primitive(Type.PrimitiveType ignored, Schema primitive) { - LogicalType logicalType = primitive.getLogicalType(); - if (logicalType != null) { - switch (logicalType.getName()) { - case "date": - return GenericReaders.dates(); - - case "time-micros": - return GenericReaders.times(); - - case "timestamp-micros": - if (AvroSchemaUtil.isTimestamptz(primitive)) { - return GenericReaders.timestamptz(); - } - return GenericReaders.timestamps(); - - case "timestamp-nanos": - if (AvroSchemaUtil.isTimestamptz(primitive)) { - return GenericReaders.timestamptzNanos(); - } - return GenericReaders.timestampNanos(); - - case "timestamp-millis": - if (AvroSchemaUtil.isTimestamptz(primitive)) { - return GenericReaders.timestamptzMillis(); - } - return GenericReaders.timestampMillis(); - - case "decimal": - return ValueReaders.decimal( - ValueReaders.decimalBytesReader(primitive), - ((LogicalTypes.Decimal) logicalType).getScale()); - - case "uuid": - return ValueReaders.uuids(); - - default: - throw new IllegalArgumentException("Unknown logical type: " + logicalType); - } - } - - switch (primitive.getType()) { - case NULL: - return ValueReaders.nulls(); - case BOOLEAN: - return ValueReaders.booleans(); - case INT: - return ValueReaders.ints(); - case LONG: - return ValueReaders.longs(); - case FLOAT: - return ValueReaders.floats(); - case DOUBLE: - return ValueReaders.doubles(); - case STRING: - // might want to use a binary-backed container like Utf8 - return ValueReaders.strings(); - case FIXED: - return ValueReaders.fixed(primitive.getFixedSize()); - case BYTES: - return ValueReaders.byteBuffers(); - default: - throw new IllegalArgumentException("Unsupported type: " + primitive); - } - } - } -} diff --git a/core/src/main/java/org/apache/iceberg/data/avro/RawDecoder.java b/core/src/main/java/org/apache/iceberg/data/avro/RawDecoder.java index 9f3cdfdad5f4..eba06489d9a4 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/RawDecoder.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/RawDecoder.java @@ -27,8 +27,6 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.DecoderFactory; import org.apache.avro.message.MessageDecoder; -import org.apache.iceberg.avro.ProjectionDatumReader; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; public class RawDecoder extends MessageDecoder.BaseDecoder { private static final ThreadLocal DECODER = new ThreadLocal<>(); @@ -56,26 +54,6 @@ public static RawDecoder create( private final DatumReader reader; - /** - * Creates a new {@link MessageDecoder} that constructs datum instances described by the {@link - * Schema readSchema}. - * - *

The {@code readSchema} is used for the expected schema and the {@code writeSchema} is the - * schema used to decode buffers. The {@code writeSchema} must be the schema that was used to - * encode all buffers decoded by this class. - * - * @deprecated will be removed in 1.12.0; use {@link #create(org.apache.iceberg.Schema, Function, - * Schema)} instead - */ - @Deprecated - public RawDecoder( - org.apache.iceberg.Schema readSchema, - Function> readerFunction, - Schema writeSchema) { - this.reader = new ProjectionDatumReader<>(readerFunction, readSchema, ImmutableMap.of(), null); - this.reader.setSchema(writeSchema); - } - /** * Creates a new {@link MessageDecoder} that constructs datum instances using the {@code reader}. */ diff --git a/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java b/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java index c3b6cbaa9bff..48a4aa635c79 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java +++ b/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java @@ -43,12 +43,6 @@ public PositionDelete set(CharSequence newPath, long newPos) { return this; } - /** - * @deprecated This method is deprecated as of version 1.11.0 and will be removed in 1.12.0. - * Position deletes that include row data are no longer supported. Use {@link - * #set(CharSequence, long)} instead. - */ - @Deprecated public PositionDelete set(CharSequence newPath, long newPos, R newRow) { this.path = newPath; this.pos = newPos; @@ -69,11 +63,6 @@ public long pos() { return pos; } - /** - * @deprecated This method is deprecated as of version 1.11.0 and will be removed in 1.12.0. - * Position deletes that include row data are no longer supported. - */ - @Deprecated public R row() { return row; } diff --git a/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java b/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java index bb5126d23e3f..f1b19fa30489 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java +++ b/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java @@ -53,15 +53,6 @@ public class StandardEncryptionManager implements EncryptionManager { private transient volatile LoadingCache unwrappedKeyCache; private transient volatile SecureRandom lazyRNG = null; - /** - * @deprecated will be removed in 1.12.0. - */ - @Deprecated - public StandardEncryptionManager( - String tableKeyId, int dataKeyLength, KeyManagementClient kmsClient) { - this(List.of(), tableKeyId, dataKeyLength, kmsClient); - } - /** * @param keys encryption keys from table metadata * @param tableKeyId table encryption key id @@ -137,22 +128,6 @@ private SecureRandom workerRNG() { return lazyRNG; } - /** - * @deprecated will be removed in 1.12.0. - */ - @Deprecated - public ByteBuffer wrapKey(ByteBuffer secretKey) { - return kmsClient.wrapKey(secretKey, tableKeyId); - } - - /** - * @deprecated will be removed in 1.12.0. - */ - @Deprecated - public ByteBuffer unwrapKey(ByteBuffer wrappedSecretKey) { - return kmsClient.unwrapKey(wrappedSecretKey, tableKeyId); - } - Map encryptionKeys() { return encryptionKeys; } diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java index 877290f48e3f..fbb50abb85a7 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java @@ -66,16 +66,7 @@ public class HadoopFileIO implements HadoopConfigurable, DelegateFileIO { public HadoopFileIO() {} public HadoopFileIO(Configuration hadoopConf) { - this(new SerializableConfiguration(hadoopConf)); - } - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * HadoopFileIO#HadoopFileIO(Configuration)} instead. - */ - @Deprecated - public HadoopFileIO(SerializableSupplier hadoopConf) { - this.hadoopConf = hadoopConf; + this.hadoopConf = new SerializableConfiguration(hadoopConf); } public Configuration conf() { @@ -138,10 +129,6 @@ public Configuration getConf() { return hadoopConf.get(); } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0. - */ - @Deprecated @Override public void serializeConfWith( Function> confSerializer) { diff --git a/core/src/main/java/org/apache/iceberg/io/ContentCache.java b/core/src/main/java/org/apache/iceberg/io/ContentCache.java index 484306690c67..22bb8f1e455b 100644 --- a/core/src/main/java/org/apache/iceberg/io/ContentCache.java +++ b/core/src/main/java/org/apache/iceberg/io/ContentCache.java @@ -139,17 +139,6 @@ public void invalidate(String key) { cache.invalidate(key); } - /** - * @deprecated since 1.7.0, will be removed in 1.12.0; This method does only best-effort - * invalidation and is susceptible to a race condition. If the caller changed the state that - * could be cached (perhaps files on the storage) and calls this method, there is no guarantee - * that the cache will not contain stale entries some time after this method returns. - */ - @Deprecated - public void invalidateAll() { - cache.invalidateAll(); - } - public void cleanUp() { cache.cleanUp(); } diff --git a/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java b/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java index 3a1e62260aae..226ea67d6a21 100644 --- a/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java +++ b/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java @@ -502,15 +502,6 @@ public static void tableExists(Catalog catalog, TableIdentifier ident) { } } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0. Use {@link #loadTable(Catalog, - * TableIdentifier, SnapshotMode)} instead. - */ - @Deprecated - public static LoadTableResponse loadTable(Catalog catalog, TableIdentifier ident) { - return loadTable(catalog, ident, SnapshotMode.ALL); - } - public static LoadTableResponse loadTable( Catalog catalog, TableIdentifier ident, SnapshotMode mode) { Table table = catalog.loadTable(ident); diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index 9effb875e05f..57f93829e9bb 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -116,12 +116,6 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog private static final Logger LOG = LoggerFactory.getLogger(RESTSessionCatalog.class); private static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.io.ResolvingFileIO"; - /** - * @deprecated will be removed in 1.12.0. Use {@link - * org.apache.iceberg.rest.RESTCatalogProperties#PAGE_SIZE} instead. - */ - @Deprecated public static final String REST_PAGE_SIZE = "rest-page-size"; - // these default endpoints must not be updated in order to maintain backwards compatibility with // legacy servers private static final Set DEFAULT_ENDPOINTS = diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java b/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java index f4fdf3af26e7..3686fae0b237 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java @@ -40,20 +40,6 @@ public class RESTUtil { /** The namespace separator as url encoded UTF-8 character */ static final String NAMESPACE_SEPARATOR_URLENCODED_UTF_8 = "%1F"; - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * RESTUtil#namespaceToQueryParam(Namespace)}} instead. - */ - @Deprecated - public static final Joiner NAMESPACE_JOINER = Joiner.on(NAMESPACE_SEPARATOR_AS_UNICODE); - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * RESTUtil#namespaceFromQueryParam(String)} instead. - */ - @Deprecated - public static final Splitter NAMESPACE_SPLITTER = Splitter.on(NAMESPACE_SEPARATOR_AS_UNICODE); - public static final String IDEMPOTENCY_KEY_HEADER = "Idempotency-Key"; private RESTUtil() {} @@ -171,9 +157,8 @@ public static String decodeString(String encoded) { /** * This converts the given namespace to a string and separates each part in a multipart namespace - * using the unicode character '\u001f'. Note that this method is different from {@link - * RESTUtil#encodeNamespace(Namespace)}, which uses the UTF-8 escaped version of '\u001f', which - * is '0x1F'. + * using the unicode character '\u001f'. Note that this method uses the raw unicode separator, + * unlike the percent-encoded form used by the REST catalog path encoding. * *

{@link #namespaceFromQueryParam(String)} should be used to convert the namespace string back * to a {@link Namespace} instance. @@ -188,8 +173,8 @@ public static String namespaceToQueryParam(Namespace namespace) { /** * This converts the given namespace to a string and separates each part in a multipart namespace - * using the provided unicode separator. Note that this method is different from {@link - * RESTUtil#encodeNamespace(Namespace)}, which uses a UTF-8 escaped separator. + * using the provided unicode separator. Note that this method uses the raw unicode separator, + * unlike the percent-encoded form used by the REST catalog path encoding. * *

{@link #namespaceFromQueryParam(String, String)} should be used to convert the namespace * string back to a {@link Namespace} instance. @@ -253,24 +238,6 @@ public static Namespace namespaceFromQueryParam( return Namespace.of(splitter.splitToStream(namespace).toArray(String[]::new)); } - /** - * Returns a String representation of a namespace that is suitable for use in a URL / URI. - * - *

This function needs to be called when a namespace is used as a path variable (or query - * parameter etc.), to format the namespace per the spec. - * - *

{@link #decodeNamespace} should be used to parse the namespace from a URL parameter. - * - * @param ns namespace to encode - * @return UTF-8 encoded string representing the namespace, suitable for use as a URL parameter - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * RESTUtil#encodeNamespace(Namespace, String)} instead. - */ - @Deprecated - public static String encodeNamespace(Namespace ns) { - return encodeNamespace(ns, NAMESPACE_SEPARATOR_URLENCODED_UTF_8); - } - /** * Returns a String representation of a namespace that is suitable for use in a URL / URI. * @@ -299,22 +266,6 @@ public static String encodeNamespace(Namespace namespace, String separator) { return Joiner.on(separator).join(encodedLevels); } - /** - * Takes in a string representation of a namespace as used for a URL parameter and returns the - * corresponding namespace. - * - *

See also {@link #encodeNamespace} for generating correctly formatted URLs. - * - * @param encodedNs a namespace to decode - * @return a namespace - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * RESTUtil#decodeNamespace(String, String)} instead. - */ - @Deprecated - public static Namespace decodeNamespace(String encodedNs) { - return decodeNamespace(encodedNs, NAMESPACE_SEPARATOR_URLENCODED_UTF_8); - } - /** * Takes in a string representation of a namespace as used for a URL parameter and returns the * corresponding namespace. diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java index e83cacc48459..3d42ffce4726 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java @@ -93,15 +93,6 @@ public B withFileScanTasks(List tasks) { return self(); } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0. - */ - @Deprecated - public B withDeleteFiles(List deleteFilesList) { - this.deleteFiles = DeleteFileSet.of(deleteFilesList); - return self(); - } - /** * @deprecated since 1.11.0, visibility will be reduced in 1.12.0. */ diff --git a/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java b/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java index 370bbfed336e..e34cd34dd8b8 100644 --- a/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java @@ -21,20 +21,14 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import java.util.Objects; import java.util.function.Function; import org.apache.iceberg.BaseMetadataTable; -import org.apache.iceberg.DataFile; import org.apache.iceberg.HistoryEntry; import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; -import org.apache.iceberg.SnapshotChanges; import org.apache.iceberg.SnapshotRef; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; -import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.io.CloseableIterable; -import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; @@ -284,66 +278,6 @@ private static Iterable toIds(Iterable snapshots) { return Iterables.transform(snapshots, Snapshot::snapshotId); } - /** - * @deprecated will be removed in 1.12.0, use {@link SnapshotChanges} with {@link - * #ancestorsBetween(long, Long, Function)} instead. - */ - @Deprecated - public static List newFiles( - Long baseSnapshotId, long latestSnapshotId, Function lookup, FileIO io) { - List newFiles = Lists.newArrayList(); - Snapshot lastSnapshot = null; - for (Snapshot currentSnapshot : ancestorsOf(latestSnapshotId, lookup)) { - lastSnapshot = currentSnapshot; - if (Objects.equals(currentSnapshot.snapshotId(), baseSnapshotId)) { - return newFiles; - } - - Iterables.addAll(newFiles, currentSnapshot.addedDataFiles(io)); - } - - ValidationException.check( - Objects.equals(lastSnapshot.parentId(), baseSnapshotId), - "Cannot determine history between read snapshot %s and the last known ancestor %s", - baseSnapshotId, - lastSnapshot.snapshotId()); - - return newFiles; - } - - /** - * @deprecated will be removed in 1.12.0, use {@link SnapshotChanges} with {@link - * #ancestorsBetween(long, Long, Function)} instead. - */ - @Deprecated - public static CloseableIterable newFilesBetween( - Long startSnapshotId, long endSnapshotId, Function lookup, FileIO io) { - - List snapshots = Lists.newArrayList(); - Snapshot lastSnapshot = null; - for (Snapshot currentSnapshot : ancestorsOf(endSnapshotId, lookup)) { - lastSnapshot = currentSnapshot; - if (Objects.equals(currentSnapshot.snapshotId(), startSnapshotId)) { - break; - } - - snapshots.add(currentSnapshot); - } - - if (lastSnapshot != null) { - ValidationException.check( - Objects.equals(lastSnapshot.snapshotId(), startSnapshotId) - || Objects.equals(lastSnapshot.parentId(), startSnapshotId), - "Cannot determine history between read snapshot %s and the last known ancestor %s", - startSnapshotId, - lastSnapshot.snapshotId()); - } - - return new ParallelIterable<>( - Iterables.transform(snapshots, snapshot -> snapshot.addedDataFiles(io)), - ThreadPools.getWorkerPool()); - } - /** * Traverses the history of the table's current snapshot and finds the snapshot with the given * snapshot id as its parent. diff --git a/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java b/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java index 6291acbf9c73..f143a300b87c 100644 --- a/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java @@ -25,7 +25,6 @@ import org.apache.iceberg.BaseCombinedScanTask; import org.apache.iceberg.BaseScanTaskGroup; import org.apache.iceberg.CombinedScanTask; -import org.apache.iceberg.FileContent; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.MergeableScanTask; import org.apache.iceberg.PartitionData; @@ -51,30 +50,6 @@ public class TableScanUtil { private TableScanUtil() {} - /** - * @deprecated since 1.11.0 and will be removed in 1.12.0 - */ - @Deprecated - public static boolean hasDeletes(CombinedScanTask task) { - return task.files().stream().anyMatch(TableScanUtil::hasDeletes); - } - - /** - * This is temporarily introduced since we plan to support pos-delete vectorized read first, then - * get to the equality-delete support. We will remove this method once both are supported. - * - * @deprecated since 1.11.0 and will be removed in 1.12.0 - */ - @Deprecated - public static boolean hasEqDeletes(CombinedScanTask task) { - return task.files().stream() - .anyMatch( - t -> - t.deletes().stream() - .anyMatch( - deleteFile -> deleteFile.content().equals(FileContent.EQUALITY_DELETES))); - } - public static boolean hasDeletes(FileScanTask task) { return !task.deletes().isEmpty(); } diff --git a/core/src/main/java/org/apache/iceberg/util/ThreadPools.java b/core/src/main/java/org/apache/iceberg/util/ThreadPools.java index bb508295ecfa..7167d14e7d7e 100644 --- a/core/src/main/java/org/apache/iceberg/util/ThreadPools.java +++ b/core/src/main/java/org/apache/iceberg/util/ThreadPools.java @@ -101,48 +101,6 @@ private static class AuthRefreshPoolHolder { "auth-session-refresh", AUTH_REFRESH_THREAD_POOL_SIZE, Duration.ZERO); } - /** - * Creates a fixed-size thread pool that uses daemon threads. The pool is wrapped with {@link - * MoreExecutors#getExitingExecutorService(ThreadPoolExecutor)}, which registers a shutdown hook - * to ensure the pool terminates when the JVM exits. Important: Even if the pool is - * explicitly shut down using {@link ExecutorService#shutdown()}, the shutdown hook is not - * removed. This can lead to accumulation of shutdown hooks if this method is used repeatedly for - * short-lived thread pools. - * - *

For clarity and to avoid potential issues with shutdown hook accumulation, prefer using - * either {@link #newExitingWorkerPool(String, int)} or {@link #newFixedThreadPool(String, int)}, - * depending on the intended lifecycle of the thread pool. - * - * @deprecated will be removed in 1.12.0. Use {@link #newExitingWorkerPool(String, int)} for - * long-lived thread pools that require a shutdown hook, or {@link #newFixedThreadPool(String, - * int)} for short-lived thread pools where you manage the lifecycle. - */ - @Deprecated - public static ExecutorService newWorkerPool(String namePrefix) { - return newExitingWorkerPool(namePrefix, WORKER_THREAD_POOL_SIZE); - } - - /** - * Creates a fixed-size thread pool that uses daemon threads. The pool is wrapped with {@link - * MoreExecutors#getExitingExecutorService(ThreadPoolExecutor)}, which registers a shutdown hook - * to ensure the pool terminates when the JVM exits. Important: Even if the pool is - * explicitly shut down using {@link ExecutorService#shutdown()}, the shutdown hook is not - * removed. This can lead to accumulation of shutdown hooks if this method is used repeatedly for - * short-lived thread pools. - * - *

For clarity and to avoid potential issues with shutdown hook accumulation, prefer using - * either {@link #newExitingWorkerPool(String, int)} or {@link #newFixedThreadPool(String, int)}, - * depending on the intended lifecycle of the thread pool. - * - * @deprecated will be removed in 1.12.0. Use {@link #newExitingWorkerPool(String, int)} for - * long-lived thread pools that require a shutdown hook, or {@link #newFixedThreadPool(String, - * int)} for short-lived thread pools where you manage the lifecycle. - */ - @Deprecated - public static ExecutorService newWorkerPool(String namePrefix, int poolSize) { - return newExitingWorkerPool(namePrefix, poolSize); - } - /** * Creates a fixed-size thread pool that uses daemon threads and registers a shutdown hook to * ensure the pool terminates when the JVM exits. This is suitable for long-lived thread pools diff --git a/core/src/test/java/org/apache/iceberg/TestManifestReader.java b/core/src/test/java/org/apache/iceberg/TestManifestReader.java index de2b7fd859e6..75551bafce94 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestReader.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestReader.java @@ -270,15 +270,14 @@ public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException { } } - @SuppressWarnings("deprecation") @TestTemplate - public void testDeprecatedReadWithoutSpecsById() throws IOException { + public void testReadWithoutSpecsById() throws IOException { assumeThat(formatVersion) .as("Deprecated read without specsById requires Avro metadata; V4 uses Parquet") .isLessThan(TableMetadata.MIN_FORMAT_VERSION_PARQUET_MANIFESTS); ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 1000L, FILE_A)); - try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { + try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO, null)) { ManifestEntry entry = Iterables.getOnlyElement(reader.entries()); assertThat(entry.status()).isEqualTo(Status.EXISTING); assertThat(entry.file().location()).isEqualTo(FILE_A.location()); diff --git a/core/src/test/java/org/apache/iceberg/data/avro/TestDataReader.java b/core/src/test/java/org/apache/iceberg/data/avro/TestDataReader.java deleted file mode 100644 index 966f01267d67..000000000000 --- a/core/src/test/java/org/apache/iceberg/data/avro/TestDataReader.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.data.avro; - -import static org.apache.iceberg.avro.AvroSchemaUtil.ADJUST_TO_UTC_PROP; -import static org.assertj.core.api.Assertions.assertThat; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.time.LocalDateTime; -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.EncoderFactory; -import org.apache.iceberg.data.Record; -import org.apache.iceberg.types.Types; -import org.apache.iceberg.util.DateTimeUtil; -import org.junit.jupiter.api.Test; - -class TestDataReader { - - @Test - public void timestampDataReader() throws IOException { - org.apache.iceberg.Schema icebergSchema = - new org.apache.iceberg.Schema( - Types.NestedField.required(1, "timestamp_nanos", Types.TimestampType.withoutZone()), - Types.NestedField.required(2, "timestamp_micros", Types.TimestampType.withoutZone()), - Types.NestedField.required(3, "timestamp_millis", Types.TimestampType.withoutZone())); - - Schema avroSchema = - SchemaBuilder.record("test_programmatic") - .fields() - .name("timestamp_nanos") - .type(LogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG))) - .noDefault() - .name("timestamp_micros") - .type(LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG))) - .noDefault() - .name("timestamp_millis") - .type(LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG))) - .noDefault() - .endRecord(); - - avroSchema.getField("timestamp_nanos").addProp("field-id", 1); - avroSchema.getField("timestamp_micros").addProp("field-id", 2); - avroSchema.getField("timestamp_millis").addProp("field-id", 3); - - DataReader reader = DataReader.create(icebergSchema, avroSchema); - reader.setSchema(avroSchema); - - // post-epoch timestamps - GenericRecord avroRecord = new GenericData.Record(avroSchema); - LocalDateTime timestampNanos = LocalDateTime.of(2023, 10, 15, 14, 30, 45, 123456789); - LocalDateTime timestampMicros = LocalDateTime.of(2023, 10, 15, 14, 30, 45, 123456000); - LocalDateTime timestampMillis = LocalDateTime.of(2023, 10, 15, 14, 30, 45, 123000000); - - avroRecord.put("timestamp_nanos", DateTimeUtil.nanosFromTimestamp(timestampNanos)); - avroRecord.put("timestamp_micros", DateTimeUtil.microsFromTimestamp(timestampMicros)); - avroRecord.put("timestamp_millis", DateTimeUtil.millisFromTimestamp(timestampMillis)); - - Record result = readRecord(reader, avroSchema, avroRecord); - - assertThat(result.getField("timestamp_nanos")).isEqualTo(timestampNanos); - assertThat(result.getField("timestamp_micros")).isEqualTo(timestampMicros); - assertThat(result.getField("timestamp_millis")).isEqualTo(timestampMillis); - - // pre-epoch timestamps - GenericRecord preEpochRecord = new GenericData.Record(avroSchema); - LocalDateTime preEpochNanos = LocalDateTime.of(1969, 1, 1, 10, 11, 12, 123456789); - LocalDateTime preEpochMicros = LocalDateTime.of(1968, 1, 1, 10, 11, 12, 123456000); - LocalDateTime preEpochMillis = LocalDateTime.of(1967, 1, 1, 10, 11, 12, 123000000); - - preEpochRecord.put("timestamp_nanos", DateTimeUtil.nanosFromTimestamp(preEpochNanos)); - preEpochRecord.put("timestamp_micros", DateTimeUtil.microsFromTimestamp(preEpochMicros)); - preEpochRecord.put("timestamp_millis", DateTimeUtil.millisFromTimestamp(preEpochMillis)); - - Record preEpochResult = readRecord(reader, avroSchema, preEpochRecord); - - assertThat(preEpochResult.getField("timestamp_nanos")).isEqualTo(preEpochNanos); - assertThat(preEpochResult.getField("timestamp_micros")).isEqualTo(preEpochMicros); - assertThat(preEpochResult.getField("timestamp_millis")).isEqualTo(preEpochMillis); - } - - @Test - public void timestampTzDataReader() throws IOException { - org.apache.iceberg.Schema icebergSchema = - new org.apache.iceberg.Schema( - Types.NestedField.required(1, "timestamp_nanos_tz", Types.TimestampType.withZone()), - Types.NestedField.required(2, "timestamp_micros_tz", Types.TimestampType.withZone()), - Types.NestedField.required(3, "timestamp_millis_tz", Types.TimestampType.withZone())); - - Schema avroSchema = - SchemaBuilder.record("test_tz") - .fields() - .name("timestamp_nanos_tz") - .type(LogicalTypes.timestampNanos().addToSchema(utcAdjustedLongSchema())) - .noDefault() - .name("timestamp_micros_tz") - .type(LogicalTypes.timestampMicros().addToSchema(utcAdjustedLongSchema())) - .noDefault() - .name("timestamp_millis_tz") - .type(LogicalTypes.timestampMillis().addToSchema(utcAdjustedLongSchema())) - .noDefault() - .endRecord(); - - avroSchema.getField("timestamp_nanos_tz").addProp("field-id", 1); - avroSchema.getField("timestamp_micros_tz").addProp("field-id", 2); - avroSchema.getField("timestamp_millis_tz").addProp("field-id", 3); - - DataReader reader = DataReader.create(icebergSchema, avroSchema); - reader.setSchema(avroSchema); - - // post-epoch timestamps - GenericRecord avroRecord = new GenericData.Record(avroSchema); - - OffsetDateTime offsetTimestampNanos = - OffsetDateTime.of(2023, 10, 15, 14, 30, 45, 123456789, ZoneOffset.ofHours(-8)); - OffsetDateTime offsetTimestampMicros = - OffsetDateTime.of(2023, 10, 15, 14, 30, 45, 123456000, ZoneOffset.ofHours(5)); - OffsetDateTime offsetTimestampMillis = - OffsetDateTime.of(2023, 10, 15, 14, 30, 45, 123000000, ZoneOffset.ofHours(-3)); - - avroRecord.put("timestamp_nanos_tz", DateTimeUtil.nanosFromTimestamptz(offsetTimestampNanos)); - avroRecord.put( - "timestamp_micros_tz", DateTimeUtil.microsFromTimestamptz(offsetTimestampMicros)); - avroRecord.put( - "timestamp_millis_tz", DateTimeUtil.millisFromTimestamptz(offsetTimestampMillis)); - - Record result = readRecord(reader, avroSchema, avroRecord); - - assertThat(result.getField("timestamp_nanos_tz")) - .isEqualTo(offsetTimestampNanos.withOffsetSameInstant(ZoneOffset.UTC)); - assertThat(result.getField("timestamp_micros_tz")) - .isEqualTo(offsetTimestampMicros.withOffsetSameInstant(ZoneOffset.UTC)); - assertThat(result.getField("timestamp_millis_tz")) - .isEqualTo(offsetTimestampMillis.withOffsetSameInstant(ZoneOffset.UTC)); - - // pre-epoch timestamps - GenericRecord preEpochRecord = new GenericData.Record(avroSchema); - - OffsetDateTime preEpochTimestampNanos = - OffsetDateTime.of(1969, 1, 1, 10, 11, 12, 123456789, ZoneOffset.ofHours(-8)); - OffsetDateTime preEpochTimestampMicros = - OffsetDateTime.of(1968, 1, 1, 10, 11, 12, 123456000, ZoneOffset.ofHours(5)); - OffsetDateTime preEpochTimestampMillis = - OffsetDateTime.of(1967, 1, 1, 10, 11, 12, 123000000, ZoneOffset.ofHours(-3)); - - preEpochRecord.put( - "timestamp_nanos_tz", DateTimeUtil.nanosFromTimestamptz(preEpochTimestampNanos)); - preEpochRecord.put( - "timestamp_micros_tz", DateTimeUtil.microsFromTimestamptz(preEpochTimestampMicros)); - preEpochRecord.put( - "timestamp_millis_tz", DateTimeUtil.millisFromTimestamptz(preEpochTimestampMillis)); - - Record preEpochResult = readRecord(reader, avroSchema, preEpochRecord); - - assertThat(preEpochResult.getField("timestamp_nanos_tz")) - .isEqualTo(preEpochTimestampNanos.withOffsetSameInstant(ZoneOffset.UTC)); - assertThat(preEpochResult.getField("timestamp_micros_tz")) - .isEqualTo(preEpochTimestampMicros.withOffsetSameInstant(ZoneOffset.UTC)); - assertThat(preEpochResult.getField("timestamp_millis_tz")) - .isEqualTo(preEpochTimestampMillis.withOffsetSameInstant(ZoneOffset.UTC)); - } - - private Record readRecord(DataReader reader, Schema avroSchema, GenericRecord avroRecord) - throws IOException { - try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { - BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); - GenericDatumWriter writer = new GenericDatumWriter<>(avroSchema); - writer.write(avroRecord, encoder); - encoder.flush(); - - try (ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray())) { - return reader.read(null, DecoderFactory.get().binaryDecoder(in, null)); - } - } - } - - private Schema utcAdjustedLongSchema() { - Schema schema = Schema.create(Schema.Type.LONG); - schema.addProp(ADJUST_TO_UTC_PROP, "true"); - return schema; - } -} diff --git a/core/src/test/java/org/apache/iceberg/rest/TestRESTUtil.java b/core/src/test/java/org/apache/iceberg/rest/TestRESTUtil.java index 1ed732ebc91a..253c719c0ac0 100644 --- a/core/src/test/java/org/apache/iceberg/rest/TestRESTUtil.java +++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTUtil.java @@ -108,8 +108,8 @@ public void testRoundTripUrlEncodeDecodeNamespace(String namespaceSeparator) { @Test public void encodeAsOldClientAndDecodeAsNewServer() { Namespace namespace = Namespace.of("first", "second", "third"); - // old client would call encodeNamespace without specifying a separator - String encodedNamespace = RESTUtil.encodeNamespace(namespace); + // old client would call encodeNamespace with the legacy separator + String encodedNamespace = RESTUtil.encodeNamespace(namespace, "%1F"); assertThat(encodedNamespace).contains(RESTUtil.NAMESPACE_SEPARATOR_URLENCODED_UTF_8); // old client would also call namespaceToQueryParam without specifying a separator @@ -130,11 +130,11 @@ public void encodeAsOldClientAndDecodeAsNewServer() { @Test public void testNamespaceUrlEncodeDecodeDoesNotAllowNull() { assertThatExceptionOfType(IllegalArgumentException.class) - .isThrownBy(() -> RESTUtil.encodeNamespace(null)) + .isThrownBy(() -> RESTUtil.encodeNamespace(null, "%1F")) .withMessage("Invalid namespace: null"); assertThatExceptionOfType(IllegalArgumentException.class) - .isThrownBy(() -> RESTUtil.decodeNamespace(null)) + .isThrownBy(() -> RESTUtil.decodeNamespace(null, "%1F")) .withMessage("Invalid namespace: null"); } diff --git a/core/src/test/java/org/apache/iceberg/rest/TestResourcePaths.java b/core/src/test/java/org/apache/iceberg/rest/TestResourcePaths.java index a742b89a7627..823c85912a6b 100644 --- a/core/src/test/java/org/apache/iceberg/rest/TestResourcePaths.java +++ b/core/src/test/java/org/apache/iceberg/rest/TestResourcePaths.java @@ -119,14 +119,14 @@ public void nestedNamespaceWithLegacySeparator() { // legacy separator is always used by default, so no need to configure it ResourcePaths pathsWithLegacySeparator = ResourcePaths.forCatalogProperties(ImmutableMap.of()); - // Encode namespace using legacy separator. No need to provide the separator to encodeNamespace - String legacyEncodedNamespace = RESTUtil.encodeNamespace(namespace); + // Encode namespace using legacy separator. + String legacyEncodedNamespace = RESTUtil.encodeNamespace(namespace, "%1F"); assertThat(pathsWithLegacySeparator.namespace(namespace)) .contains(legacyEncodedNamespace) .contains(legacySeparator); - // Decode the namespace containing legacy separator without providing the separator - assertThat(RESTUtil.decodeNamespace(legacyEncodedNamespace)).isEqualTo(namespace); + // Decode the namespace containing legacy separator + assertThat(RESTUtil.decodeNamespace(legacyEncodedNamespace, "%1F")).isEqualTo(namespace); // Decode the namespace containing legacy separator with providing the new separator assertThat(RESTUtil.decodeNamespace(legacyEncodedNamespace, newSeparator)).isEqualTo(namespace); diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java index 841083f88baf..f0eb1cfdf284 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java @@ -144,18 +144,6 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithTasksPresent() { @Test public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileScanTasksPresent() { - PlanStatus planStatus = PlanStatus.fromName("submitted"); - assertThatThrownBy( - () -> { - FetchPlanningResultResponse.builder() - .withPlanStatus(planStatus) - .withDeleteFiles(List.of(FILE_A_DELETES)) - .build(); - }) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); - String invalidJson = "{\"status\":\"submitted\"," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java index 7b44b3533a5b..b3845bf42398 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java @@ -83,16 +83,6 @@ public void roundTripSerdeWithPlanTasks() { @Test public void roundTripSerdeWithDeleteFilesNoFileScanTasksPresent() { - assertThatThrownBy( - () -> - FetchScanTasksResponse.builder() - .withPlanTasks(List.of("task1", "task2")) - .withDeleteFiles(List.of(FILE_A_DELETES)) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); - String invalidJson = "{\"plan-tasks\":[\"task1\",\"task2\"]," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java index 6354e7bf246f..c248392e3934 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -196,17 +196,6 @@ public void roundTripSerdeWithInvalidPlanIdWithIncorrectStatus() { @Test public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileScanTasksPresent() { - assertThatThrownBy( - () -> - PlanTableScanResponse.builder() - .withPlanStatus(PlanStatus.SUBMITTED) - .withPlanId("somePlanId") - .withDeleteFiles(List.of(FILE_A_DELETES)) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); - String invalidJson = "{\"status\":\"submitted\"," + "\"plan-id\":\"somePlanId\"," @@ -264,7 +253,6 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { .withPlanStatus(fromResponse.planStatus()) .withPlanId(fromResponse.planId()) .withPlanTasks(fromResponse.planTasks()) - .withDeleteFiles(fromResponse.deleteFiles()) .withFileScanTasks(fromResponse.fileScanTasks()) .withSpecsById(PARTITION_SPECS_BY_ID) .build(); diff --git a/data/src/main/java/org/apache/iceberg/data/BaseFileWriterFactory.java b/data/src/main/java/org/apache/iceberg/data/BaseFileWriterFactory.java deleted file mode 100644 index 444c0d0226bd..000000000000 --- a/data/src/main/java/org/apache/iceberg/data/BaseFileWriterFactory.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.data; - -import java.io.IOException; -import java.io.Serializable; -import java.io.UncheckedIOException; -import java.util.Map; -import org.apache.iceberg.FileFormat; -import org.apache.iceberg.MetricsConfig; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.Schema; -import org.apache.iceberg.SortOrder; -import org.apache.iceberg.StructLike; -import org.apache.iceberg.Table; -import org.apache.iceberg.avro.Avro; -import org.apache.iceberg.deletes.EqualityDeleteWriter; -import org.apache.iceberg.deletes.PositionDeleteWriter; -import org.apache.iceberg.encryption.EncryptedOutputFile; -import org.apache.iceberg.encryption.EncryptionKeyMetadata; -import org.apache.iceberg.io.DataWriter; -import org.apache.iceberg.io.FileWriterFactory; -import org.apache.iceberg.orc.ORC; -import org.apache.iceberg.parquet.Parquet; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; - -/** - * A base writer factory to be extended by query engine integrations. - * - * @deprecated since version 1.11.0 and will be removed in 1.12.0. Use {@link - * RegistryBasedFileWriterFactory} - */ -@Deprecated -public abstract class BaseFileWriterFactory implements FileWriterFactory, Serializable { - private final Table table; - private final FileFormat dataFileFormat; - private final Schema dataSchema; - private final SortOrder dataSortOrder; - private final FileFormat deleteFileFormat; - private final int[] equalityFieldIds; - private final Schema equalityDeleteRowSchema; - private final SortOrder equalityDeleteSortOrder; - private final Schema positionDeleteRowSchema; - private final Map writerProperties; - - protected BaseFileWriterFactory( - Table table, - FileFormat dataFileFormat, - Schema dataSchema, - SortOrder dataSortOrder, - FileFormat deleteFileFormat, - int[] equalityFieldIds, - Schema equalityDeleteRowSchema, - SortOrder equalityDeleteSortOrder, - Map writerProperties) { - this.table = table; - this.dataFileFormat = dataFileFormat; - this.dataSchema = dataSchema; - this.dataSortOrder = dataSortOrder; - this.deleteFileFormat = deleteFileFormat; - this.equalityFieldIds = equalityFieldIds; - this.equalityDeleteRowSchema = equalityDeleteRowSchema; - this.equalityDeleteSortOrder = equalityDeleteSortOrder; - this.writerProperties = writerProperties; - this.positionDeleteRowSchema = null; - } - - protected BaseFileWriterFactory( - Table table, - FileFormat dataFileFormat, - Schema dataSchema, - SortOrder dataSortOrder, - FileFormat deleteFileFormat, - int[] equalityFieldIds, - Schema equalityDeleteRowSchema, - SortOrder equalityDeleteSortOrder, - Schema positionDeleteRowSchema, - Map writerProperties) { - this.table = table; - this.dataFileFormat = dataFileFormat; - this.dataSchema = dataSchema; - this.dataSortOrder = dataSortOrder; - this.deleteFileFormat = deleteFileFormat; - this.equalityFieldIds = equalityFieldIds; - this.equalityDeleteRowSchema = equalityDeleteRowSchema; - this.equalityDeleteSortOrder = equalityDeleteSortOrder; - this.positionDeleteRowSchema = positionDeleteRowSchema; - this.writerProperties = writerProperties; - } - - @Deprecated - protected BaseFileWriterFactory( - Table table, - FileFormat dataFileFormat, - Schema dataSchema, - SortOrder dataSortOrder, - FileFormat deleteFileFormat, - int[] equalityFieldIds, - Schema equalityDeleteRowSchema, - SortOrder equalityDeleteSortOrder, - Schema positionDeleteRowSchema) { - this.table = table; - this.dataFileFormat = dataFileFormat; - this.dataSchema = dataSchema; - this.dataSortOrder = dataSortOrder; - this.deleteFileFormat = deleteFileFormat; - this.equalityFieldIds = equalityFieldIds; - this.equalityDeleteRowSchema = equalityDeleteRowSchema; - this.equalityDeleteSortOrder = equalityDeleteSortOrder; - this.positionDeleteRowSchema = positionDeleteRowSchema; - this.writerProperties = ImmutableMap.of(); - } - - protected abstract void configureDataWrite(Avro.DataWriteBuilder builder); - - protected abstract void configureEqualityDelete(Avro.DeleteWriteBuilder builder); - - protected abstract void configurePositionDelete(Avro.DeleteWriteBuilder builder); - - protected abstract void configureDataWrite(Parquet.DataWriteBuilder builder); - - protected abstract void configureEqualityDelete(Parquet.DeleteWriteBuilder builder); - - protected abstract void configurePositionDelete(Parquet.DeleteWriteBuilder builder); - - protected abstract void configureDataWrite(ORC.DataWriteBuilder builder); - - protected abstract void configureEqualityDelete(ORC.DeleteWriteBuilder builder); - - protected abstract void configurePositionDelete(ORC.DeleteWriteBuilder builder); - - @Override - public DataWriter newDataWriter( - EncryptedOutputFile file, PartitionSpec spec, StructLike partition) { - EncryptionKeyMetadata keyMetadata = file.keyMetadata(); - Map properties = table == null ? ImmutableMap.of() : table.properties(); - MetricsConfig metricsConfig = - table == null ? MetricsConfig.getDefault() : MetricsConfig.forTable(table); - - try { - switch (dataFileFormat) { - case AVRO: - Avro.DataWriteBuilder avroBuilder = - Avro.writeData(file) - .schema(dataSchema) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .withSortOrder(dataSortOrder) - .overwrite(); - - configureDataWrite(avroBuilder); - - return avroBuilder.build(); - - case PARQUET: - Parquet.DataWriteBuilder parquetBuilder = - Parquet.writeData(file) - .schema(dataSchema) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .withSortOrder(dataSortOrder) - .overwrite(); - - configureDataWrite(parquetBuilder); - - return parquetBuilder.build(); - - case ORC: - ORC.DataWriteBuilder orcBuilder = - ORC.writeData(file) - .schema(dataSchema) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .withSortOrder(dataSortOrder) - .overwrite(); - - configureDataWrite(orcBuilder); - - return orcBuilder.build(); - - default: - throw new UnsupportedOperationException( - "Unsupported data file format: " + dataFileFormat); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public EqualityDeleteWriter newEqualityDeleteWriter( - EncryptedOutputFile file, PartitionSpec spec, StructLike partition) { - EncryptionKeyMetadata keyMetadata = file.keyMetadata(); - Map properties = table == null ? ImmutableMap.of() : table.properties(); - MetricsConfig metricsConfig = - table == null ? MetricsConfig.getDefault() : MetricsConfig.forTable(table); - - try { - switch (deleteFileFormat) { - case AVRO: - Avro.DeleteWriteBuilder avroBuilder = - Avro.writeDeletes(file) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .rowSchema(equalityDeleteRowSchema) - .equalityFieldIds(equalityFieldIds) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .withSortOrder(equalityDeleteSortOrder) - .overwrite(); - - configureEqualityDelete(avroBuilder); - - return avroBuilder.buildEqualityWriter(); - - case PARQUET: - Parquet.DeleteWriteBuilder parquetBuilder = - Parquet.writeDeletes(file) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .rowSchema(equalityDeleteRowSchema) - .equalityFieldIds(equalityFieldIds) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .withSortOrder(equalityDeleteSortOrder) - .overwrite(); - - configureEqualityDelete(parquetBuilder); - - return parquetBuilder.buildEqualityWriter(); - - case ORC: - ORC.DeleteWriteBuilder orcBuilder = - ORC.writeDeletes(file) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .rowSchema(equalityDeleteRowSchema) - .equalityFieldIds(equalityFieldIds) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .withSortOrder(equalityDeleteSortOrder) - .overwrite(); - - configureEqualityDelete(orcBuilder); - - return orcBuilder.buildEqualityWriter(); - - default: - throw new UnsupportedOperationException( - "Unsupported format for equality deletes: " + deleteFileFormat); - } - } catch (IOException e) { - throw new UncheckedIOException("Failed to create new equality delete writer", e); - } - } - - @Override - public PositionDeleteWriter newPositionDeleteWriter( - EncryptedOutputFile file, PartitionSpec spec, StructLike partition) { - EncryptionKeyMetadata keyMetadata = file.keyMetadata(); - Map properties = table == null ? ImmutableMap.of() : table.properties(); - MetricsConfig metricsConfig = - table == null ? MetricsConfig.forPositionDelete() : MetricsConfig.forPositionDelete(table); - - try { - switch (deleteFileFormat) { - case AVRO: - Avro.DeleteWriteBuilder avroBuilder = - Avro.writeDeletes(file) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .rowSchema(positionDeleteRowSchema) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .overwrite(); - - configurePositionDelete(avroBuilder); - - return avroBuilder.buildPositionWriter(); - - case PARQUET: - Parquet.DeleteWriteBuilder parquetBuilder = - Parquet.writeDeletes(file) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .rowSchema(positionDeleteRowSchema) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .overwrite(); - - configurePositionDelete(parquetBuilder); - - return parquetBuilder.buildPositionWriter(); - - case ORC: - ORC.DeleteWriteBuilder orcBuilder = - ORC.writeDeletes(file) - .setAll(properties) - .setAll(writerProperties) - .metricsConfig(metricsConfig) - .rowSchema(positionDeleteRowSchema) - .withSpec(spec) - .withPartition(partition) - .withKeyMetadata(keyMetadata) - .overwrite(); - - configurePositionDelete(orcBuilder); - - return orcBuilder.buildPositionWriter(); - - default: - throw new UnsupportedOperationException( - "Unsupported format for position deletes: " + deleteFileFormat); - } - - } catch (IOException e) { - throw new UncheckedIOException("Failed to create new position delete writer", e); - } - } - - protected Schema dataSchema() { - return dataSchema; - } - - protected Schema equalityDeleteRowSchema() { - return equalityDeleteRowSchema; - } - - /** - * @deprecated This method is deprecated as of version 1.11.0 and will be removed in 1.12.0. - * Position deletes that include row data are no longer supported. - */ - @Deprecated - protected Schema positionDeleteRowSchema() { - return positionDeleteRowSchema; - } -} diff --git a/data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java b/data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java deleted file mode 100644 index dd78b48858a2..000000000000 --- a/data/src/main/java/org/apache/iceberg/data/GenericAppenderFactory.java +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.data; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.Map; -import org.apache.iceberg.FileFormat; -import org.apache.iceberg.MetricsConfig; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.Schema; -import org.apache.iceberg.StructLike; -import org.apache.iceberg.Table; -import org.apache.iceberg.avro.Avro; -import org.apache.iceberg.data.avro.DataWriter; -import org.apache.iceberg.data.orc.GenericOrcWriter; -import org.apache.iceberg.data.parquet.GenericParquetWriter; -import org.apache.iceberg.deletes.EqualityDeleteWriter; -import org.apache.iceberg.deletes.PositionDeleteWriter; -import org.apache.iceberg.encryption.EncryptedOutputFile; -import org.apache.iceberg.encryption.EncryptionUtil; -import org.apache.iceberg.io.FileAppender; -import org.apache.iceberg.io.FileAppenderFactory; -import org.apache.iceberg.io.OutputFile; -import org.apache.iceberg.orc.ORC; -import org.apache.iceberg.parquet.Parquet; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; - -/** - * Factory to create a new {@link FileAppender} to write {@link Record}s. - * - * @deprecated will be removed in 1.12.0; use {@link GenericFileWriterFactory} instead. - */ -@Deprecated -public class GenericAppenderFactory implements FileAppenderFactory { - private final Table table; - private final Schema schema; - private final PartitionSpec spec; - private final int[] equalityFieldIds; - private final Schema eqDeleteRowSchema; - private final Schema posDeleteRowSchema; - private final Map config; - - public GenericAppenderFactory(Schema schema) { - this(schema, PartitionSpec.unpartitioned()); - } - - public GenericAppenderFactory(Schema schema, PartitionSpec spec) { - this(schema, spec, null, null, null); - } - - public GenericAppenderFactory( - Schema schema, - PartitionSpec spec, - int[] equalityFieldIds, - Schema eqDeleteRowSchema, - Schema posDeleteRowSchema) { - this(null, schema, spec, null, equalityFieldIds, eqDeleteRowSchema, posDeleteRowSchema); - } - - /** - * Constructor for GenericAppenderFactory. - * - * @param schema the schema of the records to write - * @param spec the partition spec of the records - * @param equalityFieldIds the field ids for equality delete - * @param eqDeleteRowSchema the schema for equality delete rows - */ - public GenericAppenderFactory( - Schema schema, PartitionSpec spec, int[] equalityFieldIds, Schema eqDeleteRowSchema) { - this(null, schema, spec, null, equalityFieldIds, eqDeleteRowSchema, null); - } - - /** - * Constructor for GenericAppenderFactory. - * - * @param table iceberg table - * @param schema the schema of the records to write - * @param spec the partition spec of the records - * @param config the configuration for the writer - * @param equalityFieldIds the field ids for equality delete - * @param eqDeleteRowSchema the schema for equality delete rows - */ - public GenericAppenderFactory( - Table table, - Schema schema, - PartitionSpec spec, - Map config, - int[] equalityFieldIds, - Schema eqDeleteRowSchema) { - this(table, schema, spec, config, equalityFieldIds, eqDeleteRowSchema, null); - } - - /** - * Constructor for GenericAppenderFactory. - * - * @param table iceberg table - * @param schema the schema of the records to write - * @param spec the partition spec of the records - * @param config the configuration for the writer - * @param equalityFieldIds the field ids for equality delete - * @param eqDeleteRowSchema the schema for equality delete rows - * @param posDeleteRowSchema the schema for position delete rows - * @deprecated This constructor is deprecated as of version 1.11.0 and will be removed in 1.12.0. - * Position deletes that include row data are no longer supported. Use {@link - * #GenericAppenderFactory(Table, Schema, PartitionSpec, Map, int[], Schema)} instead. - */ - @Deprecated - public GenericAppenderFactory( - Table table, - Schema schema, - PartitionSpec spec, - Map config, - int[] equalityFieldIds, - Schema eqDeleteRowSchema, - Schema posDeleteRowSchema) { - this.table = table; - this.config = config == null ? Maps.newHashMap() : config; - - if (table != null) { - // If the table is provided and schema and spec are not provided, derive them from the table - this.schema = schema == null ? table.schema() : schema; - this.spec = spec == null ? table.spec() : spec; - validateMetricsConfig(this.config); - } else { - this.schema = schema; - this.spec = spec; - } - - this.equalityFieldIds = equalityFieldIds; - this.eqDeleteRowSchema = eqDeleteRowSchema; - this.posDeleteRowSchema = posDeleteRowSchema; - } - - public GenericAppenderFactory set(String property, String value) { - validateMetricsConfig(ImmutableMap.of(property, value)); - config.put(property, value); - return this; - } - - public GenericAppenderFactory setAll(Map properties) { - validateMetricsConfig(properties); - config.putAll(properties); - return this; - } - - @Override - public FileAppender newAppender(OutputFile outputFile, FileFormat fileFormat) { - return newAppender(EncryptionUtil.plainAsEncryptedOutput(outputFile), fileFormat); - } - - @Override - public FileAppender newAppender( - EncryptedOutputFile encryptedOutputFile, FileFormat fileFormat) { - MetricsConfig metricsConfig = - table != null ? MetricsConfig.forTable(table) : MetricsConfig.fromProperties(config); - - try { - switch (fileFormat) { - case AVRO: - return Avro.write(encryptedOutputFile) - .schema(schema) - .createWriterFunc(DataWriter::create) - .metricsConfig(metricsConfig) - .setAll(config) - .overwrite() - .build(); - - case PARQUET: - return Parquet.write(encryptedOutputFile) - .schema(schema) - .createWriterFunc(GenericParquetWriter::create) - .setAll(config) - .metricsConfig(metricsConfig) - .overwrite() - .build(); - - case ORC: - return ORC.write(encryptedOutputFile) - .schema(schema) - .createWriterFunc(GenericOrcWriter::buildWriter) - .setAll(config) - .metricsConfig(metricsConfig) - .overwrite() - .build(); - - default: - throw new UnsupportedOperationException( - "Cannot write unknown file format: " + fileFormat); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public org.apache.iceberg.io.DataWriter newDataWriter( - EncryptedOutputFile file, FileFormat format, StructLike partition) { - return new org.apache.iceberg.io.DataWriter<>( - newAppender(file, format), - format, - file.encryptingOutputFile().location(), - spec, - partition, - file.keyMetadata()); - } - - @Override - public EqualityDeleteWriter newEqDeleteWriter( - EncryptedOutputFile file, FileFormat format, StructLike partition) { - Preconditions.checkState( - equalityFieldIds != null && equalityFieldIds.length > 0, - "Equality field ids shouldn't be null or empty when creating equality-delete writer"); - Preconditions.checkNotNull( - eqDeleteRowSchema, - "Equality delete row schema shouldn't be null when creating equality-delete writer"); - MetricsConfig metricsConfig = - table != null ? MetricsConfig.forTable(table) : MetricsConfig.fromProperties(config); - - try { - switch (format) { - case AVRO: - return Avro.writeDeletes(file) - .createWriterFunc(DataWriter::create) - .withPartition(partition) - .overwrite() - .setAll(config) - .rowSchema(eqDeleteRowSchema) - .withSpec(spec) - .withKeyMetadata(file.keyMetadata()) - .equalityFieldIds(equalityFieldIds) - .buildEqualityWriter(); - - case ORC: - return ORC.writeDeletes(file) - .createWriterFunc(GenericOrcWriter::buildWriter) - .withPartition(partition) - .overwrite() - .setAll(config) - .metricsConfig(metricsConfig) - .rowSchema(eqDeleteRowSchema) - .withSpec(spec) - .withKeyMetadata(file.keyMetadata()) - .equalityFieldIds(equalityFieldIds) - .buildEqualityWriter(); - - case PARQUET: - return Parquet.writeDeletes(file) - .createWriterFunc(GenericParquetWriter::create) - .withPartition(partition) - .overwrite() - .setAll(config) - .metricsConfig(metricsConfig) - .rowSchema(eqDeleteRowSchema) - .withSpec(spec) - .withKeyMetadata(file.keyMetadata()) - .equalityFieldIds(equalityFieldIds) - .buildEqualityWriter(); - - default: - throw new UnsupportedOperationException( - "Cannot write equality-deletes for unsupported file format: " + format); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public PositionDeleteWriter newPosDeleteWriter( - EncryptedOutputFile file, FileFormat format, StructLike partition) { - MetricsConfig metricsConfig = - table != null - ? MetricsConfig.forPositionDelete(table) - : MetricsConfig.fromProperties(config); - - try { - switch (format) { - case AVRO: - return Avro.writeDeletes(file) - .createWriterFunc(DataWriter::create) - .withPartition(partition) - .overwrite() - .setAll(config) - .rowSchema(posDeleteRowSchema) - .withSpec(spec) - .withKeyMetadata(file.keyMetadata()) - .buildPositionWriter(); - - case ORC: - return ORC.writeDeletes(file) - .createWriterFunc(GenericOrcWriter::buildWriter) - .withPartition(partition) - .overwrite() - .setAll(config) - .rowSchema(posDeleteRowSchema) - .withSpec(spec) - .withKeyMetadata(file.keyMetadata()) - .buildPositionWriter(); - - case PARQUET: - return Parquet.writeDeletes(file) - .createWriterFunc(GenericParquetWriter::create) - .withPartition(partition) - .overwrite() - .setAll(config) - .metricsConfig(metricsConfig) - .rowSchema(posDeleteRowSchema) - .withSpec(spec) - .withKeyMetadata(file.keyMetadata()) - .buildPositionWriter(); - - default: - throw new UnsupportedOperationException( - "Cannot write pos-deletes for unsupported file format: " + format); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - private void validateMetricsConfig(Map writeConfig) { - if (table == null) { - return; - } - - if (writeConfig.keySet().stream().anyMatch(k -> k.startsWith("write.metadata.metrics."))) { - throw new IllegalArgumentException( - "Cannot set metrics properties when the table is provided, use table properties instead"); - } - } -} diff --git a/data/src/main/java/org/apache/iceberg/data/GenericFileWriterFactory.java b/data/src/main/java/org/apache/iceberg/data/GenericFileWriterFactory.java index 1e75b9eda961..914dc3672b77 100644 --- a/data/src/main/java/org/apache/iceberg/data/GenericFileWriterFactory.java +++ b/data/src/main/java/org/apache/iceberg/data/GenericFileWriterFactory.java @@ -241,10 +241,7 @@ public PositionDeleteWriter newPositionDeleteWriter( LOG.warn( "Deprecated feature used. Position delete row schema is used to create the position delete writer."); Map properties = table == null ? ImmutableMap.of() : table.properties(); - MetricsConfig metricsConfig = - table == null - ? MetricsConfig.forPositionDelete() - : MetricsConfig.forPositionDelete(table); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { return switch (format) { diff --git a/data/src/main/java/org/apache/iceberg/data/RegistryBasedFileWriterFactory.java b/data/src/main/java/org/apache/iceberg/data/RegistryBasedFileWriterFactory.java index 868b41f5840b..da2be7702909 100644 --- a/data/src/main/java/org/apache/iceberg/data/RegistryBasedFileWriterFactory.java +++ b/data/src/main/java/org/apache/iceberg/data/RegistryBasedFileWriterFactory.java @@ -160,8 +160,7 @@ public PositionDeleteWriter newPositionDeleteWriter( EncryptedOutputFile file, PartitionSpec spec, StructLike partition) { EncryptionKeyMetadata keyMetadata = file.keyMetadata(); Map properties = table != null ? table.properties() : ImmutableMap.of(); - MetricsConfig metricsConfig = - table != null ? MetricsConfig.forPositionDelete(table) : MetricsConfig.forPositionDelete(); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { FileWriterBuilder, ?> builder = diff --git a/data/src/test/java/org/apache/iceberg/TestGenericAppenderFactory.java b/data/src/test/java/org/apache/iceberg/TestGenericAppenderFactory.java deleted file mode 100644 index 5d940adaec58..000000000000 --- a/data/src/test/java/org/apache/iceberg/TestGenericAppenderFactory.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import static org.assertj.core.api.Assertions.assertThatNoException; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import java.util.List; -import java.util.Map; -import org.apache.iceberg.data.GenericAppenderFactory; -import org.apache.iceberg.data.GenericRecord; -import org.apache.iceberg.data.Record; -import org.apache.iceberg.io.FileAppenderFactory; -import org.apache.iceberg.io.TestAppenderFactory; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.util.ArrayUtil; -import org.apache.iceberg.util.StructLikeSet; -import org.junit.jupiter.api.TestTemplate; - -public class TestGenericAppenderFactory extends TestAppenderFactory { - - private final GenericRecord gRecord = GenericRecord.create(SCHEMA); - - @Override - protected FileAppenderFactory createAppenderFactory( - List equalityFieldIds, Schema eqDeleteSchema, Schema posDeleteRowSchema) { - return new GenericAppenderFactory( - table, - table.schema(), - table.spec(), - Maps.newHashMap(), - ArrayUtil.toIntArray(equalityFieldIds), - eqDeleteSchema, - posDeleteRowSchema); - } - - @Override - protected Record createRow(Integer id, String data) { - return gRecord.copy(ImmutableMap.of("id", id, "data", data)); - } - - @Override - protected StructLikeSet expectedRowSet(Iterable records) { - StructLikeSet set = StructLikeSet.create(table.schema().asStruct()); - records.forEach(set::add); - return set; - } - - @TestTemplate - void illegalSetConfig() { - GenericAppenderFactory appenderFactory = - (GenericAppenderFactory) createAppenderFactory(null, null, null); - - assertThatThrownBy( - () -> - appenderFactory.set( - TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS, - MetricsModes.None.get().toString())) - .as("Should not allow setting metrics property if the table was provided") - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining( - "Cannot set metrics properties when the table is provided, use table properties instead"); - } - - @TestTemplate - void illegalSetAllConfigs() { - GenericAppenderFactory appenderFactory = - (GenericAppenderFactory) createAppenderFactory(null, null, null); - - Map properties = - ImmutableMap.of( - TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS, - "10", - TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "id", - MetricsModes.Full.get().toString()); - - assertThatThrownBy(() -> appenderFactory.setAll(properties)) - .as("Should not allow setting metrics property if the table was provided") - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining( - "Cannot set metrics properties when the table is provided, use table properties instead"); - } - - @TestTemplate - void setConfigExcludeMetrics() { - GenericAppenderFactory appenderFactory = - (GenericAppenderFactory) createAppenderFactory(null, null, null); - assertThatNoException().isThrownBy(() -> appenderFactory.set("key1", "value1")); - assertThatNoException() - .isThrownBy(() -> appenderFactory.setAll(ImmutableMap.of("key2", "value2"))); - } - - @TestTemplate - void setConfigWithoutTable() { - GenericAppenderFactory appenderFactory = new GenericAppenderFactory(SCHEMA); - assertThatNoException() - .isThrownBy( - () -> appenderFactory.set(TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS, "10")); - assertThatNoException() - .isThrownBy( - () -> - appenderFactory.setAll( - ImmutableMap.of(TableProperties.DEFAULT_WRITE_METRICS_MODE, "full"))); - } - - @TestTemplate - void createFactoryWithConflictConfig() { - table - .updateProperties() - .set(TableProperties.DEFAULT_WRITE_METRICS_MODE, MetricsModes.Full.get().toString()) - .commit(); - Map config = - ImmutableMap.of( - TableProperties.DEFAULT_WRITE_METRICS_MODE, MetricsModes.None.get().toString()); - - assertThatThrownBy( - () -> new GenericAppenderFactory(table, SCHEMA, SPEC, config, null, null, null)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining( - "Cannot set metrics properties when the table is provided, use table properties instead"); - } -} diff --git a/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java b/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java index be77fda23d29..8d21dc0b6bcd 100644 --- a/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java +++ b/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java @@ -209,20 +209,6 @@ public Builder maxFilesToRewrite(int maxFilesToRewrite) { return this; } - /** - * A user provided filter for determining which files will be considered by the rewrite - * strategy. - * - * @param newFilter the filter expression to apply - * @return this for method chaining - * @deprecated will be removed in 1.12.0. Use {@link #filter(SerializableSupplier)} instead - */ - @Deprecated - public Builder filter(Expression newFilter) { - this.filterSupplier = () -> newFilter; - return this; - } - /** * A user-provided supplier of a filter expression that determines which files are considered by * the rewrite strategy. diff --git a/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java b/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java index 07a068391ce6..85dcc3133bd6 100644 --- a/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java +++ b/flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java @@ -238,7 +238,7 @@ public EqualityDeleteWriter newEqDeleteWriter( @Override public PositionDeleteWriter newPosDeleteWriter( EncryptedOutputFile outputFile, FileFormat format, StructLike partition) { - MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(table); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { switch (format) { case AVRO: diff --git a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java index c27f6081af5a..a59ca80d99fd 100644 --- a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java +++ b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java @@ -521,7 +521,7 @@ void testRewriteWithFilter() throws Exception { .minFileSizeBytes(500_000L) .minInputFiles(2) // Only rewrite data files where id is 1 or 2 for testing rewrite - .filter(Expressions.in("id", 1, 2)) + .filter(() -> Expressions.in("id", 1, 2)) .partialProgressEnabled(true) .partialProgressMaxCommits(1) .maxRewriteBytes(100_000L) diff --git a/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java b/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java index be77fda23d29..8d21dc0b6bcd 100644 --- a/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java +++ b/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java @@ -209,20 +209,6 @@ public Builder maxFilesToRewrite(int maxFilesToRewrite) { return this; } - /** - * A user provided filter for determining which files will be considered by the rewrite - * strategy. - * - * @param newFilter the filter expression to apply - * @return this for method chaining - * @deprecated will be removed in 1.12.0. Use {@link #filter(SerializableSupplier)} instead - */ - @Deprecated - public Builder filter(Expression newFilter) { - this.filterSupplier = () -> newFilter; - return this; - } - /** * A user-provided supplier of a filter expression that determines which files are considered by * the rewrite strategy. diff --git a/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java b/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java index 07a068391ce6..85dcc3133bd6 100644 --- a/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java +++ b/flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java @@ -238,7 +238,7 @@ public EqualityDeleteWriter newEqDeleteWriter( @Override public PositionDeleteWriter newPosDeleteWriter( EncryptedOutputFile outputFile, FileFormat format, StructLike partition) { - MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(table); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { switch (format) { case AVRO: diff --git a/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java b/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java index c27f6081af5a..a59ca80d99fd 100644 --- a/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java +++ b/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java @@ -521,7 +521,7 @@ void testRewriteWithFilter() throws Exception { .minFileSizeBytes(500_000L) .minInputFiles(2) // Only rewrite data files where id is 1 or 2 for testing rewrite - .filter(Expressions.in("id", 1, 2)) + .filter(() -> Expressions.in("id", 1, 2)) .partialProgressEnabled(true) .partialProgressMaxCommits(1) .maxRewriteBytes(100_000L) diff --git a/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java b/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java index be77fda23d29..8d21dc0b6bcd 100644 --- a/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java +++ b/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/maintenance/api/RewriteDataFiles.java @@ -209,20 +209,6 @@ public Builder maxFilesToRewrite(int maxFilesToRewrite) { return this; } - /** - * A user provided filter for determining which files will be considered by the rewrite - * strategy. - * - * @param newFilter the filter expression to apply - * @return this for method chaining - * @deprecated will be removed in 1.12.0. Use {@link #filter(SerializableSupplier)} instead - */ - @Deprecated - public Builder filter(Expression newFilter) { - this.filterSupplier = () -> newFilter; - return this; - } - /** * A user-provided supplier of a filter expression that determines which files are considered by * the rewrite strategy. diff --git a/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java b/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java index 07a068391ce6..85dcc3133bd6 100644 --- a/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java +++ b/flink/v2.1/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java @@ -238,7 +238,7 @@ public EqualityDeleteWriter newEqDeleteWriter( @Override public PositionDeleteWriter newPosDeleteWriter( EncryptedOutputFile outputFile, FileFormat format, StructLike partition) { - MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(table); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { switch (format) { case AVRO: diff --git a/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java b/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java index c27f6081af5a..a59ca80d99fd 100644 --- a/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java +++ b/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/maintenance/api/TestRewriteDataFiles.java @@ -521,7 +521,7 @@ void testRewriteWithFilter() throws Exception { .minFileSizeBytes(500_000L) .minInputFiles(2) // Only rewrite data files where id is 1 or 2 for testing rewrite - .filter(Expressions.in("id", 1, 2)) + .filter(() -> Expressions.in("id", 1, 2)) .partialProgressEnabled(true) .partialProgressMaxCommits(1) .maxRewriteBytes(100_000L) diff --git a/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java b/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java index cd1341822c58..4deaf61361fc 100644 --- a/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java +++ b/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java @@ -58,16 +58,6 @@ public class TableReference implements IndexedRecord { NestedField.optional(TABLE_UUID, "table_uuid", UUIDType.get())); private static final Schema AVRO_SCHEMA = AvroUtil.convert(ICEBERG_SCHEMA, TableReference.class); - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link TableReference#of(String, - * TableIdentifier, UUID)} - */ - @Deprecated - public static TableReference of(String catalog, TableIdentifier tableIdentifier) { - return new TableReference( - catalog, Arrays.asList(tableIdentifier.namespace().levels()), tableIdentifier.name(), null); - } - public static TableReference of(String catalog, TableIdentifier tableIdentifier, UUID tableUuid) { return new TableReference( catalog, @@ -81,21 +71,6 @@ public TableReference(Schema avroSchema) { this.avroSchema = avroSchema; } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link TableReference#of(String, - * TableIdentifier, UUID)}. - */ - @Deprecated - public TableReference(String catalog, List namespace, String name) { - Preconditions.checkNotNull(catalog, "Catalog cannot be null"); - Preconditions.checkNotNull(namespace, "Namespace cannot be null"); - Preconditions.checkNotNull(name, "Name cannot be null"); - this.catalog = catalog; - this.namespace = namespace; - this.name = name; - this.avroSchema = AVRO_SCHEMA; - } - private TableReference(String catalog, List namespace, String name, UUID uuid) { Preconditions.checkNotNull(catalog, "Catalog cannot be null"); Preconditions.checkNotNull(namespace, "Namespace cannot be null"); diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriterResult.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriterResult.java index 5667399cd74e..2903f5507d47 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriterResult.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriterResult.java @@ -21,7 +21,6 @@ import java.util.List; import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.connect.events.TableReference; import org.apache.iceberg.types.Types.StructType; @@ -43,35 +42,10 @@ public IcebergWriterResult( this.partitionStruct = partitionStruct; } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link - * IcebergWriterResult#IcebergWriterResult(TableReference, List, List, StructType)} instead - */ - @Deprecated - public IcebergWriterResult( - TableIdentifier tableIdentifier, - List dataFiles, - List deleteFiles, - StructType partitionStruct) { - this.tableReference = TableReference.of("unknown", tableIdentifier); - this.dataFiles = dataFiles; - this.deleteFiles = deleteFiles; - this.partitionStruct = partitionStruct; - } - public TableReference tableReference() { return tableReference; } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@code tableReference().identifier()} - * instead - */ - @Deprecated - public TableIdentifier tableIdentifier() { - return tableReference.identifier(); - } - public List dataFiles() { return dataFiles; } diff --git a/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/channel/TestWorker.java b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/channel/TestWorker.java index 6cd5c0c86eab..641b812177bc 100644 --- a/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/channel/TestWorker.java +++ b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/channel/TestWorker.java @@ -37,6 +37,7 @@ import org.apache.iceberg.connect.events.Event; import org.apache.iceberg.connect.events.PayloadType; import org.apache.iceberg.connect.events.StartCommit; +import org.apache.iceberg.connect.events.TableReference; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; @@ -67,7 +68,7 @@ public void testSave() { IcebergWriterResult writeResult = new IcebergWriterResult( - TableIdentifier.parse(TABLE_NAME), + TableReference.of("unknown", TableIdentifier.parse(TABLE_NAME), null), ImmutableList.of(EventTestUtil.createDataFile()), ImmutableList.of(), StructType.of()); diff --git a/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/TestSinkWriter.java b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/TestSinkWriter.java index 09f7a373d5f2..008d9b5a4bed 100644 --- a/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/TestSinkWriter.java +++ b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/TestSinkWriter.java @@ -38,6 +38,7 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.connect.IcebergSinkConfig; import org.apache.iceberg.connect.TableSinkConfig; +import org.apache.iceberg.connect.events.TableReference; import org.apache.iceberg.inmemory.InMemoryCatalog; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -163,7 +164,7 @@ public void testOffsetTrackedByOriginalTopicPartition() { IcebergWriterResult writeResult = new IcebergWriterResult( - TableIdentifier.parse(TABLE_NAME), + TableReference.of("unknown", TableIdentifier.parse(TABLE_NAME), null), ImmutableList.of(mock(DataFile.class)), ImmutableList.of(), Types.StructType.of()); @@ -239,7 +240,7 @@ private List sinkWriterTest( Map value, IcebergSinkConfig config) { IcebergWriterResult writeResult = new IcebergWriterResult( - TableIdentifier.parse(TABLE_NAME), + TableReference.of("unknown", TableIdentifier.parse(TABLE_NAME), null), ImmutableList.of(mock(DataFile.class)), ImmutableList.of(), Types.StructType.of()); diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java index b1fd8f43a578..eae620e38e0a 100644 --- a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java +++ b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java @@ -78,26 +78,8 @@ protected ParquetValueReader createReader( } } - /** - * @deprecated will be removed in 1.12.0. Subclasses should override {@link - * #createStructReader(List, Types.StructType, Integer)} instead - */ - @Deprecated - protected ParquetValueReader createStructReader( - List> fieldReaders, Types.StructType structType) { - throw new UnsupportedOperationException( - "Deprecated method is not used in this implementation, only createStructReader(list, Types.Struct, Integer) should be used"); - } - - /** - * This method can be overridden to provide a custom implementation which also uses the fieldId of - * the Schema when creating the struct reader - */ - protected ParquetValueReader createStructReader( - List> fieldReaders, Types.StructType structType, Integer fieldId) { - // Fallback to the signature without fieldId if not overridden - return createStructReader(fieldReaders, structType); - } + protected abstract ParquetValueReader createStructReader( + List> fieldReaders, Types.StructType structType, Integer fieldId); protected abstract ParquetValueReader fixedReader(ColumnDescriptor desc); diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java b/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java index f02974d6e79c..e45db652256c 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java @@ -75,7 +75,6 @@ import org.apache.iceberg.SchemaParser; import org.apache.iceberg.SortOrder; import org.apache.iceberg.StructLike; -import org.apache.iceberg.SystemConfigs; import org.apache.iceberg.Table; import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.data.parquet.GenericParquetWriter; @@ -1489,10 +1488,8 @@ public CloseableIterable build() { NameMapping mapping; if (nameMapping != null) { mapping = nameMapping; - } else if (SystemConfigs.NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED.value()) { - mapping = null; } else { - mapping = NameMapping.empty(); + mapping = null; } Function> batchedFunc = diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java index 39110f0b0597..5f13b8aac45b 100644 --- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java +++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java @@ -166,10 +166,7 @@ public PositionDeleteWriter newPositionDeleteWriter( } else { LOG.warn("Position deletes with deleted rows are deprecated and will be removed in 1.12.0."); Map properties = table == null ? ImmutableMap.of() : table.properties(); - MetricsConfig metricsConfig = - table == null - ? MetricsConfig.forPositionDelete() - : MetricsConfig.forPositionDelete(table); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { return switch (deleteFormat) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java index dfc693d3094d..ac6e62524768 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java @@ -44,7 +44,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(dataFormat()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java index 979abd21e7f7..e47fc53ac8bf 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java @@ -44,7 +44,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(format()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java index 5ebeafcb8cef..ed2158727d86 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java @@ -41,7 +41,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(format()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java index 06ecc20c2fc3..f5397c2ff56a 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java @@ -38,7 +38,6 @@ protected FileWriterFactory newWriterFactory(Table sourceTable) { .dataSchema(sourceTable.schema()) .dataFileFormat(fileFormat) .deleteFileFormat(fileFormat) - .positionDeleteRowSchema(sourceTable.schema()) .build(); } diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java index fcf5fbeb2acb..1338e712fda3 100644 --- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java +++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java @@ -187,25 +187,6 @@ public static Schema convert(Schema baseSchema, StructType sparkType, boolean ca return SparkFixupTypes.fixup(schema, baseSchema); } - /** - * Convert a Spark {@link StructType struct} to a {@link Schema} based on the given schema. - * - *

This conversion will assign new ids for fields that are not found in the base schema. - * - *

Data types, field order, and nullability will match the spark type. This conversion may - * return a schema that is not compatible with base schema. - * - * @param baseSchema a Schema on which conversion is based - * @param sparkType a Spark StructType - * @return the equivalent Schema - * @throws IllegalArgumentException if the type cannot be converted or there are missing ids - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Schema convertWithFreshIds(Schema baseSchema, StructType sparkType) { - return convertWithFreshIds(baseSchema, sparkType, true); - } - /** * Convert a Spark {@link StructType struct} to a {@link Schema} based on the given schema. * @@ -251,32 +232,6 @@ public static Schema prune(Schema schema, StructType requestedType) { .fields()); } - /** - * Prune columns from a {@link Schema} using a {@link StructType Spark type} projection. - * - *

This requires that the Spark type is a projection of the Schema. Nullability and types must - * match. - * - *

The filters list of {@link Expression} is used to ensure that columns referenced by filters - * are projected. - * - * @param schema a Schema - * @param requestedType a projection of the Spark representation of the Schema - * @param filters a list of filters - * @return a Schema corresponding to the Spark projection - * @throws IllegalArgumentException if the Spark type does not match the Schema - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Schema prune(Schema schema, StructType requestedType, List filters) { - Set filterRefs = Binder.boundReferences(schema.asStruct(), filters, true); - return new Schema( - TypeUtil.visit(schema, new PruneColumnsWithoutReordering(requestedType, filterRefs)) - .asNestedType() - .asStructType() - .fields()); - } - /** * Prune columns from a {@link Schema} using a {@link StructType Spark type} projection. * diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java index 0e9edac3fbd5..0b74edd67412 100644 --- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java +++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java @@ -95,19 +95,13 @@ import org.apache.spark.sql.catalyst.TableIdentifier; import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; -import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute; import org.apache.spark.sql.catalyst.catalog.CatalogTable; import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition; import org.apache.spark.sql.catalyst.catalog.SessionCatalog; -import org.apache.spark.sql.catalyst.expressions.Expression; -import org.apache.spark.sql.catalyst.expressions.NamedExpression; -import org.apache.spark.sql.catalyst.parser.ParseException; -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation; import org.apache.spark.sql.util.CaseInsensitiveStringMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import scala.Function2; import scala.Option; import scala.Some; import scala.Tuple2; @@ -115,7 +109,6 @@ import scala.collection.immutable.Map$; import scala.collection.immutable.Seq; import scala.collection.mutable.Builder; -import scala.runtime.AbstractPartialFunction; /** * Java version of the original SparkTableUtil.scala @@ -132,62 +125,6 @@ public class SparkTableUtil { private SparkTableUtil() {} - /** - * Returns a DataFrame with a row for each partition in the table. - * - *

The DataFrame has 3 columns, partition key (a=1/b=2), partition location, and format (avro - * or parquet). - * - * @param spark a Spark session - * @param table a table name and (optional) database - * @return a DataFrame of the table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Dataset partitionDF(SparkSession spark, String table) { - List partitions = getPartitions(spark, table); - return spark - .createDataFrame(partitions, SparkPartition.class) - .toDF("partition", "uri", "format"); - } - - /** - * Returns a DataFrame with a row for each partition that matches the specified 'expression'. - * - * @param spark a Spark session. - * @param table name of the table. - * @param expression The expression whose matching partitions are returned. - * @return a DataFrame of the table partitions. - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Dataset partitionDFByFilter( - SparkSession spark, String table, String expression) { - List partitions = getPartitionsByFilter(spark, table, expression); - return spark - .createDataFrame(partitions, SparkPartition.class) - .toDF("partition", "uri", "format"); - } - - /** - * Returns all partitions in the table. - * - * @param spark a Spark session - * @param table a table name and (optional) database - * @return all table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List getPartitions(SparkSession spark, String table) { - try { - TableIdentifier tableIdent = spark.sessionState().sqlParser().parseTableIdentifier(table); - return getPartitions(spark, tableIdent, null); - } catch (ParseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unable to parse table identifier: %s", table); - } - } - /** * Returns all partitions in the table. * @@ -225,80 +162,6 @@ public static List getPartitions( } } - /** - * Returns partitions that match the specified 'predicate'. - * - * @param spark a Spark session - * @param table a table name and (optional) database - * @param predicate a predicate on partition columns - * @return matching table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List getPartitionsByFilter( - SparkSession spark, String table, String predicate) { - TableIdentifier tableIdent; - try { - tableIdent = spark.sessionState().sqlParser().parseTableIdentifier(table); - } catch (ParseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unable to parse the table identifier: %s", table); - } - - Expression unresolvedPredicateExpr; - try { - unresolvedPredicateExpr = spark.sessionState().sqlParser().parseExpression(predicate); - } catch (ParseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unable to parse the predicate expression: %s", predicate); - } - - Expression resolvedPredicateExpr = resolveAttrs(spark, table, unresolvedPredicateExpr); - return getPartitionsByFilter(spark, tableIdent, resolvedPredicateExpr); - } - - /** - * Returns partitions that match the specified 'predicate'. - * - * @param spark a Spark session - * @param tableIdent a table identifier - * @param predicateExpr a predicate expression on partition columns - * @return matching table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List getPartitionsByFilter( - SparkSession spark, TableIdentifier tableIdent, Expression predicateExpr) { - try { - SessionCatalog catalog = spark.sessionState().catalog(); - CatalogTable catalogTable = catalog.getTableMetadata(tableIdent); - - Expression resolvedPredicateExpr; - if (!predicateExpr.resolved()) { - resolvedPredicateExpr = resolveAttrs(spark, tableIdent.quotedString(), predicateExpr); - } else { - resolvedPredicateExpr = predicateExpr; - } - Seq predicates = - JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(resolvedPredicateExpr)) - .asScala() - .toIndexedSeq(); - - Seq partitions = - catalog.listPartitionsByFilter(tableIdent, predicates).toIndexedSeq(); - - return JavaConverters.seqAsJavaListConverter(partitions).asJava().stream() - .map(catalogPartition -> toSparkPartition(catalogPartition, catalogTable)) - .collect(Collectors.toList()); - } catch (NoSuchDatabaseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unknown table: %s. Database not found in catalog.", tableIdent); - } catch (NoSuchTableException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unknown table: %s. Table not found in catalog.", tableIdent); - } - } - private static List listPartition( SparkPartition partition, PartitionSpec spec, @@ -344,31 +207,6 @@ private static SparkPartition toSparkPartition( return new SparkPartition(partitionSpec, uri, format); } - private static Expression resolveAttrs(SparkSession spark, String table, Expression expr) { - Function2 resolver = spark.sessionState().analyzer().resolver(); - LogicalPlan plan = spark.table(table).queryExecution().analyzed(); - return expr.transform( - new AbstractPartialFunction() { - @Override - public Expression apply(Expression attr) { - UnresolvedAttribute unresolvedAttribute = (UnresolvedAttribute) attr; - Option namedExpressionOption = - plan.resolve(unresolvedAttribute.nameParts(), resolver); - if (namedExpressionOption.isDefined()) { - return (Expression) namedExpressionOption.get(); - } else { - throw new IllegalArgumentException( - String.format("Could not resolve %s using columns: %s", attr, plan.output())); - } - } - - @Override - public boolean isDefinedAt(Expression attr) { - return attr instanceof UnresolvedAttribute; - } - }); - } - private static Iterator buildManifest( int formatVersion, Long snapshotId, @@ -406,61 +244,6 @@ private static Iterator buildManifest( } } - /** - * Import files from an existing Spark table to an Iceberg table. - * - *

The import uses the Spark session to get table metadata. It assumes no operation is going on - * the original and target table and thus is not thread-safe. - * - * @param spark a Spark session - * @param sourceTableIdent an identifier of the source Spark table - * @param targetTable an Iceberg table where to import the data - * @param stagingDir a staging directory to store temporary manifest files - * @param partitionFilter only import partitions whose values match those in the map, can be - * partially defined - * @param checkDuplicateFiles if true, throw exception if import results in a duplicate data file - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkTable( - SparkSession spark, - TableIdentifier sourceTableIdent, - Table targetTable, - String stagingDir, - Map partitionFilter, - boolean checkDuplicateFiles) { - importSparkTable( - spark, sourceTableIdent, targetTable, stagingDir, partitionFilter, checkDuplicateFiles, 1); - } - - /** - * Import files from an existing Spark table to an Iceberg table. - * - *

The import uses the Spark session to get table metadata. It assumes no operation is going on - * the original and target table and thus is not thread-safe. - * - * @param spark a Spark session - * @param sourceTableIdent an identifier of the source Spark table - * @param targetTable an Iceberg table where to import the data - * @param stagingDir a staging directory to store temporary manifest files - * @param parallelism number of threads to use for file reading - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkTable( - SparkSession spark, - TableIdentifier sourceTableIdent, - Table targetTable, - String stagingDir, - int parallelism) { - importSparkTable( - spark, - sourceTableIdent, - targetTable, - stagingDir, - TableMigrationUtil.migrationService(parallelism)); - } - /** * Import files from an existing Spark table to an Iceberg table. * @@ -628,36 +411,6 @@ public static void importSparkTable( } } - /** - * Import files from an existing Spark table to an Iceberg table. - * - *

The import uses the Spark session to get table metadata. It assumes no operation is going on - * the original and target table and thus is not thread-safe. - * - * @param spark a Spark session - * @param sourceTableIdent an identifier of the source Spark table - * @param targetTable an Iceberg table where to import the data - * @param stagingDir a staging directory to store temporary manifest files - * @param checkDuplicateFiles if true, throw exception if import results in a duplicate data file - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkTable( - SparkSession spark, - TableIdentifier sourceTableIdent, - Table targetTable, - String stagingDir, - boolean checkDuplicateFiles) { - importSparkTable( - spark, - sourceTableIdent, - targetTable, - stagingDir, - Collections.emptyMap(), - checkDuplicateFiles, - 1); - } - /** * Import files from an existing Spark table to an Iceberg table. * @@ -733,28 +486,6 @@ private static void importUnpartitionedSparkTable( } } - /** - * Import files from given partitions to an Iceberg table. - * - * @param spark a Spark session - * @param partitions partitions to import - * @param targetTable an Iceberg table where to import the data - * @param spec a partition spec - * @param stagingDir a staging directory to store temporary manifest files - * @param checkDuplicateFiles if true, throw exception if import results in a duplicate data file - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkPartitions( - SparkSession spark, - List partitions, - Table targetTable, - PartitionSpec spec, - String stagingDir, - boolean checkDuplicateFiles) { - importSparkPartitions(spark, partitions, targetTable, spec, stagingDir, checkDuplicateFiles, 1); - } - /** * Import files from given partitions to an Iceberg table. * @@ -934,41 +665,6 @@ public static void importSparkPartitions( } } - /** - * Import files from given partitions to an Iceberg table. - * - * @param spark a Spark session - * @param partitions partitions to import - * @param targetTable an Iceberg table where to import the data - * @param spec a partition spec - * @param stagingDir a staging directory to store temporary manifest files - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkPartitions( - SparkSession spark, - List partitions, - Table targetTable, - PartitionSpec spec, - String stagingDir) { - importSparkPartitions(spark, partitions, targetTable, spec, stagingDir, false, 1); - } - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List filterPartitions( - List partitions, Map partitionFilter) { - if (partitionFilter.isEmpty()) { - return partitions; - } else { - return partitions.stream() - .filter(p -> p.getValues().entrySet().containsAll(partitionFilter.entrySet())) - .collect(Collectors.toList()); - } - } - private static void deleteManifests(FileIO io, List manifests) { CatalogUtil.deleteFiles(io, Lists.transform(manifests, ManifestFile::path), "manifests"); } diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java index 39110f0b0597..5f13b8aac45b 100644 --- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java +++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java @@ -166,10 +166,7 @@ public PositionDeleteWriter newPositionDeleteWriter( } else { LOG.warn("Position deletes with deleted rows are deprecated and will be removed in 1.12.0."); Map properties = table == null ? ImmutableMap.of() : table.properties(); - MetricsConfig metricsConfig = - table == null - ? MetricsConfig.forPositionDelete() - : MetricsConfig.forPositionDelete(table); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { return switch (deleteFormat) { diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java index dfc693d3094d..ac6e62524768 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java @@ -44,7 +44,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(dataFormat()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java index 979abd21e7f7..e47fc53ac8bf 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java @@ -44,7 +44,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(format()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java index 5ebeafcb8cef..ed2158727d86 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java @@ -41,7 +41,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(format()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java index 06ecc20c2fc3..f5397c2ff56a 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java @@ -38,7 +38,6 @@ protected FileWriterFactory newWriterFactory(Table sourceTable) { .dataSchema(sourceTable.schema()) .dataFileFormat(fileFormat) .deleteFileFormat(fileFormat) - .positionDeleteRowSchema(sourceTable.schema()) .build(); } diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java index 8128babfa340..096061660843 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java @@ -64,16 +64,6 @@ public SparkReadConf(SparkSession spark, Table table) { } public SparkReadConf(SparkSession spark, Table table, CaseInsensitiveStringMap options) { - this(spark, table, null, options); - } - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0. Use {@link #SparkReadConf(SparkSession, - * Table, CaseInsensitiveStringMap)} instead. - */ - @Deprecated - public SparkReadConf( - SparkSession spark, Table table, String branch, CaseInsensitiveStringMap options) { this.spark = spark; this.table = table; this.confParser = new SparkConfParser(spark, table, options); diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java index 4c3713d3fff3..9f299cb276ae 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java @@ -188,25 +188,6 @@ public static Schema convert(Schema baseSchema, StructType sparkType, boolean ca return SparkFixupTypes.fixup(schema, baseSchema); } - /** - * Convert a Spark {@link StructType struct} to a {@link Schema} based on the given schema. - * - *

This conversion will assign new ids for fields that are not found in the base schema. - * - *

Data types, field order, and nullability will match the spark type. This conversion may - * return a schema that is not compatible with base schema. - * - * @param baseSchema a Schema on which conversion is based - * @param sparkType a Spark StructType - * @return the equivalent Schema - * @throws IllegalArgumentException if the type cannot be converted or there are missing ids - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Schema convertWithFreshIds(Schema baseSchema, StructType sparkType) { - return convertWithFreshIds(baseSchema, sparkType, true); - } - /** * Convert a Spark {@link StructType struct} to a {@link Schema} based on the given schema. * @@ -252,32 +233,6 @@ public static Schema prune(Schema schema, StructType requestedType) { .fields()); } - /** - * Prune columns from a {@link Schema} using a {@link StructType Spark type} projection. - * - *

This requires that the Spark type is a projection of the Schema. Nullability and types must - * match. - * - *

The filters list of {@link Expression} is used to ensure that columns referenced by filters - * are projected. - * - * @param schema a Schema - * @param requestedType a projection of the Spark representation of the Schema - * @param filters a list of filters - * @return a Schema corresponding to the Spark projection - * @throws IllegalArgumentException if the Spark type does not match the Schema - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Schema prune(Schema schema, StructType requestedType, List filters) { - Set filterRefs = Binder.boundReferences(schema.asStruct(), filters, true); - return new Schema( - TypeUtil.visit(schema, new PruneColumnsWithoutReordering(requestedType, filterRefs)) - .asNestedType() - .asStructType() - .fields()); - } - /** * Prune columns from a {@link Schema} using a {@link StructType Spark type} projection. * diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java index 96499184cab3..d56ebd0b1df8 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java @@ -94,19 +94,13 @@ import org.apache.spark.sql.catalyst.TableIdentifier; import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; -import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute; import org.apache.spark.sql.catalyst.catalog.CatalogTable; import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition; import org.apache.spark.sql.catalyst.catalog.SessionCatalog; -import org.apache.spark.sql.catalyst.expressions.Expression; -import org.apache.spark.sql.catalyst.expressions.NamedExpression; -import org.apache.spark.sql.catalyst.parser.ParseException; -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation; import org.apache.spark.sql.util.CaseInsensitiveStringMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import scala.Function2; import scala.Option; import scala.Some; import scala.Tuple2; @@ -114,7 +108,6 @@ import scala.collection.immutable.Map$; import scala.collection.immutable.Seq; import scala.collection.mutable.Builder; -import scala.runtime.AbstractPartialFunction; /** * Java version of the original SparkTableUtil.scala @@ -131,62 +124,6 @@ public class SparkTableUtil { private SparkTableUtil() {} - /** - * Returns a DataFrame with a row for each partition in the table. - * - *

The DataFrame has 3 columns, partition key (a=1/b=2), partition location, and format (avro - * or parquet). - * - * @param spark a Spark session - * @param table a table name and (optional) database - * @return a DataFrame of the table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Dataset partitionDF(SparkSession spark, String table) { - List partitions = getPartitions(spark, table); - return spark - .createDataFrame(partitions, SparkPartition.class) - .toDF("partition", "uri", "format"); - } - - /** - * Returns a DataFrame with a row for each partition that matches the specified 'expression'. - * - * @param spark a Spark session. - * @param table name of the table. - * @param expression The expression whose matching partitions are returned. - * @return a DataFrame of the table partitions. - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static Dataset partitionDFByFilter( - SparkSession spark, String table, String expression) { - List partitions = getPartitionsByFilter(spark, table, expression); - return spark - .createDataFrame(partitions, SparkPartition.class) - .toDF("partition", "uri", "format"); - } - - /** - * Returns all partitions in the table. - * - * @param spark a Spark session - * @param table a table name and (optional) database - * @return all table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List getPartitions(SparkSession spark, String table) { - try { - TableIdentifier tableIdent = spark.sessionState().sqlParser().parseTableIdentifier(table); - return getPartitions(spark, tableIdent, null); - } catch (ParseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unable to parse table identifier: %s", table); - } - } - /** * Returns all partitions in the table. * @@ -224,80 +161,6 @@ public static List getPartitions( } } - /** - * Returns partitions that match the specified 'predicate'. - * - * @param spark a Spark session - * @param table a table name and (optional) database - * @param predicate a predicate on partition columns - * @return matching table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List getPartitionsByFilter( - SparkSession spark, String table, String predicate) { - TableIdentifier tableIdent; - try { - tableIdent = spark.sessionState().sqlParser().parseTableIdentifier(table); - } catch (ParseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unable to parse the table identifier: %s", table); - } - - Expression unresolvedPredicateExpr; - try { - unresolvedPredicateExpr = spark.sessionState().sqlParser().parseExpression(predicate); - } catch (ParseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unable to parse the predicate expression: %s", predicate); - } - - Expression resolvedPredicateExpr = resolveAttrs(spark, table, unresolvedPredicateExpr); - return getPartitionsByFilter(spark, tableIdent, resolvedPredicateExpr); - } - - /** - * Returns partitions that match the specified 'predicate'. - * - * @param spark a Spark session - * @param tableIdent a table identifier - * @param predicateExpr a predicate expression on partition columns - * @return matching table's partitions - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List getPartitionsByFilter( - SparkSession spark, TableIdentifier tableIdent, Expression predicateExpr) { - try { - SessionCatalog catalog = spark.sessionState().catalog(); - CatalogTable catalogTable = catalog.getTableMetadata(tableIdent); - - Expression resolvedPredicateExpr; - if (!predicateExpr.resolved()) { - resolvedPredicateExpr = resolveAttrs(spark, tableIdent.quotedString(), predicateExpr); - } else { - resolvedPredicateExpr = predicateExpr; - } - Seq predicates = - JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(resolvedPredicateExpr)) - .asScala() - .toIndexedSeq(); - - Seq partitions = - catalog.listPartitionsByFilter(tableIdent, predicates).toIndexedSeq(); - - return JavaConverters.seqAsJavaListConverter(partitions).asJava().stream() - .map(catalogPartition -> toSparkPartition(catalogPartition, catalogTable)) - .collect(Collectors.toList()); - } catch (NoSuchDatabaseException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unknown table: %s. Database not found in catalog.", tableIdent); - } catch (NoSuchTableException e) { - throw SparkExceptionUtil.toUncheckedException( - e, "Unknown table: %s. Table not found in catalog.", tableIdent); - } - } - private static List listPartition( SparkPartition partition, PartitionSpec spec, @@ -343,31 +206,6 @@ private static SparkPartition toSparkPartition( return new SparkPartition(partitionSpec, uri, format); } - private static Expression resolveAttrs(SparkSession spark, String table, Expression expr) { - Function2 resolver = spark.sessionState().analyzer().resolver(); - LogicalPlan plan = spark.table(table).queryExecution().analyzed(); - return expr.transform( - new AbstractPartialFunction() { - @Override - public Expression apply(Expression attr) { - UnresolvedAttribute unresolvedAttribute = (UnresolvedAttribute) attr; - Option namedExpressionOption = - plan.resolve(unresolvedAttribute.nameParts(), resolver); - if (namedExpressionOption.isDefined()) { - return (Expression) namedExpressionOption.get(); - } else { - throw new IllegalArgumentException( - String.format("Could not resolve %s using columns: %s", attr, plan.output())); - } - } - - @Override - public boolean isDefinedAt(Expression attr) { - return attr instanceof UnresolvedAttribute; - } - }); - } - private static Iterator buildManifest( int formatVersion, Long snapshotId, @@ -405,61 +243,6 @@ private static Iterator buildManifest( } } - /** - * Import files from an existing Spark table to an Iceberg table. - * - *

The import uses the Spark session to get table metadata. It assumes no operation is going on - * the original and target table and thus is not thread-safe. - * - * @param spark a Spark session - * @param sourceTableIdent an identifier of the source Spark table - * @param targetTable an Iceberg table where to import the data - * @param stagingDir a staging directory to store temporary manifest files - * @param partitionFilter only import partitions whose values match those in the map, can be - * partially defined - * @param checkDuplicateFiles if true, throw exception if import results in a duplicate data file - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkTable( - SparkSession spark, - TableIdentifier sourceTableIdent, - Table targetTable, - String stagingDir, - Map partitionFilter, - boolean checkDuplicateFiles) { - importSparkTable( - spark, sourceTableIdent, targetTable, stagingDir, partitionFilter, checkDuplicateFiles, 1); - } - - /** - * Import files from an existing Spark table to an Iceberg table. - * - *

The import uses the Spark session to get table metadata. It assumes no operation is going on - * the original and target table and thus is not thread-safe. - * - * @param spark a Spark session - * @param sourceTableIdent an identifier of the source Spark table - * @param targetTable an Iceberg table where to import the data - * @param stagingDir a staging directory to store temporary manifest files - * @param parallelism number of threads to use for file reading - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkTable( - SparkSession spark, - TableIdentifier sourceTableIdent, - Table targetTable, - String stagingDir, - int parallelism) { - importSparkTable( - spark, - sourceTableIdent, - targetTable, - stagingDir, - TableMigrationUtil.migrationService(parallelism)); - } - /** * Import files from an existing Spark table to an Iceberg table. * @@ -627,36 +410,6 @@ public static void importSparkTable( } } - /** - * Import files from an existing Spark table to an Iceberg table. - * - *

The import uses the Spark session to get table metadata. It assumes no operation is going on - * the original and target table and thus is not thread-safe. - * - * @param spark a Spark session - * @param sourceTableIdent an identifier of the source Spark table - * @param targetTable an Iceberg table where to import the data - * @param stagingDir a staging directory to store temporary manifest files - * @param checkDuplicateFiles if true, throw exception if import results in a duplicate data file - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkTable( - SparkSession spark, - TableIdentifier sourceTableIdent, - Table targetTable, - String stagingDir, - boolean checkDuplicateFiles) { - importSparkTable( - spark, - sourceTableIdent, - targetTable, - stagingDir, - Collections.emptyMap(), - checkDuplicateFiles, - 1); - } - /** * Import files from an existing Spark table to an Iceberg table. * @@ -732,28 +485,6 @@ private static void importUnpartitionedSparkTable( } } - /** - * Import files from given partitions to an Iceberg table. - * - * @param spark a Spark session - * @param partitions partitions to import - * @param targetTable an Iceberg table where to import the data - * @param spec a partition spec - * @param stagingDir a staging directory to store temporary manifest files - * @param checkDuplicateFiles if true, throw exception if import results in a duplicate data file - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkPartitions( - SparkSession spark, - List partitions, - Table targetTable, - PartitionSpec spec, - String stagingDir, - boolean checkDuplicateFiles) { - importSparkPartitions(spark, partitions, targetTable, spec, stagingDir, checkDuplicateFiles, 1); - } - /** * Import files from given partitions to an Iceberg table. * @@ -933,41 +664,6 @@ public static void importSparkPartitions( } } - /** - * Import files from given partitions to an Iceberg table. - * - * @param spark a Spark session - * @param partitions partitions to import - * @param targetTable an Iceberg table where to import the data - * @param spec a partition spec - * @param stagingDir a staging directory to store temporary manifest files - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static void importSparkPartitions( - SparkSession spark, - List partitions, - Table targetTable, - PartitionSpec spec, - String stagingDir) { - importSparkPartitions(spark, partitions, targetTable, spec, stagingDir, false, 1); - } - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0 - */ - @Deprecated - public static List filterPartitions( - List partitions, Map partitionFilter) { - if (partitionFilter.isEmpty()) { - return partitions; - } else { - return partitions.stream() - .filter(p -> p.getValues().entrySet().containsAll(partitionFilter.entrySet())) - .collect(Collectors.toList()); - } - } - private static void deleteManifests(FileIO io, List manifests) { CatalogUtil.deleteFiles(io, Lists.transform(manifests, ManifestFile::path), "manifests"); } diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkWriteConf.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkWriteConf.java index 80f93427805a..373273e537e4 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkWriteConf.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkWriteConf.java @@ -94,20 +94,10 @@ public class SparkWriteConf { private final SparkConfParser confParser; public SparkWriteConf(SparkSession spark, Table table) { - this(spark, table, null, CaseInsensitiveStringMap.empty()); + this(spark, table, CaseInsensitiveStringMap.empty()); } public SparkWriteConf(SparkSession spark, Table table, CaseInsensitiveStringMap options) { - this(spark, table, null, options); - } - - /** - * @deprecated since 1.11.0, will be removed in 1.12.0. Use {@link #SparkWriteConf(SparkSession, - * Table, CaseInsensitiveStringMap)} instead. - */ - @Deprecated - public SparkWriteConf( - SparkSession spark, Table table, String branch, CaseInsensitiveStringMap options) { this.spark = spark; this.table = table; this.sessionConf = spark.conf(); diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java index 39110f0b0597..5f13b8aac45b 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java @@ -166,10 +166,7 @@ public PositionDeleteWriter newPositionDeleteWriter( } else { LOG.warn("Position deletes with deleted rows are deprecated and will be removed in 1.12.0."); Map properties = table == null ? ImmutableMap.of() : table.properties(); - MetricsConfig metricsConfig = - table == null - ? MetricsConfig.forPositionDelete() - : MetricsConfig.forPositionDelete(table); + MetricsConfig metricsConfig = MetricsConfig.forPositionDelete(); try { return switch (deleteFormat) { diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java index dfc693d3094d..ac6e62524768 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDVWriters.java @@ -44,7 +44,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(dataFormat()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java index 979abd21e7f7..e47fc53ac8bf 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkPartitioningWriters.java @@ -44,7 +44,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(format()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java index 5ebeafcb8cef..ed2158727d86 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkRollingFileWriters.java @@ -41,7 +41,6 @@ protected FileWriterFactory newWriterFactory( .deleteFileFormat(format()) .equalityFieldIds(ArrayUtil.toIntArray(equalityFieldIds)) .equalityDeleteRowSchema(equalityDeleteRowSchema) - .positionDeleteRowSchema(positionDeleteRowSchema) .build(); } diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java index 06ecc20c2fc3..f5397c2ff56a 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java @@ -38,7 +38,6 @@ protected FileWriterFactory newWriterFactory(Table sourceTable) { .dataSchema(sourceTable.schema()) .dataFileFormat(fileFormat) .deleteFileFormat(fileFormat) - .positionDeleteRowSchema(sourceTable.schema()) .build(); } From 52b6f9f673de4b05138f9d9f3b3a57fbc2891075 Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Mon, 25 May 2026 12:04:21 -0700 Subject: [PATCH 02/14] All: Fix tests after removing deprecated signer defaults and positionDeleteRowSchema - TestS3FileIOProperties: Add required signer.endpoint property since the deprecated default was removed - TestSparkWriterMetrics: Override checkRowStatistics and checkNotExistingRowStatistics to match behavior without positionDeleteRowSchema (consistent with Flink tests) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../iceberg/aws/TestS3FileIOProperties.java | 15 +++++++++++++-- .../spark/source/TestSparkWriterMetrics.java | 14 ++++++++++++++ .../spark/source/TestSparkWriterMetrics.java | 14 ++++++++++++++ .../spark/source/TestSparkWriterMetrics.java | 14 ++++++++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOProperties.java b/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOProperties.java index f374a18c0411..e0932e216d94 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOProperties.java +++ b/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOProperties.java @@ -28,6 +28,7 @@ import org.apache.iceberg.aws.s3.signer.S3V4RestSignerClient; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.rest.RESTCatalogProperties; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -225,7 +226,12 @@ public void testS3RemoteSigningEnabled() { String uri = "http://localhost:12345"; Map properties = ImmutableMap.of( - S3FileIOProperties.REMOTE_SIGNING_ENABLED, "true", CatalogProperties.URI, uri); + S3FileIOProperties.REMOTE_SIGNING_ENABLED, + "true", + CatalogProperties.URI, + uri, + RESTCatalogProperties.SIGNER_ENDPOINT, + "v1/aws/s3/sign"); S3FileIOProperties s3Properties = new S3FileIOProperties(properties); S3ClientBuilder builder = S3Client.builder(); @@ -244,7 +250,12 @@ public void s3RemoteSigningEnabledWithUserAgentAndRetryPolicy() { String uri = "http://localhost:12345"; Map properties = ImmutableMap.of( - S3FileIOProperties.REMOTE_SIGNING_ENABLED, "true", CatalogProperties.URI, uri); + S3FileIOProperties.REMOTE_SIGNING_ENABLED, + "true", + CatalogProperties.URI, + uri, + RESTCatalogProperties.SIGNER_ENDPOINT, + "v1/aws/s3/sign"); S3FileIOProperties s3Properties = new S3FileIOProperties(properties); S3ClientBuilder builder = S3Client.builder(); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java index f5397c2ff56a..c6d7418a9061 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java @@ -18,6 +18,10 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import java.util.Map; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Table; import org.apache.iceberg.io.FileWriterFactory; @@ -63,4 +67,14 @@ protected InternalRow toGenericRow(int value, int repeated) { } return row; } + + @Override + protected void checkRowStatistics(Map bounds) { + assertThat(bounds).hasSize(2); + } + + @Override + protected void checkNotExistingRowStatistics(Map bounds) { + assertThat(bounds).isNull(); + } } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java index f5397c2ff56a..c6d7418a9061 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java @@ -18,6 +18,10 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import java.util.Map; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Table; import org.apache.iceberg.io.FileWriterFactory; @@ -63,4 +67,14 @@ protected InternalRow toGenericRow(int value, int repeated) { } return row; } + + @Override + protected void checkRowStatistics(Map bounds) { + assertThat(bounds).hasSize(2); + } + + @Override + protected void checkNotExistingRowStatistics(Map bounds) { + assertThat(bounds).isNull(); + } } diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java index f5397c2ff56a..c6d7418a9061 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkWriterMetrics.java @@ -18,6 +18,10 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import java.util.Map; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Table; import org.apache.iceberg.io.FileWriterFactory; @@ -63,4 +67,14 @@ protected InternalRow toGenericRow(int value, int repeated) { } return row; } + + @Override + protected void checkRowStatistics(Map bounds) { + assertThat(bounds).hasSize(2); + } + + @Override + protected void checkNotExistingRowStatistics(Map bounds) { + assertThat(bounds).isNull(); + } } From d2a00845a1995980b17572847c2b8e4142adc280 Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Mon, 25 May 2026 12:46:50 -0700 Subject: [PATCH 03/14] AWS: Fix testApplySignerConfiguration missing required signer.endpoint Co-Authored-By: Claude Opus 4.6 (1M context) --- .../org/apache/iceberg/aws/s3/TestS3FileIOProperties.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java index 953f73d45d4a..5318ac6c2e03 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java @@ -32,6 +32,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.rest.RESTCatalogProperties; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -504,7 +505,9 @@ public void testApplySignerConfiguration() { S3FileIOProperties.REMOTE_SIGNING_ENABLED, "true", CatalogProperties.URI, - "http://localhost:12345"); + "http://localhost:12345", + RESTCatalogProperties.SIGNER_ENDPOINT, + "v1/aws/s3/sign"); S3FileIOProperties s3FileIOProperties = new S3FileIOProperties(properties); S3ClientBuilder mockS3ClientBuilder = Mockito.mock(S3ClientBuilder.class); s3FileIOProperties.applySignerConfiguration(mockS3ClientBuilder); From d228aef6b5e751bd2cb990302f25c6f7d8e9cbff Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Mon, 25 May 2026 13:39:14 -0700 Subject: [PATCH 04/14] Core: Restore @Deprecated annotations for methods not yet removed BaseScan.io() and PositionDelete.set(path, pos, row)/row() still have callers and cannot be removed yet. Restore their deprecation annotations to preserve the original removal intent. Co-Authored-By: Claude Opus 4.6 (1M context) --- core/src/main/java/org/apache/iceberg/BaseScan.java | 4 ++++ .../org/apache/iceberg/deletes/PositionDelete.java | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/BaseScan.java b/core/src/main/java/org/apache/iceberg/BaseScan.java index fd0a6986121a..242a5aaacc09 100644 --- a/core/src/main/java/org/apache/iceberg/BaseScan.java +++ b/core/src/main/java/org/apache/iceberg/BaseScan.java @@ -103,6 +103,10 @@ public Table table() { return table; } + /** + * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link BaseScan#fileIO()} instead. + */ + @Deprecated protected FileIO io() { return table.io(); } diff --git a/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java b/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java index 48a4aa635c79..c3b6cbaa9bff 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java +++ b/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java @@ -43,6 +43,12 @@ public PositionDelete set(CharSequence newPath, long newPos) { return this; } + /** + * @deprecated This method is deprecated as of version 1.11.0 and will be removed in 1.12.0. + * Position deletes that include row data are no longer supported. Use {@link + * #set(CharSequence, long)} instead. + */ + @Deprecated public PositionDelete set(CharSequence newPath, long newPos, R newRow) { this.path = newPath; this.pos = newPos; @@ -63,6 +69,11 @@ public long pos() { return pos; } + /** + * @deprecated This method is deprecated as of version 1.11.0 and will be removed in 1.12.0. + * Position deletes that include row data are no longer supported. + */ + @Deprecated public R row() { return row; } From 20c7066dcb943d28d1fb43f3ef63958a2571e322 Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Mon, 25 May 2026 13:49:15 -0700 Subject: [PATCH 05/14] Core: Remove deprecated BaseScan.io() and migrate callers to table().io() The method had only 4 internal callers (DataScan, BaseDistributedDataScan) which are trivially replaced with table().io(). Also moves revapi entries to the 1.12.0 section with unified justification. Co-Authored-By: Claude Opus 4.6 (1M context) --- .palantir/revapi.yml | 164 ++++++++++++------ .../iceberg/BaseDistributedDataScan.java | 6 +- .../java/org/apache/iceberg/BaseScan.java | 8 - .../java/org/apache/iceberg/DataScan.java | 2 +- 4 files changed, 112 insertions(+), 68 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 11e7948e72f4..048ca8660fdd 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -517,6 +517,62 @@ acceptedBreaks: - code: "java.class.removed" old: "class org.apache.iceberg.data.PartitionStatsHandler" justification: "Removing deprecated code for 1.11.0" + "1.11.0": + org.apache.iceberg:iceberg-core: + - code: "java.class.removed" + old: "class org.apache.iceberg.PartitionStats" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.DATA_FILE_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.DATA_RECORD_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.DV_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_FILE_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_RECORD_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_AT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_SNAPSHOT_ID" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_FILE_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_RECORD_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.SPEC_ID" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_DATA_FILE_SIZE_IN_BYTES" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removed" + old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_RECORD_COUNT" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removedWithConstant" + old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_ID" + justification: "Removed deprecated functionality for partition stats" + - code: "java.field.removedWithConstant" + old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_NAME" + justification: "Removed deprecated functionality for partition stats" + - code: "java.method.removed" + old: "method org.apache.iceberg.Schema org.apache.iceberg.PartitionStatsHandler::schema(org.apache.iceberg.types.Types.StructType,\ + \ int)" + justification: "Removed deprecated functionality for partition stats" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.CloseableIterable\ + \ org.apache.iceberg.PartitionStatsHandler::readPartitionStatsFile(org.apache.iceberg.Schema,\ + \ org.apache.iceberg.io.InputFile)" + justification: "Removed deprecated functionality for partition stats" "1.12.0": org.apache.iceberg:iceberg-core: - code: "java.class.removed" @@ -628,6 +684,58 @@ acceptedBreaks: \ java.util.function.Function,\ \ org.apache.iceberg.io.FileIO)" justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.AllDataFilesTable.AllDataFilesTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.AllDeleteFilesTable.AllDeleteFilesTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.AllFilesTable.AllFilesTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.AllManifestsTable.AllManifestsTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.DataFilesTable.DataFilesTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.DataTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.DeleteFilesTable.DeleteFilesTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.FilesTable.FilesTableScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.PositionDeletesTable.PositionDeletesBatchScan" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan>::io() @ org.apache.iceberg.SnapshotScan>" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.method.removed" old: "method org.apache.iceberg.rest.responses.LoadTableResponse org.apache.iceberg.rest.CatalogHandlers::loadTable(org.apache.iceberg.catalog.Catalog,\ \ org.apache.iceberg.catalog.TableIdentifier)" @@ -691,62 +799,6 @@ acceptedBreaks: \ org.apache.iceberg.types.Types.StructType) @ org.apache.iceberg.data.parquet.InternalReader" justification: "Removing deprecated API scheduled for removal in 1.12.0" - "1.11.0": - org.apache.iceberg:iceberg-core: - - code: "java.class.removed" - old: "class org.apache.iceberg.PartitionStats" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.DATA_FILE_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.DATA_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.DV_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_FILE_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_AT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_SNAPSHOT_ID" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_FILE_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.SPEC_ID" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_DATA_FILE_SIZE_IN_BYTES" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removed" - old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removedWithConstant" - old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_ID" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removedWithConstant" - old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_NAME" - justification: "Removed deprecated functionality for partition stats" - - code: "java.method.removed" - old: "method org.apache.iceberg.Schema org.apache.iceberg.PartitionStatsHandler::schema(org.apache.iceberg.types.Types.StructType,\ - \ int)" - justification: "Removed deprecated functionality for partition stats" - - code: "java.method.removed" - old: "method org.apache.iceberg.io.CloseableIterable\ - \ org.apache.iceberg.PartitionStatsHandler::readPartitionStatsFile(org.apache.iceberg.Schema,\ - \ org.apache.iceberg.io.InputFile)" - justification: "Removed deprecated functionality for partition stats" "1.2.0": org.apache.iceberg:iceberg-api: - code: "java.field.constantValueChanged" diff --git a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java index e951ae830737..36f3e4d867d3 100644 --- a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java +++ b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java @@ -194,7 +194,7 @@ public CloseableIterable> planTasks() { } private List findMatchingDataManifests(Snapshot snapshot) { - List dataManifests = snapshot.dataManifests(io()); + List dataManifests = snapshot.dataManifests(table().io()); scanMetrics().totalDataManifests().increment(dataManifests.size()); List matchingDataManifests = filterManifests(dataManifests); @@ -205,7 +205,7 @@ private List findMatchingDataManifests(Snapshot snapshot) { } private List findMatchingDeleteManifests(Snapshot snapshot) { - List deleteManifests = snapshot.deleteManifests(io()); + List deleteManifests = snapshot.deleteManifests(table().io()); scanMetrics().totalDeleteManifests().increment(deleteManifests.size()); List matchingDeleteManifests = filterManifests(deleteManifests); @@ -293,7 +293,7 @@ private CompletableFuture newDeletesFuture( } private DeleteFileIndex planDeletesLocally(List deleteManifests) { - DeleteFileIndex.Builder builder = DeleteFileIndex.builderFor(io(), deleteManifests); + DeleteFileIndex.Builder builder = DeleteFileIndex.builderFor(table().io(), deleteManifests); if (shouldPlanWithExecutor() && deleteManifests.size() > 1) { builder.planWith(planExecutor()); diff --git a/core/src/main/java/org/apache/iceberg/BaseScan.java b/core/src/main/java/org/apache/iceberg/BaseScan.java index 242a5aaacc09..3c5692c50d2b 100644 --- a/core/src/main/java/org/apache/iceberg/BaseScan.java +++ b/core/src/main/java/org/apache/iceberg/BaseScan.java @@ -103,14 +103,6 @@ public Table table() { return table; } - /** - * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link BaseScan#fileIO()} instead. - */ - @Deprecated - protected FileIO io() { - return table.io(); - } - @Override public Supplier fileIO() { return table::io; diff --git a/core/src/main/java/org/apache/iceberg/DataScan.java b/core/src/main/java/org/apache/iceberg/DataScan.java index 336824e4cc91..7921591abf95 100644 --- a/core/src/main/java/org/apache/iceberg/DataScan.java +++ b/core/src/main/java/org/apache/iceberg/DataScan.java @@ -49,7 +49,7 @@ protected ManifestGroup newManifestGroup( boolean withColumnStats) { ManifestGroup manifestGroup = - new ManifestGroup(io(), dataManifests, deleteManifests) + new ManifestGroup(table().io(), dataManifests, deleteManifests) .caseSensitive(isCaseSensitive()) .select(withColumnStats ? SCAN_WITH_STATS_COLUMNS : SCAN_COLUMNS) .filterData(filter()) From fb5ddae9b20a1c5d92328fe62763963bca88b69a Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Mon, 25 May 2026 13:55:27 -0700 Subject: [PATCH 06/14] Core: Move partition stats revapi entries to 1.12.0 with unified justification All deprecated API removals for this release should be under the "1.12.0" version key with justification "Removing deprecated API scheduled for removal in 1.12.0", sorted alphabetically by class/method name. Co-Authored-By: Claude Opus 4.6 (1M context) --- .palantir/revapi.yml | 73 ++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 048ca8660fdd..fc2fca7ed814 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -517,69 +517,55 @@ acceptedBreaks: - code: "java.class.removed" old: "class org.apache.iceberg.data.PartitionStatsHandler" justification: "Removing deprecated code for 1.11.0" - "1.11.0": + "1.12.0": org.apache.iceberg:iceberg-core: - code: "java.class.removed" old: "class org.apache.iceberg.PartitionStats" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.class.removed" + old: "class org.apache.iceberg.PartitionStats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.class.removed" + old: "class org.apache.iceberg.SystemProperties" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.class.removed" + old: "class org.apache.iceberg.data.avro.DataReader" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.DATA_FILE_COUNT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.DATA_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.DV_COUNT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_FILE_COUNT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_AT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_SNAPSHOT_ID" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_FILE_COUNT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.SPEC_ID" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_DATA_FILE_SIZE_IN_BYTES" - justification: "Removed deprecated functionality for partition stats" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_RECORD_COUNT" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removedWithConstant" - old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_ID" - justification: "Removed deprecated functionality for partition stats" - - code: "java.field.removedWithConstant" - old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_NAME" - justification: "Removed deprecated functionality for partition stats" - - code: "java.method.removed" - old: "method org.apache.iceberg.Schema org.apache.iceberg.PartitionStatsHandler::schema(org.apache.iceberg.types.Types.StructType,\ - \ int)" - justification: "Removed deprecated functionality for partition stats" - - code: "java.method.removed" - old: "method org.apache.iceberg.io.CloseableIterable\ - \ org.apache.iceberg.PartitionStatsHandler::readPartitionStatsFile(org.apache.iceberg.Schema,\ - \ org.apache.iceberg.io.InputFile)" - justification: "Removed deprecated functionality for partition stats" - "1.12.0": - org.apache.iceberg:iceberg-core: - - code: "java.class.removed" - old: "class org.apache.iceberg.SystemProperties" - justification: "Removing deprecated API scheduled for removal in 1.12.0" - - code: "java.class.removed" - old: "class org.apache.iceberg.data.avro.DataReader" justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removed" old: "field org.apache.iceberg.SystemConfigs.NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED" @@ -590,6 +576,12 @@ acceptedBreaks: - code: "java.field.removed" old: "field org.apache.iceberg.rest.RESTUtil.NAMESPACE_SPLITTER" justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removedWithConstant" + old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_ID" + justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.field.removedWithConstant" + old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_NAME" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removedWithConstant" old: "field org.apache.iceberg.TableProperties.MANIFEST_LISTS_ENABLED" justification: "Removing deprecated API scheduled for removal in 1.12.0" @@ -665,6 +657,10 @@ acceptedBreaks: - code: "java.method.removed" old: "method org.apache.iceberg.MetricsConfig org.apache.iceberg.MetricsConfig::forPositionDelete(org.apache.iceberg.Table)" justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.Schema org.apache.iceberg.PartitionStatsHandler::schema(org.apache.iceberg.types.Types.StructType,\ + \ int)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.method.removed" old: "method org.apache.iceberg.catalog.Namespace org.apache.iceberg.rest.RESTUtil::decodeNamespace(java.lang.String)" justification: "Removing deprecated API scheduled for removal in 1.12.0" @@ -684,6 +680,11 @@ acceptedBreaks: \ java.util.function.Function,\ \ org.apache.iceberg.io.FileIO)" justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.CloseableIterable\ + \ org.apache.iceberg.PartitionStatsHandler::readPartitionStatsFile(org.apache.iceberg.Schema,\ + \ org.apache.iceberg.io.InputFile)" + justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.method.removed" old: "method org.apache.iceberg.io.FileIO org.apache.iceberg.BaseScan Date: Wed, 24 Jun 2026 17:40:13 -0700 Subject: [PATCH 07/14] REST: Reduce visibility of specsById on scan-planning responses The `specsById` map carried on `BaseScanTaskResponse` and its builder is parser context, not part of the REST spec for scan-planning response models. It was deprecated in 1.11.0 (#14485) with a note that visibility would be reduced in 1.12.0. Demote `specsById()` getter and `withSpecsById()` setter (plus the builder's `deleteFiles()` getter) from public to protected so the parsers and subclasses can still use them while keeping the field out of the public API. To preserve cross-package construction paths used by `CatalogHandlers`, `RESTServerCatalogAdapter`, and tests in `org.apache.iceberg.rest`: - Add `builder(Map specsById)` factory on each of `PlanTableScanResponse`, `FetchPlanningResultResponse`, and `FetchScanTasksResponse` for initial server-side construction. - Add `toBuilder()` on each response for copy-with-modification patterns (used when adapters rebuild a response to inject credentials or rewrite status). Co-Authored-By: Claude Opus 4.7 (1M context) --- .palantir/revapi.yml | 32 +++++++++++++ .../apache/iceberg/rest/CatalogHandlers.java | 14 ++---- .../rest/responses/BaseScanTaskResponse.java | 24 ++-------- .../FetchPlanningResultResponse.java | 23 ++++++++++ .../responses/FetchScanTasksResponse.java | 20 ++++++++ .../rest/responses/PlanTableScanResponse.java | 24 ++++++++++ .../iceberg/rest/TestRESTScanPlanning.java | 46 +++++++------------ .../rest/RESTServerCatalogAdapter.java | 12 +---- 8 files changed, 126 insertions(+), 69 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index fc2fca7ed814..7a8251b3ebe3 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -531,6 +531,22 @@ acceptedBreaks: - code: "java.class.removed" old: "class org.apache.iceberg.data.avro.DataReader" justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.element.noLongerDeprecated" + old: "method B org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::withSpecsById(java.util.Map)" + new: "method B org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::withSpecsById(java.util.Map)" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" + - code: "java.element.noLongerDeprecated" + old: "method java.util.List org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::deleteFiles()" + new: "method java.util.List org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::deleteFiles()" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" + - code: "java.element.noLongerDeprecated" + old: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::specsById()" + new: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::specsById()" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" + - code: "java.element.noLongerDeprecated" + old: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse::specsById()" + new: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse::specsById()" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.DATA_FILE_COUNT" justification: "Removing deprecated API scheduled for removal in 1.12.0" @@ -756,6 +772,22 @@ acceptedBreaks: - code: "java.method.removed" old: "method void org.apache.iceberg.io.ContentCache::invalidateAll()" justification: "Removing deprecated API scheduled for removal in 1.12.0" + - code: "java.method.visibilityReduced" + old: "method B org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::withSpecsById(java.util.Map)" + new: "method B org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::withSpecsById(java.util.Map)" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" + - code: "java.method.visibilityReduced" + old: "method java.util.List org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::deleteFiles()" + new: "method java.util.List org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::deleteFiles()" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" + - code: "java.method.visibilityReduced" + old: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::specsById()" + new: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse.Builder, R>, R extends org.apache.iceberg.rest.responses.BaseScanTaskResponse>::specsById()" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" + - code: "java.method.visibilityReduced" + old: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse::specsById()" + new: "method java.util.Map org.apache.iceberg.rest.responses.BaseScanTaskResponse::specsById()" + justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" org.apache.iceberg:iceberg-data: - code: "java.class.removed" old: "class org.apache.iceberg.data.BaseFileWriterFactory" diff --git a/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java b/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java index 226ea67d6a21..93841f613635 100644 --- a/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java +++ b/core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java @@ -848,10 +848,9 @@ public static PlanTableScanResponse planTableScan( table.uuid().toString(), tasksPerPlanTask.applyAsInt(configuredScan), request.minRowsRequested()); - return PlanTableScanResponse.builder() + return PlanTableScanResponse.builder(table.specs()) .withPlanId(asyncPlanId) .withPlanStatus(PlanStatus.SUBMITTED) - .withSpecsById(table.specs()) .build(); } @@ -868,11 +867,10 @@ public static PlanTableScanResponse planTableScan( ? Collections.emptyList() : IN_MEMORY_PLANNING_STATE.nextPlanTask(initial.second()); PlanTableScanResponse.Builder builder = - PlanTableScanResponse.builder() + PlanTableScanResponse.builder(table.specs()) .withPlanStatus(PlanStatus.COMPLETED) .withPlanId(planId) - .withFileScanTasks(initial.first()) - .withSpecsById(table.specs()); + .withFileScanTasks(initial.first()); if (!nextPlanTasks.isEmpty()) { builder.withPlanTasks(nextPlanTasks); @@ -898,11 +896,10 @@ public static FetchPlanningResultResponse fetchPlanningResult( } Pair, String> initial = IN_MEMORY_PLANNING_STATE.initialScanTasksFor(planId); - return FetchPlanningResultResponse.builder() + return FetchPlanningResultResponse.builder(table.specs()) .withPlanStatus(PlanStatus.COMPLETED) .withFileScanTasks(initial.first()) .withPlanTasks(IN_MEMORY_PLANNING_STATE.nextPlanTask(initial.second())) - .withSpecsById(table.specs()) .build(); } @@ -920,10 +917,9 @@ public static FetchScanTasksResponse fetchScanTasks( String planTask = request.planTask(); List fileScanTasks = IN_MEMORY_PLANNING_STATE.fileScanTasksForPlanTask(planTask); - return FetchScanTasksResponse.builder() + return FetchScanTasksResponse.builder(table.specs()) .withFileScanTasks(fileScanTasks) .withPlanTasks(IN_MEMORY_PLANNING_STATE.nextPlanTask(planTask)) - .withSpecsById(table.specs()) .build(); } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java index 3d42ffce4726..628ffb86919f 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java @@ -57,11 +57,7 @@ public List deleteFiles() { return deleteFiles == null ? null : Lists.newArrayList(deleteFiles.iterator()); } - /** - * @deprecated since 1.11.0, visibility will be reduced in 1.12.0. - */ - @Deprecated - public Map specsById() { + protected Map specsById() { return specsById; } @@ -93,11 +89,7 @@ public B withFileScanTasks(List tasks) { return self(); } - /** - * @deprecated since 1.11.0, visibility will be reduced in 1.12.0. - */ - @Deprecated - public B withSpecsById(Map specs) { + protected B withSpecsById(Map specs) { this.specsById = specs; return self(); } @@ -110,19 +102,11 @@ public List fileScanTasks() { return fileScanTasks; } - /** - * @deprecated since 1.11.0, visibility will be reduced in 1.12.0. - */ - @Deprecated - public List deleteFiles() { + protected List deleteFiles() { return deleteFiles == null ? null : Lists.newArrayList(deleteFiles.iterator()); } - /** - * @deprecated since 1.11.0, visibility will be reduced in 1.12.0. - */ - @Deprecated - public Map specsById() { + protected Map specsById() { return specsById; } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java index 2e176aac653f..2d219c36d07b 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java @@ -65,6 +65,29 @@ public static Builder builder() { return new Builder(); } + /** + * Returns a new builder pre-populated with the given partition specs map. Required for server + * responses that serialize {@code fileScanTasks} or {@code deleteFiles}; the specs are used only + * to serialize partition data and are never written to the response payload. + */ + public static Builder builder(Map specsById) { + return new Builder().withSpecsById(specsById); + } + + /** + * Returns a builder pre-populated with this response's fields, suitable for producing a copy with + * one or more fields modified. + */ + public Builder toBuilder() { + return new Builder() + .withPlanStatus(planStatus) + .withErrorResponse(errorResponse) + .withPlanTasks(planTasks()) + .withFileScanTasks(fileScanTasks()) + .withCredentials(credentials()) + .withSpecsById(specsById()); + } + @Override public void validate() { Preconditions.checkArgument(planStatus() != null, "Invalid status: null"); diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java index 6dcd85e6d307..271386d0d0e7 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java @@ -53,6 +53,26 @@ public static Builder builder() { return new Builder(); } + /** + * Returns a new builder pre-populated with the given partition specs map. Required for server + * responses that serialize {@code fileScanTasks} or {@code deleteFiles}; the specs are used only + * to serialize partition data and are never written to the response payload. + */ + public static Builder builder(Map specsById) { + return new Builder().withSpecsById(specsById); + } + + /** + * Returns a builder pre-populated with this response's fields, suitable for producing a copy with + * one or more fields modified. + */ + public Builder toBuilder() { + return new Builder() + .withPlanTasks(planTasks()) + .withFileScanTasks(fileScanTasks()) + .withSpecsById(specsById()); + } + public static class Builder extends BaseScanTaskResponse.Builder { private Builder() {} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java index d0ac222c3052..ce27d301855e 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java @@ -116,6 +116,30 @@ public static Builder builder() { return new Builder(); } + /** + * Returns a new builder pre-populated with the given partition specs map. Required for server + * responses that serialize {@code fileScanTasks} or {@code deleteFiles}; the specs are used only + * to serialize partition data and are never written to the response payload. + */ + public static Builder builder(Map specsById) { + return new Builder().withSpecsById(specsById); + } + + /** + * Returns a builder pre-populated with this response's fields, suitable for producing a copy with + * one or more fields modified. + */ + public Builder toBuilder() { + return new Builder() + .withPlanStatus(planStatus) + .withPlanId(planId) + .withErrorResponse(errorResponse) + .withPlanTasks(planTasks()) + .withFileScanTasks(fileScanTasks()) + .withCredentials(credentials()) + .withSpecsById(specsById()); + } + public static class Builder extends BaseScanTaskResponse.Builder { private PlanStatus planStatus; private String planId; diff --git a/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java b/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java index 9b42d445f585..6d4922e4ea2c 100644 --- a/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java +++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java @@ -62,7 +62,6 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.rest.credentials.Credential; import org.apache.iceberg.rest.credentials.ImmutableCredential; import org.apache.iceberg.rest.requests.PlanTableScanRequest; import org.apache.iceberg.rest.responses.ConfigResponse; @@ -1370,10 +1369,12 @@ public T execute( && planResp.planStatus() == PlanStatus.COMPLETED) { return castResponse( responseType, - PlanTableScanResponse.builder() + planResp.toBuilder() .withPlanStatus(PlanStatus.FAILED) .withErrorResponse(serverError) - .withSpecsById(planResp.specsById()) + .withPlanId(null) + .withFileScanTasks(null) + .withPlanTasks(null) .build()); } if (response instanceof FetchPlanningResultResponse) { @@ -1473,39 +1474,24 @@ private T maybeAddStorageCredential(T response) { if (response instanceof PlanTableScanResponse resp && PlanStatus.COMPLETED == resp.planStatus()) { return (T) - PlanTableScanResponse.builder() - .withPlanStatus(resp.planStatus()) - .withPlanId(resp.planId()) - .withPlanTasks(resp.planTasks()) - .withFileScanTasks(resp.fileScanTasks()) + resp.toBuilder() .withCredentials( - ImmutableList.builder() - .addAll(resp.credentials()) - .add( - ImmutableCredential.builder() - .prefix("dummy") - .putConfig("dummyKey", "dummyVal") - .build()) - .build()) - .withSpecsById(resp.specsById()) + ImmutableList.of( + ImmutableCredential.builder() + .prefix("dummy") + .putConfig("dummyKey", "dummyVal") + .build())) .build(); } else if (response instanceof FetchPlanningResultResponse resp && PlanStatus.COMPLETED == resp.planStatus()) { return (T) - FetchPlanningResultResponse.builder() - .withPlanStatus(resp.planStatus()) - .withFileScanTasks(resp.fileScanTasks()) - .withPlanTasks(resp.planTasks()) - .withSpecsById(resp.specsById()) + resp.toBuilder() .withCredentials( - ImmutableList.builder() - .addAll(resp.credentials()) - .add( - ImmutableCredential.builder() - .prefix("dummy") - .putConfig("dummyKey", "dummyVal") - .build()) - .build()) + ImmutableList.of( + ImmutableCredential.builder() + .prefix("dummy") + .putConfig("dummyKey", "dummyVal") + .build())) .build(); } diff --git a/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTServerCatalogAdapter.java b/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTServerCatalogAdapter.java index fde23d4b930a..52dae1434f73 100644 --- a/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTServerCatalogAdapter.java +++ b/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTServerCatalogAdapter.java @@ -60,21 +60,13 @@ public T handleRequest( } else if (restResponse instanceof PlanTableScanResponse response && PlanStatus.COMPLETED == response.planStatus()) { return (T) - PlanTableScanResponse.builder() - .withPlanStatus(response.planStatus()) - .withPlanId(response.planId()) - .withFileScanTasks(response.fileScanTasks()) - .withSpecsById(response.specsById()) + response.toBuilder() .withCredentials(createStorageCredentials(catalogContext.configuration())) .build(); } else if (restResponse instanceof FetchPlanningResultResponse response && PlanStatus.COMPLETED == response.planStatus()) { return (T) - FetchPlanningResultResponse.builder() - .withPlanStatus(response.planStatus()) - .withFileScanTasks(response.fileScanTasks()) - .withPlanTasks(response.planTasks()) - .withSpecsById(response.specsById()) + response.toBuilder() .withCredentials(createStorageCredentials(catalogContext.configuration())) .build(); } From 8881516b6b2d1f2c8cc8fad9960b3336b0ae6865 Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Thu, 25 Jun 2026 00:27:54 -0700 Subject: [PATCH 08/14] Parquet: Restore NameMapping.empty() default after removing NETFLIX_UNSAFE branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior 1.11 logic in `Parquet.ReadBuilder` had three branches when picking the name mapping for a vectorized read: 1. caller-provided nameMapping → use it 2. `NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED=true` → use null (unsafe fallback to position-based field resolution) 3. default → use `NameMapping.empty()` (safe) PR #16449 removed the deprecated NETFLIX_UNSAFE branch (case 2) but collapsed the default to `null` rather than `NameMapping.empty()`, making unsafe position-based resolution the new default. That contradicts the PR description ("always use NameMapping.empty() (the safe default)") and silently regresses readers that don't supply an explicit mapping. Restore the safe default by using `NameMapping.empty()` when no mapping is provided. Co-Authored-By: Claude Opus 4.7 (1M context) --- parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java b/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java index e45db652256c..a8193d8bac95 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java @@ -1489,7 +1489,7 @@ public CloseableIterable build() { if (nameMapping != null) { mapping = nameMapping; } else { - mapping = null; + mapping = NameMapping.empty(); } Function> batchedFunc = From 14e654f198d20d683549aa46062e0ff635ebe96d Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Thu, 25 Jun 2026 00:50:14 -0700 Subject: [PATCH 09/14] Core: Restore revapi.yml section key to 1.11.0 baseline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `build.gradle` line 145 sets `oldVersion = "1.11.0"` — revapi compares the current code against the 1.11.0 released baseline. Section keys in `.palantir/revapi.yml` correspond to that baseline, so new breaks accepted during the 1.12.0 dev cycle belong under `"1.11.0"`, not under a not-yet-released `"1.12.0"` section. Evidence in-file: the `"1.10.0"` section already contains entries with justifications like "Removing deprecated code for 1.11.0" — entries added during the 1.11.0 dev cycle when `oldVersion` was `"1.10.0"`. Same shape applies now. This commit: - Renames the section header `"1.12.0"` -> `"1.11.0"`, merging the new entries from PR #16449 into the existing 1.11.0 section that was already on upstream/main (the partition-stats entries from PR #14998). - Restores the original justifications on the 17 partition-stats entries that were rewritten by an earlier fixup commit (`fb5ddae9b2 Core: Move partition stats revapi entries to 1.12.0`), back to "Removed deprecated functionality for partition stats". - Drops a duplicate `class org.apache.iceberg.PartitionStats` entry that the earlier fixup commit had introduced. Net effect: the cumulative PR diff against upstream/main is now additive only (new entries appended to the existing 1.11.0 section, no churn on existing entries). Co-Authored-By: Claude Opus 4.7 (1M context) --- .palantir/revapi.yml | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 7a8251b3ebe3..17983ff1e1cd 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -517,14 +517,11 @@ acceptedBreaks: - code: "java.class.removed" old: "class org.apache.iceberg.data.PartitionStatsHandler" justification: "Removing deprecated code for 1.11.0" - "1.12.0": + "1.11.0": org.apache.iceberg:iceberg-core: - code: "java.class.removed" old: "class org.apache.iceberg.PartitionStats" - justification: "Removing deprecated API scheduled for removal in 1.12.0" - - code: "java.class.removed" - old: "class org.apache.iceberg.PartitionStats" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.class.removed" old: "class org.apache.iceberg.SystemProperties" justification: "Removing deprecated API scheduled for removal in 1.12.0" @@ -549,40 +546,40 @@ acceptedBreaks: justification: "Reducing visibility of internal scan-planning accessor to protected; deprecated in 1.11.0" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.DATA_FILE_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.DATA_RECORD_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.DV_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_FILE_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_RECORD_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_AT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_SNAPSHOT_ID" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_FILE_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_RECORD_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.SPEC_ID" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_DATA_FILE_SIZE_IN_BYTES" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_RECORD_COUNT" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removed" old: "field org.apache.iceberg.SystemConfigs.NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED" justification: "Removing deprecated API scheduled for removal in 1.12.0" @@ -594,10 +591,10 @@ acceptedBreaks: justification: "Removing deprecated API scheduled for removal in 1.12.0" - code: "java.field.removedWithConstant" old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_ID" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removedWithConstant" old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_NAME" - justification: "Removing deprecated API scheduled for removal in 1.12.0" + justification: "Removed deprecated functionality for partition stats" - code: "java.field.removedWithConstant" old: "field org.apache.iceberg.TableProperties.MANIFEST_LISTS_ENABLED" justification: "Removing deprecated API scheduled for removal in 1.12.0" From ac9ee16a64f505145243a811910ca4e54403f98e Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Thu, 25 Jun 2026 09:36:00 -0700 Subject: [PATCH 10/14] Data: Migrate parquet tests off unsafe positional-fallback ID assignment After restoring `NameMapping.empty()` as the default in `Parquet.ReadBuilder` (commit 8881516b6b), two data-module tests fail because they wrote parquet files via raw `AvroParquetWriter` (no Iceberg field IDs in the file metadata) and read them back without providing a `NameMapping`, implicitly relying on the unsafe positional-fallback ID assignment that the `NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED` config previously enabled by default. The deprecation message on that config explicitly said "Fallback ID assignment in Parquet is UNSAFE and will be removed in 1.12.0. Use name mapping instead." Honor that guidance: - `TestGenericData.testTwoLevelList` and `TestParquetEncryptionWithWriteSupport.testTwoLevelList` now pass an explicit `withNameMapping(MappingUtil.create(schema))` to the reader. - Add `TestGenericData.testReadWithoutFieldIdsOrNameMappingReturnsNullFields` to assert the new strict default behavior: when a parquet file has no field IDs and no `NameMapping` is supplied, projected fields read as null (rather than being bound by position as before). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../iceberg/data/parquet/TestGenericData.java | 44 +++++++++++++++++++ ...TestParquetEncryptionWithWriteSupport.java | 2 + 2 files changed, 46 insertions(+) diff --git a/data/src/test/java/org/apache/iceberg/data/parquet/TestGenericData.java b/data/src/test/java/org/apache/iceberg/data/parquet/TestGenericData.java index 8c0e2e903ab7..25d5607d5e33 100644 --- a/data/src/test/java/org/apache/iceberg/data/parquet/TestGenericData.java +++ b/data/src/test/java/org/apache/iceberg/data/parquet/TestGenericData.java @@ -39,6 +39,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; @@ -166,6 +167,7 @@ public void testTwoLevelList() throws IOException { try (CloseableIterable reader = Parquet.read(Files.localInput(testFile)) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .reuseContainers() .createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)) .build()) { @@ -177,4 +179,46 @@ public void testTwoLevelList() throws IOException { assertThat(Lists.newArrayList(reader)).hasSize(1); } } + + @Test + public void testReadWithoutFieldIdsOrNameMappingReturnsNullFields() throws IOException { + Schema schema = + new Schema( + optional(1, "arraybytes", Types.ListType.ofRequired(3, Types.BinaryType.get())), + optional(2, "topbytes", Types.BinaryType.get())); + org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct()); + + File testFile = temp.resolve("test-file" + System.nanoTime()).toFile(); + + ParquetWriter writer = + AvroParquetWriter.builder( + new LocalOutputFile(testFile.toPath())) + .withDataModel(GenericData.get()) + .withSchema(avroSchema) + .config("parquet.avro.add-list-element-records", "true") + .config("parquet.avro.write-old-list-structure", "true") + .build(); + + GenericRecordBuilder recordBuilder = new GenericRecordBuilder(avroSchema); + byte[] writtenByte = {0x00, 0x01}; + ByteBuffer writtenBinary = ByteBuffer.wrap(writtenByte); + List writtenByteList = new ArrayList(); + writtenByteList.add(writtenBinary); + recordBuilder.set("arraybytes", writtenByteList); + recordBuilder.set("topbytes", writtenBinary); + writer.write(recordBuilder.build()); + writer.close(); + + // No field IDs in the file metadata and no NameMapping provided. + try (CloseableIterable reader = + Parquet.read(Files.localInput(testFile)) + .project(schema) + .createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)) + .build()) { + for (Record actualRecord : reader) { + assertThat(actualRecord.get(0)).isNull(); + assertThat(actualRecord.get(1)).isNull(); + } + } + } } diff --git a/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetEncryptionWithWriteSupport.java b/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetEncryptionWithWriteSupport.java index a10035066061..f32e95bdb469 100644 --- a/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetEncryptionWithWriteSupport.java +++ b/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetEncryptionWithWriteSupport.java @@ -42,6 +42,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.CloseableIterator; import org.apache.iceberg.io.FileAppender; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; @@ -187,6 +188,7 @@ public void testTwoLevelList() throws IOException { .project(schema) .withFileEncryptionKey(FILE_DEK) .withAADPrefix(AAD_PREFIX) + .withNameMapping(MappingUtil.create(schema)) .reuseContainers() .createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)) .build()) { From 8da506219d83f11f26dbd1ba65da6d4dd69c4df7 Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Thu, 25 Jun 2026 10:20:32 -0700 Subject: [PATCH 11/14] Flink: Migrate TestFlinkParquetReader.testTwoLevelList off unsafe positional-fallback Same root cause as the data-module migrations in ac9ee16a64: the test writes a parquet file via raw AvroParquetWriter (no Iceberg field IDs) and then reads it back without supplying a NameMapping. Pre-1.12 this worked via the NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED positional fallback (defaulted to true on upstream/main); after this PR's Parquet ReadBuilder restores NameMapping.empty() as the default, projected fields no longer bind by position and the assertion NPEs. Migrate all three flink versions (v1.20, v2.0, v2.1) to supply `withNameMapping(MappingUtil.create(schema))`, matching the strict default and aligning with the deprecation message's guidance to "Use name mapping instead". Co-Authored-By: Claude Opus 4.7 (1M context) --- .../org/apache/iceberg/flink/data/TestFlinkParquetReader.java | 2 ++ .../org/apache/iceberg/flink/data/TestFlinkParquetReader.java | 2 ++ .../org/apache/iceberg/flink/data/TestFlinkParquetReader.java | 2 ++ 3 files changed, 6 insertions(+) diff --git a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java index cb3b3c503495..afca8885caa1 100644 --- a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java +++ b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java @@ -46,6 +46,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.parquet.ParquetValueReader; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -217,6 +218,7 @@ public void testTwoLevelList() throws IOException { try (CloseableIterable reader = Parquet.read(Files.localInput(testFile)) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createReaderFunc(type -> FlinkParquetReaders.buildReader(schema, type)) .build()) { Iterator rows = reader.iterator(); diff --git a/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java b/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java index cb3b3c503495..afca8885caa1 100644 --- a/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java +++ b/flink/v2.0/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java @@ -46,6 +46,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.parquet.ParquetValueReader; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -217,6 +218,7 @@ public void testTwoLevelList() throws IOException { try (CloseableIterable reader = Parquet.read(Files.localInput(testFile)) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createReaderFunc(type -> FlinkParquetReaders.buildReader(schema, type)) .build()) { Iterator rows = reader.iterator(); diff --git a/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java b/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java index c75b40f0b22e..10bc4446846b 100644 --- a/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java +++ b/flink/v2.1/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java @@ -45,6 +45,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.parquet.ParquetValueReader; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -217,6 +218,7 @@ public void testTwoLevelList() throws IOException { try (CloseableIterable reader = Parquet.read(Files.localInput(testFile)) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createReaderFunc(type -> FlinkParquetReaders.buildReader(schema, type)) .build()) { Iterator rows = reader.iterator(); From 9ebb273258069c4c9f0a5aabfcd97f634e8c14fc Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Thu, 25 Jun 2026 14:07:17 -0700 Subject: [PATCH 12/14] Spark: Auto-set default name mapping in SparkTableUtil.importSparkTable After restoring `NameMapping.empty()` as the safe default in `Parquet.ReadBuilder` (commit 8881516b6b), Spark tests that import external Parquet files via `SparkTableUtil.importSparkTable` and then read them through Iceberg fail field resolution: the imported files have no Iceberg field IDs and no `schema.name-mapping.default` property is auto-set, so the strict empty mapping returns null for every projected field. Mirror the established pattern from `AddFilesProcedure.ensureNameMappingPresent` and `BaseTableCreationSparkAction`: when the target Iceberg table does not already have `schema.name-mapping.default` set, auto-derive a name mapping from the target schema before importing. Subsequent reads via Iceberg pick up the property automatically through `BaseReader`. Applied identically to v3.5, v4.0, and v4.1. This honors the deprecation guidance Russell Spitzer documented when deprecating the NETFLIX_UNSAFE positional fallback ("Use name mapping instead") and brings `importSparkTable` in line with `AddFilesProcedure`, which has been doing this for some time. Verified on Spark 4.1 via the full `org.apache.iceberg.spark.source.*` test package (`-DtestParallelism=8`); previously-failing `TestIcebergSourceHadoopTables.testTableWithInt96Timestamp` and `TestIdentityPartitionData.testProjections` now pass with no collateral failures elsewhere. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../org/apache/iceberg/spark/SparkTableUtil.java | 16 ++++++++++++++++ .../org/apache/iceberg/spark/SparkTableUtil.java | 16 ++++++++++++++++ .../org/apache/iceberg/spark/SparkTableUtil.java | 16 ++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java index ce509d92741b..f63fbb3a5a3a 100644 --- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java +++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java @@ -67,6 +67,7 @@ import org.apache.iceberg.hadoop.Util; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.mapping.NameMapping; import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.relocated.com.google.common.base.Joiner; @@ -582,6 +583,8 @@ public static void importSparkTable( "Table %s does not exist", sourceTableIdentWithDB); } + ensureNameMappingPresent(targetTable); + try { PartitionSpec spec = findCompatibleSpec(targetTable, spark, sourceTableIdentWithDB.unquotedString()); @@ -659,6 +662,19 @@ public static void importSparkTable( spark, sourceTableIdent, targetTable, stagingDir, Collections.emptyMap(), false, 1); } + /** + * Auto-set the default name mapping on the target table if it is not already set, so that + * imported files without Iceberg field IDs resolve fields by name rather than by unsafe position. + * Mirrors {@code AddFilesProcedure#ensureNameMappingPresent}. + */ + private static void ensureNameMappingPresent(Table table) { + if (table.properties().get(TableProperties.DEFAULT_NAME_MAPPING) == null) { + NameMapping mapping = MappingUtil.create(table.schema()); + String mappingJson = NameMappingParser.toJson(mapping); + table.updateProperties().set(TableProperties.DEFAULT_NAME_MAPPING, mappingJson).commit(); + } + } + private static void importUnpartitionedSparkTable( SparkSession spark, TableIdentifier sourceTableIdent, diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java index 0b74edd67412..f196ddc29f92 100644 --- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java +++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java @@ -67,6 +67,7 @@ import org.apache.iceberg.hadoop.Util; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.mapping.NameMapping; import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.relocated.com.google.common.base.Joiner; @@ -379,6 +380,8 @@ public static void importSparkTable( "Table %s does not exist", sourceTableIdentWithDB); } + ensureNameMappingPresent(targetTable); + try { PartitionSpec spec = findCompatibleSpec(targetTable, spark, sourceTableIdentWithDB.unquotedString()); @@ -428,6 +431,19 @@ public static void importSparkTable( spark, sourceTableIdent, targetTable, stagingDir, Collections.emptyMap(), false, 1); } + /** + * Auto-set the default name mapping on the target table if it is not already set, so that + * imported files without Iceberg field IDs resolve fields by name rather than by unsafe position. + * Mirrors {@code AddFilesProcedure#ensureNameMappingPresent}. + */ + private static void ensureNameMappingPresent(Table table) { + if (table.properties().get(TableProperties.DEFAULT_NAME_MAPPING) == null) { + NameMapping mapping = MappingUtil.create(table.schema()); + String mappingJson = NameMappingParser.toJson(mapping); + table.updateProperties().set(TableProperties.DEFAULT_NAME_MAPPING, mappingJson).commit(); + } + } + private static void importUnpartitionedSparkTable( SparkSession spark, TableIdentifier sourceTableIdent, diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java index d56ebd0b1df8..611b2eec641b 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java @@ -66,6 +66,7 @@ import org.apache.iceberg.hadoop.Util; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.mapping.NameMapping; import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.relocated.com.google.common.base.Joiner; @@ -378,6 +379,8 @@ public static void importSparkTable( "Table %s does not exist", sourceTableIdentWithDB); } + ensureNameMappingPresent(targetTable); + try { PartitionSpec spec = findCompatibleSpec(targetTable, spark, sourceTableIdentWithDB.unquotedString()); @@ -427,6 +430,19 @@ public static void importSparkTable( spark, sourceTableIdent, targetTable, stagingDir, Collections.emptyMap(), false, 1); } + /** + * Auto-set the default name mapping on the target table if it is not already set, so that + * imported files without Iceberg field IDs resolve fields by name rather than by unsafe position. + * Mirrors {@code AddFilesProcedure#ensureNameMappingPresent}. + */ + private static void ensureNameMappingPresent(Table table) { + if (table.properties().get(TableProperties.DEFAULT_NAME_MAPPING) == null) { + NameMapping mapping = MappingUtil.create(table.schema()); + String mappingJson = NameMappingParser.toJson(mapping); + table.updateProperties().set(TableProperties.DEFAULT_NAME_MAPPING, mappingJson).commit(); + } + } + private static void importUnpartitionedSparkTable( SparkSession spark, TableIdentifier sourceTableIdent, From 8846661ea7ce96f08dc3f578fd7a55b14730a51f Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Thu, 25 Jun 2026 22:04:18 -0700 Subject: [PATCH 13/14] Data, Spark: Auto-apply DEFAULT_NAME_MAPPING in IcebergGenerics; migrate TestSparkParquetReader fixtures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After restoring `NameMapping.empty()` as the safe default in `Parquet.ReadBuilder` (commit 8881516b6b), `IcebergGenerics.read(table)` started failing field resolution on tables whose data files lack Iceberg field IDs. Pre-1.12 the unsafe positional-fallback path masked this; now the strict default refuses to bind by position. `GenericReader` (data module) never consulted `schema.name-mapping.default` even though every Iceberg table that reads non-Iceberg-written files is expected to carry it. Spark's `BaseReader` already reads the property (`BaseReader.java:100`); bring `GenericReader` in line with that pattern so `IcebergGenerics.read(table)` auto-applies the mapping when present. Also migrate `TestSparkParquetReader.testInt96TimestampProducedBySparkIsReadCorrectly` (v3.5 / v4.0 / v4.1) — the test creates a `HadoopTables` table directly (bypassing `SparkTableUtil.importSparkTable`, so the auto-set in commit 9ebb273258 doesn't reach it) and reads files written by raw Spark `ParquetWriteSupport` without Iceberg field IDs: - `rowsFromFile`: pass `.withNameMapping(MappingUtil.create(schema))` on the direct `Parquet.read(...)` call. - `tableFromInputFile`: set `schema.name-mapping.default` at table creation time so the `GenericReader` change above picks it up automatically. Verified: - `:iceberg-data:test` clean - Targeted `TestSparkParquetReader.testInt96TimestampProducedBySparkIsReadCorrectly` passes on v3.5, v4.0, v4.1 - `:iceberg-flink:iceberg-flink-2.1:test` full sweep clean (30m, `-DtestParallelism=8`) - Spotless / checkstyle / revapi clean across all versions Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/org/apache/iceberg/data/GenericReader.java | 9 +++++++++ .../iceberg/spark/data/TestSparkParquetReader.java | 8 +++++++- .../iceberg/spark/data/TestSparkParquetReader.java | 8 +++++++- .../iceberg/spark/data/TestSparkParquetReader.java | 8 +++++++- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/data/src/main/java/org/apache/iceberg/data/GenericReader.java b/data/src/main/java/org/apache/iceberg/data/GenericReader.java index f18f5785105f..a74a725f091d 100644 --- a/data/src/main/java/org/apache/iceberg/data/GenericReader.java +++ b/data/src/main/java/org/apache/iceberg/data/GenericReader.java @@ -23,6 +23,7 @@ import org.apache.iceberg.CombinedScanTask; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.Schema; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.TableScan; import org.apache.iceberg.expressions.Evaluator; import org.apache.iceberg.expressions.Expression; @@ -34,6 +35,8 @@ import org.apache.iceberg.io.CloseableIterator; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.mapping.NameMapping; +import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.util.PartitionUtil; @@ -43,6 +46,7 @@ class GenericReader implements Serializable { private final Schema projection; private final boolean caseSensitive; private final boolean reuseContainers; + private final NameMapping nameMapping; GenericReader(TableScan scan, boolean reuseContainers) { this.io = scan.table().io(); @@ -50,6 +54,8 @@ class GenericReader implements Serializable { this.projection = scan.schema(); this.caseSensitive = scan.isCaseSensitive(); this.reuseContainers = reuseContainers; + String mappingJson = scan.table().properties().get(TableProperties.DEFAULT_NAME_MAPPING); + this.nameMapping = mappingJson != null ? NameMappingParser.fromJson(mappingJson) : null; } CloseableIterator open(CloseableIterable tasks) { @@ -94,6 +100,9 @@ private CloseableIterable openFile(FileScanTask task, Schema fileProject if (reuseContainers) { builder = builder.reuseContainers(); } + if (nameMapping != null) { + builder = builder.withNameMapping(nameMapping); + } return builder .project(fileProjection) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index bc4b77059d43..a65a652e91c3 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -35,6 +35,7 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.data.IcebergGenerics; import org.apache.iceberg.data.RandomGenericData; import org.apache.iceberg.data.Record; @@ -45,6 +46,8 @@ import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; +import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.parquet.ParquetUtil; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -126,6 +129,7 @@ protected List rowsFromFile(InputFile inputFile, Schema schema) thr try (CloseableIterable reader = Parquet.read(inputFile) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createReaderFunc(type -> SparkParquetReaders.buildReader(schema, type)) .build()) { return Lists.newArrayList(reader); @@ -138,7 +142,9 @@ protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IO tables.create( schema, PartitionSpec.unpartitioned(), - ImmutableMap.of(), + ImmutableMap.of( + TableProperties.DEFAULT_NAME_MAPPING, + NameMappingParser.toJson(MappingUtil.create(schema))), java.nio.file.Files.createTempDirectory(temp, null).toFile().getCanonicalPath()); table diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index 1d1ccca1a2fb..ed759b153d69 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -34,6 +34,7 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.data.IcebergGenerics; import org.apache.iceberg.data.RandomGenericData; import org.apache.iceberg.data.Record; @@ -44,6 +45,8 @@ import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; +import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.parquet.ParquetUtil; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -122,6 +125,7 @@ protected List rowsFromFile(InputFile inputFile, Schema schema) thr try (CloseableIterable reader = Parquet.read(inputFile) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createReaderFunc(type -> SparkParquetReaders.buildReader(schema, type)) .build()) { return Lists.newArrayList(reader); @@ -134,7 +138,9 @@ protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IO tables.create( schema, PartitionSpec.unpartitioned(), - ImmutableMap.of(), + ImmutableMap.of( + TableProperties.DEFAULT_NAME_MAPPING, + NameMappingParser.toJson(MappingUtil.create(schema))), java.nio.file.Files.createTempDirectory(temp, null).toFile().getCanonicalPath()); table diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index f42c37f5e41d..4a0cf4f61aa6 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -34,6 +34,7 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.data.IcebergGenerics; import org.apache.iceberg.data.RandomGenericData; import org.apache.iceberg.data.Record; @@ -44,6 +45,8 @@ import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.mapping.MappingUtil; +import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.parquet.ParquetUtil; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -122,6 +125,7 @@ protected List rowsFromFile(InputFile inputFile, Schema schema) thr try (CloseableIterable reader = Parquet.read(inputFile) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createReaderFunc(type -> SparkParquetReaders.buildReader(schema, type)) .build()) { return Lists.newArrayList(reader); @@ -134,7 +138,9 @@ protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IO tables.create( schema, PartitionSpec.unpartitioned(), - ImmutableMap.of(), + ImmutableMap.of( + TableProperties.DEFAULT_NAME_MAPPING, + NameMappingParser.toJson(MappingUtil.create(schema))), java.nio.file.Files.createTempDirectory(temp, null).toFile().getCanonicalPath()); table From ea434b8c00b6602939b2dc076c75fd48422d0bfc Mon Sep 17 00:00:00 2001 From: Hongyue Zhang Date: Thu, 25 Jun 2026 23:20:01 -0700 Subject: [PATCH 14/14] Spark: Apply name mapping in TestParquetDictionaryEncodedVectorizedReads Migrate the vectorized dictionary-encoded parquet read test off unsafe positional-fallback ID assignment by attaching a NameMapping created from the schema to the Parquet read builder. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parquet/TestParquetDictionaryEncodedVectorizedReads.java | 2 ++ .../parquet/TestParquetDictionaryEncodedVectorizedReads.java | 2 ++ .../parquet/TestParquetDictionaryEncodedVectorizedReads.java | 2 ++ 3 files changed, 6 insertions(+) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java index b61ecfa2f442..068b98637b99 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java @@ -37,6 +37,7 @@ import org.apache.iceberg.data.parquet.GenericParquetWriter; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.base.Function; import org.apache.iceberg.relocated.com.google.common.collect.FluentIterable; @@ -178,6 +179,7 @@ public void testDecimalNotAllPagesDictionaryEncoded() throws Exception { Parquet.ReadBuilder readBuilder = Parquet.read(Files.localInput(path.toFile())) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createBatchedReaderFunc( type -> VectorizedSparkParquetReaders.buildReader( diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java index b61ecfa2f442..068b98637b99 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java @@ -37,6 +37,7 @@ import org.apache.iceberg.data.parquet.GenericParquetWriter; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.base.Function; import org.apache.iceberg.relocated.com.google.common.collect.FluentIterable; @@ -178,6 +179,7 @@ public void testDecimalNotAllPagesDictionaryEncoded() throws Exception { Parquet.ReadBuilder readBuilder = Parquet.read(Files.localInput(path.toFile())) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createBatchedReaderFunc( type -> VectorizedSparkParquetReaders.buildReader( diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java index b61ecfa2f442..068b98637b99 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java @@ -37,6 +37,7 @@ import org.apache.iceberg.data.parquet.GenericParquetWriter; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; +import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.base.Function; import org.apache.iceberg.relocated.com.google.common.collect.FluentIterable; @@ -178,6 +179,7 @@ public void testDecimalNotAllPagesDictionaryEncoded() throws Exception { Parquet.ReadBuilder readBuilder = Parquet.read(Files.localInput(path.toFile())) .project(schema) + .withNameMapping(MappingUtil.create(schema)) .createBatchedReaderFunc( type -> VectorizedSparkParquetReaders.buildReader(