From aab2b5123640a4a0bd5f3d21c69be7a3f0bb16d0 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 09:47:39 -0700 Subject: [PATCH 1/8] fix: respect scan impl config for v2 scan --- spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 68a63b6ae8..45faa4d940 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -228,7 +228,7 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com private def transformV2Scan(scanExec: BatchScanExec): SparkPlan = { scanExec.scan match { - case scan: ParquetScan => + case scan: ParquetScan if COMET_NATIVE_SCAN_IMPL.get() == SCAN_NATIVE_COMET => val fallbackReasons = new ListBuffer[String]() val schemaSupported = CometBatchScanExec.isSchemaSupported(scan.readDataSchema, fallbackReasons) From 33e770b70962bb75a42e84b1aa5e53236f3bc37d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 11:03:48 -0700 Subject: [PATCH 2/8] fix --- .../test/scala/org/apache/comet/parquet/ParquetReadSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala index e4486e940b..3da00a2404 100644 --- a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala +++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala @@ -2045,7 +2045,7 @@ class ParquetReadV2Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { case (cometEnabled, expectedScanner) => testScanner( cometEnabled, - CometConf.SCAN_NATIVE_DATAFUSION, + CometConf.SCAN_NATIVE_COMET, scanner = expectedScanner, v1 = None) } From 6b39d7ebcf5729905e86dcd84ff16d62d8dd457a Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 11:05:30 -0700 Subject: [PATCH 3/8] fix: set native_comet scan impl in V2 scan tests Co-Authored-By: Claude Opus 4.5 --- .../test/scala/org/apache/comet/exec/CometExecSuite.scala | 3 ++- .../scala/org/apache/comet/rules/CometScanRuleSuite.scala | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala index 696a12d4a2..2d07c03c1b 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala @@ -382,9 +382,10 @@ class CometExecSuite extends CometTestBase { } } - test("ReusedExchangeExec should work on CometBroadcastExchangeExec") { + test("ReusedExchangeExec should work on CometBroadcastExchangeExec with V2 scan") { withSQLConf( CometConf.COMET_EXEC_BROADCAST_FORCE_ENABLED.key -> "true", + CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET, SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", SQLConf.USE_V1_SOURCE_LIST.key -> "") { withTempPath { path => diff --git a/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala b/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala index d0dfbbb09d..c7a07b3f1f 100644 --- a/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala +++ b/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala @@ -101,11 +101,13 @@ class CometScanRuleSuite extends CometTestBase { } } - test("CometExecRule should replace BatchScanExec, but only when Comet is enabled") { + test("CometScanRule should replace V2 BatchScanExec, but only when Comet is enabled") { withTempPath { path => createTestDataFrame.write.parquet(path.toString) withTempView("test_data") { - withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") { + withSQLConf( + SQLConf.USE_V1_SOURCE_LIST.key -> "", + CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { spark.read.parquet(path.toString).createOrReplaceTempView("test_data") val sparkPlan = From 80a76725f6ec12a97eb1ff3e362e68b6c28d0d99 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 12:10:52 -0700 Subject: [PATCH 4/8] stop accepting native_comet as a valid scan impl --- common/src/main/scala/org/apache/comet/CometConf.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index 6504c0294b..659d99b69a 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -124,11 +124,8 @@ object CometConf extends ShimCometConf { val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl") .category(CATEGORY_SCAN) .doc( - s"The implementation of Comet Native Scan to use. Available modes are `$SCAN_NATIVE_COMET`," + + s"The implementation of Comet Native Scan to use. Available modes are " + s"`$SCAN_NATIVE_DATAFUSION`, and `$SCAN_NATIVE_ICEBERG_COMPAT`. " + - s"`$SCAN_NATIVE_COMET` (DEPRECATED - will be removed in a future release) is for the " + - "original Comet native scan which uses a jvm based parquet file reader and native " + - "column decoding. Supports simple types only. " + s"`$SCAN_NATIVE_DATAFUSION` is a fully native implementation of scan based on " + "DataFusion. " + s"`$SCAN_NATIVE_ICEBERG_COMPAT` is the recommended native implementation that " + @@ -138,7 +135,7 @@ object CometConf extends ShimCometConf { .stringConf .transform(_.toLowerCase(Locale.ROOT)) .checkValues( - Set(SCAN_NATIVE_COMET, SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO)) + Set(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO)) .createWithEnvVarOrDefault("COMET_PARQUET_SCAN_IMPL", SCAN_AUTO) val COMET_ICEBERG_NATIVE_ENABLED: ConfigEntry[Boolean] = From 9fbf0584b89ecdf2203b48b23638dd8be25879fe Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 12:15:51 -0700 Subject: [PATCH 5/8] stop setting native_comet in ci --- .github/workflows/pr_build_linux.yml | 4 ++-- .github/workflows/spark_sql_test.yml | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml index 8376afbc6e..04a5b305be 100644 --- a/.github/workflows/pr_build_linux.yml +++ b/.github/workflows/pr_build_linux.yml @@ -164,7 +164,7 @@ jobs: - name: "Spark 3.4, JDK 11, Scala 2.12" java_version: "11" maven_opts: "-Pspark-3.4 -Pscala-2.12" - scan_impl: "native_comet" + scan_impl: "auto" - name: "Spark 3.5.5, JDK 17, Scala 2.13" java_version: "17" @@ -174,7 +174,7 @@ jobs: - name: "Spark 3.5.6, JDK 17, Scala 2.13" java_version: "17" maven_opts: "-Pspark-3.5 -Dspark.version=3.5.6 -Pscala-2.13" - scan_impl: "native_comet" + scan_impl: "auto" - name: "Spark 3.5, JDK 17, Scala 2.12" java_version: "17" diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 2e4b6926c2..7d1b58ff31 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -116,15 +116,12 @@ jobs: - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"} # Test combinations: # - auto scan: all Spark versions (3.4, 3.5, 4.0) - # - native_comet: Spark 3.4, 3.5 # - native_iceberg_compat: Spark 3.5 only config: - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto', scan-env: ''} - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto', scan-env: ''} - - {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''} - - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'} - - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'} - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_iceberg_compat', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_iceberg_compat'} + - {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''} # Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946 exclude: - config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''} From 32c6526185421b5219fd91e72a97fa6b26cfc2cc Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 12:25:33 -0700 Subject: [PATCH 6/8] remove native_comet from list of valid options for scan impl --- .../apache/comet/CometExpressionSuite.scala | 15 ++-- .../apache/comet/exec/CometExecSuite.scala | 3 +- .../parquet/CometParquetWriterSuite.scala | 3 +- .../comet/parquet/ParquetReadSuite.scala | 21 +++-- .../comet/rules/CometScanRuleSuite.scala | 3 +- .../sql/benchmark/CometReadBenchmark.scala | 84 +------------------ .../comet/ParquetDatetimeRebaseSuite.scala | 14 ++-- .../sql/comet/ParquetEncryptionITCase.scala | 18 +--- 8 files changed, 42 insertions(+), 119 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 5a22583ae0..2999d8bfe5 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -185,7 +185,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("basic data type support") { + // ignored: native_comet scan is no longer supported + ignore("basic data type support") { // this test requires native_comet scan due to unsigned u8/u16 issue withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { Seq(true, false).foreach { dictionaryEnabled => @@ -216,7 +217,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("uint data type support") { + // ignored: native_comet scan is no longer supported + ignore("uint data type support") { // this test requires native_comet scan due to unsigned u8/u16 issue withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { Seq(true, false).foreach { dictionaryEnabled => @@ -1503,7 +1505,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("round") { + // ignored: native_comet scan is no longer supported + ignore("round") { // https://github.com/apache/datafusion-comet/issues/1441 assume(usingLegacyNativeCometScan) Seq(true, false).foreach { dictionaryEnabled => @@ -1567,7 +1570,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("hex") { + // ignored: native_comet scan is no longer supported + ignore("hex") { // https://github.com/apache/datafusion-comet/issues/1441 assume(usingLegacyNativeCometScan) Seq(true, false).foreach { dictionaryEnabled => @@ -2781,7 +2785,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("test integral divide") { + // ignored: native_comet scan is no longer supported + ignore("test integral divide") { // this test requires native_comet scan due to unsigned u8/u16 issue withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { Seq(true, false).foreach { dictionaryEnabled => diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala index 2d07c03c1b..6c9bdf6eba 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala @@ -382,7 +382,8 @@ class CometExecSuite extends CometTestBase { } } - test("ReusedExchangeExec should work on CometBroadcastExchangeExec with V2 scan") { + // ignored: native_comet scan is no longer supported + ignore("ReusedExchangeExec should work on CometBroadcastExchangeExec with V2 scan") { withSQLConf( CometConf.COMET_EXEC_BROADCAST_FORCE_ENABLED.key -> "true", CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET, diff --git a/spark/src/test/scala/org/apache/comet/parquet/CometParquetWriterSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/CometParquetWriterSuite.scala index 25008302f9..b691039f19 100644 --- a/spark/src/test/scala/org/apache/comet/parquet/CometParquetWriterSuite.scala +++ b/spark/src/test/scala/org/apache/comet/parquet/CometParquetWriterSuite.scala @@ -310,7 +310,8 @@ class CometParquetWriterSuite extends CometTestBase { } } - test("native write falls back when scan produces non-Arrow data") { + // ignored: native_comet scan is no longer supported + ignore("native write falls back when scan produces non-Arrow data") { // This test verifies that when a native scan (like native_comet) doesn't support // certain data types (complex types), the native write correctly falls back to Spark // instead of failing at runtime with "Comet execution only takes Arrow Arrays" error. diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala index 3da00a2404..928e66b29b 100644 --- a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala +++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala @@ -85,7 +85,8 @@ abstract class ParquetReadSuite extends CometTestBase { } } - test("unsupported Spark types") { + // ignored: native_comet scan is no longer supported + ignore("unsupported Spark types") { // TODO this test is not correctly implemented for scan implementations other than SCAN_NATIVE_COMET // https://github.com/apache/datafusion-comet/issues/2188 withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { @@ -130,7 +131,8 @@ abstract class ParquetReadSuite extends CometTestBase { } } - test("unsupported Spark schema") { + // ignored: native_comet scan is no longer supported + ignore("unsupported Spark schema") { // TODO this test is not correctly implemented for scan implementations other than SCAN_NATIVE_COMET // https://github.com/apache/datafusion-comet/issues/2188 withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { @@ -368,7 +370,8 @@ abstract class ParquetReadSuite extends CometTestBase { checkParquetFile(data) } - test("test multiple pages with different sizes and nulls") { + // ignored: native_comet scan is no longer supported + ignore("test multiple pages with different sizes and nulls") { def makeRawParquetFile( path: Path, dictionaryEnabled: Boolean, @@ -1344,7 +1347,8 @@ abstract class ParquetReadSuite extends CometTestBase { } } - test("scan metrics") { + // ignored: native_comet scan is no longer supported + ignore("scan metrics") { val cometScanMetricNames = Seq( "ParquetRowGroups", @@ -1866,8 +1870,7 @@ class ParquetReadV1Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { test("Test V1 parquet scan uses respective scanner") { Seq( - ("false", CometConf.SCAN_NATIVE_COMET, "FileScan parquet"), - ("true", CometConf.SCAN_NATIVE_COMET, "CometScan [native_comet] parquet"), + ("false", CometConf.SCAN_NATIVE_DATAFUSION, "FileScan parquet"), ("true", CometConf.SCAN_NATIVE_DATAFUSION, "CometNativeScan"), ("true", CometConf.SCAN_NATIVE_ICEBERG_COMPAT, "CometScan [native_iceberg_compat] parquet")) .foreach { case (cometEnabled, cometNativeScanImpl, expectedScanner) => @@ -2014,10 +2017,11 @@ class ParquetReadV1Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { } +// ignored: native_comet scan is no longer supported class ParquetReadV2Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit pos: Position): Unit = { - super.test(testName, testTags: _*)( + super.ignore(testName, testTags: _*)( withSQLConf( SQLConf.USE_V1_SOURCE_LIST.key -> "", CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { @@ -2040,7 +2044,8 @@ class ParquetReadV2Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { } } - test("Test V2 parquet scan uses respective scanner") { + // ignored: native_comet scan is no longer supported + ignore("Test V2 parquet scan uses respective scanner") { Seq(("false", "BatchScan"), ("true", "CometBatchScan")).foreach { case (cometEnabled, expectedScanner) => testScanner( diff --git a/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala b/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala index c7a07b3f1f..a349ab2b93 100644 --- a/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala +++ b/spark/src/test/scala/org/apache/comet/rules/CometScanRuleSuite.scala @@ -101,7 +101,8 @@ class CometScanRuleSuite extends CometTestBase { } } - test("CometScanRule should replace V2 BatchScanExec, but only when Comet is enabled") { + // ignored: native_comet scan is no longer supported + ignore("CometScanRule should replace V2 BatchScanExec, but only when Comet is enabled") { withTempPath { path => createTestDataFrame.write.parquet(path.toString) withTempView("test_data") { diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala index 9b2dd186dd..3bfbdee91a 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.vectorized.ColumnVector import org.apache.comet.{CometConf, WithHdfsCluster} -import org.apache.comet.CometConf.{SCAN_NATIVE_COMET, SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT} +import org.apache.comet.CometConf.{SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT} import org.apache.comet.parquet.BatchReader /** @@ -67,14 +67,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { spark.sql(s"select $query from parquetV1Table").noop() } - sqlBenchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark.sql(s"select $query from parquetV1Table").noop() - } - } - sqlBenchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", @@ -175,21 +167,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { } } - sqlBenchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - "spark.memory.offHeap.enabled" -> "true", - "spark.memory.offHeap.size" -> "10g", - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET, - DecryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME -> cryptoFactoryClass, - KeyToolkit.KMS_CLIENT_CLASS_PROPERTY_NAME -> - "org.apache.parquet.crypto.keytools.mocks.InMemoryKMS", - InMemoryKMS.KEY_LIST_PROPERTY_NAME -> - s"footerKey: ${footerKey}, key1: ${key1}") { - spark.sql(s"select $query from parquetV1Table").noop() - } - } - sqlBenchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( "spark.memory.offHeap.enabled" -> "true", @@ -245,14 +222,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { spark.sql("select sum(id) from parquetV1Table").noop() } - sqlBenchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark.sql("select sum(id) from parquetV1Table").noop() - } - } - sqlBenchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", @@ -373,14 +342,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { spark.sql("select sum(c2) from parquetV1Table where c1 + 1 > 0").noop() } - benchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark.sql("select sum(c2) from parquetV1Table where c1 + 1 > 0").noop() - } - } - benchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", @@ -431,14 +392,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { spark.sql("select sum(length(id)) from parquetV1Table").noop() } - sqlBenchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark.sql("select sum(length(id)) from parquetV1Table").noop() - } - } - sqlBenchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", @@ -482,17 +435,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { .noop() } - benchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark - .sql("select sum(length(c2)) from parquetV1Table where c1 is " + - "not NULL and c2 is not NULL") - .noop() - } - } - benchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", @@ -538,14 +480,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { spark.sql(s"SELECT sum(c$middle) FROM parquetV1Table").noop() } - benchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark.sql(s"SELECT sum(c$middle) FROM parquetV1Table").noop() - } - } - benchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", @@ -589,14 +523,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop() } - benchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop() - } - } - benchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", @@ -640,14 +566,6 @@ class CometReadBaseBenchmark extends CometBenchmarkBase { spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop() } - benchmark.addCase("SQL Parquet - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> SCAN_NATIVE_COMET) { - spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop() - } - } - benchmark.addCase("SQL Parquet - Comet Native DataFusion") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetDatetimeRebaseSuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetDatetimeRebaseSuite.scala index 131423ddeb..c330bbe4c3 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetDatetimeRebaseSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetDatetimeRebaseSuite.scala @@ -37,7 +37,8 @@ abstract class ParquetDatetimeRebaseSuite extends CometTestBase { // visible under package `spark`. val SPARK_TESTING: String = "spark.testing" - test("reading ancient dates before 1582") { + // ignored: native_comet scan is no longer supported + ignore("reading ancient dates before 1582") { Seq(true, false).foreach { exceptionOnRebase => withSQLConf( CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET, @@ -62,7 +63,8 @@ abstract class ParquetDatetimeRebaseSuite extends CometTestBase { } } - test("reading ancient timestamps before 1582") { + // ignored: native_comet scan is no longer supported + ignore("reading ancient timestamps before 1582") { assume(usingLegacyNativeCometScan(conf)) Seq(true, false).foreach { exceptionOnRebase => withSQLConf( @@ -89,7 +91,8 @@ abstract class ParquetDatetimeRebaseSuite extends CometTestBase { } } - test("reading ancient int96 timestamps before 1582") { + // ignored: native_comet scan is no longer supported + ignore("reading ancient int96 timestamps before 1582") { assume(usingLegacyNativeCometScan(conf)) Seq(true, false).foreach { exceptionOnRebase => withSQLConf( @@ -147,12 +150,11 @@ class ParquetDatetimeRebaseV1Suite extends ParquetDatetimeRebaseSuite { } } +// ignored: native_comet scan is no longer supported class ParquetDatetimeRebaseV2Suite extends ParquetDatetimeRebaseSuite { override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit pos: Position): Unit = { - // Datasource V2 is not supported by the native (datafusion based) readers so force - // the scan impl back to 'native_comet' - super.test(testName, testTags: _*)( + super.ignore(testName, testTags: _*)( withSQLConf( SQLConf.USE_V1_SOURCE_LIST.key -> "", CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) { diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala index b3e6a5a42a..8a746c29da 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.comet.{CometConf, IntegrationTestSuite} -import org.apache.comet.CometConf.{SCAN_NATIVE_COMET, SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT} +import org.apache.comet.CometConf.{SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT} /** * A integration test suite that tests parquet modular encryption usage. @@ -238,12 +238,7 @@ class ParquetEncryptionITCase extends CometTestBase with SQLTestUtils { // native_datafusion and native_iceberg_compat fall back due to Arrow-rs // https://github.com/apache/arrow-rs/blob/da9829728e2a9dffb8d4f47ffe7b103793851724/parquet/src/file/metadata/parser.rs#L494 - if (CometConf.COMET_ENABLED.get(conf) && CometConf.COMET_NATIVE_SCAN_IMPL.get( - conf) == SCAN_NATIVE_COMET) { - checkSparkAnswerAndOperator(readDataset) - } else { - checkAnswer(readDataset, inputDF) - } + checkAnswer(readDataset, inputDF) } } } @@ -442,12 +437,7 @@ class ParquetEncryptionITCase extends CometTestBase with SQLTestUtils { // native_datafusion and native_iceberg_compat fall back due to Arrow-rs not // supporting other key lengths - if (CometConf.COMET_ENABLED.get(conf) && CometConf.COMET_NATIVE_SCAN_IMPL.get( - conf) == SCAN_NATIVE_COMET) { - checkSparkAnswerAndOperator(readDataset) - } else { - checkAnswer(readDataset, inputDF) - } + checkAnswer(readDataset, inputDF) } } } @@ -467,7 +457,7 @@ class ParquetEncryptionITCase extends CometTestBase with SQLTestUtils { Seq("true", "false").foreach { cometEnabled => if (cometEnabled == "true") { - Seq(SCAN_NATIVE_COMET, SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT).foreach { + Seq(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT).foreach { scanImpl => super.test(testName + s" Comet($cometEnabled)" + s" Scan($scanImpl)", testTags: _*) { withSQLConf( From 249db59f34b323f7bfa3328501c0a72c7674e82b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 12:27:02 -0700 Subject: [PATCH 7/8] format --- .../scala/org/apache/comet/CometConf.scala | 5 ++--- .../sql/comet/ParquetEncryptionITCase.scala | 19 +++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index 659d99b69a..522ccbc94c 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -124,7 +124,7 @@ object CometConf extends ShimCometConf { val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl") .category(CATEGORY_SCAN) .doc( - s"The implementation of Comet Native Scan to use. Available modes are " + + "The implementation of Comet Native Scan to use. Available modes are " + s"`$SCAN_NATIVE_DATAFUSION`, and `$SCAN_NATIVE_ICEBERG_COMPAT`. " + s"`$SCAN_NATIVE_DATAFUSION` is a fully native implementation of scan based on " + "DataFusion. " + @@ -134,8 +134,7 @@ object CometConf extends ShimCometConf { .internal() .stringConf .transform(_.toLowerCase(Locale.ROOT)) - .checkValues( - Set(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO)) + .checkValues(Set(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO)) .createWithEnvVarOrDefault("COMET_PARQUET_SCAN_IMPL", SCAN_AUTO) val COMET_ICEBERG_NATIVE_ENABLED: ConfigEntry[Boolean] = diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala index 8a746c29da..db07b91e93 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala @@ -457,17 +457,16 @@ class ParquetEncryptionITCase extends CometTestBase with SQLTestUtils { Seq("true", "false").foreach { cometEnabled => if (cometEnabled == "true") { - Seq(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT).foreach { - scanImpl => - super.test(testName + s" Comet($cometEnabled)" + s" Scan($scanImpl)", testTags: _*) { - withSQLConf( - CometConf.COMET_ENABLED.key -> cometEnabled, - CometConf.COMET_EXEC_ENABLED.key -> "true", - SQLConf.ANSI_ENABLED.key -> "false", - CometConf.COMET_NATIVE_SCAN_IMPL.key -> scanImpl) { - testFun - } + Seq(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT).foreach { scanImpl => + super.test(testName + s" Comet($cometEnabled)" + s" Scan($scanImpl)", testTags: _*) { + withSQLConf( + CometConf.COMET_ENABLED.key -> cometEnabled, + CometConf.COMET_EXEC_ENABLED.key -> "true", + SQLConf.ANSI_ENABLED.key -> "false", + CometConf.COMET_NATIVE_SCAN_IMPL.key -> scanImpl) { + testFun } + } } } else { super.test(testName + s" Comet($cometEnabled)", testTags: _*) { From c5f928786023db6aba1a8aab980480472348cd8f Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 1 Feb 2026 12:41:30 -0700 Subject: [PATCH 8/8] fix: change default scan_impl from native_comet to auto in java-test action The macOS CI jobs were failing because the java-test action defaults scan_impl to native_comet, which is no longer a valid option. Co-Authored-By: Claude Opus 4.5 --- .github/actions/java-test/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index 6105962161..ee3c46eb48 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -32,7 +32,7 @@ inputs: scan_impl: description: 'The default Parquet scan implementation' required: false - default: 'native_comet' + default: 'auto' upload-test-reports: description: 'Whether to upload test results including coverage to GitHub' required: false