From 1d289233189f33739021e0a370dca89bead3d1e4 Mon Sep 17 00:00:00 2001 From: Stevo Mitric Date: Thu, 18 Jun 2026 10:43:41 +0000 Subject: [PATCH 1/3] initial commit --- .../ExpressionTypeCheckingSuite.scala | 14 ++ .../timestamp-ltz-nanos.sql.out | 24 +++ .../timestamp-ntz-nanos.sql.out | 24 +++ .../sql-tests/inputs/timestamp-ltz-nanos.sql | 15 ++ .../sql-tests/inputs/timestamp-ntz-nanos.sql | 15 ++ .../results/timestamp-ltz-nanos.sql.out | 24 +++ .../results/timestamp-ntz-nanos.sql.out | 24 +++ .../TimestampNanosFunctionsSuiteBase.scala | 160 ++++++++++++++++++ 8 files changed, 300 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index 95e118a30771c..61a6f0c40081b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -394,6 +394,20 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer ) } + test("SPARK-57502: Min/Max accept nanosecond-precision timestamp types and preserve them") { + // Min/Max gate only on orderability (TypeUtils.checkForOrderingExpr), and the nanosecond + // timestamp types are orderable AtomicTypes (SPARK-57103), so the analysis gate passes and the + // result type preserves the input precision (dataType = child.dataType). No inputTypes / type + // matcher is involved, so no production change to Min/Max is needed. + Seq(TimestampNTZNanosType(9), TimestampLTZNanosType(7)).foreach { dt => + val a = AttributeReference("c", dt)() + assert(Max(a).checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + assert(Min(a).checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + assert(Max(a).dataType == dt) + assert(Min(a).dataType == dt) + } + } + test("check types for aggregates") { // We use AggregateFunction directly at here because the error will be thrown from it // instead of from AggregateExpression, which is the wrapper of an AggregateFunction. diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index 8582e5175301b..c2e81a619db99 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -647,3 +647,27 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "fragment" : "TIMESTAMP_LTZ '2020-01-02 03:04:05.123456789 UTC' + INTERVAL '1' MONTH" } ] } + + +-- !query +SELECT max(c), min(c) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000999 UTC'), + (CAST(NULL AS timestamp_ltz(9))) AS t(c) +-- !query analysis +Aggregate [max(c#x) AS max(c)#x, min(c#x) AS min(c)#x] ++- SubqueryAlias t + +- LocalRelation [c#x] + + +-- !query +SELECT c, count(*) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000999 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC') AS t(c) + GROUP BY c ORDER BY c +-- !query analysis +Sort [c#x ASC NULLS FIRST], true ++- Aggregate [c#x], [c#x, count(1) AS count(1)#xL] + +- SubqueryAlias t + +- LocalRelation [c#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out index f7bf8f3ffd941..505ef4acc59a9 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out @@ -574,3 +574,27 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "fragment" : "TIMESTAMP_NTZ '2020-01-02 03:04:05.123456789' + INTERVAL '1' MONTH" } ] } + + +-- !query +SELECT max(c), min(c) FROM VALUES + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000999'), + (CAST(NULL AS timestamp_ntz(9))) AS t(c) +-- !query analysis +Aggregate [max(c#x) AS max(c)#x, min(c#x) AS min(c)#x] ++- SubqueryAlias t + +- LocalRelation [c#x] + + +-- !query +SELECT c, count(*) FROM VALUES + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000999'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001') AS t(c) + GROUP BY c ORDER BY c +-- !query analysis +Sort [c#x ASC NULLS FIRST], true ++- Aggregate [c#x], [c#x, count(1) AS count(1)#xL] + +- SubqueryAlias t + +- LocalRelation [c#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index 7e8ab11f633f3..ed608a4854d01 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -173,3 +173,18 @@ SELECT TIMESTAMP_LTZ '1960-01-02 03:04:05.123456789 UTC' + -- operator overload. SELECT TIMESTAMP_LTZ '2020-01-02 03:04:05.123456789 UTC' + make_interval(0, 1, 0, 2, 0, 0, 0); SELECT TIMESTAMP_LTZ '2020-01-02 03:04:05.123456789 UTC' + INTERVAL '1' MONTH; + +-- SPARK-57502: MAX / MIN over nanosecond-precision TIMESTAMP_LTZ. The aggregate preserves the +-- nanosecond type and orders by the sub-microsecond remainder; NULLs are ignored. Values are +-- rendered in the session time zone (America/Los_Angeles). +SELECT max(c), min(c) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000999 UTC'), + (CAST(NULL AS timestamp_ltz(9))) AS t(c); +-- GROUP BY a nanosecond key: two keys that share epochMicros but differ within the microsecond +-- must not collapse into one group. +SELECT c, count(*) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000999 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC') AS t(c) + GROUP BY c ORDER BY c; diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql index 0568f671e4bfc..31d95824dbedb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -149,3 +149,18 @@ SELECT TIMESTAMP_NTZ '1960-01-02 03:04:05.123456789' + INTERVAL '0 00:00:00.0000 -- operator overload. SELECT TIMESTAMP_NTZ '2020-01-02 03:04:05.123456789' + make_interval(0, 1, 0, 2, 0, 0, 0); SELECT TIMESTAMP_NTZ '2020-01-02 03:04:05.123456789' + INTERVAL '1' MONTH; + +-- SPARK-57502: MAX / MIN over nanosecond-precision TIMESTAMP_NTZ. The aggregate preserves the +-- nanosecond type and orders by the sub-microsecond remainder (two values share the same +-- microsecond and differ only within it); NULLs are ignored. +SELECT max(c), min(c) FROM VALUES + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000999'), + (CAST(NULL AS timestamp_ntz(9))) AS t(c); +-- GROUP BY a nanosecond key: two keys that share epochMicros but differ within the microsecond +-- must not collapse into one group. +SELECT c, count(*) FROM VALUES + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000999'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001') AS t(c) + GROUP BY c ORDER BY c; diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index 7850cfc9bfef9..00fc9f36f0e25 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -726,3 +726,27 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "fragment" : "TIMESTAMP_LTZ '2020-01-02 03:04:05.123456789 UTC' + INTERVAL '1' MONTH" } ] } + + +-- !query +SELECT max(c), min(c) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000999 UTC'), + (CAST(NULL AS timestamp_ltz(9))) AS t(c) +-- !query schema +struct +-- !query output +2019-12-31 16:00:00.000000999 2019-12-31 16:00:00.000000001 + + +-- !query +SELECT c, count(*) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000999 UTC'), + (TIMESTAMP_LTZ '2020-01-01 00:00:00.000000001 UTC') AS t(c) + GROUP BY c ORDER BY c +-- !query schema +struct +-- !query output +2019-12-31 16:00:00.000000001 2 +2019-12-31 16:00:00.000000999 1 diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out index 9f64ed3b229f4..7931449d99b42 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out @@ -644,3 +644,27 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "fragment" : "TIMESTAMP_NTZ '2020-01-02 03:04:05.123456789' + INTERVAL '1' MONTH" } ] } + + +-- !query +SELECT max(c), min(c) FROM VALUES + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000999'), + (CAST(NULL AS timestamp_ntz(9))) AS t(c) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000999 2020-01-01 00:00:00.000000001 + + +-- !query +SELECT c, count(*) FROM VALUES + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000999'), + (TIMESTAMP_NTZ '2020-01-01 00:00:00.000000001') AS t(c) + GROUP BY c ORDER BY c +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000001 2 +2020-01-01 00:00:00.000000999 1 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala index 8b47efb85f0d2..53ce873e62e75 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala @@ -239,6 +239,166 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { Row(null, null, null, null)) } } + + // ===== MIN / MAX aggregates over nanosecond-precision timestamps (SPARK-56822) ===== + // `Min`/`Max` are type-agnostic `DeclarativeAggregate`s gated only on orderability + // (`TypeUtils.checkForOrderingExpr`); the nanosecond timestamp types became orderable in + // SPARK-57103, so MIN/MAX (and `min_by`/`max_by`/`greatest`/`least`, which ride the same gate) + // work without any change to the aggregates themselves. These end-to-end tests lock that in, + // mirroring the TimeType precedent (SPARK-52626 group-by, SPARK-52660 codegen split). The result + // type preserves the input precision (`dataType = child.dataType`). Mixed-precision inputs route + // through `findWiderDateTimeType`, which has no nanos arm yet, so they are out of scope here + // (SPARK-57454); every column below is strictly same-precision. + + test("SPARK-57502: max/min over nanosecond-precision timestamps preserve the input type") { + Seq(7, 8, 9).foreach { p => + val schema = new StructType() + .add("ntz", TimestampNTZNanosType(p)) + .add("ltz", TimestampLTZNanosType(p)) + val data = Seq( + Row(LocalDateTime.parse("2020-01-01T00:00:01.100000000"), + Instant.parse("2020-01-01T00:00:01.100000000Z")), + Row(LocalDateTime.parse("2020-01-01T00:00:02.200000000"), + Instant.parse("2020-01-01T00:00:02.200000000Z")), + Row(LocalDateTime.parse("2020-01-01T00:00:00.300000000"), + Instant.parse("2020-01-01T00:00:00.300000000Z")), + Row(null, null)) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + val sqlRes = df.selectExpr("max(ntz)", "min(ntz)", "max(ltz)", "min(ltz)") + val colRes = df.select( + max(col("ntz")), min(col("ntz")), max(col("ltz")), min(col("ltz"))) + // The SQL and the Scala Column API agree. + checkAnswer(sqlRes, colRes) + // Absolute values (NTZ collects to LocalDateTime, LTZ to Instant; SPARK-57033). + checkAnswer(sqlRes, Row( + LocalDateTime.parse("2020-01-01T00:00:02.200000000"), + LocalDateTime.parse("2020-01-01T00:00:00.300000000"), + Instant.parse("2020-01-01T00:00:02.200000000Z"), + Instant.parse("2020-01-01T00:00:00.300000000Z"))) + // The result keeps both the family (NTZ/LTZ) and the precision of the input. + assert(sqlRes.schema.map(_.dataType) === Seq( + TimestampNTZNanosType(p), TimestampNTZNanosType(p), + TimestampLTZNanosType(p), TimestampLTZNanosType(p))) + } + } + + test("SPARK-57502: max/min over nanos order by the sub-microsecond remainder") { + // Two values share the same epochMicros and differ only within the microsecond, so a correct + // result must use the full `TimestampNanosVal` comparison and never truncate to micros. + // Run on both the codegen (`CodeGenerator.genComp` AnyTimestampNanoType arm) and the + // interpreted (`Ordering[TimestampNanosVal]`) paths. + Seq( + Seq(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true", + SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY"), + Seq(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN") + ).foreach { conf => + withSQLConf(conf: _*) { + val ntz = spark.createDataFrame( + spark.sparkContext.parallelize(Seq( + Row(LocalDateTime.parse("2020-01-01T00:00:00.000000001")), + Row(LocalDateTime.parse("2020-01-01T00:00:00.000000999")), + Row(null))), + new StructType().add("c", TimestampNTZNanosType(9))) + checkAnswer(ntz.selectExpr("max(c)", "min(c)"), + Row(LocalDateTime.parse("2020-01-01T00:00:00.000000999"), + LocalDateTime.parse("2020-01-01T00:00:00.000000001"))) + + val ltz = spark.createDataFrame( + spark.sparkContext.parallelize(Seq( + Row(Instant.parse("2020-01-01T00:00:00.000000001Z")), + Row(Instant.parse("2020-01-01T00:00:00.000000999Z")), + Row(null))), + new StructType().add("c", TimestampLTZNanosType(9))) + checkAnswer(ltz.selectExpr("max(c)", "min(c)"), + Row(Instant.parse("2020-01-01T00:00:00.000000999Z"), + Instant.parse("2020-01-01T00:00:00.000000001Z"))) + } + } + } + + test("SPARK-57502: max/min over all-NULL or empty nanos input return NULL") { + Seq(7, 8, 9).foreach { p => + val ntz = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(null), Row(null))), + new StructType().add("c", TimestampNTZNanosType(p))) + checkAnswer(ntz.selectExpr("max(c)", "min(c)"), Row(null, null)) + // Global aggregate over zero rows still produces one all-NULL row. + checkAnswer(ntz.filter(lit(false)).selectExpr("max(c)", "min(c)"), Row(null, null)) + + val ltz = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(null), Row(null))), + new StructType().add("c", TimestampLTZNanosType(p))) + checkAnswer(ltz.selectExpr("max(c)", "min(c)"), Row(null, null)) + } + } + + test("SPARK-57502: group by a nanosecond key with per-group max/min") { + // The grouping keys k1/k2 share their epochMicros but differ within the microsecond, so + // hashing/grouping (SPARK-57103) must distinguish sub-microsecond keys; the per-group max/min + // then order by the remainder. + val schema = new StructType() + .add("k", TimestampNTZNanosType(9)) + .add("v", TimestampLTZNanosType(9)) + val k1 = "2020-01-01T00:00:00.000000001" + val k2 = "2020-01-01T00:00:00.000000002" + val data = Seq( + Row(LocalDateTime.parse(k1), Instant.parse("2020-01-01T10:00:00.000000111Z")), + Row(LocalDateTime.parse(k1), Instant.parse("2020-01-01T10:00:00.000000999Z")), + Row(LocalDateTime.parse(k2), Instant.parse("2020-01-01T10:00:00.000000500Z"))) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + val res = df.groupBy("k").agg(max("v").as("mx"), min("v").as("mn")).orderBy("k") + checkAnswer(res, Seq( + Row(LocalDateTime.parse(k1), + Instant.parse("2020-01-01T10:00:00.000000999Z"), + Instant.parse("2020-01-01T10:00:00.000000111Z")), + Row(LocalDateTime.parse(k2), + Instant.parse("2020-01-01T10:00:00.000000500Z"), + Instant.parse("2020-01-01T10:00:00.000000500Z")))) + // The two sub-microsecond-distinct keys do not collapse into one group. + assert(res.count() === 2) + assert(res.schema("k").dataType === TimestampNTZNanosType(9)) + } + + test("SPARK-57502: min_by/max_by and greatest/least over same-precision nanos") { + val df = spark.createDataFrame( + spark.sparkContext.parallelize(Seq( + Row("early", LocalDateTime.parse("2020-01-01T00:00:00.000000001")), + Row("late", LocalDateTime.parse("2020-01-01T00:00:00.000000999")))), + new StructType().add("label", StringType).add("ts", TimestampNTZNanosType(9))) + checkAnswer(df.selectExpr("max_by(label, ts)", "min_by(label, ts)"), Row("late", "early")) + + val df2 = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row( + LocalDateTime.parse("2020-01-01T00:00:00.000000001"), + LocalDateTime.parse("2020-01-01T00:00:00.000000999")))), + new StructType() + .add("a", TimestampNTZNanosType(9)) + .add("b", TimestampNTZNanosType(9))) + checkAnswer(df2.selectExpr("greatest(a, b)", "least(a, b)"), + Row(LocalDateTime.parse("2020-01-01T00:00:00.000000999"), + LocalDateTime.parse("2020-01-01T00:00:00.000000001"))) + } + + test("SPARK-57502: max/min over nanos agree with the micros path when sub-micro digits are 0") { + Seq(7, 8, 9).foreach { p => + val ldts = Seq( + "2020-01-01T00:00:01.100000000", + "2020-01-01T00:00:02.200000000", + "2020-01-01T00:00:00.300000000") + val nanos = spark.createDataFrame( + spark.sparkContext.parallelize(ldts.map(s => Row(LocalDateTime.parse(s)))), + new StructType().add("c", TimestampNTZNanosType(p))) + val micro = spark.createDataFrame( + spark.sparkContext.parallelize(ldts.map(s => Row(LocalDateTime.parse(s)))), + new StructType().add("c", TimestampNTZType)) + // Compare via the string rendering so the differing result types (nanos vs micros) do not + // matter; the sub-microsecond digits are all zero, so the values agree. + checkAnswer( + nanos.selectExpr("cast(max(c) as string)", "cast(min(c) as string)"), + micro.selectExpr("cast(max(c) as string)", "cast(min(c) as string)")) + } + } } // Runs the nanosecond timestamp function tests with ANSI mode enabled explicitly. From 019bcf974ec0d86895c4c226641f4b61aab992e5 Mon Sep 17 00:00:00 2001 From: Stevo Mitric Date: Thu, 18 Jun 2026 12:09:31 +0000 Subject: [PATCH 2/3] [SPARK-57502][SQL][TESTS][FOLLOWUP] Update suite Scaladoc to cover MIN/MAX aggregate tests Addresses review feedback on apache/spark#56592: the class Scaladoc for TimestampNanosFunctionsSuiteBase still described only the hour/minute/second functions and claimed every test exercises both the SQL and Column API paths, no longer accurate now that the MIN/MAX aggregate tests are present. Rewrote it to cover the datetime functions and the MIN/MAX aggregates and to note the ANSI on/off subclasses. Tests-only; no production change. Co-authored-by: Isaac --- .../spark/sql/TimestampNanosFunctionsSuiteBase.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala index 53ce873e62e75..6ee7ffe1c3740 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala @@ -26,10 +26,12 @@ import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ /** - * End-to-end tests for the `hour`, `minute` and `second` functions over the nanosecond-precision - * timestamp types `TIMESTAMP_NTZ(p)` / `TIMESTAMP_LTZ(p)` (`p` in `[7, 9]`), part of the - * nanosecond timestamp preview (SPARK-56822). Each test exercises both the SQL path - * (`selectExpr`) and the Scala `Column` API (`functions.hour` / `minute` / `second`). + * End-to-end tests over the nanosecond-precision timestamp types `TIMESTAMP_NTZ(p)` / + * `TIMESTAMP_LTZ(p)` (`p` in `[7, 9]`), part of the nanosecond timestamp preview (SPARK-56822). + * Covers the datetime functions (`hour`/`minute`/`second`, `EXTRACT`/`date_part`, the date-field + * functions) and the `MIN`/`MAX` aggregates (plus `min_by`/`max_by`/`greatest`/`least`). Most + * tests use the SQL path (`selectExpr`); several also cross-check the Scala `Column` API. The two + * subclasses run every test with ANSI mode on and off. */ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { From 941a20a043531f0ca7ad845376e91bfe6efb5c64 Mon Sep 17 00:00:00 2001 From: Stevo Mitric Date: Thu, 18 Jun 2026 17:35:42 +0000 Subject: [PATCH 3/3] [SPARK-57103][SQL][TESTS][FOLLOWUP] Use correct JIRA ticket for MIN/MAX nanos tests The MIN/MAX-over-nanos test names and golden-file comments were tagged with a placeholder ticket, SPARK-57502, which is in fact an unrelated INFRA issue ("Fix npm vulnerabilities in ui-test and dev"). Rename the references to SPARK-57103 ("Add ordering, compare, and hash for nanosecond timestamp types"), the sub-task of SPARK-56822 that enabled MIN/MAX over the nanosecond types. Tests-only; no production or behavior change. Co-authored-by: Isaac --- .../analysis/ExpressionTypeCheckingSuite.scala | 2 +- .../sql-tests/inputs/timestamp-ltz-nanos.sql | 2 +- .../sql-tests/inputs/timestamp-ntz-nanos.sql | 2 +- .../spark/sql/TimestampNanosFunctionsSuiteBase.scala | 12 ++++++------ 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index 61a6f0c40081b..9b5db986153ee 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -394,7 +394,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer ) } - test("SPARK-57502: Min/Max accept nanosecond-precision timestamp types and preserve them") { + test("SPARK-57103: Min/Max accept nanosecond-precision timestamp types and preserve them") { // Min/Max gate only on orderability (TypeUtils.checkForOrderingExpr), and the nanosecond // timestamp types are orderable AtomicTypes (SPARK-57103), so the analysis gate passes and the // result type preserves the input precision (dataType = child.dataType). No inputTypes / type diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index ed608a4854d01..5341ba178b532 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -174,7 +174,7 @@ SELECT TIMESTAMP_LTZ '1960-01-02 03:04:05.123456789 UTC' + SELECT TIMESTAMP_LTZ '2020-01-02 03:04:05.123456789 UTC' + make_interval(0, 1, 0, 2, 0, 0, 0); SELECT TIMESTAMP_LTZ '2020-01-02 03:04:05.123456789 UTC' + INTERVAL '1' MONTH; --- SPARK-57502: MAX / MIN over nanosecond-precision TIMESTAMP_LTZ. The aggregate preserves the +-- SPARK-57103: MAX / MIN over nanosecond-precision TIMESTAMP_LTZ. The aggregate preserves the -- nanosecond type and orders by the sub-microsecond remainder; NULLs are ignored. Values are -- rendered in the session time zone (America/Los_Angeles). SELECT max(c), min(c) FROM VALUES diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql index 31d95824dbedb..18e362309fe1e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -150,7 +150,7 @@ SELECT TIMESTAMP_NTZ '1960-01-02 03:04:05.123456789' + INTERVAL '0 00:00:00.0000 SELECT TIMESTAMP_NTZ '2020-01-02 03:04:05.123456789' + make_interval(0, 1, 0, 2, 0, 0, 0); SELECT TIMESTAMP_NTZ '2020-01-02 03:04:05.123456789' + INTERVAL '1' MONTH; --- SPARK-57502: MAX / MIN over nanosecond-precision TIMESTAMP_NTZ. The aggregate preserves the +-- SPARK-57103: MAX / MIN over nanosecond-precision TIMESTAMP_NTZ. The aggregate preserves the -- nanosecond type and orders by the sub-microsecond remainder (two values share the same -- microsecond and differ only within it); NULLs are ignored. SELECT max(c), min(c) FROM VALUES diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala index 6ee7ffe1c3740..b264693b50030 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala @@ -252,7 +252,7 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { // through `findWiderDateTimeType`, which has no nanos arm yet, so they are out of scope here // (SPARK-57454); every column below is strictly same-precision. - test("SPARK-57502: max/min over nanosecond-precision timestamps preserve the input type") { + test("SPARK-57103: max/min over nanosecond-precision timestamps preserve the input type") { Seq(7, 8, 9).foreach { p => val schema = new StructType() .add("ntz", TimestampNTZNanosType(p)) @@ -284,7 +284,7 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { } } - test("SPARK-57502: max/min over nanos order by the sub-microsecond remainder") { + test("SPARK-57103: max/min over nanos order by the sub-microsecond remainder") { // Two values share the same epochMicros and differ only within the microsecond, so a correct // result must use the full `TimestampNanosVal` comparison and never truncate to micros. // Run on both the codegen (`CodeGenerator.genComp` AnyTimestampNanoType arm) and the @@ -319,7 +319,7 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { } } - test("SPARK-57502: max/min over all-NULL or empty nanos input return NULL") { + test("SPARK-57103: max/min over all-NULL or empty nanos input return NULL") { Seq(7, 8, 9).foreach { p => val ntz = spark.createDataFrame( spark.sparkContext.parallelize(Seq(Row(null), Row(null))), @@ -335,7 +335,7 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { } } - test("SPARK-57502: group by a nanosecond key with per-group max/min") { + test("SPARK-57103: group by a nanosecond key with per-group max/min") { // The grouping keys k1/k2 share their epochMicros but differ within the microsecond, so // hashing/grouping (SPARK-57103) must distinguish sub-microsecond keys; the per-group max/min // then order by the remainder. @@ -362,7 +362,7 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { assert(res.schema("k").dataType === TimestampNTZNanosType(9)) } - test("SPARK-57502: min_by/max_by and greatest/least over same-precision nanos") { + test("SPARK-57103: min_by/max_by and greatest/least over same-precision nanos") { val df = spark.createDataFrame( spark.sparkContext.parallelize(Seq( Row("early", LocalDateTime.parse("2020-01-01T00:00:00.000000001")), @@ -382,7 +382,7 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { LocalDateTime.parse("2020-01-01T00:00:00.000000001"))) } - test("SPARK-57502: max/min over nanos agree with the micros path when sub-micro digits are 0") { + test("SPARK-57103: max/min over nanos agree with the micros path when sub-micro digits are 0") { Seq(7, 8, 9).foreach { p => val ldts = Seq( "2020-01-01T00:00:01.100000000",