From b8519ad7036563e5566a9e20d6e6e0b75ebe8cc8 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 17 Jun 2026 23:28:16 +0200 Subject: [PATCH 1/5] [SPARK-57511][SQL] Support explicit CAST between TIMESTAMP_LTZ(p) and TIMESTAMP_NTZ(q) for p, q in [6, 9] Add explicit CAST support for the cross-family pairs CAST( AS TIMESTAMP_NTZ(q)) and CAST( AS TIMESTAMP_LTZ(q)) for p, q in [6, 9], where precision 6 maps to the microsecond family members TIMESTAMP / TIMESTAMP_NTZ. The conversion reinterprets the value against the session time zone (reusing the existing micro LTZ<->NTZ semantics) and floors the sub-microsecond part to the target precision. Casts stay explicit-only (not silent store assignments) and depend on the session time zone. --- .../catalyst/util/SparkDateTimeUtils.scala | 30 +++ .../spark/sql/catalyst/expressions/Cast.scala | 102 +++++++++ .../catalyst/expressions/CastSuiteBase.scala | 198 +++++++++++++++++- .../sql-tests/analyzer-results/cast.sql.out | 152 ++++++++++++++ .../analyzer-results/nonansi/cast.sql.out | 152 ++++++++++++++ .../test/resources/sql-tests/inputs/cast.sql | 35 ++++ .../resources/sql-tests/results/cast.sql.out | 176 ++++++++++++++++ .../sql-tests/results/nonansi/cast.sql.out | 176 ++++++++++++++++ 8 files changed, 1020 insertions(+), 1 deletion(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index ea20057278cec..b6d3012939b56 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -310,6 +310,36 @@ trait SparkDateTimeUtils { microsToInstant(v.epochMicros).plusNanos(v.nanosWithinMicro.toLong) } + /** + * Converts a `TIMESTAMP_LTZ(p)` nanosecond value into the `TIMESTAMP_NTZ(precision)` wall-clock + * value observed in the time zone `zoneId`. The LTZ value denotes an absolute instant; rendering + * it as a local date-time at `zoneId` yields the NTZ representation. Time-zone offsets shift only + * whole seconds, so the sub-microsecond `nanosWithinMicro` component is preserved before being + * floored to the target `precision` (same flooring as same-family narrowing casts). + */ + def timestampLTZNanosToNTZNanos( + v: TimestampNanosVal, + zoneId: ZoneId, + precision: Int): TimestampNanosVal = { + val localDateTime = timestampNanosToInstant(v).atZone(zoneId).toLocalDateTime + localDateTimeToTimestampNanos(localDateTime, precision) + } + + /** + * Converts a `TIMESTAMP_NTZ(q)` nanosecond value into the `TIMESTAMP_LTZ(precision)` instant + * obtained by interpreting its wall-clock local date-time in the time zone `zoneId`. This is the + * reverse of [[timestampLTZNanosToNTZNanos]]; the sub-microsecond `nanosWithinMicro` component is + * preserved across the (whole-second) offset shift before being floored to the target + * `precision`. + */ + def timestampNTZNanosToLTZNanos( + v: TimestampNanosVal, + zoneId: ZoneId, + precision: Int): TimestampNanosVal = { + val instant = timestampNanosToLocalDateTime(v).atZone(zoneId).toInstant + instantToTimestampNanos(instant, precision) + } + /** * Converts the local date to the number of days since 1970-01-01. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index b29e6f5d953ba..216603df0f145 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -123,6 +123,16 @@ object Cast extends QueryErrorsBase { case (_: TimestampNTZNanosType, _: TimestampNTZNanosType) => true case (_: TimestampLTZNanosType, _: TimestampLTZNanosType) => true + case (_: TimestampLTZNanosType, _: TimestampNTZNanosType) => true + case (_: TimestampNTZNanosType, _: TimestampLTZNanosType) => true + + // Cross-family casts between the micro family member (TIMESTAMP_LTZ(6) = TIMESTAMP and + // TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ) and the other family's nanosecond member. + case (TimestampType, _: TimestampNTZNanosType) => true + case (_: TimestampNTZNanosType, TimestampType) => true + case (TimestampNTZType, _: TimestampLTZNanosType) => true + case (_: TimestampLTZNanosType, TimestampNTZType) => true + case (DateType, _: TimestampLTZNanosType) => true case (_: TimestampLTZNanosType, DateType) => true case (DateType, _: TimestampNTZNanosType) => true @@ -273,6 +283,16 @@ object Cast extends QueryErrorsBase { case (_: TimestampNTZNanosType, _: TimestampNTZNanosType) => true case (_: TimestampLTZNanosType, _: TimestampLTZNanosType) => true + case (_: TimestampLTZNanosType, _: TimestampNTZNanosType) => true + case (_: TimestampNTZNanosType, _: TimestampLTZNanosType) => true + + // Cross-family casts between the micro family member (TIMESTAMP_LTZ(6) = TIMESTAMP and + // TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ) and the other family's nanosecond member. + case (TimestampType, _: TimestampNTZNanosType) => true + case (_: TimestampNTZNanosType, TimestampType) => true + case (TimestampNTZType, _: TimestampLTZNanosType) => true + case (_: TimestampLTZNanosType, TimestampNTZType) => true + case (DateType, _: TimestampLTZNanosType) => true case (_: TimestampLTZNanosType, DateType) => true case (DateType, _: TimestampNTZNanosType) => true @@ -365,6 +385,16 @@ object Cast extends QueryErrorsBase { case (TimestampType, DateType) => true case (TimestampType, TimestampNTZType) => true case (TimestampNTZType, TimestampType) => true + // Cross-family nanosecond casts convert between an absolute instant (LTZ) and a wall-clock + // local date-time (NTZ), so they depend on the session time zone, mirroring the micro + // TIMESTAMP <-> TIMESTAMP_NTZ pair above. This includes the mixed micro/nanos cases where one + // side is the precision-6 micro family member (TIMESTAMP / TIMESTAMP_NTZ). + case (_: TimestampLTZNanosType, _: TimestampNTZNanosType) => true + case (_: TimestampNTZNanosType, _: TimestampLTZNanosType) => true + case (TimestampType, _: TimestampNTZNanosType) => true + case (_: TimestampNTZNanosType, TimestampType) => true + case (TimestampNTZType, _: TimestampLTZNanosType) => true + case (_: TimestampLTZNanosType, TimestampNTZType) => true // NTZ string is zone-independent (mirroring micro TIMESTAMP_NTZ, which is not listed); only // the LTZ string parse/render depends on the session time zone. case (_: StringType, _: TimestampLTZNanosType) => true @@ -449,6 +479,18 @@ object Cast extends QueryErrorsBase { // already handled by the `from == to` short-circuit above. case (f: TimestampNTZNanosType, t: TimestampNTZNanosType) => f.precision <= t.precision case (f: TimestampLTZNanosType, t: TimestampLTZNanosType) => f.precision <= t.precision + // Cross-family nanosecond casts (TIMESTAMP_LTZ(p) <-> TIMESTAMP_NTZ(q)) reinterpret the value + // against the session time zone, so they stay explicit-only rather than silent store + // assignments while the nanosecond types are unreleased (same rationale as the cases above). + // This includes the mixed micro/nanos cases where one side is the precision-6 micro family + // member (TIMESTAMP / TIMESTAMP_NTZ); only the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair stays + // store-assignable via the catch-all below. + case (_: TimestampLTZNanosType, _: TimestampNTZNanosType) => false + case (_: TimestampNTZNanosType, _: TimestampLTZNanosType) => false + case (TimestampType, _: TimestampNTZNanosType) => false + case (_: TimestampNTZNanosType, TimestampType) => false + case (TimestampNTZType, _: TimestampLTZNanosType) => false + case (_: TimestampLTZNanosType, TimestampNTZType) => false case (_: DatetimeType, _: DatetimeType) => true case (ArrayType(fromType, fn), ArrayType(toType, tn)) => @@ -817,6 +859,8 @@ case class Cast( buildCast[Long](_, ts => convertTz(ts, zoneId, ZoneOffset.UTC)) case _: TimestampLTZNanosType => buildCast[TimestampNanosVal](_, v => v.epochMicros) + case _: TimestampNTZNanosType => + buildCast[TimestampNanosVal](_, v => convertTz(v.epochMicros, zoneId, ZoneOffset.UTC)) // TimestampWritable.decimalToTimestamp case DecimalType() => buildCast[Decimal](_, d => decimalToTimestamp(d)) @@ -851,6 +895,8 @@ case class Cast( buildCast[Long](_, ts => convertTz(ts, ZoneOffset.UTC, zoneId)) case _: TimestampNTZNanosType => buildCast[TimestampNanosVal](_, v => v.epochMicros) + case _: TimestampLTZNanosType => + buildCast[TimestampNanosVal](_, v => convertTz(v.epochMicros, ZoneOffset.UTC, zoneId)) } private[this] def castToTimestampLTZNanos( @@ -865,9 +911,15 @@ case class Cast( }) case TimestampType => buildCast[Long](_, m => TimestampNanosVal.fromParts(m, 0.toShort)) + case TimestampNTZType => + buildCast[Long](_, m => + TimestampNanosVal.fromParts(convertTz(m, zoneId, ZoneOffset.UTC), 0.toShort)) case _: TimestampLTZNanosType => buildCast[TimestampNanosVal](_, v => DateTimeUtils.truncateTimestampNanosToPrecision(v, precision)) + case _: TimestampNTZNanosType => + buildCast[TimestampNanosVal](_, v => + DateTimeUtils.timestampNTZNanosToLTZNanos(v, zoneId, precision)) case DateType => buildCast[Int](_, d => TimestampNanosVal.fromParts(daysToMicros(d, zoneId), 0.toShort)) } @@ -884,9 +936,15 @@ case class Cast( }) case TimestampNTZType => buildCast[Long](_, m => TimestampNanosVal.fromParts(m, 0.toShort)) + case TimestampType => + buildCast[Long](_, m => + TimestampNanosVal.fromParts(convertTz(m, ZoneOffset.UTC, zoneId), 0.toShort)) case _: TimestampNTZNanosType => buildCast[TimestampNanosVal](_, v => DateTimeUtils.truncateTimestampNanosToPrecision(v, precision)) + case _: TimestampLTZNanosType => + buildCast[TimestampNanosVal](_, v => + DateTimeUtils.timestampLTZNanosToNTZNanos(v, zoneId, precision)) case DateType => buildCast[Int](_, d => TimestampNanosVal.fromParts(daysToMicros(d, ZoneOffset.UTC), 0.toShort)) @@ -1831,6 +1889,13 @@ case class Cast( code"$evPrim = $dateTimeUtilsCls.convertTz($c, $zid, java.time.ZoneOffset.UTC);" case _: TimestampLTZNanosType => (c, evPrim, evNull) => code"$evPrim = $c.epochMicros;" + case _: TimestampNTZNanosType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = $dateTimeUtilsCls.convertTz($c.epochMicros, $zid, java.time.ZoneOffset.UTC);" case DecimalType() => (c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};" case DoubleType => @@ -1896,6 +1961,13 @@ case class Cast( code"$evPrim = $dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid);" case _: TimestampNTZNanosType => (c, evPrim, evNull) => code"$evPrim = $c.epochMicros;" + case _: TimestampLTZNanosType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = $dateTimeUtilsCls.convertTz($c.epochMicros, java.time.ZoneOffset.UTC, $zid);" } private[this] def castToTimestampLTZNanosCode( @@ -1929,9 +2001,24 @@ case class Cast( case TimestampType => (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts($c, (short) 0);" + case TimestampNTZType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = TimestampNanosVal.fromParts(" + + code"$dateTimeUtilsCls.convertTz($c, $zid, java.time.ZoneOffset.UTC), (short) 0);" case _: TimestampLTZNanosType => (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.truncateTimestampNanosToPrecision($c, $precision);" + case _: TimestampNTZNanosType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = $dateTimeUtilsCls.timestampNTZNanosToLTZNanos($c, $zid, $precision);" case DateType => val zoneIdClass = classOf[ZoneId] val zid = JavaCode.global( @@ -1969,9 +2056,24 @@ case class Cast( case TimestampNTZType => (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts($c, (short) 0);" + case TimestampType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = TimestampNanosVal.fromParts(" + + code"$dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid), (short) 0);" case _: TimestampNTZNanosType => (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.truncateTimestampNanosToPrecision($c, $precision);" + case _: TimestampLTZNanosType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = $dateTimeUtilsCls.timestampLTZNanosToNTZNanos($c, $zid, $precision);" case DateType => (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts(" + diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 896b2eddf3f68..e4438085c7b39 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -101,13 +101,23 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkNullCast(DateType, TimestampLTZNanosType(p)) checkNullCast(TimestampLTZNanosType(p), DateType) } - // Same-family cross-precision nanos casts. + // Same-family and cross-family cross-precision nanos casts. for { p1 <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION p2 <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION } { checkNullCast(TimestampNTZNanosType(p1), TimestampNTZNanosType(p2)) checkNullCast(TimestampLTZNanosType(p1), TimestampLTZNanosType(p2)) + checkNullCast(TimestampLTZNanosType(p1), TimestampNTZNanosType(p2)) + checkNullCast(TimestampNTZNanosType(p1), TimestampLTZNanosType(p2)) + } + // Cross-family casts that involve the precision-6 micro family member (TIMESTAMP_LTZ(6) = + // TIMESTAMP and TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ) and the other family's nanosecond member. + foreachNanosPrecision { p => + checkNullCast(TimestampType, TimestampNTZNanosType(p)) + checkNullCast(TimestampNTZNanosType(p), TimestampType) + checkNullCast(TimestampNTZType, TimestampLTZNanosType(p)) + checkNullCast(TimestampLTZNanosType(p), TimestampNTZType) } checkNullCast(StringType, BinaryType) checkNullCast(StringType, BooleanType) @@ -791,6 +801,55 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } + test("cross-family nanos cast: admissibility, store-assignment and up-cast contract") { + for { + p <- TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION + q <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION + } { + val ltz = TimestampLTZNanosType(p) + val ntz = TimestampNTZNanosType(q) + // Explicit casts are allowed in both directions, both in ANSI and non-ANSI modes. + assert(Cast.canCast(ltz, ntz)) + assert(Cast.canCast(ntz, ltz)) + assert(Cast.canAnsiCast(ltz, ntz)) + assert(Cast.canAnsiCast(ntz, ltz)) + // The cross-family reinterpretation against the session zone is never a safe up-cast. + assert(!Cast.canUpCast(ltz, ntz)) + assert(!Cast.canUpCast(ntz, ltz)) + // They stay explicit-only: never silent store assignments (mirroring the other nanos casts). + assert(!Cast.canANSIStoreAssign(ltz, ntz)) + assert(!Cast.canANSIStoreAssign(ntz, ltz)) + // The conversion depends on the session time zone in both directions. + assert(Cast.needsTimeZone(ltz, ntz)) + assert(Cast.needsTimeZone(ntz, ltz)) + } + } + + test("cross-family nanos cast: micro boundary (precision 6) admissibility and store contract") { + // TIMESTAMP_LTZ(6) = TIMESTAMP and TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ, so the precision-6 + // cross-family casts are the mixed micro/nanos pairs covered here. + foreachNanosPrecision { p => + val pairs = Seq( + (TimestampType: DataType, TimestampNTZNanosType(p): DataType), // LTZ(6) -> NTZ(p) + (TimestampNTZNanosType(p): DataType, TimestampType: DataType), // NTZ(p) -> LTZ(6) + (TimestampNTZType: DataType, TimestampLTZNanosType(p): DataType),// NTZ(6) -> LTZ(p) + (TimestampLTZNanosType(p): DataType, TimestampNTZType: DataType))// LTZ(p) -> NTZ(6) + pairs.foreach { case (from, to) => + // Explicit casts are allowed (ANSI and non-ANSI), but are never safe up-casts and never + // silent store assignments, and they depend on the session time zone. + assert(Cast.canCast(from, to)) + assert(Cast.canAnsiCast(from, to)) + assert(!Cast.canUpCast(from, to)) + assert(!Cast.canANSIStoreAssign(from, to)) + assert(Cast.needsTimeZone(from, to)) + } + } + // Sanity: the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair (precision 6 <-> 6) stays a silent + // store assignment, unlike the mixed micro/nanos pairs above. + assert(Cast.canANSIStoreAssign(TimestampType, TimestampNTZType)) + assert(Cast.canANSIStoreAssign(TimestampNTZType, TimestampType)) + } + test("SPARK-40389: canUpCast: return false if casting decimal to integral types can cause" + " overflow") { Seq(ByteType, ShortType, IntegerType, LongType).foreach { integralType => @@ -1398,6 +1457,143 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } + test("cross-family nanos cast: timestamp_ltz to timestamp_ntz") { + // LTZ(p) denotes an absolute instant; LTZ(p) -> NTZ(q) renders it as the wall-clock local + // date-time observed in the session zone (mirroring the micro TIMESTAMP -> TIMESTAMP_NTZ + // conversion on the epoch-micros part) and re-floors the sub-microsecond digits to q. + Seq(UTC, LA).foreach { zone => + val zid = Option(zone.getId) + val instants = Seq( + timestampLTZ(2020, 1, 1, 12, 30, 15, 123456789), + // Pre-epoch: epochMicros is negative, the sub-microsecond part stays in [0, 999]. + timestampLTZ(1969, 12, 31, 23, 59, 59, 999999789)) + instants.foreach { instant => + val srcMicros = instantToNanosVal(instant).epochMicros + val ntzMicros = convertTz(srcMicros, UTC, zone) + for { + p <- TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION + q <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION + } { + // The source already floors to p; the cross-family cast then floors to q, i.e. to + // min(p, q). Only the wall-clock epoch-micros part shifts with the zone. + val srcNanos = floorNanosToPrecision(789, p) + val expectedNanos = floorNanosToPrecision(srcNanos, q) + checkEvaluation( + cast(Literal.create(nanosVal(srcMicros, srcNanos), TimestampLTZNanosType(p)), + TimestampNTZNanosType(q), zid), + nanosVal(ntzMicros, expectedNanos)) + checkEvaluation( + cast(Literal.create(null, TimestampLTZNanosType(p)), TimestampNTZNanosType(q), zid), + null) + } + } + // Zone sensitivity (exercises needsTimeZone): in LA the wall-clock micros differ from UTC. + if (zone == LA) { + assert(convertTz(instantToNanosVal(instants.head).epochMicros, UTC, zone) != + instantToNanosVal(instants.head).epochMicros) + } + } + } + + test("cross-family nanos cast: timestamp_ntz to timestamp_ltz") { + // NTZ(q) denotes a wall-clock local date-time; NTZ(q) -> LTZ(p) interprets it in the session + // zone to obtain the absolute instant (mirroring micro TIMESTAMP_NTZ -> TIMESTAMP on the + // epoch-micros part) and re-floors the sub-microsecond digits to p. + Seq(UTC, LA).foreach { zone => + val zid = Option(zone.getId) + val localDateTimes = Seq( + LocalDateTime.of(2020, 1, 1, 12, 30, 15, 123456789), + LocalDateTime.of(1969, 12, 31, 23, 59, 59, 999999789)) + localDateTimes.foreach { localDateTime => + val srcMicros = localDateTimeToNanosVal(localDateTime).epochMicros + val ltzMicros = convertTz(srcMicros, zone, UTC) + for { + p <- TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION + q <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION + } { + val srcNanos = floorNanosToPrecision(789, q) + val expectedNanos = floorNanosToPrecision(srcNanos, p) + checkEvaluation( + cast(Literal.create(nanosVal(srcMicros, srcNanos), TimestampNTZNanosType(q)), + TimestampLTZNanosType(p), zid), + nanosVal(ltzMicros, expectedNanos)) + checkEvaluation( + cast(Literal.create(null, TimestampNTZNanosType(q)), TimestampLTZNanosType(p), zid), + null) + } + } + if (zone == LA) { + assert(convertTz(localDateTimeToNanosVal(localDateTimes.head).epochMicros, zone, UTC) != + localDateTimeToNanosVal(localDateTimes.head).epochMicros) + } + } + } + + test("cross-family nanos cast: round-trip ltz -> ntz -> ltz preserves the instant") { + // Round-tripping through the wall-clock NTZ form and back to LTZ in the same zone is the + // identity at equal precision (no sub-microsecond digits are dropped). + Seq(UTC, LA).foreach { zone => + val zid = Option(zone.getId) + foreachNanosPrecision { p => + val src = nanosVal( + instantToNanosVal(timestampLTZ(2020, 7, 1, 6, 15, 30, 123456789)).epochMicros, + floorNanosToPrecision(789, p)) + checkEvaluation( + cast( + cast(Literal.create(src, TimestampLTZNanosType(p)), TimestampNTZNanosType(p), zid), + TimestampLTZNanosType(p), zid), + src) + } + } + } + + test("cross-family nanos cast: micro family member (precision 6) to/from nanos") { + // The precision-6 micro family members are TIMESTAMP_LTZ(6) = TIMESTAMP and + // TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ. Casting them across families reinterprets the value against + // the session zone (mirroring the all-micro TIMESTAMP <-> TIMESTAMP_NTZ conversion); micro + // targets carry a zero sub-microsecond part and micro sources contribute none. + Seq(UTC, LA).foreach { zone => + val zid = Option(zone.getId) + // Micro LTZ instant (sub-micro digits are not representable in the micro type). + val ltzMicros = instantToMicros(timestampLTZ(2020, 1, 1, 12, 30, 15, 123456000)) + val ntzMicrosForLtz = convertTz(ltzMicros, UTC, zone) + // Micro NTZ wall-clock value. + val ntzMicros = + localDateTimeToNanosVal(LocalDateTime.of(2020, 1, 1, 12, 30, 15, 123456000)).epochMicros + val ltzMicrosForNtz = convertTz(ntzMicros, zone, UTC) + foreachNanosPrecision { p => + // TIMESTAMP (LTZ(6)) -> TIMESTAMP_NTZ(p): wall clock in the session zone, sub-micro = 0. + checkEvaluation( + cast(Literal.create(ltzMicros, TimestampType), TimestampNTZNanosType(p), zid), + nanosVal(ntzMicrosForLtz, 0)) + // TIMESTAMP_NTZ (NTZ(6)) -> TIMESTAMP_LTZ(p): instant of the wall clock, sub-micro = 0. + checkEvaluation( + cast(Literal.create(ntzMicros, TimestampNTZType), TimestampLTZNanosType(p), zid), + nanosVal(ltzMicrosForNtz, 0)) + // TIMESTAMP_NTZ(p) -> TIMESTAMP (LTZ(6)): drops the sub-microsecond digits before the zone + // reinterpretation. A non-zero nanosWithinMicro on the source proves the truncation. + checkEvaluation( + cast(Literal.create(nanosVal(ntzMicros, floorNanosToPrecision(789, p)), + TimestampNTZNanosType(p)), TimestampType, zid), + convertTz(ntzMicros, zone, UTC)) + // TIMESTAMP_LTZ(p) -> TIMESTAMP_NTZ (NTZ(6)): drops the sub-microsecond digits likewise. + checkEvaluation( + cast(Literal.create(nanosVal(ltzMicros, floorNanosToPrecision(789, p)), + TimestampLTZNanosType(p)), TimestampNTZType, zid), + convertTz(ltzMicros, UTC, zone)) + // Null input in all four directions. + checkEvaluation( + cast(Literal.create(null, TimestampType), TimestampNTZNanosType(p), zid), null) + checkEvaluation( + cast(Literal.create(null, TimestampNTZType), TimestampLTZNanosType(p), zid), null) + checkEvaluation( + cast(Literal.create(null, TimestampNTZNanosType(p)), TimestampType, zid), null) + checkEvaluation( + cast(Literal.create(null, TimestampLTZNanosType(p)), TimestampNTZType, zid), null) + } + } + } + test("SPARK-57323: cast between date and timestamp_ntz with nanosecond precision") { // NTZ casts use a fixed UTC wall-clock grid, independent of the session time zone. val date = LocalDate.of(2020, 1, 1) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out index bf344eb08e05b..a3c8e894f8a9c 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out @@ -872,6 +872,158 @@ Project [cast(cast(null as timestamp_ltz(8)) as timestamp_ltz(9)) AS CAST(CAST(N +- OneRowRelation +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7))) AS typeof(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(7)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS typeof(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(8)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(8))) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(8)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)::timestamp_ltz(9) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) as timestamp_ltz(9)) AS CAST(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(9)) AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)::timestamp_ntz(9) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9)) as timestamp_ntz(9)) AS CAST(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)::timestamp_ltz(7) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7)) as timestamp_ltz(7)) AS CAST(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(7)) AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + +-- !query +select timestamp_ntz'1960-01-01 00:00:00.123456789'::timestamp_ltz(7)::timestamp_ntz(7) +-- !query analysis +Project [cast(cast(1960-01-01 00:00:00.123456789 as timestamp_ltz(7)) as timestamp_ntz(7)) AS CAST(CAST(TIMESTAMP_NTZ '1960-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(7)) AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz(7) +-- !query analysis +Project [cast(cast(null as timestamp_ltz(9)) as timestamp_ntz(7)) AS CAST(CAST(NULL AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ntz(8))::timestamp_ltz(9) +-- !query analysis +Project [cast(cast(null as timestamp_ntz(8)) as timestamp_ltz(9)) AS CAST(CAST(NULL AS TIMESTAMP_NTZ(8)) AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456 as timestamp_ltz(9))) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456' AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz)) AS typeof(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp)) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP))#x] ++- OneRowRelation + + +-- !query +select timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)::timestamp +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)::timestamp_ntz +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456 as timestamp_ltz(9)) as timestamp_ntz) AS CAST(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456' AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ)#x] ++- OneRowRelation + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)::timestamp_ltz +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz) as timestamp) AS CAST(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ) AS TIMESTAMP)#x] ++- OneRowRelation + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)::timestamp_ntz +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp) as timestamp_ntz) AS CAST(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP) AS TIMESTAMP_NTZ)#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp)::timestamp_ntz(9) +-- !query analysis +Project [cast(cast(null as timestamp) as timestamp_ntz(9)) AS CAST(CAST(NULL AS TIMESTAMP) AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ntz)::timestamp_ltz(9) +-- !query analysis +Project [cast(cast(null as timestamp_ntz) as timestamp_ltz(9)) AS CAST(CAST(NULL AS TIMESTAMP_NTZ) AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ntz(9))::timestamp +-- !query analysis +Project [cast(cast(null as timestamp_ntz(9)) as timestamp) AS CAST(CAST(NULL AS TIMESTAMP_NTZ(9)) AS TIMESTAMP)#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz +-- !query analysis +Project [cast(cast(null as timestamp_ltz(9)) as timestamp_ntz) AS CAST(CAST(NULL AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ)#x] ++- OneRowRelation + + -- !query select cast(cast('inf' as double) as timestamp) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out index c6ee17e8e387a..b18f5739fd685 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out @@ -736,6 +736,158 @@ Project [cast(cast(null as timestamp_ltz(8)) as timestamp_ltz(9)) AS CAST(CAST(N +- OneRowRelation +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7))) AS typeof(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(7)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS typeof(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(8)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(8))) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(8)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)::timestamp_ltz(9) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) as timestamp_ltz(9)) AS CAST(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(9)) AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)::timestamp_ntz(9) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9)) as timestamp_ntz(9)) AS CAST(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)::timestamp_ltz(7) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7)) as timestamp_ltz(7)) AS CAST(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ(7)) AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + +-- !query +select timestamp_ntz'1960-01-01 00:00:00.123456789'::timestamp_ltz(7)::timestamp_ntz(7) +-- !query analysis +Project [cast(cast(1960-01-01 00:00:00.123456789 as timestamp_ltz(7)) as timestamp_ntz(7)) AS CAST(CAST(TIMESTAMP_NTZ '1960-01-01 00:00:00.123456789' AS TIMESTAMP_LTZ(7)) AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz(7) +-- !query analysis +Project [cast(cast(null as timestamp_ltz(9)) as timestamp_ntz(7)) AS CAST(CAST(NULL AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ntz(8))::timestamp_ltz(9) +-- !query analysis +Project [cast(cast(null as timestamp_ntz(8)) as timestamp_ltz(9)) AS CAST(CAST(NULL AS TIMESTAMP_NTZ(8)) AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456 as timestamp_ltz(9))) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456' AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz)) AS typeof(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ))#x] ++- OneRowRelation + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)) +-- !query analysis +Project [typeof(cast(2020-01-01 00:00:00.123456789 as timestamp)) AS typeof(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP))#x] ++- OneRowRelation + + +-- !query +select timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)::timestamp +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)::timestamp_ntz +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456 as timestamp_ltz(9)) as timestamp_ntz) AS CAST(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456' AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ)#x] ++- OneRowRelation + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)::timestamp_ltz +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz) as timestamp) AS CAST(CAST(TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP_NTZ) AS TIMESTAMP)#x] ++- OneRowRelation + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)::timestamp_ntz +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp) as timestamp_ntz) AS CAST(CAST(TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789' AS TIMESTAMP) AS TIMESTAMP_NTZ)#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp)::timestamp_ntz(9) +-- !query analysis +Project [cast(cast(null as timestamp) as timestamp_ntz(9)) AS CAST(CAST(NULL AS TIMESTAMP) AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ntz)::timestamp_ltz(9) +-- !query analysis +Project [cast(cast(null as timestamp_ntz) as timestamp_ltz(9)) AS CAST(CAST(NULL AS TIMESTAMP_NTZ) AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ntz(9))::timestamp +-- !query analysis +Project [cast(cast(null as timestamp_ntz(9)) as timestamp) AS CAST(CAST(NULL AS TIMESTAMP_NTZ(9)) AS TIMESTAMP)#x] ++- OneRowRelation + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz +-- !query analysis +Project [cast(cast(null as timestamp_ltz(9)) as timestamp_ntz) AS CAST(CAST(NULL AS TIMESTAMP_LTZ(9)) AS TIMESTAMP_NTZ)#x] ++- OneRowRelation + + -- !query select cast(cast('inf' as double) as timestamp) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index a9e8551066240..87621d4413d3e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -164,6 +164,41 @@ select timestamp_ltz'1960-01-01 00:00:00.123456789'::timestamp_ltz(9)::timestamp select cast(null as timestamp_ntz(9))::timestamp_ntz(7); select cast(null as timestamp_ltz(8))::timestamp_ltz(9); +-- Cross-family cast between nanosecond TIMESTAMP_LTZ(p) and TIMESTAMP_NTZ(q). +-- The conversion reinterprets the value against the session time zone, so SQL-layer coverage stays +-- zone-independent (type resolution, lossless same-zone round-trips, null propagation); the +-- zone-aware value semantics (flooring, pre-epoch, zone shifts) are covered in CastSuite* unit tests. +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)); +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)); +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(8)); +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)); +-- Same-zone round-trips are lossless at equal precision and re-floored when narrowing in between. +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)::timestamp_ltz(9); +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)::timestamp_ntz(9); +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)::timestamp_ltz(7); +select timestamp_ntz'1960-01-01 00:00:00.123456789'::timestamp_ltz(7)::timestamp_ntz(7); +-- Null propagation in both directions. +select cast(null as timestamp_ltz(9))::timestamp_ntz(7); +select cast(null as timestamp_ntz(8))::timestamp_ltz(9); + +-- Boundary at precision 6: TIMESTAMP_LTZ(6) is TIMESTAMP and TIMESTAMP_NTZ(6) is TIMESTAMP_NTZ, so +-- these cross-family casts mix the micro family member with the other family's nanos member. +select typeof(timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)); +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)); +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)); +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)); +-- Same-zone round-trips: micro(6) -> nanos -> micro(6) is lossless; nanos -> micro(6) -> nanos +-- drops the sub-microsecond digits. +select timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)::timestamp; +select timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)::timestamp_ntz; +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)::timestamp_ltz; +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)::timestamp_ntz; +-- Null propagation across the micro boundary in all four directions. +select cast(null as timestamp)::timestamp_ntz(9); +select cast(null as timestamp_ntz)::timestamp_ltz(9); +select cast(null as timestamp_ntz(9))::timestamp; +select cast(null as timestamp_ltz(9))::timestamp_ntz; + select cast(cast('inf' as double) as timestamp); select cast(cast('inf' as float) as timestamp); diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index b6da2f1c32e23..24f9668b3b177 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1586,6 +1586,182 @@ struct +-- !query output +timestamp_ntz(7) + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)) +-- !query schema +struct +-- !query output +timestamp_ntz(9) + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(8)) +-- !query schema +struct +-- !query output +timestamp_ltz(8) + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)) +-- !query schema +struct +-- !query output +timestamp_ltz(9) + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)::timestamp_ltz(9) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)::timestamp_ntz(9) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)::timestamp_ltz(7) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +select timestamp_ntz'1960-01-01 00:00:00.123456789'::timestamp_ltz(7)::timestamp_ntz(7) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz(7) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ntz(8))::timestamp_ltz(9) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select typeof(timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)) +-- !query schema +struct +-- !query output +timestamp_ntz(9) + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)) +-- !query schema +struct +-- !query output +timestamp_ltz(9) + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)) +-- !query schema +struct +-- !query output +timestamp_ntz + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)) +-- !query schema +struct +-- !query output +timestamp + + +-- !query +select timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)::timestamp +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)::timestamp_ntz +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)::timestamp_ltz +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)::timestamp_ntz +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select cast(null as timestamp)::timestamp_ntz(9) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ntz)::timestamp_ltz(9) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ntz(9))::timestamp +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz +-- !query schema +struct +-- !query output +NULL + + -- !query select cast(cast('inf' as double) as timestamp) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out index 408cb06ef36b6..4d66796a577be 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out @@ -848,6 +848,182 @@ struct +-- !query output +timestamp_ntz(7) + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)) +-- !query schema +struct +-- !query output +timestamp_ntz(9) + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(8)) +-- !query schema +struct +-- !query output +timestamp_ltz(8) + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)) +-- !query schema +struct +-- !query output +timestamp_ltz(9) + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(9)::timestamp_ltz(9) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(9)::timestamp_ntz(9) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(7)::timestamp_ltz(7) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +select timestamp_ntz'1960-01-01 00:00:00.123456789'::timestamp_ltz(7)::timestamp_ntz(7) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz(7) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ntz(8))::timestamp_ltz(9) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select typeof(timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)) +-- !query schema +struct +-- !query output +timestamp_ntz(9) + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)) +-- !query schema +struct +-- !query output +timestamp_ltz(9) + + +-- !query +select typeof(timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)) +-- !query schema +struct +-- !query output +timestamp_ntz + + +-- !query +select typeof(timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)) +-- !query schema +struct +-- !query output +timestamp + + +-- !query +select timestamp'2020-01-01 00:00:00.123456'::timestamp_ntz(9)::timestamp +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456'::timestamp_ltz(9)::timestamp_ntz +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select timestamp_ltz'2020-01-01 00:00:00.123456789'::timestamp_ntz(6)::timestamp_ltz +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select timestamp_ntz'2020-01-01 00:00:00.123456789'::timestamp_ltz(6)::timestamp_ntz +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456 + + +-- !query +select cast(null as timestamp)::timestamp_ntz(9) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ntz)::timestamp_ltz(9) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ntz(9))::timestamp +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(null as timestamp_ltz(9))::timestamp_ntz +-- !query schema +struct +-- !query output +NULL + + -- !query select cast(cast('inf' as double) as timestamp) -- !query schema From e081d3cd342561a6c6ab3b4b9e8bdd7bba1c70e1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 17 Jun 2026 23:46:51 +0200 Subject: [PATCH 2/5] [SPARK-57511][SQL] Fold timestamp/nanos cast cases using AnyTimestampType and AnyTimestampNanoType Consolidate the duplicated micro/nanos and nanos/nanos case arms in Cast.canCast, Cast.canAnsiCast, and Cast.canANSIStoreAssign using the AnyTimestampNanoType type pattern and the AnyTimestampType.acceptsType guard, without changing behavior. --- .../spark/sql/catalyst/expressions/Cast.scala | 90 +++++++------------ 1 file changed, 30 insertions(+), 60 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 216603df0f145..200a1679e444c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -115,23 +115,13 @@ object Cast extends QueryErrorsBase { case (_: StringType, _: AnyTimestampNanoType) => true - case (TimestampNTZType, _: TimestampNTZNanosType) => true - case (_: TimestampNTZNanosType, TimestampNTZType) => true - case (TimestampType, _: TimestampLTZNanosType) => true - case (_: TimestampLTZNanosType, TimestampType) => true - - case (_: TimestampNTZNanosType, _: TimestampNTZNanosType) => true - case (_: TimestampLTZNanosType, _: TimestampLTZNanosType) => true - - case (_: TimestampLTZNanosType, _: TimestampNTZNanosType) => true - case (_: TimestampNTZNanosType, _: TimestampLTZNanosType) => true - - // Cross-family casts between the micro family member (TIMESTAMP_LTZ(6) = TIMESTAMP and - // TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ) and the other family's nanosecond member. - case (TimestampType, _: TimestampNTZNanosType) => true - case (_: TimestampNTZNanosType, TimestampType) => true - case (TimestampNTZType, _: TimestampLTZNanosType) => true - case (_: TimestampLTZNanosType, TimestampNTZType) => true + // Casts between timestamp types where at least one side is a nanosecond type are allowed for + // every micro/nanos and nanos/nanos combination, across both time-zone families. Precision 6 is + // the micro family member (TIMESTAMP_LTZ(6) = TIMESTAMP, TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ), + // matched here via AnyTimestampType; the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair is above. + case (_: AnyTimestampNanoType, _: AnyTimestampNanoType) => true + case (t, _: AnyTimestampNanoType) if AnyTimestampType.acceptsType(t) => true + case (_: AnyTimestampNanoType, t) if AnyTimestampType.acceptsType(t) => true case (DateType, _: TimestampLTZNanosType) => true case (_: TimestampLTZNanosType, DateType) => true @@ -275,23 +265,13 @@ object Cast extends QueryErrorsBase { case (_: StringType, _: AnyTimestampNanoType) => true - case (TimestampNTZType, _: TimestampNTZNanosType) => true - case (_: TimestampNTZNanosType, TimestampNTZType) => true - case (TimestampType, _: TimestampLTZNanosType) => true - case (_: TimestampLTZNanosType, TimestampType) => true - - case (_: TimestampNTZNanosType, _: TimestampNTZNanosType) => true - case (_: TimestampLTZNanosType, _: TimestampLTZNanosType) => true - - case (_: TimestampLTZNanosType, _: TimestampNTZNanosType) => true - case (_: TimestampNTZNanosType, _: TimestampLTZNanosType) => true - - // Cross-family casts between the micro family member (TIMESTAMP_LTZ(6) = TIMESTAMP and - // TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ) and the other family's nanosecond member. - case (TimestampType, _: TimestampNTZNanosType) => true - case (_: TimestampNTZNanosType, TimestampType) => true - case (TimestampNTZType, _: TimestampLTZNanosType) => true - case (_: TimestampLTZNanosType, TimestampNTZType) => true + // Casts between timestamp types where at least one side is a nanosecond type are allowed for + // every micro/nanos and nanos/nanos combination, across both time-zone families. Precision 6 is + // the micro family member (TIMESTAMP_LTZ(6) = TIMESTAMP, TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ), + // matched here via AnyTimestampType; the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair is above. + case (_: AnyTimestampNanoType, _: AnyTimestampNanoType) => true + case (t, _: AnyTimestampNanoType) if AnyTimestampType.acceptsType(t) => true + case (_: AnyTimestampNanoType, t) if AnyTimestampType.acceptsType(t) => true case (DateType, _: TimestampLTZNanosType) => true case (_: TimestampLTZNanosType, DateType) => true @@ -458,39 +438,29 @@ object Cast extends QueryErrorsBase { case (_: NumericType, _: NumericType) => true case (_: AtomicType, _: StringType) => true case (_: CalendarIntervalType, _: StringType) => true - // SPARK-57293: narrowing a nanosecond-precision timestamp to its microsecond counterpart - // drops the sub-microsecond digits, so it is not allowed as a (silent) store assignment. - // This conversion stays explicit-only. - case (_: TimestampNTZNanosType, TimestampNTZType) => false - case (_: TimestampLTZNanosType, TimestampType) => false - // SPARK-57323: DATE <-> nanosecond-precision timestamp requires an explicit CAST in both - // directions. nanos -> DATE silently drops time-of-day and sub-microsecond digits (the same - // rule as the narrowing above). DATE -> nanos is lossless (midnight, zero sub-micro part), - // but conversions involving the nanos types stay explicit-only while the types are - // unreleased: allowing the store assignment later is a compatible change, revoking it is not. - // Note this is stricter than micro DATE <-> TIMESTAMP[_NTZ], which the catch-all below allows. - case (DateType, _: TimestampLTZNanosType) => false - case (DateType, _: TimestampNTZNanosType) => false - case (_: TimestampLTZNanosType, DateType) => false - case (_: TimestampNTZNanosType, DateType) => false // SPARK-57490: same-family cross-precision nanosecond casts: widening (e.g. TIMESTAMP_NTZ(7) -> // TIMESTAMP_NTZ(9)) is lossless and allowed as a silent store assignment, while narrowing // (e.g. (9) -> (7)) drops sub-microsecond digits and stays explicit-only. Equal precision is - // already handled by the `from == to` short-circuit above. + // handled by the `from == to` short-circuit above; micros -> nanos widening (e.g. TIMESTAMP_NTZ + // -> TIMESTAMP_NTZ(9)) is lossless and falls to the catch-all below. case (f: TimestampNTZNanosType, t: TimestampNTZNanosType) => f.precision <= t.precision case (f: TimestampLTZNanosType, t: TimestampLTZNanosType) => f.precision <= t.precision - // Cross-family nanosecond casts (TIMESTAMP_LTZ(p) <-> TIMESTAMP_NTZ(q)) reinterpret the value - // against the session time zone, so they stay explicit-only rather than silent store - // assignments while the nanosecond types are unreleased (same rationale as the cases above). - // This includes the mixed micro/nanos cases where one side is the precision-6 micro family - // member (TIMESTAMP / TIMESTAMP_NTZ); only the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair stays - // store-assignable via the catch-all below. - case (_: TimestampLTZNanosType, _: TimestampNTZNanosType) => false - case (_: TimestampNTZNanosType, _: TimestampLTZNanosType) => false + // SPARK-57323: DATE <-> nanosecond-precision timestamp requires an explicit CAST in both + // directions (nanos -> DATE drops fields; DATE -> nanos is lossless but kept explicit-only + // while the nanos types are unreleased). Stricter than micro DATE <-> TIMESTAMP[_NTZ], which + // the catch-all below allows. + case (DateType, _: AnyTimestampNanoType) => false + case (_: AnyTimestampNanoType, DateType) => false + // SPARK-57293/57511: narrowing any nanosecond timestamp to a microsecond timestamp drops the + // sub-microsecond digits, and cross-family casts additionally reinterpret the value against the + // session time zone; both stay explicit-only rather than silent store assignments while the + // nanos types are unreleased. This covers same-family narrowing (nanos -> micro), cross-family + // nanos <-> nanos, and the mixed micro/nanos pairs at the precision-6 boundary. Only the + // all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair stays store-assignable via the catch-all below. + case (_: AnyTimestampNanoType, t) if AnyTimestampType.acceptsType(t) => false case (TimestampType, _: TimestampNTZNanosType) => false - case (_: TimestampNTZNanosType, TimestampType) => false case (TimestampNTZType, _: TimestampLTZNanosType) => false - case (_: TimestampLTZNanosType, TimestampNTZType) => false + case (_: AnyTimestampNanoType, _: AnyTimestampNanoType) => false case (_: DatetimeType, _: DatetimeType) => true case (ArrayType(fromType, fn), ArrayType(toType, tn)) => From bd01cb22cb1a418b1237539e0a4e7668f9186fe7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2026 07:56:27 +0200 Subject: [PATCH 3/5] Fix coding style --- .../sql/catalyst/util/SparkDateTimeUtils.scala | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index b6d3012939b56..2b992ba53fe81 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -312,10 +312,11 @@ trait SparkDateTimeUtils { /** * Converts a `TIMESTAMP_LTZ(p)` nanosecond value into the `TIMESTAMP_NTZ(precision)` wall-clock - * value observed in the time zone `zoneId`. The LTZ value denotes an absolute instant; rendering - * it as a local date-time at `zoneId` yields the NTZ representation. Time-zone offsets shift only - * whole seconds, so the sub-microsecond `nanosWithinMicro` component is preserved before being - * floored to the target `precision` (same flooring as same-family narrowing casts). + * value observed in the time zone `zoneId`. The LTZ value denotes an absolute instant; + * rendering it as a local date-time at `zoneId` yields the NTZ representation. Time-zone + * offsets shift only whole seconds, so the sub-microsecond `nanosWithinMicro` component is + * preserved before being floored to the target `precision` (same flooring as same-family + * narrowing casts). */ def timestampLTZNanosToNTZNanos( v: TimestampNanosVal, @@ -327,10 +328,10 @@ trait SparkDateTimeUtils { /** * Converts a `TIMESTAMP_NTZ(q)` nanosecond value into the `TIMESTAMP_LTZ(precision)` instant - * obtained by interpreting its wall-clock local date-time in the time zone `zoneId`. This is the - * reverse of [[timestampLTZNanosToNTZNanos]]; the sub-microsecond `nanosWithinMicro` component is - * preserved across the (whole-second) offset shift before being floored to the target - * `precision`. + * obtained by interpreting its wall-clock local date-time in the time zone `zoneId`. This is + * the reverse of [[timestampLTZNanosToNTZNanos]]; the sub-microsecond `nanosWithinMicro` + * component is preserved across the (whole-second) offset shift before being floored to the + * target `precision`. */ def timestampNTZNanosToLTZNanos( v: TimestampNanosVal, From 1d2b27141f284c9113d69a66cf56616496cd6553 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2026 13:14:44 +0200 Subject: [PATCH 4/5] [SPARK-57511][SQL] Address review: fix store-assign comment, add DST gap/overlap tests Reword the canANSIStoreAssign SPARK-57293/57511 comment so it no longer overstates exclusivity: the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair and micros -> nanos same-family widening both stay store-assignable via the catch-all, consistent with the SPARK-57490 note above. Add a cross-family nanos cast test that exercises the LA spring-forward gap (02:30 -> shifted forward) and fall-back overlap (01:30 -> earlier offset), asserting the nanos NTZ <-> LTZ result matches the micro TIMESTAMP_NTZ <-> TIMESTAMP cast on the epoch-micros part. Also assert !Cast.forceNullable in both cross-family contract tests, mirroring the null-safe micro pair. --- .../spark/sql/catalyst/expressions/Cast.scala | 5 +- .../catalyst/expressions/CastSuiteBase.scala | 49 +++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 200a1679e444c..3c4ccc1db281c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -455,8 +455,9 @@ object Cast extends QueryErrorsBase { // sub-microsecond digits, and cross-family casts additionally reinterpret the value against the // session time zone; both stay explicit-only rather than silent store assignments while the // nanos types are unreleased. This covers same-family narrowing (nanos -> micro), cross-family - // nanos <-> nanos, and the mixed micro/nanos pairs at the precision-6 boundary. Only the - // all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair stays store-assignable via the catch-all below. + // nanos <-> nanos, and the mixed micro/nanos pairs at the precision-6 boundary; everything + // matched here is explicit-only. The all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair and micros -> + // nanos same-family widening stay store-assignable via the catch-all below. case (_: AnyTimestampNanoType, t) if AnyTimestampType.acceptsType(t) => false case (TimestampType, _: TimestampNTZNanosType) => false case (TimestampNTZType, _: TimestampLTZNanosType) => false diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index e4438085c7b39..980c68f65f20e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -822,6 +822,10 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { // The conversion depends on the session time zone in both directions. assert(Cast.needsTimeZone(ltz, ntz)) assert(Cast.needsTimeZone(ntz, ltz)) + // Reinterpreting a non-null instant/local date-time never yields null, so the cast is + // null-safe in both directions (mirroring the micro TIMESTAMP <-> TIMESTAMP_NTZ pair). + assert(!Cast.forceNullable(ltz, ntz)) + assert(!Cast.forceNullable(ntz, ltz)) } } @@ -842,6 +846,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { assert(!Cast.canUpCast(from, to)) assert(!Cast.canANSIStoreAssign(from, to)) assert(Cast.needsTimeZone(from, to)) + // Null-safe like the micro TIMESTAMP <-> TIMESTAMP_NTZ pair. + assert(!Cast.forceNullable(from, to)) } } // Sanity: the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair (precision 6 <-> 6) stays a silent @@ -1547,6 +1553,49 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } + test("cross-family nanos cast: DST gap and overlap resolve like the micro cast") { + // The cross-family nanos casts reuse the same java.time resolver as the micro + // TIMESTAMP <-> TIMESTAMP_NTZ casts (NTZ -> LTZ interprets the wall clock via + // LocalDateTime.atZone; LTZ -> NTZ renders the instant via Instant.atZone). Exercise the LA + // spring-forward gap (02:30 does not exist -> java.time shifts forward) and the fall-back + // overlap (01:30 occurs twice -> earlier offset), asserting the nanos result matches the + // corresponding micro cast on the epoch-micros part. The existing zone tests above use + // non-transition instants, so this is the only place DST resolution is observable. + val la = Option(LA.getId) + Seq( + LocalDateTime.of(2020, 3, 8, 2, 30, 0, 123456789), // inside the PST -> PDT gap + LocalDateTime.of(2020, 11, 1, 1, 30, 0, 123456789)) // inside the PDT -> PST overlap + .foreach { ldt => + // NTZ stores the wall-clock fields as epoch-micros-at-UTC, exactly like micro TIMESTAMP_NTZ. + val ntzMicros = localDateTimeToNanosVal(ldt).epochMicros + // The expected LTZ instant is whatever the micro TIMESTAMP_NTZ -> TIMESTAMP cast produces; + // for the gap this is the forward-shifted instant, for the overlap the earlier offset. + val microLtz = evaluateWithoutCodegen( + cast(Literal.create(ntzMicros, TimestampNTZType), TimestampType, la)).asInstanceOf[Long] + // The reverse micro TIMESTAMP -> TIMESTAMP_NTZ of that instant (for the gap, the wall clock + // is the shifted 03:30, not the original 02:30). + val microNtz = evaluateWithoutCodegen( + cast(Literal.create(microLtz, TimestampType), TimestampNTZType, la)).asInstanceOf[Long] + for { + p <- TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION + q <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION + } { + // NTZ(q) -> LTZ(p): the epoch-micros part resolves DST exactly like the micro cast. + val ntzToLtzSrc = floorNanosToPrecision(789, q) + checkEvaluation( + cast(Literal.create(nanosVal(ntzMicros, ntzToLtzSrc), TimestampNTZNanosType(q)), + TimestampLTZNanosType(p), la), + nanosVal(microLtz, floorNanosToPrecision(ntzToLtzSrc, p))) + // LTZ(p) -> NTZ(q): rendering the resolved instant matches the micro cast too. + val ltzToNtzSrc = floorNanosToPrecision(789, p) + checkEvaluation( + cast(Literal.create(nanosVal(microLtz, ltzToNtzSrc), TimestampLTZNanosType(p)), + TimestampNTZNanosType(q), la), + nanosVal(microNtz, floorNanosToPrecision(ltzToNtzSrc, q))) + } + } + } + test("cross-family nanos cast: micro family member (precision 6) to/from nanos") { // The precision-6 micro family members are TIMESTAMP_LTZ(6) = TIMESTAMP and // TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ. Casting them across families reinterprets the value against From 0f1682c5fceaa43a150ef4d1b0bd3a42eb17ede5 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2026 18:04:21 +0200 Subject: [PATCH 5/5] [SPARK-57511][SQL] Hoist duplicated zoneId codegen boilerplate into a helper Extract the repeated `JavaCode.global(ctx.addReferenceObj("zoneId", ...))` boilerplate (14 call sites across the timestamp/date codegen arms) into a single private zoneIdValue(ctx) helper. The helper is invoked per-arm rather than hoisted to a pre-match val, so the zoneId reference object is only added for casts that actually need the session time zone, preserving the existing behavior for zone-independent arms. --- .../spark/sql/catalyst/expressions/Cast.scala | 74 ++++++------------- 1 file changed, 22 insertions(+), 52 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 3c4ccc1db281c..506babb08f341 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1609,6 +1609,14 @@ case class Cast( code"$evPrim = ${NumberConverter.getClass.getName.stripSuffix("$")}.toBinary($c);" } + // Registers the session `zoneId` as a codegen reference object and returns a handle to it. + // Kept as a method (rather than a pre-`match` val) so the reference is only added for the arms + // that actually need the session time zone, not for zone-independent casts. + private[this] def zoneIdValue(ctx: CodegenContext): GlobalValue = { + val zoneIdClass = classOf[ZoneId] + JavaCode.global(ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), zoneIdClass) + } + private[this] def castToDateCode( from: DataType, ctx: CodegenContext): CastFunction = { @@ -1633,16 +1641,14 @@ case class Cast( } case TimestampType => - val zidClass = classOf[ZoneId] - val zid = JavaCode.global(ctx.addReferenceObj("zoneId", zoneId, zidClass.getName), zidClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"""$evPrim = $dateTimeUtilsCls.microsToDays($c, $zid);""" case TimestampNTZType => (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.microsToDays($c, java.time.ZoneOffset.UTC);" case _: TimestampLTZNanosType => - val zidClass = classOf[ZoneId] - val zid = JavaCode.global(ctx.addReferenceObj("zoneId", zoneId, zidClass.getName), zidClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.microsToDays($c.epochMicros, $zid);" case _: TimestampNTZNanosType => @@ -1819,10 +1825,7 @@ case class Cast( from: DataType, ctx: CodegenContext): CastFunction = from match { case _: StringType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => if (ansiEnabled) { @@ -1845,26 +1848,17 @@ case class Cast( case _: IntegralType => (c, evPrim, evNull) => code"$evPrim = ${longToTimeStampCode(c)};" case DateType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"""$evPrim = $dateTimeUtilsCls.daysToMicros($c, $zid);""" case TimestampNTZType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.convertTz($c, $zid, java.time.ZoneOffset.UTC);" case _: TimestampLTZNanosType => (c, evPrim, evNull) => code"$evPrim = $c.epochMicros;" case _: TimestampNTZNanosType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.convertTz($c.epochMicros, $zid, java.time.ZoneOffset.UTC);" case DecimalType() => @@ -1924,19 +1918,13 @@ case class Cast( (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.daysToMicros($c, java.time.ZoneOffset.UTC);" case TimestampType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid);" case _: TimestampNTZNanosType => (c, evPrim, evNull) => code"$evPrim = $c.epochMicros;" case _: TimestampLTZNanosType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.convertTz($c.epochMicros, java.time.ZoneOffset.UTC, $zid);" } @@ -1946,10 +1934,7 @@ case class Cast( precision: Int, ctx: CodegenContext): CastFunction = from match { case _: StringType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) val tsOpt = ctx.freshVariable("tsOpt", classOf[Option[TimestampNanosVal]]) (c, evPrim, evNull) => if (ansiEnabled) { @@ -1973,10 +1958,7 @@ case class Cast( (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts($c, (short) 0);" case TimestampNTZType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts(" + code"$dateTimeUtilsCls.convertTz($c, $zid, java.time.ZoneOffset.UTC), (short) 0);" @@ -1984,17 +1966,11 @@ case class Cast( (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.truncateTimestampNanosToPrecision($c, $precision);" case _: TimestampNTZNanosType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.timestampNTZNanosToLTZNanos($c, $zid, $precision);" case DateType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts(" + code"$dateTimeUtilsCls.daysToMicros($c, $zid), (short) 0);" @@ -2028,10 +2004,7 @@ case class Cast( (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts($c, (short) 0);" case TimestampType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = TimestampNanosVal.fromParts(" + code"$dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid), (short) 0);" @@ -2039,10 +2012,7 @@ case class Cast( (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.truncateTimestampNanosToPrecision($c, $precision);" case _: TimestampLTZNanosType => - val zoneIdClass = classOf[ZoneId] - val zid = JavaCode.global( - ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), - zoneIdClass) + val zid = zoneIdValue(ctx) (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.timestampLTZNanosToNTZNanos($c, $zid, $precision);" case DateType =>