diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 8599d0dd46e1c..10aa01e5a6005 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -82,7 +82,9 @@ def test_function_parity(self): missing_in_py = jvm_fn_set.difference(py_fn_set) # Functions that we expect to be missing in python until they are added to pyspark - expected_missing_in_py = set() + expected_missing_in_py = { + "unix_nanos", # SPARK-57527: PySpark support tracked as a follow-up + } self.assertEqual( expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected" diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index a2850f37a8697..76748f0ae9420 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -8199,6 +8199,16 @@ object functions { */ def unix_micros(e: Column): Column = Column.fn("unix_micros", e) + /** + * Returns the number of nanoseconds since 1970-01-01 00:00:00 UTC for a nanosecond-precision + * timestamp (`TIMESTAMP_LTZ(p)` / `TIMESTAMP_NTZ(p)`, `p` in `[7, 9]`). The result is a + * lossless `DECIMAL(21, 0)`. + * + * @group datetime_funcs + * @since 4.3.0 + */ + def unix_nanos(e: Column): Column = Column.fn("unix_nanos", e) + /** * Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. Truncates higher levels of * precision. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index dea90247cc1b1..2c47fca543a98 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -777,6 +777,7 @@ object FunctionRegistry { expression[UnixSeconds]("unix_seconds"), expression[UnixMillis]("unix_millis"), expression[UnixMicros]("unix_micros"), + expression[UnixNanos]("unix_nanos"), expression[ConvertTimezone]("convert_timezone"), expressionBuilder("time_bucket", TimeBucketExpressionBuilder), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index bdbc071e576cb..3fbef82ef246e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.expressions +import java.math.BigInteger import java.text.ParseException import java.time.{DateTimeException, LocalDate, LocalDateTime, ZoneId, ZoneOffset} import java.time.format.DateTimeParseException @@ -854,6 +855,57 @@ case class UnixMicros(child: Expression) extends TimestampToLongBase { copy(child = newChild) } +// scalastyle:off line.contains.tab +@ExpressionDescription( + usage = "_FUNC_(timestamp) - Returns the number of nanoseconds since 1970-01-01 00:00:00 UTC.", + examples = """ + Examples: + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2008-12-25 15:30:00.123456789'); + 1230219000123456789 + """, + group = "datetime_funcs", + since = "4.3.0") +// scalastyle:on line.contains.tab +case class UnixNanos(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + override def nullIntolerant: Boolean = true + + // Accepts only the nanosecond-precision timestamp types TIMESTAMP_LTZ(p) / TIMESTAMP_NTZ(p) + // (p in [7, 9]); support for the microsecond timestamp types is deferred to a follow-up. + override def inputTypes: Seq[AbstractDataType] = Seq(AnyTimestampNanoType) + + // epochMicros * 1000 overflows a 64-bit BIGINT across the full [0001..9999] calendar range, so + // the result is a lossless DECIMAL with enough precision to hold every value (~2.5e20 max). + override def dataType: DataType = DecimalType(21, 0) + + override def nullSafeEval(input: Any): Any = { + val v = input.asInstanceOf[TimestampNanosVal] + val nanos = BigInteger.valueOf(v.epochMicros) + .multiply(BigInteger.valueOf(NANOS_PER_MICROS)) + .add(BigInteger.valueOf(v.nanosWithinMicro.toLong)) + Decimal.apply(new java.math.BigDecimal(nanos), 21, 0) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, c => { + val bi = ctx.freshName("nanos") + s""" + |java.math.BigInteger $bi = java.math.BigInteger.valueOf($c.epochMicros) + | .multiply(java.math.BigInteger.valueOf(${NANOS_PER_MICROS}L)) + | .add(java.math.BigInteger.valueOf($c.nanosWithinMicro)); + |${ev.value} = Decimal.apply(new java.math.BigDecimal($bi), 21, 0); + |""".stripMargin + }) + } + + override def prettyName: String = "unix_nanos" + + override protected def withNewChildInternal(newChild: Expression): UnixNanos = + copy(child = newChild) +} + // scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the year component of the date/timestamp.", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 20f3dea2ec224..8771123ad1202 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.TimestampTypes import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes} -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.unsafe.types.{CalendarInterval, TimestampNanosVal, UTF8String} class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -1696,6 +1696,53 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(UnixMicros(Literal(timestampWithNanos)), 1000001L) } + test("SPARK-57527: unix_nanos over nanosecond-precision timestamps") { + import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._ + + def expectedNanos(v: TimestampNanosVal): Decimal = { + val nanos = BigInt(v.epochMicros) * NANOS_PER_MICROS + v.nanosWithinMicro.toInt + Decimal(BigDecimal(nanos), 21, 0) + } + + // 2008-12-25 15:30:00.123456789 -> 1230219000123456789 nanos since the epoch. unix_nanos + // applies no zone shift, so the NTZ wall-clock value and the LTZ instant at the same UTC + // reading produce the same result; the declared precision does not re-truncate the value. + val ntz = localDateTimeToNanosVal(timestampNTZ(2008, 12, 25, 15, 30, 0, 123456789)) + val ltz = instantToNanosVal(Instant.parse("2008-12-25T15:30:00.123456789Z")) + val post = Decimal(BigDecimal("1230219000123456789"), 21, 0) + foreachNanosPrecision { p => + checkEvaluation(UnixNanos(Literal.create(ntz, TimestampNTZNanosType(p))), post) + checkEvaluation(UnixNanos(Literal.create(ltz, TimestampLTZNanosType(p))), post) + } + + // Pre-epoch value exercises the negative-epoch path. + val preEpoch = localDateTimeToNanosVal(timestampNTZ(1960, 1, 1, 0, 0, 0, 1)) + checkEvaluation( + UnixNanos(Literal.create(preEpoch, TimestampNTZNanosType(9))), expectedNanos(preEpoch)) + + // Far-future value: epochMicros * 1000 overflows a 64-bit BIGINT, so the DECIMAL result must + // exceed Long.MaxValue and the computation must not be done in long arithmetic. + val far = localDateTimeToNanosVal(timestampNTZ(9999, 12, 31, 23, 59, 59, 999999999)) + checkEvaluation(UnixNanos(Literal.create(far, TimestampNTZNanosType(9))), expectedNanos(far)) + val farResult = + UnixNanos(Literal.create(far, TimestampNTZNanosType(9))).eval().asInstanceOf[Decimal] + assert(farResult.toJavaBigDecimal.compareTo(java.math.BigDecimal.valueOf(Long.MaxValue)) > 0) + + // NULL input. + checkEvaluation(UnixNanos(Literal.create(null, TimestampNTZNanosType(9))), null) + checkEvaluation(UnixNanos(Literal.create(null, TimestampLTZNanosType(9))), null) + } + + test("SPARK-57527: unix_nanos rejects non-nanosecond input types") { + // unix_nanos accepts only the nanosecond-precision timestamp types; the microsecond + // TimestampType / TimestampNTZType (and other types) fail analysis with a type mismatch. + Seq(TimestampType, TimestampNTZType, DateType, LongType).foreach { dt => + val mismatch = UnixNanos(Literal.create(null, dt)) + .checkInputDataTypes().asInstanceOf[DataTypeMismatch] + assert(mismatch.errorSubClass == "UNEXPECTED_INPUT_TYPE") + } + } + test("TIMESTAMP_SECONDS") { def testIntegralFunc(value: Number): Unit = { checkEvaluation( diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 6003c7af52d34..3ff81b7f57f02 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -435,6 +435,7 @@ | org.apache.spark.sql.catalyst.expressions.UnixDate | unix_date | SELECT unix_date(DATE("1970-01-02")) | struct | | org.apache.spark.sql.catalyst.expressions.UnixMicros | unix_micros | SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | | org.apache.spark.sql.catalyst.expressions.UnixMillis | unix_millis | SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | +| org.apache.spark.sql.catalyst.expressions.UnixNanos | unix_nanos | SELECT unix_nanos(TIMESTAMP_NTZ '2008-12-25 15:30:00.123456789') | struct | | org.apache.spark.sql.catalyst.expressions.UnixSeconds | unix_seconds | SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct | | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index bd98d67a3c5f8..1a91dd549f0f4 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -696,3 +696,45 @@ SELECT unix_timestamp(NULL :: timestamp_ltz(9)), to_unix_timestamp(NULL :: times -- !query analysis Project [unix_timestamp(cast(null as timestamp_ltz(9)), yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), true) AS unix_timestamp(CAST(NULL AS TIMESTAMP_LTZ(9)), yyyy-MM-dd HH:mm:ss)#xL, to_unix_timestamp(cast(null as timestamp_ltz(9)), yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), true) AS to_unix_timestamp(CAST(NULL AS TIMESTAMP_LTZ(9)), yyyy-MM-dd HH:mm:ss)#xL] +- OneRowRelation + + +-- !query +SELECT unix_nanos(TIMESTAMP_LTZ '2020-01-01 13:24:35.123456789 UTC') +-- !query analysis +Project [unix_nanos(2020-01-01 05:24:35.123456789) AS unix_nanos(TIMESTAMP_LTZ '2020-01-01 05:24:35.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789 UTC' :: timestamp_ltz(7)) +-- !query analysis +Project [unix_nanos(cast(2020-01-01 13:24:35.123456789 UTC as timestamp_ltz(7))) AS unix_nanos(CAST(2020-01-01 13:24:35.123456789 UTC AS TIMESTAMP_LTZ(7)))#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789 UTC' :: timestamp_ltz(8)) +-- !query analysis +Project [unix_nanos(cast(2020-01-01 13:24:35.123456789 UTC as timestamp_ltz(8))) AS unix_nanos(CAST(2020-01-01 13:24:35.123456789 UTC AS TIMESTAMP_LTZ(8)))#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos(TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999 UTC') +-- !query analysis +Project [unix_nanos(9999-12-31 15:59:59.999999999) AS unix_nanos(TIMESTAMP_LTZ '9999-12-31 15:59:59.999999999')#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos(TIMESTAMP_LTZ '1960-01-01 00:00:00.000000001 UTC') +-- !query analysis +Project [unix_nanos(1959-12-31 16:00:00.000000001) AS unix_nanos(TIMESTAMP_LTZ '1959-12-31 16:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos(NULL :: timestamp_ltz(9)) +-- !query analysis +Project [unix_nanos(cast(null as timestamp_ltz(9))) AS unix_nanos(CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out index 5bd6f7e6a76f3..aee34389c059f 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out @@ -616,3 +616,45 @@ SELECT unix_timestamp(NULL :: timestamp_ntz(9)), to_unix_timestamp(NULL :: times -- !query analysis Project [unix_timestamp(cast(null as timestamp_ntz(9)), yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), true) AS unix_timestamp(CAST(NULL AS TIMESTAMP_NTZ(9)), yyyy-MM-dd HH:mm:ss)#xL, to_unix_timestamp(cast(null as timestamp_ntz(9)), yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), true) AS to_unix_timestamp(CAST(NULL AS TIMESTAMP_NTZ(9)), yyyy-MM-dd HH:mm:ss)#xL] +- OneRowRelation + + +-- !query +SELECT unix_nanos(TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789') +-- !query analysis +Project [unix_nanos(2020-01-01 13:24:35.123456789) AS unix_nanos(TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789' :: timestamp_ntz(7)) +-- !query analysis +Project [unix_nanos(cast(2020-01-01 13:24:35.123456789 as timestamp_ntz(7))) AS unix_nanos(CAST(2020-01-01 13:24:35.123456789 AS TIMESTAMP_NTZ(7)))#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789' :: timestamp_ntz(8)) +-- !query analysis +Project [unix_nanos(cast(2020-01-01 13:24:35.123456789 as timestamp_ntz(8))) AS unix_nanos(CAST(2020-01-01 13:24:35.123456789 AS TIMESTAMP_NTZ(8)))#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos(TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999') +-- !query analysis +Project [unix_nanos(9999-12-31 23:59:59.999999999) AS unix_nanos(TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999')#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos(TIMESTAMP_NTZ '1960-01-01 00:00:00.000000001') +-- !query analysis +Project [unix_nanos(1960-01-01 00:00:00.000000001) AS unix_nanos(TIMESTAMP_NTZ '1960-01-01 00:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT unix_nanos(NULL :: timestamp_ntz(9)) +-- !query analysis +Project [unix_nanos(cast(null as timestamp_ntz(9))) AS unix_nanos(CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index 73c6022e354e9..8aaa5b34fb465 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -187,3 +187,16 @@ SELECT unix_timestamp('2020-01-01 13:24:35.999999999' :: timestamp_ltz(7)); SELECT unix_timestamp(TIMESTAMP_LTZ '1969-12-31 23:59:59.500000000 UTC'); -- NULL nanosecond timestamp. SELECT unix_timestamp(NULL :: timestamp_ltz(9)), to_unix_timestamp(NULL :: timestamp_ltz(9)); + +-- SPARK-57527: unix_nanos over nanosecond-precision values returns DECIMAL(21, 0) nanoseconds since +-- the epoch. The explicit-zone literals below fix the instant directly, independent of the session +-- time zone. The sub-microsecond digits are kept, truncated to the type's precision. +SELECT unix_nanos(TIMESTAMP_LTZ '2020-01-01 13:24:35.123456789 UTC'); +SELECT unix_nanos('2020-01-01 13:24:35.123456789 UTC' :: timestamp_ltz(7)); +SELECT unix_nanos('2020-01-01 13:24:35.123456789 UTC' :: timestamp_ltz(8)); +-- Far-future value: epochMicros * 1000 overflows a 64-bit BIGINT, exercising the DECIMAL path. +SELECT unix_nanos(TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999 UTC'); +-- Pre-epoch value exercises the negative-epoch path. +SELECT unix_nanos(TIMESTAMP_LTZ '1960-01-01 00:00:00.000000001 UTC'); +-- NULL nanosecond timestamp. +SELECT unix_nanos(NULL :: timestamp_ltz(9)); diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql index 016af6b247632..bd2e1038656d1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -161,3 +161,16 @@ SELECT to_unix_timestamp('2020-01-01 13:24:35.000000001' :: timestamp_ntz(9)); SELECT unix_timestamp(TIMESTAMP_NTZ '1969-12-31 23:59:59.500000000'); -- NULL nanosecond timestamp. SELECT unix_timestamp(NULL :: timestamp_ntz(9)), to_unix_timestamp(NULL :: timestamp_ntz(9)); + +-- SPARK-57527: unix_nanos over nanosecond-precision values returns DECIMAL(21, 0) nanoseconds since +-- the epoch; NTZ applies no zone shift, so the wall-clock value is read as the epoch instant. The +-- sub-microsecond digits are kept, truncated to the type's precision. +SELECT unix_nanos(TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789'); +SELECT unix_nanos('2020-01-01 13:24:35.123456789' :: timestamp_ntz(7)); +SELECT unix_nanos('2020-01-01 13:24:35.123456789' :: timestamp_ntz(8)); +-- Far-future value: epochMicros * 1000 overflows a 64-bit BIGINT, exercising the DECIMAL path. +SELECT unix_nanos(TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999'); +-- Pre-epoch value exercises the negative-epoch path. +SELECT unix_nanos(TIMESTAMP_NTZ '1960-01-01 00:00:00.000000001'); +-- NULL nanosecond timestamp. +SELECT unix_nanos(NULL :: timestamp_ntz(9)); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index b5eef6a1ac93b..0be8c9d73a104 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -782,3 +782,51 @@ SELECT unix_timestamp(NULL :: timestamp_ltz(9)), to_unix_timestamp(NULL :: times struct -- !query output NULL NULL + + +-- !query +SELECT unix_nanos(TIMESTAMP_LTZ '2020-01-01 13:24:35.123456789 UTC') +-- !query schema +struct +-- !query output +1577885075123456789 + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789 UTC' :: timestamp_ltz(7)) +-- !query schema +struct +-- !query output +1577885075123456700 + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789 UTC' :: timestamp_ltz(8)) +-- !query schema +struct +-- !query output +1577885075123456780 + + +-- !query +SELECT unix_nanos(TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999 UTC') +-- !query schema +struct +-- !query output +253402300799999999999 + + +-- !query +SELECT unix_nanos(TIMESTAMP_LTZ '1960-01-01 00:00:00.000000001 UTC') +-- !query schema +struct +-- !query output +-315619199999999999 + + +-- !query +SELECT unix_nanos(NULL :: timestamp_ltz(9)) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out index a3d0d237e8180..9297786f3bda0 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out @@ -692,3 +692,51 @@ SELECT unix_timestamp(NULL :: timestamp_ntz(9)), to_unix_timestamp(NULL :: times struct -- !query output NULL NULL + + +-- !query +SELECT unix_nanos(TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789') +-- !query schema +struct +-- !query output +1577885075123456789 + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789' :: timestamp_ntz(7)) +-- !query schema +struct +-- !query output +1577885075123456700 + + +-- !query +SELECT unix_nanos('2020-01-01 13:24:35.123456789' :: timestamp_ntz(8)) +-- !query schema +struct +-- !query output +1577885075123456780 + + +-- !query +SELECT unix_nanos(TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999') +-- !query schema +struct +-- !query output +253402300799999999999 + + +-- !query +SELECT unix_nanos(TIMESTAMP_NTZ '1960-01-01 00:00:00.000000001') +-- !query schema +struct +-- !query output +-315619199999999999 + + +-- !query +SELECT unix_nanos(NULL :: timestamp_ntz(9)) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala index cfda0007fe93f..4fa9ccd8ffe9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala @@ -287,6 +287,38 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { ltz.select(unix_timestamp(col("c")), to_unix_timestamp(col("c"))), Row(null, null)) } } + + test("SPARK-57527: unix_nanos over nanosecond-precision timestamps") { + // unix_nanos returns DECIMAL(21, 0) nanoseconds since the epoch and applies no zone shift to a + // timestamp argument. The chosen fractions have zeros beyond the 7th digit, so truncating to + // precision p in {7, 8, 9} leaves the sub-microsecond part unchanged and the result is the same + // for every p. Both the Scala Column API and the SQL path are exercised. + val ntzStr = "2020-01-01T13:24:35.123456700" + val ltzStr = "2020-01-01T21:24:35.987654300Z" + // 2020-01-01 13:24:35.123456 -> 1577885075123456 micros, + 700 ns = 1577885075123456700. + val ntzExpected = Row(new java.math.BigDecimal("1577885075123456700")) + // 2020-01-01 21:24:35.987654 UTC -> 1577913875987654 micros, + 300 ns = 1577913875987654300. + val ltzExpected = Row(new java.math.BigDecimal("1577913875987654300")) + Seq(7, 8, 9).foreach { p => + checkAnswer(ntzNanos(ntzStr, p).select(unix_nanos(col("c"))), ntzExpected) + checkAnswer(ntzNanos(ntzStr, p).selectExpr("unix_nanos(c)"), ntzExpected) + checkAnswer(ltzNanos(ltzStr, p).select(unix_nanos(col("c"))), ltzExpected) + checkAnswer(ltzNanos(ltzStr, p).selectExpr("unix_nanos(c)"), ltzExpected) + } + } + + test("SPARK-57527: unix_nanos over NULL nanosecond timestamps") { + Seq(7, 8, 9).foreach { p => + val ntz = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(null))), + new StructType().add("c", TimestampNTZNanosType(p))) + val ltz = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(null))), + new StructType().add("c", TimestampLTZNanosType(p))) + checkAnswer(ntz.select(unix_nanos(col("c"))), Row(null)) + checkAnswer(ltz.select(unix_nanos(col("c"))), Row(null)) + } + } } // Runs the nanosecond timestamp function tests with ANSI mode enabled explicitly.