From 5a5c37046888ce692eb9f2c989628c980d417e84 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Tue, 3 Feb 2026 12:44:43 -0500 Subject: [PATCH 1/7] Add time constants for micros and nanos --- cpp/src/gandiva/precompiled/time_constants.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/cpp/src/gandiva/precompiled/time_constants.h b/cpp/src/gandiva/precompiled/time_constants.h index 015ef4bf9f7d..1cd906cb8be6 100644 --- a/cpp/src/gandiva/precompiled/time_constants.h +++ b/cpp/src/gandiva/precompiled/time_constants.h @@ -17,7 +17,7 @@ #pragma once -#define MILLIS_IN_SEC (1000) +#define MILLIS_IN_SEC (1000LL) #define MILLIS_IN_MIN (60 * MILLIS_IN_SEC) #define MILLIS_IN_HOUR (60 * MILLIS_IN_MIN) #define MILLIS_IN_DAY (24 * MILLIS_IN_HOUR) @@ -28,3 +28,18 @@ #define MILLIS_TO_HOUR(millis) ((millis) / MILLIS_IN_HOUR) #define MILLIS_TO_DAY(millis) ((millis) / MILLIS_IN_DAY) #define MILLIS_TO_WEEK(millis) ((millis) / MILLIS_IN_WEEK) + +#define MICROS_IN_MILLIS (1000LL) +#define MICROS_IN_SEC (MICROS_IN_MILLIS * MILLIS_IN_SEC) +#define MICROS_IN_MIN (MICROS_IN_MILLIS * MILLIS_IN_MIN) +#define MICROS_IN_HOUR (MICROS_IN_MILLIS * MILLIS_IN_HOUR) +#define MICROS_IN_DAY (MICROS_IN_MILLIS * MILLIS_IN_DAY) +#define MICROS_IN_WEEK (MICROS_IN_MILLIS * MILLIS_IN_WEEK) + +#define NANOS_IN_MICROS (1000LL) +#define NANOS_IN_MILLIS (NANOS_IN_MICROS * MICROS_IN_MILLIS) +#define NANOS_IN_SEC (NANOS_IN_MICROS * MICROS_IN_SEC) +#define NANOS_IN_MIN (NANOS_IN_MICROS * MICROS_IN_MIN) +#define NANOS_IN_HOUR (NANOS_IN_MICROS * MICROS_IN_HOUR) +#define NANOS_IN_DAY (NANOS_IN_MICROS * MICROS_IN_DAY) +#define NANOS_IN_WEEK (NANOS_IN_MICROS * MICROS_IN_WEEK) From 81accb65f1b9394438af5f6f632a9cc22e713c76 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Tue, 3 Feb 2026 12:46:50 -0500 Subject: [PATCH 2/7] Add data types for timestamp_micro and timestamp_nano --- cpp/src/gandiva/function_registry_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h index abe861e3385e..8471de2d5a28 100644 --- a/cpp/src/gandiva/function_registry_common.h +++ b/cpp/src/gandiva/function_registry_common.h @@ -55,6 +55,8 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); } inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); } inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); } +inline DataTypePtr timestamp_micro() { return arrow::timestamp(arrow::TimeUnit::MICRO); } +inline DataTypePtr timestamp_nano() { return arrow::timestamp(arrow::TimeUnit::NANO); } inline DataTypePtr decimal128() { return arrow::decimal128(38, 0); } struct KeyHash { From 714bdd6f1967250f9e4e91eb09fd532de3567ff9 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Tue, 3 Feb 2026 12:56:57 -0500 Subject: [PATCH 3/7] Add new timestampaddDay functions --- cpp/src/gandiva/precompiled/timestamp_arithmetic.cc | 8 ++++++++ cpp/src/gandiva/precompiled/types.h | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc index 695605b3cc77..b5cc334dcff7 100644 --- a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc +++ b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc @@ -240,6 +240,14 @@ TIMESTAMP_DIFF(timestamp) TIMESTAMP_ADD_INT(date64) TIMESTAMP_ADD_INT(timestamp) +// timestampaddDay for microsecond timestamps +ADD_TIMESTAMP_INT32_FIXEDUNITS(timestamp, timestampaddDay_micro, MICROS_IN_DAY) +ADD_TIMESTAMP_INT64_FIXEDUNITS(timestamp, timestampaddDay_micro, MICROS_IN_DAY) + +// timestampaddDay for nanosecond timestamps +ADD_TIMESTAMP_INT32_FIXEDUNITS(timestamp, timestampaddDay_nano, NANOS_IN_DAY) +ADD_TIMESTAMP_INT64_FIXEDUNITS(timestamp, timestampaddDay_nano, NANOS_IN_DAY) + // add gdv_int32 to timestamp ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY) ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(date64, add, MILLIS_IN_DAY) diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index c93b694fc777..4a90e1c2eb87 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -114,6 +114,18 @@ gdv_int64 timestampaddMonth_timestamp_int64(gdv_timestamp, gdv_int64); gdv_int64 timestampaddQuarter_timestamp_int64(gdv_timestamp, gdv_int64); gdv_int64 timestampaddYear_timestamp_int64(gdv_timestamp, gdv_int64); +// timestampaddDay for microsecond timestamps +gdv_int64 timestampaddDay_micro_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddDay_micro_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddDay_micro_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddDay_micro_timestamp_int64(gdv_timestamp, gdv_int64); + +// timestampaddDay for nanosecond timestamps +gdv_int64 timestampaddDay_nano_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddDay_nano_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddDay_nano_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddDay_nano_timestamp_int64(gdv_timestamp, gdv_int64); + gdv_boolean isnull_day_time_interval(gdv_day_time_interval in, gdv_boolean is_valid); gdv_boolean istrue_boolean(gdv_boolean in, gdv_boolean isvalid); From 458f179bc569a9ab15bff96d74f577f9fab0d82a Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Tue, 3 Feb 2026 13:23:29 -0500 Subject: [PATCH 4/7] Register the new timestamp arithmetic functions Add unit tests --- .../function_registry_timestamp_arithmetic.cc | 29 +++++++++++++ cpp/src/gandiva/precompiled/time_test.cc | 43 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc index 188bd60d9eac..ba7ee2fc8206 100644 --- a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc +++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc @@ -68,6 +68,35 @@ std::vector GetDateTimeArithmeticFunctionRegistry() { TIMESTAMP_ADD_FNS(timestampaddMinute, {}), TIMESTAMP_ADD_FNS(timestampaddHour, {}), TIMESTAMP_ADD_FNS(timestampaddDay, {}), + + // timestampaddDay for microsecond timestamps + NativeFunction("timestampaddDay", {}, DataTypeVector{int32(), timestamp_micro()}, + timestamp_micro(), kResultNullIfNull, + "timestampaddDay_micro_int32_timestamp"), + NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_micro(), int32()}, + timestamp_micro(), kResultNullIfNull, + "timestampaddDay_micro_timestamp_int32"), + NativeFunction("timestampaddDay", {}, DataTypeVector{int64(), timestamp_micro()}, + timestamp_micro(), kResultNullIfNull, + "timestampaddDay_micro_int64_timestamp"), + NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_micro(), int64()}, + timestamp_micro(), kResultNullIfNull, + "timestampaddDay_micro_timestamp_int64"), + + // timestampaddDay for nanosecond timestamps + NativeFunction("timestampaddDay", {}, DataTypeVector{int32(), timestamp_nano()}, + timestamp_nano(), kResultNullIfNull, + "timestampaddDay_nano_int32_timestamp"), + NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_nano(), int32()}, + timestamp_nano(), kResultNullIfNull, + "timestampaddDay_nano_timestamp_int32"), + NativeFunction("timestampaddDay", {}, DataTypeVector{int64(), timestamp_nano()}, + timestamp_nano(), kResultNullIfNull, + "timestampaddDay_nano_int64_timestamp"), + NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_nano(), int64()}, + timestamp_nano(), kResultNullIfNull, + "timestampaddDay_nano_timestamp_int64"), + TIMESTAMP_ADD_FNS(timestampaddWeek, {}), TIMESTAMP_ADD_FNS(timestampaddMonth, {"add_months"}), TIMESTAMP_ADD_FNS(timestampaddQuarter, {}), diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index bdaf3dc2a5fa..6f65d0584169 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -499,6 +499,49 @@ TEST(TestTime, TimeStampAdd) { StringToTimestamp("1999-03-01 00:00:00")); } +TEST(TestTime, TimeStampAddMicroNano) { + // Test timestampaddDay for microsecond timestamps + // StringToTimestamp returns milliseconds, multiply by 1000 for microseconds + gdv_timestamp ts_micro = StringToTimestamp("2000-05-01 10:20:34") * 1000LL; + gdv_timestamp expected_micro = StringToTimestamp("2000-05-06 10:20:34") * 1000LL; + + EXPECT_EQ(timestampaddDay_micro_int32_timestamp(5, ts_micro), expected_micro); + EXPECT_EQ(timestampaddDay_micro_timestamp_int32(ts_micro, 5), expected_micro); + EXPECT_EQ(timestampaddDay_micro_int64_timestamp(5, ts_micro), expected_micro); + EXPECT_EQ(timestampaddDay_micro_timestamp_int64(ts_micro, 5), expected_micro); + + // Test negative days + expected_micro = StringToTimestamp("2000-04-26 10:20:34") * 1000LL; + EXPECT_EQ(timestampaddDay_micro_int32_timestamp(-5, ts_micro), expected_micro); + EXPECT_EQ(timestampaddDay_micro_timestamp_int32(ts_micro, -5), expected_micro); + + // Test timestampaddDay for nanosecond timestamps + // StringToTimestamp returns milliseconds, multiply by 1000000 for nanoseconds + gdv_timestamp ts_nano = StringToTimestamp("2000-05-01 10:20:34") * 1000000LL; + gdv_timestamp expected_nano = StringToTimestamp("2000-05-06 10:20:34") * 1000000LL; + + EXPECT_EQ(timestampaddDay_nano_int32_timestamp(5, ts_nano), expected_nano); + EXPECT_EQ(timestampaddDay_nano_timestamp_int32(ts_nano, 5), expected_nano); + EXPECT_EQ(timestampaddDay_nano_int64_timestamp(5, ts_nano), expected_nano); + EXPECT_EQ(timestampaddDay_nano_timestamp_int64(ts_nano, 5), expected_nano); + + // Test negative days + expected_nano = StringToTimestamp("2000-04-26 10:20:34") * 1000000LL; + EXPECT_EQ(timestampaddDay_nano_int32_timestamp(-5, ts_nano), expected_nano); + EXPECT_EQ(timestampaddDay_nano_timestamp_int32(ts_nano, -5), expected_nano); + + // Test that sub-second precision is preserved + // Add 500 microseconds to the timestamp + ts_micro = StringToTimestamp("2000-05-01 10:20:34") * 1000LL + 500; + expected_micro = StringToTimestamp("2000-05-02 10:20:34") * 1000LL + 500; + EXPECT_EQ(timestampaddDay_micro_int32_timestamp(1, ts_micro), expected_micro); + + // Add 500 nanoseconds to the timestamp + ts_nano = StringToTimestamp("2000-05-01 10:20:34") * 1000000LL + 500; + expected_nano = StringToTimestamp("2000-05-02 10:20:34") * 1000000LL + 500; + EXPECT_EQ(timestampaddDay_nano_int32_timestamp(1, ts_nano), expected_nano); +} + // test cases from http://www.staff.science.uu.nl/~gent0113/calendar/isocalendar.htm TEST(TestTime, TestExtractWeek) { std::vector data; From 4627682bfd000add714ca2b12e1c7bac5f3830dc Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Tue, 3 Feb 2026 13:29:22 -0500 Subject: [PATCH 5/7] Add a test helper to parse strings with fractional seconds --- cpp/src/gandiva/precompiled/testing.h | 13 +++++++ cpp/src/gandiva/precompiled/time_test.cc | 48 +++++++++++++++--------- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/cpp/src/gandiva/precompiled/testing.h b/cpp/src/gandiva/precompiled/testing.h index c41bc5471483..867beda0a6e7 100644 --- a/cpp/src/gandiva/precompiled/testing.h +++ b/cpp/src/gandiva/precompiled/testing.h @@ -40,4 +40,17 @@ static inline gdv_timestamp StringToTimestamp(const std::string& s) { return out * 1000; } +// Parse timestamp with optional fractional seconds, returning value in the specified +// unit. Supports formats like "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD HH:MM:SS.sssssssss" +// where fractional seconds can have 1-9 digits depending on the time unit. +static inline gdv_timestamp StringToTimestampWithUnit(const std::string& s, + ::arrow::TimeUnit::type unit) { + int64_t out = 0; + bool success = ::arrow::internal::ParseTimestampISO8601( + s.c_str(), s.length(), unit, &out, /*out_zone_offset_present=*/nullptr); + DCHECK(success); + ARROW_UNUSED(success); + return out; +} + } // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index 6f65d0584169..3d88ef944a70 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -500,10 +500,11 @@ TEST(TestTime, TimeStampAdd) { } TEST(TestTime, TimeStampAddMicroNano) { - // Test timestampaddDay for microsecond timestamps - // StringToTimestamp returns milliseconds, multiply by 1000 for microseconds - gdv_timestamp ts_micro = StringToTimestamp("2000-05-01 10:20:34") * 1000LL; - gdv_timestamp expected_micro = StringToTimestamp("2000-05-06 10:20:34") * 1000LL; + // Test timestampaddDay for microsecond timestamps using StringToTimestampWithUnit + gdv_timestamp ts_micro = + StringToTimestampWithUnit("2000-05-01 10:20:34", arrow::TimeUnit::MICRO); + gdv_timestamp expected_micro = + StringToTimestampWithUnit("2000-05-06 10:20:34", arrow::TimeUnit::MICRO); EXPECT_EQ(timestampaddDay_micro_int32_timestamp(5, ts_micro), expected_micro); EXPECT_EQ(timestampaddDay_micro_timestamp_int32(ts_micro, 5), expected_micro); @@ -511,14 +512,16 @@ TEST(TestTime, TimeStampAddMicroNano) { EXPECT_EQ(timestampaddDay_micro_timestamp_int64(ts_micro, 5), expected_micro); // Test negative days - expected_micro = StringToTimestamp("2000-04-26 10:20:34") * 1000LL; + expected_micro = + StringToTimestampWithUnit("2000-04-26 10:20:34", arrow::TimeUnit::MICRO); EXPECT_EQ(timestampaddDay_micro_int32_timestamp(-5, ts_micro), expected_micro); EXPECT_EQ(timestampaddDay_micro_timestamp_int32(ts_micro, -5), expected_micro); - // Test timestampaddDay for nanosecond timestamps - // StringToTimestamp returns milliseconds, multiply by 1000000 for nanoseconds - gdv_timestamp ts_nano = StringToTimestamp("2000-05-01 10:20:34") * 1000000LL; - gdv_timestamp expected_nano = StringToTimestamp("2000-05-06 10:20:34") * 1000000LL; + // Test timestampaddDay for nanosecond timestamps using StringToTimestampWithUnit + gdv_timestamp ts_nano = + StringToTimestampWithUnit("2000-05-01 10:20:34", arrow::TimeUnit::NANO); + gdv_timestamp expected_nano = + StringToTimestampWithUnit("2000-05-06 10:20:34", arrow::TimeUnit::NANO); EXPECT_EQ(timestampaddDay_nano_int32_timestamp(5, ts_nano), expected_nano); EXPECT_EQ(timestampaddDay_nano_timestamp_int32(ts_nano, 5), expected_nano); @@ -526,20 +529,31 @@ TEST(TestTime, TimeStampAddMicroNano) { EXPECT_EQ(timestampaddDay_nano_timestamp_int64(ts_nano, 5), expected_nano); // Test negative days - expected_nano = StringToTimestamp("2000-04-26 10:20:34") * 1000000LL; + expected_nano = StringToTimestampWithUnit("2000-04-26 10:20:34", arrow::TimeUnit::NANO); EXPECT_EQ(timestampaddDay_nano_int32_timestamp(-5, ts_nano), expected_nano); EXPECT_EQ(timestampaddDay_nano_timestamp_int32(ts_nano, -5), expected_nano); - // Test that sub-second precision is preserved - // Add 500 microseconds to the timestamp - ts_micro = StringToTimestamp("2000-05-01 10:20:34") * 1000LL + 500; - expected_micro = StringToTimestamp("2000-05-02 10:20:34") * 1000LL + 500; + // Test that sub-second precision is preserved using fractional seconds + // Parse timestamp with 500 microseconds + ts_micro = + StringToTimestampWithUnit("2000-05-01 10:20:34.000500", arrow::TimeUnit::MICRO); + expected_micro = + StringToTimestampWithUnit("2000-05-02 10:20:34.000500", arrow::TimeUnit::MICRO); EXPECT_EQ(timestampaddDay_micro_int32_timestamp(1, ts_micro), expected_micro); - // Add 500 nanoseconds to the timestamp - ts_nano = StringToTimestamp("2000-05-01 10:20:34") * 1000000LL + 500; - expected_nano = StringToTimestamp("2000-05-02 10:20:34") * 1000000LL + 500; + // Parse timestamp with 500 nanoseconds + ts_nano = + StringToTimestampWithUnit("2000-05-01 10:20:34.000000500", arrow::TimeUnit::NANO); + expected_nano = + StringToTimestampWithUnit("2000-05-02 10:20:34.000000500", arrow::TimeUnit::NANO); EXPECT_EQ(timestampaddDay_nano_int32_timestamp(1, ts_nano), expected_nano); + + // Test with milliseconds too + gdv_timestamp ts_milli = + StringToTimestampWithUnit("2000-05-01 10:20:34.123", arrow::TimeUnit::MILLI); + gdv_timestamp expected_milli = + StringToTimestampWithUnit("2000-05-02 10:20:34.123", arrow::TimeUnit::MILLI); + EXPECT_EQ(timestampaddDay_int32_timestamp(1, ts_milli), expected_milli); } // test cases from http://www.staff.science.uu.nl/~gent0113/calendar/isocalendar.htm From 0dd42874ee5e0ee7a1fd8f1d4e2173a5116eb750 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 9 Feb 2026 13:13:44 -0500 Subject: [PATCH 6/7] Templetize the micro/nano-aware functions --- .../function_registry_timestamp_arithmetic.cc | 48 ++++++++----------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc index ba7ee2fc8206..735c4b3d651f 100644 --- a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc +++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc @@ -50,6 +50,24 @@ namespace gandiva { BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, date64, int64, date64), \ BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, timestamp, int64, timestamp) +// Macro to register timestamp arithmetic functions for specific precision types +// NAME: base function name (e.g., timestampaddDay) +// PRECISION_SUFFIX: suffix for precision (e.g., micro, nano) +// TS_TYPE_FN: timestamp type function (e.g., timestamp_micro) +#define TIMESTAMP_ADD_PRECISION_FNS(NAME, PRECISION_SUFFIX, TS_TYPE_FN) \ + NativeFunction(#NAME, {}, DataTypeVector{int32(), TS_TYPE_FN()}, TS_TYPE_FN(), \ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_int32_timestamp)), \ + NativeFunction(#NAME, {}, DataTypeVector{TS_TYPE_FN(), int32()}, TS_TYPE_FN(),\ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_timestamp_int32)), \ + NativeFunction(#NAME, {}, DataTypeVector{int64(), TS_TYPE_FN()}, TS_TYPE_FN(),\ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_int64_timestamp)), \ + NativeFunction(#NAME, {}, DataTypeVector{TS_TYPE_FN(), int64()}, TS_TYPE_FN(),\ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_timestamp_int64)) + std::vector GetDateTimeArithmeticFunctionRegistry() { static std::vector datetime_fn_registry_ = { BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, {}, date64, date64, float64), @@ -68,34 +86,8 @@ std::vector GetDateTimeArithmeticFunctionRegistry() { TIMESTAMP_ADD_FNS(timestampaddMinute, {}), TIMESTAMP_ADD_FNS(timestampaddHour, {}), TIMESTAMP_ADD_FNS(timestampaddDay, {}), - - // timestampaddDay for microsecond timestamps - NativeFunction("timestampaddDay", {}, DataTypeVector{int32(), timestamp_micro()}, - timestamp_micro(), kResultNullIfNull, - "timestampaddDay_micro_int32_timestamp"), - NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_micro(), int32()}, - timestamp_micro(), kResultNullIfNull, - "timestampaddDay_micro_timestamp_int32"), - NativeFunction("timestampaddDay", {}, DataTypeVector{int64(), timestamp_micro()}, - timestamp_micro(), kResultNullIfNull, - "timestampaddDay_micro_int64_timestamp"), - NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_micro(), int64()}, - timestamp_micro(), kResultNullIfNull, - "timestampaddDay_micro_timestamp_int64"), - - // timestampaddDay for nanosecond timestamps - NativeFunction("timestampaddDay", {}, DataTypeVector{int32(), timestamp_nano()}, - timestamp_nano(), kResultNullIfNull, - "timestampaddDay_nano_int32_timestamp"), - NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_nano(), int32()}, - timestamp_nano(), kResultNullIfNull, - "timestampaddDay_nano_timestamp_int32"), - NativeFunction("timestampaddDay", {}, DataTypeVector{int64(), timestamp_nano()}, - timestamp_nano(), kResultNullIfNull, - "timestampaddDay_nano_int64_timestamp"), - NativeFunction("timestampaddDay", {}, DataTypeVector{timestamp_nano(), int64()}, - timestamp_nano(), kResultNullIfNull, - "timestampaddDay_nano_timestamp_int64"), + TIMESTAMP_ADD_PRECISION_FNS(timestampaddDay, micro, timestamp_micro), + TIMESTAMP_ADD_PRECISION_FNS(timestampaddDay, nano, timestamp_nano), TIMESTAMP_ADD_FNS(timestampaddWeek, {}), TIMESTAMP_ADD_FNS(timestampaddMonth, {"add_months"}), From e40c79d8a3e923c558885e233eb699c51971e7f0 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 9 Feb 2026 13:26:39 -0500 Subject: [PATCH 7/7] Add support for micros and nanos to timestamp_diff --- .../function_registry_timestamp_arithmetic.cc | 65 ++++++++++++------- cpp/src/gandiva/precompiled/time_constants.h | 12 ++++ cpp/src/gandiva/precompiled/time_test.cc | 49 ++++++++++++++ .../precompiled/timestamp_arithmetic.cc | 14 ++++ cpp/src/gandiva/precompiled/types.h | 14 ++++ 5 files changed, 130 insertions(+), 24 deletions(-) diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc index 735c4b3d651f..403bb2f3cfad 100644 --- a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc +++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc @@ -54,20 +54,29 @@ namespace gandiva { // NAME: base function name (e.g., timestampaddDay) // PRECISION_SUFFIX: suffix for precision (e.g., micro, nano) // TS_TYPE_FN: timestamp type function (e.g., timestamp_micro) -#define TIMESTAMP_ADD_PRECISION_FNS(NAME, PRECISION_SUFFIX, TS_TYPE_FN) \ - NativeFunction(#NAME, {}, DataTypeVector{int32(), TS_TYPE_FN()}, TS_TYPE_FN(), \ - kResultNullIfNull, \ - ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_int32_timestamp)), \ - NativeFunction(#NAME, {}, DataTypeVector{TS_TYPE_FN(), int32()}, TS_TYPE_FN(),\ - kResultNullIfNull, \ - ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_timestamp_int32)), \ - NativeFunction(#NAME, {}, DataTypeVector{int64(), TS_TYPE_FN()}, TS_TYPE_FN(),\ - kResultNullIfNull, \ - ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_int64_timestamp)), \ - NativeFunction(#NAME, {}, DataTypeVector{TS_TYPE_FN(), int64()}, TS_TYPE_FN(),\ - kResultNullIfNull, \ +#define TIMESTAMP_ADD_PRECISION_FNS(NAME, PRECISION_SUFFIX, TS_TYPE_FN) \ + NativeFunction(#NAME, {}, DataTypeVector{int32(), TS_TYPE_FN()}, TS_TYPE_FN(), \ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_int32_timestamp)), \ + NativeFunction(#NAME, {}, DataTypeVector{TS_TYPE_FN(), int32()}, TS_TYPE_FN(), \ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_timestamp_int32)), \ + NativeFunction(#NAME, {}, DataTypeVector{int64(), TS_TYPE_FN()}, TS_TYPE_FN(), \ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_int64_timestamp)), \ + NativeFunction(#NAME, {}, DataTypeVector{TS_TYPE_FN(), int64()}, TS_TYPE_FN(), \ + kResultNullIfNull, \ ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_timestamp_int64)) +// Macro to register timestampdiff functions for specific precision types +// NAME: base function name (e.g., timestampdiffDay) +// PRECISION_SUFFIX: suffix for precision (e.g., micro, nano) +// TS_TYPE_FN: timestamp type function (e.g., timestamp_micro) +#define TIMESTAMP_DIFF_PRECISION_FN(NAME, PRECISION_SUFFIX, TS_TYPE_FN) \ + NativeFunction(#NAME, {}, DataTypeVector{TS_TYPE_FN(), TS_TYPE_FN()}, int32(), \ + kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##PRECISION_SUFFIX##_timestamp_timestamp)) + std::vector GetDateTimeArithmeticFunctionRegistry() { static std::vector datetime_fn_registry_ = { BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, {}, date64, date64, float64), @@ -75,33 +84,41 @@ std::vector GetDateTimeArithmeticFunctionRegistry() { TIMESTAMP_DIFF_FN(timestampdiffSecond, {}), TIMESTAMP_DIFF_FN(timestampdiffMinute, {}), - TIMESTAMP_DIFF_FN(timestampdiffHour, {}), - TIMESTAMP_DIFF_FN(timestampdiffDay, {}), - TIMESTAMP_DIFF_FN(timestampdiffWeek, {}), - TIMESTAMP_DIFF_FN(timestampdiffMonth, {}), + TIMESTAMP_DIFF_FN(timestampdiffHour, {}), TIMESTAMP_DIFF_FN(timestampdiffDay, {}), + TIMESTAMP_DIFF_FN(timestampdiffWeek, {}), TIMESTAMP_DIFF_FN(timestampdiffMonth, {}), TIMESTAMP_DIFF_FN(timestampdiffQuarter, {}), TIMESTAMP_DIFF_FN(timestampdiffYear, {}), + // timestampdiff for microsecond timestamps + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffSecond, micro, timestamp_micro), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffMinute, micro, timestamp_micro), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffHour, micro, timestamp_micro), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffDay, micro, timestamp_micro), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffWeek, micro, timestamp_micro), + + // timestampdiff for nanosecond timestamps + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffSecond, nano, timestamp_nano), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffMinute, nano, timestamp_nano), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffHour, nano, timestamp_nano), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffDay, nano, timestamp_nano), + TIMESTAMP_DIFF_PRECISION_FN(timestampdiffWeek, nano, timestamp_nano), + TIMESTAMP_ADD_FNS(timestampaddSecond, {}), - TIMESTAMP_ADD_FNS(timestampaddMinute, {}), - TIMESTAMP_ADD_FNS(timestampaddHour, {}), + TIMESTAMP_ADD_FNS(timestampaddMinute, {}), TIMESTAMP_ADD_FNS(timestampaddHour, {}), TIMESTAMP_ADD_FNS(timestampaddDay, {}), TIMESTAMP_ADD_PRECISION_FNS(timestampaddDay, micro, timestamp_micro), TIMESTAMP_ADD_PRECISION_FNS(timestampaddDay, nano, timestamp_nano), TIMESTAMP_ADD_FNS(timestampaddWeek, {}), TIMESTAMP_ADD_FNS(timestampaddMonth, {"add_months"}), - TIMESTAMP_ADD_FNS(timestampaddQuarter, {}), - TIMESTAMP_ADD_FNS(timestampaddYear, {}), + TIMESTAMP_ADD_FNS(timestampaddQuarter, {}), TIMESTAMP_ADD_FNS(timestampaddYear, {}), - DATE_ADD_FNS(date_add, {}), - DATE_ADD_FNS(add, {}), + DATE_ADD_FNS(date_add, {}), DATE_ADD_FNS(add, {}), NativeFunction("add", {}, DataTypeVector{date64(), int64()}, timestamp(), kResultNullIfNull, "add_date64_int64"), - DATE_DIFF_FNS(date_sub, {}), - DATE_DIFF_FNS(subtract, {}), + DATE_DIFF_FNS(date_sub, {}), DATE_DIFF_FNS(subtract, {}), DATE_DIFF_FNS(date_diff, {})}; return datetime_fn_registry_; diff --git a/cpp/src/gandiva/precompiled/time_constants.h b/cpp/src/gandiva/precompiled/time_constants.h index 1cd906cb8be6..7ee14a5c0cb5 100644 --- a/cpp/src/gandiva/precompiled/time_constants.h +++ b/cpp/src/gandiva/precompiled/time_constants.h @@ -36,6 +36,12 @@ #define MICROS_IN_DAY (MICROS_IN_MILLIS * MILLIS_IN_DAY) #define MICROS_IN_WEEK (MICROS_IN_MILLIS * MILLIS_IN_WEEK) +#define MICROS_TO_SEC(micros) ((micros) / MICROS_IN_SEC) +#define MICROS_TO_MINS(micros) ((micros) / MICROS_IN_MIN) +#define MICROS_TO_HOUR(micros) ((micros) / MICROS_IN_HOUR) +#define MICROS_TO_DAY(micros) ((micros) / MICROS_IN_DAY) +#define MICROS_TO_WEEK(micros) ((micros) / MICROS_IN_WEEK) + #define NANOS_IN_MICROS (1000LL) #define NANOS_IN_MILLIS (NANOS_IN_MICROS * MICROS_IN_MILLIS) #define NANOS_IN_SEC (NANOS_IN_MICROS * MICROS_IN_SEC) @@ -43,3 +49,9 @@ #define NANOS_IN_HOUR (NANOS_IN_MICROS * MICROS_IN_HOUR) #define NANOS_IN_DAY (NANOS_IN_MICROS * MICROS_IN_DAY) #define NANOS_IN_WEEK (NANOS_IN_MICROS * MICROS_IN_WEEK) + +#define NANOS_TO_SEC(nanos) ((nanos) / NANOS_IN_SEC) +#define NANOS_TO_MINS(nanos) ((nanos) / NANOS_IN_MIN) +#define NANOS_TO_HOUR(nanos) ((nanos) / NANOS_IN_HOUR) +#define NANOS_TO_DAY(nanos) ((nanos) / NANOS_IN_DAY) +#define NANOS_TO_WEEK(nanos) ((nanos) / NANOS_IN_WEEK) diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index 3d88ef944a70..d8e2b8e9a866 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -556,6 +556,55 @@ TEST(TestTime, TimeStampAddMicroNano) { EXPECT_EQ(timestampaddDay_int32_timestamp(1, ts_milli), expected_milli); } +TEST(TestTime, TimeStampDiffMicroNano) { + // Test timestampdiff for microsecond timestamps + gdv_timestamp start_micro = + StringToTimestampWithUnit("2000-05-01 10:20:34", arrow::TimeUnit::MICRO); + gdv_timestamp end_micro = + StringToTimestampWithUnit("2000-05-06 12:25:40", arrow::TimeUnit::MICRO); + + // Difference: 5 days, 2 hours, 5 minutes, 6 seconds + EXPECT_EQ(timestampdiffDay_micro_timestamp_timestamp(start_micro, end_micro), 5); + EXPECT_EQ(timestampdiffHour_micro_timestamp_timestamp(start_micro, end_micro), 122); + EXPECT_EQ(timestampdiffMinute_micro_timestamp_timestamp(start_micro, end_micro), 7325); + EXPECT_EQ(timestampdiffSecond_micro_timestamp_timestamp(start_micro, end_micro), + 439506); + EXPECT_EQ(timestampdiffWeek_micro_timestamp_timestamp(start_micro, end_micro), 0); + + // Test negative difference + EXPECT_EQ(timestampdiffDay_micro_timestamp_timestamp(end_micro, start_micro), -5); + + // Test timestampdiff for nanosecond timestamps + gdv_timestamp start_nano = + StringToTimestampWithUnit("2000-05-01 10:20:34", arrow::TimeUnit::NANO); + gdv_timestamp end_nano = + StringToTimestampWithUnit("2000-05-06 12:25:40", arrow::TimeUnit::NANO); + + EXPECT_EQ(timestampdiffDay_nano_timestamp_timestamp(start_nano, end_nano), 5); + EXPECT_EQ(timestampdiffHour_nano_timestamp_timestamp(start_nano, end_nano), 122); + EXPECT_EQ(timestampdiffMinute_nano_timestamp_timestamp(start_nano, end_nano), 7325); + EXPECT_EQ(timestampdiffSecond_nano_timestamp_timestamp(start_nano, end_nano), 439506); + EXPECT_EQ(timestampdiffWeek_nano_timestamp_timestamp(start_nano, end_nano), 0); + + // Test negative difference + EXPECT_EQ(timestampdiffDay_nano_timestamp_timestamp(end_nano, start_nano), -5); + + // Test week difference with larger gap + gdv_timestamp start_week_micro = + StringToTimestampWithUnit("2000-05-01 00:00:00", arrow::TimeUnit::MICRO); + gdv_timestamp end_week_micro = + StringToTimestampWithUnit("2000-05-22 00:00:00", arrow::TimeUnit::MICRO); + EXPECT_EQ(timestampdiffWeek_micro_timestamp_timestamp(start_week_micro, end_week_micro), + 3); + + gdv_timestamp start_week_nano = + StringToTimestampWithUnit("2000-05-01 00:00:00", arrow::TimeUnit::NANO); + gdv_timestamp end_week_nano = + StringToTimestampWithUnit("2000-05-22 00:00:00", arrow::TimeUnit::NANO); + EXPECT_EQ(timestampdiffWeek_nano_timestamp_timestamp(start_week_nano, end_week_nano), + 3); +} + // test cases from http://www.staff.science.uu.nl/~gent0113/calendar/isocalendar.htm TEST(TestTime, TestExtractWeek) { std::vector data; diff --git a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc index b5cc334dcff7..fee1679ba637 100644 --- a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc +++ b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc @@ -138,6 +138,20 @@ extern "C" { TIMESTAMP_DIFF(timestamp) +// timestampdiff for microsecond timestamps +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffSecond_micro, MICROS_TO_SEC) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffMinute_micro, MICROS_TO_MINS) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffHour_micro, MICROS_TO_HOUR) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffDay_micro, MICROS_TO_DAY) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffWeek_micro, MICROS_TO_WEEK) + +// timestampdiff for nanosecond timestamps +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffSecond_nano, NANOS_TO_SEC) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffMinute_nano, NANOS_TO_MINS) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffHour_nano, NANOS_TO_HOUR) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffDay_nano, NANOS_TO_DAY) +TIMESTAMP_DIFF_FIXED_UNITS(timestamp, timestampdiffWeek_nano, NANOS_TO_WEEK) + #define ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \ FORCE_INLINE \ gdv_##TYPE NAME##_int32_##TYPE(gdv_int32 count, gdv_##TYPE millis) { \ diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index 4a90e1c2eb87..3542d73343d0 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -126,6 +126,20 @@ gdv_int64 timestampaddDay_nano_timestamp_int32(gdv_timestamp, gdv_int32); gdv_int64 timestampaddDay_nano_int64_timestamp(gdv_int64, gdv_timestamp); gdv_int64 timestampaddDay_nano_timestamp_int64(gdv_timestamp, gdv_int64); +// timestampdiff for microsecond timestamps +gdv_int32 timestampdiffSecond_micro_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffMinute_micro_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffHour_micro_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffDay_micro_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffWeek_micro_timestamp_timestamp(gdv_timestamp, gdv_timestamp); + +// timestampdiff for nanosecond timestamps +gdv_int32 timestampdiffSecond_nano_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffMinute_nano_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffHour_nano_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffDay_nano_timestamp_timestamp(gdv_timestamp, gdv_timestamp); +gdv_int32 timestampdiffWeek_nano_timestamp_timestamp(gdv_timestamp, gdv_timestamp); + gdv_boolean isnull_day_time_interval(gdv_day_time_interval in, gdv_boolean is_valid); gdv_boolean istrue_boolean(gdv_boolean in, gdv_boolean isvalid);