diff --git a/Makefile b/Makefile index bc17d050..a9485498 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,39 @@ include extension-ci-tools/makefiles/duckdb_extension.Makefile # both MEOS (meos_initialize_timezone) and DuckDB (DBConfig::SetOptionByName # "TimeZone") to Europe/Brussels. Tests pass on any OS timezone — the # extension is the single source of truth, no TZ env var needed. +# +# LoadInternal also calls ExtensionHelper::AutoLoadExtension(db, "icu") so +# the timezone option is honoured. Autoload looks for the extension on disk +# at $HOME/.duckdb/extensions///icu.duckdb_extension +# and falls back to a hub download. That fails both inside the linux_amd64 +# test docker container (empty path, no network egress) and on the macOS +# osx_arm64 test runner (hub icu not reliably resolvable). We copy the +# icu.duckdb_extension that was built locally as part of this extension's +# build (declared in extension_config.cmake) into the expected path, +# matched to the DuckDB platform string, before running the unittester. +DUCKDB_VERSION_TAG := v1.4.4 + +define stage_icu + @if [ -f ./build/$(1)/extension/icu/icu.duckdb_extension ]; then \ + case "$$(uname -s)-$$(uname -m)" in \ + Linux-x86_64) platform=linux_amd64 ;; \ + Linux-aarch64) platform=linux_arm64 ;; \ + Darwin-arm64) platform=osx_arm64 ;; \ + Darwin-x86_64) platform=osx_amd64 ;; \ + *) platform=$$(uname -m) ;; \ + esac; \ + target=$$HOME/.duckdb/extensions/$(DUCKDB_VERSION_TAG)/$$platform; \ + mkdir -p "$$target" && cp -f ./build/$(1)/extension/icu/icu.duckdb_extension "$$target/" && \ + echo "Staged icu.duckdb_extension at $$target/"; \ + fi +endef + test_release_internal: + $(call stage_icu,release) ./build/release/$(TEST_PATH) "$(PROJ_DIR)test/*" test_debug_internal: + $(call stage_icu,debug) ./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*" test_reldebug_internal: - ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*" \ No newline at end of file + $(call stage_icu,reldebug) + ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*" diff --git a/src/geo/stbox_functions.cpp b/src/geo/stbox_functions.cpp index b5398f02..5985ba5d 100644 --- a/src/geo/stbox_functions.cpp +++ b/src/geo/stbox_functions.cpp @@ -310,8 +310,18 @@ void StboxFunctions::Stbox_as_hexwkb(DataChunk &args, ExpressionState &state, Ve throw InternalException("Failure in Stbox_as_hexwkb: unable to cast stbox to hexwkb"); return string_t(); } - string_t ret_str(wkb); - string_t stored_data = StringVector::AddStringOrBlob(result, ret_str); + // Diagnostic: hex strings must be even-length. See + // src/geo/tgeompoint.cpp TgeoAsHexWkbExec for context. + size_t actual = strlen(wkb); + if (actual % 2 != 0) { + std::string diag = "Stbox_as_hexwkb produced odd-length string: " + "strlen=" + std::to_string(actual) + + " sz=" + std::to_string(wkb_size); + free(wkb); + free(stbox); + throw InternalException(diag); + } + string_t stored_data = StringVector::AddStringOrBlob(result, wkb, actual); free(wkb); free(stbox); return stored_data; diff --git a/src/geo/tgeompoint.cpp b/src/geo/tgeompoint.cpp index 2bc5e6a0..e69411c2 100644 --- a/src/geo/tgeompoint.cpp +++ b/src/geo/tgeompoint.cpp @@ -1933,10 +1933,25 @@ void TgeoAsHexWkbExec(DataChunk &args, ExpressionState &state, Vector &result, u Temporal *t = BlobToTemp(input); size_t sz = 0; char *hex = temporal_as_hexwkb(t, variant, &sz); - (void) sz; free(t); if (!hex) throw InternalException("temporal_as_hexwkb returned null"); - string_t stored = StringVector::AddString(result, hex); + // Diagnostic: hex strings must be even-length (2 chars / byte). + // The macOS-arm64 CI on PRs #126/#130 has been failing with + // "Invalid hex string, length (69)" from DuckDB's decoder, which + // implies the producer is occasionally emitting odd-length output + // on that platform. Report both `strlen` and `sz` so the next + // failure pinpoints whether MEOS itself produced the odd length + // or whether something downstream truncates at an embedded null. + size_t actual = strlen(hex); + if (actual % 2 != 0) { + std::string diag = "temporal_as_hexwkb produced odd-length string: " + "strlen=" + std::to_string(actual) + + " sz=" + std::to_string(sz) + + " variant=" + std::to_string((int) variant); + free(hex); + throw InternalException(diag); + } + string_t stored = StringVector::AddString(result, hex, actual); free(hex); return stored; }); diff --git a/src/include/tydef.hpp b/src/include/tydef.hpp index b7b28109..b9860ca0 100644 --- a/src/include/tydef.hpp +++ b/src/include/tydef.hpp @@ -11,11 +11,27 @@ extern "C" { #include } -// Forward-compat alias for the meosType → MeosType rename (MobilityDB -// pr785-sync-script). Vcpkg's MEOS exposes `MeosType`; existing -// MobilityDuck code still uses `meosType`. This alias bridges the two -// without touching every reference site. -using meosType = MeosType; +// MEOS naming history: `meosType` is the **pre-consolidation** spelling +// and `MeosType` is the **post-consolidation** target (the rename is +// part of the upstream consolidation sweep, not yet reached by the +// vcpkg pin). The current pin +// (`vcpkg_ports/meos/portfile.cmake` REF f11b7443ee98…) is still +// pre-consolidation and exposes `meosType` — see +// meos/include/temporal/meos_catalog.h, where line 121 declares +// `} meosType;`. MobilityDuck's source consistently uses +// `meosType` (verified via `grep -rn '\bmeosType\b' src/`), which +// matches the pin, so no alias is needed today. +// +// An earlier version of this file added `using meosType = MeosType;` +// as a forward-looking bridge for the eventual consolidation bump. +// That alias references `MeosType`, which the current pin does NOT +// yet expose, so it broke the build: +// "'MeosType' does not name a type; did you mean 'meosType'?". +// +// When the MEOS pin is bumped past the consolidation point, restore +// a bridge here (`using meosType = MeosType;` becomes valid then) or +// sweep the source `meosType → MeosType` in one PR — whichever the +// project prefers at that time. namespace duckdb { diff --git a/src/temporal/set.cpp b/src/temporal/set.cpp index b803498a..1858e489 100644 --- a/src/temporal/set.cpp +++ b/src/temporal/set.cpp @@ -945,6 +945,14 @@ static inline Set *date_to_set_duckdb(DateADT d) { return date_to_set(ToMeosDate(duckdb::date_t(d))); } +// macOS LP64: int64 (long) and int64_t (long long) are the same width but +// distinct types, so clang rejects passing bigint_to_set where a +// Set *(*)(int64_t) is expected as a non-type template arg. The cast is a +// no-op on Linux. See SetUnionScalarFunction below. +static inline Set *bigint_to_set_duckdb(int64_t i) { + return bigint_to_set(static_cast(i)); +} + struct SetPtrState { Set *accumulated; }; @@ -1069,7 +1077,7 @@ void SetTypes::RegisterSetUnionAgg(ExtensionLoader &loader) { LogicalType::INTEGER, SetTypes::intset())); set_union_set.AddFunction( AggregateFunction::UnaryAggregateDestructor>( + SetUnionScalarFunction>( LogicalType::BIGINT, SetTypes::bigintset())); set_union_set.AddFunction( AggregateFunction::UnaryAggregateDestructor(data_copy); size_t hex_size = 0; char *hex = set_as_hexwkb(s, WKB_EXTENDED, &hex_size); - (void)hex_size; free(data_copy); if (!hex) { throw InternalException("asHexWKB: set_as_hexwkb failed"); } - string_t stored = StringVector::AddString(result, hex); + // Diagnostic: hex strings must be even-length. See + // src/geo/tgeompoint.cpp TgeoAsHexWkbExec for context. + size_t actual = strlen(hex); + if (actual % 2 != 0) { + std::string diag = "asHexWKB: set_as_hexwkb produced odd-length string: " + "strlen=" + std::to_string(actual) + + " sz=" + std::to_string(hex_size); + free(hex); + throw InternalException(diag); + } + string_t stored = StringVector::AddString(result, hex, actual); free(hex); return stored; } diff --git a/src/temporal/span_functions.cpp b/src/temporal/span_functions.cpp index 8c6f5bdb..3e4eea52 100644 --- a/src/temporal/span_functions.cpp +++ b/src/temporal/span_functions.cpp @@ -152,10 +152,19 @@ void SpanFunctions::Span_as_hexwkb(DataChunk &args, ExpressionState &state, Vect Span *s = reinterpret_cast(copy); size_t hex_size = 0; char *hex = span_as_hexwkb(s, WKB_EXTENDED, &hex_size); - (void)hex_size; free(copy); if (!hex) throw InternalException("asHexWKB: span_as_hexwkb failed"); - string_t stored = StringVector::AddString(result, hex); + // Diagnostic: hex strings must be even-length. See + // src/geo/tgeompoint.cpp TgeoAsHexWkbExec for context. + size_t actual = strlen(hex); + if (actual % 2 != 0) { + std::string diag = "asHexWKB: span_as_hexwkb produced odd-length string: " + "strlen=" + std::to_string(actual) + + " sz=" + std::to_string(hex_size); + free(hex); + throw InternalException(diag); + } + string_t stored = StringVector::AddString(result, hex, actual); free(hex); return stored; }); diff --git a/src/temporal/spanset_functions.cpp b/src/temporal/spanset_functions.cpp index 4010f4d9..64f74434 100644 --- a/src/temporal/spanset_functions.cpp +++ b/src/temporal/spanset_functions.cpp @@ -110,10 +110,19 @@ void SpansetFunctions::Spanset_as_hexwkb(DataChunk &args, ExpressionState &state SpanSet *ss = reinterpret_cast(copy); size_t hex_size = 0; char *hex = spanset_as_hexwkb(ss, WKB_EXTENDED, &hex_size); - (void)hex_size; free(copy); if (!hex) throw InternalException("asHexWKB: spanset_as_hexwkb failed"); - string_t stored = StringVector::AddString(result, hex); + // Diagnostic: hex strings must be even-length. See + // src/geo/tgeompoint.cpp TgeoAsHexWkbExec for context. + size_t actual = strlen(hex); + if (actual % 2 != 0) { + std::string diag = "asHexWKB: spanset_as_hexwkb produced odd-length string: " + "strlen=" + std::to_string(actual) + + " sz=" + std::to_string(hex_size); + free(hex); + throw InternalException(diag); + } + string_t stored = StringVector::AddString(result, hex, actual); free(hex); return stored; }); diff --git a/src/temporal/tbox_functions.cpp b/src/temporal/tbox_functions.cpp index fd17bacf..4ea413ef 100644 --- a/src/temporal/tbox_functions.cpp +++ b/src/temporal/tbox_functions.cpp @@ -1905,12 +1905,21 @@ void TboxFunctions::Tbox_as_hexwkb(DataChunk &args, ExpressionState &state, Vect TBox *tbox = reinterpret_cast(copy); size_t hex_size = 0; char *hex = tbox_as_hexwkb(tbox, WKB_EXTENDED, &hex_size); - (void)hex_size; free(copy); if (!hex) { throw InternalException("asHexWKB: tbox_as_hexwkb failed"); } - string_t stored = StringVector::AddString(result, hex); + // Diagnostic: hex strings must be even-length. See + // src/geo/tgeompoint.cpp TgeoAsHexWkbExec for context. + size_t actual = strlen(hex); + if (actual % 2 != 0) { + std::string diag = "asHexWKB: tbox_as_hexwkb produced odd-length string: " + "strlen=" + std::to_string(actual) + + " sz=" + std::to_string(hex_size); + free(hex); + throw InternalException(diag); + } + string_t stored = StringVector::AddString(result, hex, actual); free(hex); return stored; } diff --git a/src/temporal/temporal_functions.cpp b/src/temporal/temporal_functions.cpp index 7e5cc932..acafbb3b 100644 --- a/src/temporal/temporal_functions.cpp +++ b/src/temporal/temporal_functions.cpp @@ -6105,8 +6105,20 @@ void TemporalFunctions::Temporal_as_hexwkb(DataChunk &args, ExpressionState &sta free(temp); throw InternalException("[Temporal_as_hexwkb] temporal_as_hexwkb returned NULL"); } - std::string ret(hex); - string_t stored = StringVector::AddStringOrBlob(result, ret); + // Diagnostic: hex strings must be even-length (2 chars / byte). + // See tgeompoint.cpp TgeoAsHexWkbExec for context on the + // macOS-arm64 "Invalid hex string, length (69)" failure. + size_t actual = strlen(hex); + if (actual % 2 != 0) { + std::string diag = "[Temporal_as_hexwkb] odd-length string: " + "strlen=" + std::to_string(actual) + + " sz=" + std::to_string(hex_size) + + " variant=" + std::to_string((int) variant); + free(hex); + free(temp); + throw InternalException(diag); + } + string_t stored = StringVector::AddStringOrBlob(result, hex, actual); free(hex); free(temp);