From 24f947a7b302a41d3b0b804d706bfcd302acd353 Mon Sep 17 00:00:00 2001 From: Esteban Zimanyi Date: Fri, 5 Jun 2026 22:35:42 +0200 Subject: [PATCH] Add static-geometry to h3indexset prefilter UDFs Expose the static-geometry h3indexset prefilter that narrows a th3index candidate set before the exact spatial test. The trip x static cell-set predicate derives the canonical ?= / ever_eq surface, registered as the everEq comparison overload in both argument directions and backed by the ever_eq_h3indexset_th3index kernel, with geoToH3IndexSet building the static side. --- src/h3/th3index.cpp | 92 +++++++++++++++++++++++++++++++++++++ src/include/h3/th3index.hpp | 15 ++++++ test/sql/h3_prefilter.test | 30 ++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 test/sql/h3_prefilter.test diff --git a/src/h3/th3index.cpp b/src/h3/th3index.cpp index ee6a03e1..38dade5f 100644 --- a/src/h3/th3index.cpp +++ b/src/h3/th3index.cpp @@ -11,6 +11,8 @@ #include "temporal/temporal.hpp" #include "geo/tgeompoint.hpp" #include "geo/tgeogpoint.hpp" +#include "geo_util.hpp" +#include "spatial/spatial_types.hpp" #include "tydef.hpp" #include "duckdb/common/types/data_chunk.hpp" #include "duckdb/main/extension/extension_loader.hpp" @@ -74,9 +76,16 @@ LogicalType H3IndexTypes::TH3INDEX() { return type; } +LogicalType H3IndexTypes::H3INDEXSET() { + auto type = LogicalType(LogicalTypeId::BLOB); + type.SetAlias("H3INDEXSET"); + return type; +} + void H3IndexTypes::RegisterTypes(ExtensionLoader &loader) { loader.RegisterType("H3INDEX", H3INDEX()); loader.RegisterType("TH3INDEX", TH3INDEX()); + loader.RegisterType("H3INDEXSET", H3INDEXSET()); } void H3IndexTypes::RegisterCastFunctions(ExtensionLoader &loader) { @@ -111,6 +120,22 @@ inline string_t TempToBlob(Vector &result, Temporal *t) { /* TINT → BIGINT result for the int-returning H3 predicates. */ inline bool IntToBool(int r) { return r != 0; } +inline Set *BlobToSet(string_t blob) { + size_t sz = blob.GetSize(); + uint8_t *copy = (uint8_t *) malloc(sz); + memcpy(copy, blob.GetData(), sz); + return reinterpret_cast(copy); +} + +inline string_t SetToBlob(Vector &result, Set *s) { + if (!s) return string_t(); + size_t sz = set_mem_size(s); + string_t out = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(s), sz)); + free(s); + return out; +} + } // namespace /* ===================================================================== @@ -544,6 +569,60 @@ TH3_T_T_T_TEMP(Tne_th3index_th3index, tne_th3index_th3index) #undef TH3_T_T_T_TEMP +/* ===================================================================== + * Static geometry → h3indexset, and h3indexset × th3index prefilter + * ===================================================================== */ + +void H3IndexFunctions::Geo_to_h3index_set(DataChunk &args, ExpressionState &state, + Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t geom_blob, int32_t resolution, ValidityMask &mask, + idx_t idx) -> string_t { + /* H3 cells are inherently geographic (WGS84). The DuckDB + * spatial GEOMETRY blob has no embedded SRID, so callers + * must pass a geometry in EPSG:4326 coordinates (e.g. + * `ST_Transform(geom, 'EPSG:4326')`). We mark the + * GSERIALIZED with SRID 4326 explicitly. */ + GSERIALIZED *gs = GeometryToGSerialized(geom_blob, 4326); + if (!gs) { mask.SetInvalid(idx); return string_t(); } + Set *s = geo_to_h3index_set(gs, resolution); + free(gs); + if (!s) { mask.SetInvalid(idx); return string_t(); } + return SetToBlob(result, s); + }); +} + +void H3IndexFunctions::Ever_eq_h3indexset_th3index(DataChunk &args, + ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t set_blob, string_t temp_blob, ValidityMask &mask, + idx_t idx) -> bool { + Set *s = BlobToSet(set_blob); + Temporal *t = BlobToTemp(temp_blob); + int r = ever_eq_h3indexset_th3index(s, t); + free(s); free(t); + if (r < 0) { mask.SetInvalid(idx); return false; } + return IntToBool(r); + }); +} + +void H3IndexFunctions::Ever_eq_th3index_h3indexset(DataChunk &args, + ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t temp_blob, string_t set_blob, ValidityMask &mask, + idx_t idx) -> bool { + Temporal *t = BlobToTemp(temp_blob); + Set *s = BlobToSet(set_blob); + int r = ever_eq_h3indexset_th3index(s, t); + free(s); free(t); + if (r < 0) { mask.SetInvalid(idx); return false; } + return IntToBool(r); + }); +} + /* ===================================================================== * Registration * ===================================================================== */ @@ -706,6 +785,19 @@ void H3IndexTypes::RegisterScalarFunctions(ExtensionLoader &loader) { duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "tgeogpointGreatCircleDistance", {TgeogpointType::TGEOGPOINT(), TgeogpointType::TGEOGPOINT(), V}, TemporalTypes::TFLOAT(), H3IndexFunctions::Tgeogpoint_great_circle_distance)); + + /* --- Static geometry h3 prefilter for trip × static cross-joins --- */ + const auto H3SET = H3INDEXSET(); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geoToH3IndexSet", + {GeoTypes::GEOMETRY(), LogicalType::INTEGER}, + H3SET, H3IndexFunctions::Geo_to_h3index_set)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "everEq", {H3SET, TH3}, LogicalType::BOOLEAN, + H3IndexFunctions::Ever_eq_h3indexset_th3index)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "everEq", {TH3, H3SET}, LogicalType::BOOLEAN, + H3IndexFunctions::Ever_eq_th3index_h3indexset)); } } // namespace duckdb diff --git a/src/include/h3/th3index.hpp b/src/include/h3/th3index.hpp index 8c2ab960..bff314ae 100644 --- a/src/include/h3/th3index.hpp +++ b/src/include/h3/th3index.hpp @@ -16,6 +16,11 @@ namespace duckdb { struct H3IndexTypes { static LogicalType H3INDEX(); static LogicalType TH3INDEX(); + /* H3INDEXSET is a Set, stored as a serialized Set* blob, + * built from a static geometry by `geoToH3IndexSet`. Used as the + * static side of the trip×static h3 prefilter on Q4 / Q7 / Q11 / + * Q12 / Q15 / Q17. */ + static LogicalType H3INDEXSET(); static void RegisterTypes(ExtensionLoader &loader); static void RegisterCastFunctions(ExtensionLoader &loader); @@ -113,6 +118,16 @@ struct H3IndexFunctions { static void Th3index_cell_area(DataChunk &args, ExpressionState &state, Vector &result); static void Th3index_edge_length(DataChunk &args, ExpressionState &state, Vector &result); static void Tgeogpoint_great_circle_distance(DataChunk &args, ExpressionState &state, Vector &result); + + /* Static geometry → h3indexset (Set) at a given H3 resolution */ + static void Geo_to_h3index_set(DataChunk &args, ExpressionState &state, Vector &result); + + /* Trip × static h3indexset prefilter (everEq): true if the th3index + * trajectory ever takes a cell that equals a cell of the static set. + * Registered as the everEq comparison overload in both argument + * directions (h3indexset,th3index) and (th3index,h3indexset). */ + static void Ever_eq_h3indexset_th3index(DataChunk &args, ExpressionState &state, Vector &result); + static void Ever_eq_th3index_h3indexset(DataChunk &args, ExpressionState &state, Vector &result); }; } // namespace duckdb diff --git a/test/sql/h3_prefilter.test b/test/sql/h3_prefilter.test new file mode 100644 index 00000000..5e3e557b --- /dev/null +++ b/test/sql/h3_prefilter.test @@ -0,0 +1,30 @@ +# name: test/sql/h3_prefilter.test +# description: static-geometry h3indexset prefilter (geoToH3IndexSet + everEq) smoke coverage +# group: [mobilityduck] + +require mobilityduck + +# everEq(h3indexset, th3index): a trip running through the polygon ever takes a +# cell that equals a cell of the polygon's set +query I +SELECT everEq( + geoToH3IndexSet(ST_GeomFromText('POLYGON((-1 -1, -1 1, 1 1, 1 -1, -1 -1))'), 4), + th3index(tgeompoint 'SRID=4326;[POINT(0 0)@2000-01-01, POINT(0.5 0.5)@2000-01-02]', 4)); +---- +true + +# everEq is registered in both argument directions +query I +SELECT everEq( + th3index(tgeompoint 'SRID=4326;[POINT(0 0)@2000-01-01, POINT(0.5 0.5)@2000-01-02]', 4), + geoToH3IndexSet(ST_GeomFromText('POLYGON((-1 -1, -1 1, 1 1, 1 -1, -1 -1))'), 4)); +---- +true + +# a polygon disjoint from the trip shares no cell with it +query I +SELECT everEq( + geoToH3IndexSet(ST_GeomFromText('POLYGON((39 39, 39 41, 41 41, 41 39, 39 39))'), 4), + th3index(tgeompoint 'SRID=4326;[POINT(0 0)@2000-01-01, POINT(0.5 0.5)@2000-01-02]', 4)); +---- +false