Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions src/index/rtree_index_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,30 @@ static void RTreeIndexScanExecute(ClientContext &context, TableFunctionInput &da
//-------------------------------------------------------------------------
// Get Function
//-------------------------------------------------------------------------

// The R-tree probe narrows rows by bounding-box overlap only. For an exact
// predicate (&&, @>) that is the answer; for a lossy predicate (the
// spatial-rel functions, whose bbox is only a superset) the original
// predicate must still be evaluated or the scan would emit false positives.
// Reporting that this scan can apply no pushed expression filter makes
// DuckDB keep every pushed predicate as an exact recheck PhysicalFilter
// directly above the scan (execution/physical_plan/plan_get.cpp rebuilds it
// via ExpressionFilter::ToExpression). This is the lossy-index-always-
// rechecks contract of PostGIS GiST and MobilityDB's tspatial_supportfn:
// the index is a prefilter, the recheck is correctness.
static bool RTreeIndexScanSupportsPushdownType(const FunctionData &, idx_t) {
return false;
}

TableFunction TRTreeIndexScanFunction::GetFunction() {
TableFunction func("mobility rtree index", {}, RTreeIndexScanExecute);
func.init_global = RTreeIndexScanInitGlobal;

func.get_bind_info = TRTreeIndexScanBindInfo;

func.projection_pushdown = true;
func.filter_pushdown = false;
func.filter_pushdown = false;
func.supports_pushdown_type = RTreeIndexScanSupportsPushdownType;
return func;
}

Expand Down
9 changes: 8 additions & 1 deletion src/index/rtree_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,14 @@ unique_ptr<ExpressionMatcher> TRTreeIndex::MakeFunctionMatcher() const {
unordered_set<string> supported_functions;

if (bbox_meostype == T_STBOX) {
supported_functions = {"&&"};
// && is the exact bbox predicate; the spatial-rel functions are
// lossy supersets whose bbox prefilter the index serves while the
// original predicate is rechecked exactly above the scan by the
// recheck PhysicalFilter (the scan reports supports_pushdown_type
// = false; see RTreeIndexScanSupportsPushdownType). Mirrors
// MobilityDB's tspatial_supportfn, in function form.
supported_functions = {"&&", "eIntersects", "eContains",
"eDisjoint", "eTouches"};
} else if (bbox_meostype == T_TSTZSPAN) {
supported_functions = {"&&", "@>"};
} else {
Expand Down
66 changes: 55 additions & 11 deletions src/index/rtree_optimize_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include "index/rtree_module.hpp"
#include "index/rtree_index_scan.hpp"
#include "time_util.hpp"
#include "geo_util.hpp"
#include <unordered_set>
#include <string>



Expand Down Expand Up @@ -84,41 +87,82 @@ class TRTreeIndexScanOptimizer : public OptimizerExtension {
}

const auto &constant = const_expr->Cast<BoundConstantExpression>();


static const std::unordered_set<std::string> spatial_rel_fns =
{"eIntersects", "eContains", "eDisjoint", "eTouches"};
const bool is_spatial_rel =
spatial_rel_fns.count(function_name) > 0;

void *query_box = nullptr;
size_t box_size = 0;

if (constant.value.type().id() == LogicalTypeId::BLOB) {


if (is_spatial_rel) {
// supportfn-equivalent (mirrors MobilityDB
// tspatial_supportfn): the predicate is a lossy spatial
// relationship; synthesize its bbox && prefilter from the
// constant geometry argument. The original spatial-rel
// predicate is rechecked exactly above the index scan
// (the scan reports supports_pushdown_type = false, so
// plan_get.cpp keeps it as a recheck PhysicalFilter), so
// the bbox superset never drops nor wrongly keeps a row.
if (constant.value.type().id() != LogicalTypeId::BLOB) {
return false;
}
auto blob_data = constant.value.GetValueUnsafe<duckdb::string_t>();
GSERIALIZED *gs = GeometryToGSerialized(blob_data, 0);
if (!gs) {
return false;
}
STBox *box = geo_to_stbox(gs);
free(gs);
if (!box) {
return false;
}
box_size = sizeof(STBox);
query_box = malloc(box_size);
if (query_box) {
memcpy(query_box, box, box_size);
}
free(box);
}
else if (constant.value.type().id() == LogicalTypeId::BLOB) {

auto blob_data = constant.value.GetValueUnsafe<duckdb::string_t>();

const uint8_t *data = reinterpret_cast<const uint8_t *>(blob_data.GetDataUnsafe());
box_size = blob_data.GetSize();

query_box = malloc(box_size);
memcpy(query_box, data, box_size);

}
else if (constant.value.type().id() == LogicalTypeId::TIMESTAMP_TZ) {
auto timestamp_duckdb = constant.value.GetValueUnsafe<timestamp_tz_t>();

timestamp_tz_t ts_meos = DuckDBToMeosTimestamp(timestamp_duckdb);

box_size = sizeof(timestamp_tz_t);
query_box = malloc(box_size);

if (query_box) {
memcpy(query_box, &ts_meos, box_size);
}
}


if (!query_box) {
return false;
}

// The index probe is always a bbox overlap; a spatial-rel
// name is only the recognition key, not an index operation.
// Exactness is restored by the recheck PhysicalFilter that
// plan_get.cpp builds above this scan (see
// RTreeIndexScanSupportsPushdownType).
const string index_op =
is_spatial_rel ? string("&&") : function_name;
bind_data = make_uniq<TRTreeIndexScanBindData>(
duck_table, rtree_index, 1000, query_box, box_size, function_name);
duck_table, rtree_index, 1000, query_box, box_size, index_op);
return true;
});

Expand Down
69 changes: 69 additions & 0 deletions test/sql/parity/050_index_types.test
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,72 @@ query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((999,999),(1002,1002))'::stbox;
----
1

# =============================================================================
# Spatial-relationship predicate pushdown (supportfn-equivalent).
#
# A predicate like eIntersects(trip, <const geometry>) is lossy: its bbox
# is only a superset of the true answer. The optimizer rewrites the scan to
# probe the TRTREE with the synthesized bbox &&, and the original predicate
# is rechecked exactly above the scan. The decisive regression is that the
# index must NOT leak a row whose bbox overlaps the query geometry but which
# does not actually intersect it (a missing recheck would return 2, not 1).
# =============================================================================

statement ok
CREATE TABLE idx_srel(t tgeompoint);

# A: an L-shaped path. Its bbox X[0,10] Y[0,10] overlaps the query polygon
# P = Polygon((4 4,6 4,6 6,4 6,4 4)), but the path runs along y=0 then
# x=10 and never enters P -> bbox-overlap false positive.
# B: a vertical path through x=5 that crosses P -> true match.
# C: a far-away path, bbox-disjoint from P -> true miss.
statement ok
INSERT INTO idx_srel VALUES
('[Point(0 0)@2000-01-01, Point(10 0)@2000-01-02, Point(10 10)@2000-01-03]'::tgeompoint),
('[Point(5 0)@2000-01-01, Point(5 10)@2000-01-02]'::tgeompoint),
('[Point(100 100)@2000-01-01, Point(101 101)@2000-01-02]'::tgeompoint);

# Ground truth without an index: only B intersects P.
query I
SELECT count(*) FROM idx_srel
WHERE eIntersects(t, geometry 'Polygon((4 4, 6 4, 6 6, 4 6, 4 4))');
----
1

query I
SELECT count(*) FROM idx_srel
WHERE NOT eIntersects(t, geometry 'Polygon((4 4, 6 4, 6 6, 4 6, 4 4))');
----
2

statement ok
CREATE INDEX i_srel ON idx_srel USING TRTREE (t);

# Same answer with the index: the bbox-overlap bait A is dropped by the
# exact recheck. A regression to a missing recheck would return 2.
query I
SELECT count(*) FROM idx_srel
WHERE eIntersects(t, geometry 'Polygon((4 4, 6 4, 6 6, 4 6, 4 4))');
----
1

# A polygon whose bbox is disjoint from every row: clean true negative.
query I
SELECT count(*) FROM idx_srel
WHERE eIntersects(t, geometry 'Polygon((50 50, 51 50, 51 51, 50 51, 50 50))');
----
0

statement ok
DROP INDEX i_srel;

# Same soundness under MEST multi-entry indexing.
statement ok
CREATE INDEX i_srel ON idx_srel USING TRTREE (t) WITH (max_boxes = 8);

query I
SELECT count(*) FROM idx_srel
WHERE eIntersects(t, geometry 'Polygon((4 4, 6 4, 6 6, 4 6, 4 4))');
----
1
Loading