diff --git a/src/include/index/rtree_module.hpp b/src/include/index/rtree_module.hpp index b16c05de..106931b3 100644 --- a/src/include/index/rtree_module.hpp +++ b/src/include/index/rtree_module.hpp @@ -87,6 +87,11 @@ class TRTreeIndex : public BoundIndex { MeosType bbox_meostype; size_t bbox_size_; LogicalType column_type_; + // Multi-entry (MEST) split bound for temporal columns: each temporal + // value is indexed as up to this many tight per-segment bounding + // boxes. <= 1 degenerates to the single minimum bounding box (the + // pre-MEST behaviour). Tunable via WITH (max_boxes = N) on the index. + int max_boxes_ = 8; size_t current_size_ = 0; size_t current_capacity_ = 0; diff --git a/src/index/rtree_index_scan.cpp b/src/index/rtree_index_scan.cpp index 623fc5d1..db2003b8 100644 --- a/src/index/rtree_index_scan.cpp +++ b/src/index/rtree_index_scan.cpp @@ -28,12 +28,17 @@ BindInfo TRTreeIndexScanBindInfo(const optional_ptr bind_data_p) { struct RTreeIndexScanGlobalState : public GlobalTableFunctionState { DataChunk all_columns; vector projection_ids; + vector scanned_types; ColumnFetchState fetch_state; TableScanState local_storage_state; vector column_ids; unique_ptr index_state; - Vector row_ids = Vector(LogicalType::ROW_TYPE); + // rowid is BIGINT. Use the LogicalTypeId enumerator, never the + // LogicalType::ROW_TYPE static const member: ODR-using it from this + // extension TU emits a second definition that clashes with libduckdb + // ("multiple definition of duckdb::LogicalType::ROW_TYPE" at link). + Vector row_ids = Vector(LogicalType(LogicalTypeId::BIGINT)); }; static unique_ptr RTreeIndexScanInitGlobal(ClientContext &context, @@ -50,10 +55,24 @@ static unique_ptr RTreeIndexScanInitGlobal(ClientConte storage_t col_id = id; if (id != DConstants::INVALID_INDEX) { col_id = bind_data.table.GetColumn(LogicalIndex(id)).StorageOid(); + result->scanned_types.push_back(bind_data.table.GetColumn(LogicalIndex(id)).Type()); + } else { + // rowid column: BIGINT (see row_ids note re: ROW_TYPE ODR clash) + result->scanned_types.emplace_back(LogicalType(LogicalTypeId::BIGINT)); } result->column_ids.emplace_back(col_id); } + // Honour projection pushdown exactly like DuckDB's table_scan: when the + // optimizer removes filter-only columns, fetch the full scanned set into + // all_columns (which MUST be initialized with the scanned column types, + // else Fetch hits "Expected vector of type X, found Y") and reference + // the projected subset out of it. + if (input.CanRemoveFilterColumns()) { + result->projection_ids = input.projection_ids; + result->all_columns.Initialize(context, result->scanned_types); + } + // Initialize the storage scan state result->local_storage_state.Initialize(result->column_ids, context, input.filters); local_storage.InitializeScan(bind_data.table.GetStorage(), result->local_storage_state.local_state, input.filters); diff --git a/src/index/rtree_module.cpp b/src/index/rtree_module.cpp index 9b9fdaac..c931f087 100644 --- a/src/index/rtree_module.cpp +++ b/src/index/rtree_module.cpp @@ -121,7 +121,14 @@ TRTreeIndex::TRTreeIndex(const string &name, IndexConstraintType constraint_type if (!rtree_) { throw InternalException("Failed to create MEOS RTree"); } - + + // MEST split bound: WITH (max_boxes = N). Default 8; N <= 1 reproduces + // the pre-MEST single-box index. Only affects temporal columns. + auto mb_it = options_.find("max_boxes"); + if (mb_it != options_.end() && !mb_it->second.IsNull()) { + max_boxes_ = mb_it->second.GetValue(); + } + function_matcher = MakeFunctionMatcher(); } @@ -222,16 +229,11 @@ ErrorData TRTreeIndex::Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) box = (STBox*)malloc(stbox_size); memcpy(box, stbox_data, stbox_size); - - int32_t box_srid = stbox_srid(box); - if (box_srid != 0) { - STBox *normalized_box = stbox_set_srid(box, 0); - if (normalized_box) { - free(box); - box = normalized_box; - } - } - } + // No SRID normalisation: index keys keep their natural SRID + // and && requires matching SRID, mirroring MobilityDB/PostGIS + // GiST. Normalising here but not at search time (or vice + // versa) makes ensure_same_srid reject every overlap. + } else { continue; } @@ -312,26 +314,18 @@ void TRTreeIndex::Construct(DataChunk &expression_result, Vector &row_identifier if (indexes_temporal) { const Temporal *temp = reinterpret_cast(data); - // For temporal-spatial types extract the bbox and strip the - // SRID so index keys agree with the SRID-stripped query box - // used at search time (InitializeScan strips it too). - if (bbox_meostype == T_STBOX) { - STBox *box = tspatial_to_stbox(temp); - if (!box) { - continue; - } - if (stbox_srid(box) != 0) { - STBox *normalized = stbox_set_srid(box, 0); - if (normalized) { - free(box); - box = normalized; - } - } - rtree_insert(rtree_, box, static_cast(row_data[i])); - free(box); - } else { - rtree_insert_temporal(rtree_, temp, static_cast(row_data[i])); - } + const int id = static_cast(row_data[i]); + // Multi-entry (MEST): index the temporal as up to max_boxes_ + // tight per-segment bounding boxes sharing this id; the + // splitter degenerates to a single minimum bounding box for + // instants or max_boxes_ <= 1, byte-identical to the pre-MEST + // single-box path. The produced boxes carry the temporal's + // own SRID, exactly like rtree_insert_temporal / + // tspatial_to_stbox. Do NOT pre-normalise the SRID: + // tspatial_set_srid(temp, 0) makes the SRID "unknown" and the + // stbox conversion inside the splitter then raises + // "The SRID cannot be unknown" at index-build time. + rtree_insert_temporal_split(rtree_, temp, id, max_boxes_); continue; } @@ -342,18 +336,9 @@ void TRTreeIndex::Construct(DataChunk &expression_result, Vector &row_identifier void *box = malloc(data_size); memcpy(box, data, data_size); - - if (bbox_meostype == T_STBOX) { - STBox *stbox = (STBox *) box; - int32_t box_srid = stbox_srid(stbox); - if (box_srid != 0) { - STBox *normalized_box = stbox_set_srid(stbox, 0); - if (normalized_box) { - free(box); - box = normalized_box; - } - } - } + // No SRID normalisation (see Construct/InitializeScan): keep the + // natural SRID so index keys and the query box agree; && requires + // matching SRID, mirroring MobilityDB/PostGIS GiST. void *target = (char *) boxes + (i * bbox_size_); memcpy(target, box, bbox_size_); @@ -417,21 +402,10 @@ unique_ptr TRTreeIndex::InitializeScan(const void* query_blob, s state->query_box = malloc(blob_size); memcpy(state->query_box, data, blob_size); + // No SRID normalisation: the query box keeps its natural SRID so + // it matches the index keys (which also keep theirs); && requires + // matching SRID, mirroring MobilityDB/PostGIS GiST. - if (bbox_meostype == T_STBOX) { - STBox *stbox = (STBox*)state->query_box; - int32_t query_srid = stbox_srid(stbox); - if (query_srid != 0) { - STBox *normalized_query = stbox_set_srid(stbox, 0); - if (normalized_query) { - free(state->query_box); - state->query_box = malloc(blob_size); - memcpy(state->query_box, normalized_query, blob_size); - free(normalized_query); - } - } - } - } else { throw InvalidInputException("Unsupported R-Tree operation: " + operation + " for bbox_type: " + std::to_string(bbox_meostype)); @@ -486,9 +460,18 @@ vector TRTreeIndex::Search(const void *query_box, RTreeSearchOp op) const if (count > 0) { results.reserve(count); + // Multi-entry (MEST) leaves return the same id once per + // overlapping per-segment box. Emit each row exactly once; + // duplicates would otherwise surface as duplicate rows in the + // index scan. Harmless no-op for single-box indexes. + unordered_set seen; + seen.reserve(count); for (int i = 0; i < count; i++) { int *id = static_cast(meos_array_get(ids, i)); - results.push_back(static_cast(*id)); + row_t rid = static_cast(*id); + if (seen.insert(rid).second) { + results.push_back(rid); + } } } } catch (...) { diff --git a/test/sql/parity/050_index_types.test b/test/sql/parity/050_index_types.test index 40150568..84495b5a 100644 --- a/test/sql/parity/050_index_types.test +++ b/test/sql/parity/050_index_types.test @@ -283,3 +283,80 @@ statement error CREATE INDEX i_unsupported ON idx_unsupported USING TRTREE (x); ---- TRTREE index supports + +# ============================================================================= +# Multi-entry (MEST) indexing on temporal columns. +# +# A temporal column is indexed as up to max_boxes tight per-segment +# bounding boxes per row. This must (a) never produce false negatives, +# (b) return each row exactly once even when many of its per-segment +# boxes overlap the query (dedup), and (c) behave identically for the +# default split, an explicit max_boxes, and the degenerate max_boxes = 1 +# single-box index. +# ============================================================================= + +statement ok +CREATE TABLE idx_mest(t tgeompoint); + +# trip 1: a wiggly zig-zag with a large X-extent (many tight segments); +# trip 2: a small trip far away. +statement ok +INSERT INTO idx_mest VALUES + ('[Point(0 0)@2000-01-01, Point(10 0)@2000-01-02, Point(0 0)@2000-01-03, Point(10 0)@2000-01-04, Point(0 0)@2000-01-05]'::tgeompoint), + ('[Point(1000 1000)@2000-01-01, Point(1001 1001)@2000-01-02]'::tgeompoint); + +statement ok +CREATE INDEX i_mest ON idx_mest USING TRTREE (t); + +# Query box covers the whole zig-zag and therefore overlaps many of +# trip 1's per-segment boxes: it must still come back exactly once. +query I +SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox; +---- +1 + +# Disjoint from both trips: a true negative for single-box and MEST alike. +query I +SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((100,100),(200,200))'::stbox; +---- +0 + +# Near trip 2 only. +query I +SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((999,999),(1002,1002))'::stbox; +---- +1 + +statement ok +DROP INDEX i_mest; + +# Explicit, tighter split: same correct results, no false negatives. +statement ok +CREATE INDEX i_mest ON idx_mest USING TRTREE (t) WITH (max_boxes = 16); + +query I +SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox; +---- +1 + +query I +SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((100,100),(200,200))'::stbox; +---- +0 + +statement ok +DROP INDEX i_mest; + +# Degenerate single-box index (pre-MEST behaviour) via the option. +statement ok +CREATE INDEX i_mest ON idx_mest USING TRTREE (t) WITH (max_boxes = 1); + +query I +SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox; +---- +1 + +query I +SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((999,999),(1002,1002))'::stbox; +---- +1