Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/include/index/rtree_module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ class TRTreeIndex : public BoundIndex {
MeosType bbox_meostype;
size_t bbox_size_;
LogicalType column_type_;
// Multi-entry (MEST) split bound for temporal columns: each temporal
// value is indexed as up to this many tight per-segment bounding
// boxes. <= 1 degenerates to the single minimum bounding box (the
// pre-MEST behaviour). Tunable via WITH (max_boxes = N) on the index.
int max_boxes_ = 8;

size_t current_size_ = 0;
size_t current_capacity_ = 0;
Expand Down
21 changes: 20 additions & 1 deletion src/index/rtree_index_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@ BindInfo TRTreeIndexScanBindInfo(const optional_ptr<FunctionData> bind_data_p) {
struct RTreeIndexScanGlobalState : public GlobalTableFunctionState {
DataChunk all_columns;
vector<idx_t> projection_ids;
vector<LogicalType> scanned_types;
ColumnFetchState fetch_state;
TableScanState local_storage_state;
vector<StorageIndex> column_ids;

unique_ptr<IndexScanState> index_state;
Vector row_ids = Vector(LogicalType::ROW_TYPE);
// rowid is BIGINT. Use the LogicalTypeId enumerator, never the
// LogicalType::ROW_TYPE static const member: ODR-using it from this
// extension TU emits a second definition that clashes with libduckdb
// ("multiple definition of duckdb::LogicalType::ROW_TYPE" at link).
Vector row_ids = Vector(LogicalType(LogicalTypeId::BIGINT));
};

static unique_ptr<GlobalTableFunctionState> RTreeIndexScanInitGlobal(ClientContext &context,
Expand All @@ -50,10 +55,24 @@ static unique_ptr<GlobalTableFunctionState> RTreeIndexScanInitGlobal(ClientConte
storage_t col_id = id;
if (id != DConstants::INVALID_INDEX) {
col_id = bind_data.table.GetColumn(LogicalIndex(id)).StorageOid();
result->scanned_types.push_back(bind_data.table.GetColumn(LogicalIndex(id)).Type());
} else {
// rowid column: BIGINT (see row_ids note re: ROW_TYPE ODR clash)
result->scanned_types.emplace_back(LogicalType(LogicalTypeId::BIGINT));
}
result->column_ids.emplace_back(col_id);
}

// Honour projection pushdown exactly like DuckDB's table_scan: when the
// optimizer removes filter-only columns, fetch the full scanned set into
// all_columns (which MUST be initialized with the scanned column types,
// else Fetch hits "Expected vector of type X, found Y") and reference
// the projected subset out of it.
if (input.CanRemoveFilterColumns()) {
result->projection_ids = input.projection_ids;
result->all_columns.Initialize(context, result->scanned_types);
}

// Initialize the storage scan state
result->local_storage_state.Initialize(result->column_ids, context, input.filters);
local_storage.InitializeScan(bind_data.table.GetStorage(), result->local_storage_state.local_state, input.filters);
Expand Down
99 changes: 41 additions & 58 deletions src/index/rtree_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,14 @@ TRTreeIndex::TRTreeIndex(const string &name, IndexConstraintType constraint_type
if (!rtree_) {
throw InternalException("Failed to create MEOS RTree");
}


// MEST split bound: WITH (max_boxes = N). Default 8; N <= 1 reproduces
// the pre-MEST single-box index. Only affects temporal columns.
auto mb_it = options_.find("max_boxes");
if (mb_it != options_.end() && !mb_it->second.IsNull()) {
max_boxes_ = mb_it->second.GetValue<int32_t>();
}

function_matcher = MakeFunctionMatcher();
}

Expand Down Expand Up @@ -222,16 +229,11 @@ ErrorData TRTreeIndex::Insert(IndexLock &lock, DataChunk &data, Vector &row_ids)
box = (STBox*)malloc(stbox_size);

memcpy(box, stbox_data, stbox_size);

int32_t box_srid = stbox_srid(box);
if (box_srid != 0) {
STBox *normalized_box = stbox_set_srid(box, 0);
if (normalized_box) {
free(box);
box = normalized_box;
}
}
}
// No SRID normalisation: index keys keep their natural SRID
// and && requires matching SRID, mirroring MobilityDB/PostGIS
// GiST. Normalising here but not at search time (or vice
// versa) makes ensure_same_srid reject every overlap.
}
else {
continue;
}
Expand Down Expand Up @@ -312,26 +314,18 @@ void TRTreeIndex::Construct(DataChunk &expression_result, Vector &row_identifier

if (indexes_temporal) {
const Temporal *temp = reinterpret_cast<const Temporal *>(data);
// For temporal-spatial types extract the bbox and strip the
// SRID so index keys agree with the SRID-stripped query box
// used at search time (InitializeScan strips it too).
if (bbox_meostype == T_STBOX) {
STBox *box = tspatial_to_stbox(temp);
if (!box) {
continue;
}
if (stbox_srid(box) != 0) {
STBox *normalized = stbox_set_srid(box, 0);
if (normalized) {
free(box);
box = normalized;
}
}
rtree_insert(rtree_, box, static_cast<int>(row_data[i]));
free(box);
} else {
rtree_insert_temporal(rtree_, temp, static_cast<int>(row_data[i]));
}
const int id = static_cast<int>(row_data[i]);
// Multi-entry (MEST): index the temporal as up to max_boxes_
// tight per-segment bounding boxes sharing this id; the
// splitter degenerates to a single minimum bounding box for
// instants or max_boxes_ <= 1, byte-identical to the pre-MEST
// single-box path. The produced boxes carry the temporal's
// own SRID, exactly like rtree_insert_temporal /
// tspatial_to_stbox. Do NOT pre-normalise the SRID:
// tspatial_set_srid(temp, 0) makes the SRID "unknown" and the
// stbox conversion inside the splitter then raises
// "The SRID cannot be unknown" at index-build time.
rtree_insert_temporal_split(rtree_, temp, id, max_boxes_);
continue;
}

Expand All @@ -342,18 +336,9 @@ void TRTreeIndex::Construct(DataChunk &expression_result, Vector &row_identifier

void *box = malloc(data_size);
memcpy(box, data, data_size);

if (bbox_meostype == T_STBOX) {
STBox *stbox = (STBox *) box;
int32_t box_srid = stbox_srid(stbox);
if (box_srid != 0) {
STBox *normalized_box = stbox_set_srid(stbox, 0);
if (normalized_box) {
free(box);
box = normalized_box;
}
}
}
// No SRID normalisation (see Construct/InitializeScan): keep the
// natural SRID so index keys and the query box agree; && requires
// matching SRID, mirroring MobilityDB/PostGIS GiST.

void *target = (char *) boxes + (i * bbox_size_);
memcpy(target, box, bbox_size_);
Expand Down Expand Up @@ -417,21 +402,10 @@ unique_ptr<IndexScanState> TRTreeIndex::InitializeScan(const void* query_blob, s

state->query_box = malloc(blob_size);
memcpy(state->query_box, data, blob_size);
// No SRID normalisation: the query box keeps its natural SRID so
// it matches the index keys (which also keep theirs); && requires
// matching SRID, mirroring MobilityDB/PostGIS GiST.

if (bbox_meostype == T_STBOX) {
STBox *stbox = (STBox*)state->query_box;
int32_t query_srid = stbox_srid(stbox);
if (query_srid != 0) {
STBox *normalized_query = stbox_set_srid(stbox, 0);
if (normalized_query) {
free(state->query_box);
state->query_box = malloc(blob_size);
memcpy(state->query_box, normalized_query, blob_size);
free(normalized_query);
}
}
}

} else {
throw InvalidInputException("Unsupported R-Tree operation: " + operation +
" for bbox_type: " + std::to_string(bbox_meostype));
Expand Down Expand Up @@ -486,9 +460,18 @@ vector<row_t> TRTreeIndex::Search(const void *query_box, RTreeSearchOp op) const

if (count > 0) {
results.reserve(count);
// Multi-entry (MEST) leaves return the same id once per
// overlapping per-segment box. Emit each row exactly once;
// duplicates would otherwise surface as duplicate rows in the
// index scan. Harmless no-op for single-box indexes.
unordered_set<row_t> seen;
seen.reserve(count);
for (int i = 0; i < count; i++) {
int *id = static_cast<int *>(meos_array_get(ids, i));
results.push_back(static_cast<row_t>(*id));
row_t rid = static_cast<row_t>(*id);
if (seen.insert(rid).second) {
results.push_back(rid);
}
}
}
} catch (...) {
Expand Down
77 changes: 77 additions & 0 deletions test/sql/parity/050_index_types.test
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,80 @@ statement error
CREATE INDEX i_unsupported ON idx_unsupported USING TRTREE (x);
----
TRTREE index supports

# =============================================================================
# Multi-entry (MEST) indexing on temporal columns.
#
# A temporal column is indexed as up to max_boxes tight per-segment
# bounding boxes per row. This must (a) never produce false negatives,
# (b) return each row exactly once even when many of its per-segment
# boxes overlap the query (dedup), and (c) behave identically for the
# default split, an explicit max_boxes, and the degenerate max_boxes = 1
# single-box index.
# =============================================================================

statement ok
CREATE TABLE idx_mest(t tgeompoint);

# trip 1: a wiggly zig-zag with a large X-extent (many tight segments);
# trip 2: a small trip far away.
statement ok
INSERT INTO idx_mest VALUES
('[Point(0 0)@2000-01-01, Point(10 0)@2000-01-02, Point(0 0)@2000-01-03, Point(10 0)@2000-01-04, Point(0 0)@2000-01-05]'::tgeompoint),
('[Point(1000 1000)@2000-01-01, Point(1001 1001)@2000-01-02]'::tgeompoint);

statement ok
CREATE INDEX i_mest ON idx_mest USING TRTREE (t);

# Query box covers the whole zig-zag and therefore overlaps many of
# trip 1's per-segment boxes: it must still come back exactly once.
query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox;
----
1

# Disjoint from both trips: a true negative for single-box and MEST alike.
query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((100,100),(200,200))'::stbox;
----
0

# Near trip 2 only.
query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((999,999),(1002,1002))'::stbox;
----
1

statement ok
DROP INDEX i_mest;

# Explicit, tighter split: same correct results, no false negatives.
statement ok
CREATE INDEX i_mest ON idx_mest USING TRTREE (t) WITH (max_boxes = 16);

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox;
----
1

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((100,100),(200,200))'::stbox;
----
0

statement ok
DROP INDEX i_mest;

# Degenerate single-box index (pre-MEST behaviour) via the option.
statement ok
CREATE INDEX i_mest ON idx_mest USING TRTREE (t) WITH (max_boxes = 1);

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox;
----
1

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((999,999),(1002,1002))'::stbox;
----
1
Loading