Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/include/index/rtree_module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class TRTreeIndex : public BoundIndex {

MeosType bbox_meostype;
size_t bbox_size_;
LogicalType column_type_;

size_t current_size_ = 0;
size_t current_capacity_ = 0;
Expand Down
99 changes: 16 additions & 83 deletions src/index/rtree_index_create_physical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,99 +151,32 @@ class TRTreeIndexConstructTask final : public ExecutorTask {

const auto count = scan_chunk.size();

auto &vec_vec = scan_chunk.data[0];
auto &rowid_vec = scan_chunk.data[1];
auto &vec_vec = scan_chunk.data[0];
auto &rowid_vec = scan_chunk.data[1];

auto vector_type = vec_vec.GetType();

if (vec_vec.GetVectorType() != VectorType::FLAT_VECTOR) {
vec_vec.Flatten(count);
}
if (rowid_vec.GetVectorType() != VectorType::FLAT_VECTOR) {
rowid_vec.Flatten(count);
}

UnifiedVectorFormat vec_format;
UnifiedVectorFormat rowid_format;

vec_vec.ToUnifiedFormat(count, vec_format);
rowid_vec.ToUnifiedFormat(count, rowid_format);

const auto row_ptr = UnifiedVectorFormat::GetData<row_t>(rowid_format);
STBox* boxes = (STBox*)malloc(sizeof(STBox) * count);
if (!boxes) {
executor.PushError(ErrorData("Failed to allocate memory for STBox array"));
return TaskExecutionResult::TASK_ERROR;
}

idx_t valid_count = 0;
vector<row_t> valid_row_ids;

for (idx_t i = 0; i < count; i++) {
const auto vec_idx = vec_format.sel->get_index(i);
const auto row_idx = rowid_format.sel->get_index(i);

const auto vec_valid = vec_format.validity.RowIsValid(vec_idx);
const auto rowid_valid = rowid_format.validity.RowIsValid(row_idx);

if (!vec_valid || !rowid_valid) {
continue;
}

fprintf(stderr, "Processing row %zu (vec_idx=%zu, row_idx=%zu)\n", i, vec_idx, row_idx);

STBox *box = nullptr;

if (vector_type.id() == LogicalTypeId::BLOB) {

const auto stbox_data_ptr = UnifiedVectorFormat::GetData<string_t>(vec_format);
auto blob_data = stbox_data_ptr[vec_idx];
const uint8_t *stbox_bytes = reinterpret_cast<const uint8_t*>(blob_data.GetData());
size_t stbox_size = blob_data.GetSize();
box = (STBox*)malloc(stbox_size);
memcpy(box, stbox_bytes, stbox_size);

int32_t box_srid = stbox_srid(box);

if (box_srid != 0) {
STBox *normalized_box = stbox_set_srid(box, 0);
if (normalized_box) {
free(box);
box = normalized_box;
}
}

// Copy to our batch array
memcpy(&boxes[valid_count], box, sizeof(STBox));
valid_row_ids.push_back(row_ptr[row_idx]);
valid_count++;

free(box);


}
else {
free(boxes);
executor.PushError(ErrorData("Unsupported data type for RTree index: " + vector_type.ToString()));
return TaskExecutionResult::TASK_ERROR;
}
// Build a one-column DataChunk with the indexed expression so we
// can hand it to TRTreeIndex::Construct, which dispatches on the
// indexed column type to choose between rtree_insert (for box /
// span blobs whose bytes ARE the bbox) and rtree_insert_temporal
// (for Temporal blobs whose bbox is extracted at insert time).
DataChunk col_chunk;
vector<LogicalType> col_types = {vec_vec.GetType()};
col_chunk.Initialize(Allocator::DefaultAllocator(), col_types);
col_chunk.SetCardinality(count);
col_chunk.data[0].Reference(vec_vec);

{
lock_guard<mutex> l(gstate.glock);
gstate.global_index->Construct(col_chunk, rowid_vec);
}

// Now batch insert the valid STBoxes into the index
if (valid_count > 0) {
auto &rtree_index = gstate.global_index;

auto result = rtree_index->BulkConstruct(boxes, valid_row_ids.data(), valid_count);
if (result.HasError()) {
free(boxes);
executor.PushError(result);
return TaskExecutionResult::TASK_ERROR;
}

}

free(boxes);

gstate.built_count += count;

if (mode == TaskExecutionMode::PROCESS_PARTIAL) {
Expand Down
194 changes: 137 additions & 57 deletions src/index/rtree_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@
#include "duckdb/optimizer/matcher/expression_matcher.hpp"
#include "index/rtree_module.hpp"
#include "geo/stbox.hpp"
#include "geo/tgeompoint.hpp"
#include "geo/tgeometry.hpp"
#include "geo/tgeography.hpp"
#include "geo/tgeogpoint.hpp"
#include "temporal/span.hpp"
#include "temporal/tbox.hpp"
#include "temporal/temporal.hpp"
#include "index/rtree_index_create_physical.hpp"
#include "time_util.hpp"

Expand All @@ -48,17 +55,67 @@ TRTreeIndex::TRTreeIndex(const string &name, IndexConstraintType constraint_type


auto &type = unbound_expressions[0]->return_type;

column_type_ = type;

// The R-tree's bbox flavour is determined by the indexed column's type.
// Span / box types are stored directly (`rtree_insert` with the raw
// bytes); temporal types are bbox-extracted at insert time (spatial
// temporals via tspatial_to_stbox, the rest via rtree_insert_temporal).
if (type == StboxType::STBOX()) {
bbox_meostype = T_STBOX;
bbox_size_ = sizeof(STBox);
rtree_ = rtree_create_stbox();
} else if (type == TboxType::TBOX()) {
bbox_meostype = T_TBOX;
bbox_size_ = sizeof(TBox);
rtree_ = rtree_create_tbox();
} else if (type == SpanTypes::TSTZSPAN()) {
bbox_meostype = T_TSTZSPAN;
bbox_size_ = sizeof(Span);
bbox_size_ = sizeof(Span);
rtree_ = rtree_create_tstzspan();
} else if (type == SpanTypes::INTSPAN()) {
bbox_meostype = T_INTSPAN;
bbox_size_ = sizeof(Span);
rtree_ = rtree_create_intspan();
} else if (type == SpanTypes::BIGINTSPAN()) {
bbox_meostype = T_BIGINTSPAN;
bbox_size_ = sizeof(Span);
rtree_ = rtree_create_bigintspan();
} else if (type == SpanTypes::FLOATSPAN()) {
bbox_meostype = T_FLOATSPAN;
bbox_size_ = sizeof(Span);
rtree_ = rtree_create_floatspan();
} else if (type == SpanTypes::DATESPAN()) {
bbox_meostype = T_DATESPAN;
bbox_size_ = sizeof(Span);
rtree_ = rtree_create_datespan();
} else if (type == TemporalTypes::TINT() || type == TemporalTypes::TFLOAT()) {
// Temporal numbers: bbox is a tbox.
bbox_meostype = T_TBOX;
bbox_size_ = sizeof(TBox);
rtree_ = rtree_create_tbox();
} else if (type == TemporalTypes::TBOOL() || type == TemporalTypes::TTEXT()) {
// Non-numeric, non-spatial temporals: bbox is the time span only.
bbox_meostype = T_TSTZSPAN;
bbox_size_ = sizeof(Span);
rtree_ = rtree_create_tstzspan();
} else if (type == TgeompointType::TGEOMPOINT() ||
type == TGeometryTypes::TGEOMETRY() ||
type == TGeographyTypes::TGEOGRAPHY() ||
type == TGeogpointType::TGEOGPOINT()) {
// Temporal-spatial types: bbox is an stbox.
bbox_meostype = T_STBOX;
bbox_size_ = sizeof(STBox);
rtree_ = rtree_create_stbox();
} else {
throw InternalException("RTree index only supports STBOX and TSTZSPAN types, got: " + type.ToString());
// Unsupported indexed column type. This is user input, not an
// internal invariant, so raise a clean BinderException rather than
// an InternalException (which DuckDB renders as the generic
// "assertion failure within DuckDB" crash message).
throw BinderException(
"TRTREE index supports stbox, tbox, the 5 span types, and the "
"temporal types (tint, tfloat, tbool, ttext, tgeompoint, "
"tgeogpoint, tgeometry, tgeography). Got: " + type.ToString());
}

if (!rtree_) {
Expand Down Expand Up @@ -103,23 +160,22 @@ PhysicalOperator &TRTreeIndex::CreatePlan(PlanIndexInput &input) {
select_list.push_back(std::move(expression));
}

// new_column_types.emplace_back(LogicalType::ROW_TYPE);
// select_list.push_back(
// make_uniq<BoundReferenceExpression>(LogicalType::ROW_TYPE, create_index.info->scan_types.size() - 1));
LogicalType row_type = LogicalType::ROW_TYPE;
new_column_types.push_back(row_type);
select_list.push_back(
make_uniq<BoundReferenceExpression>(row_type, create_index.info->scan_types.size() - 1));

auto &projection = planner.Make<PhysicalProjection>(new_column_types, std::move(select_list),
auto &projection = planner.Make<PhysicalProjection>(new_column_types, std::move(select_list),
create_index.estimated_cardinality);
projection.children.push_back(input.table_scan);


auto &physical_create_index = planner.Make<PhysicalCreateTRTreeIndex>(
create_index.types, create_index.table, create_index.info->column_ids,
std::move(create_index.info), std::move(create_index.unbound_expressions),
create_index.types, create_index.table, create_index.info->column_ids,
std::move(create_index.info), std::move(create_index.unbound_expressions),
create_index.estimated_cardinality);

physical_create_index.children.push_back(projection);
return physical_create_index;
return input.table_scan;
}

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -224,58 +280,88 @@ void TRTreeIndex::Construct(DataChunk &expression_result, Vector &row_identifier
if (vector.GetVectorType() != VectorType::FLAT_VECTOR) {
vector.Flatten(expression_result.size());
}

auto vector_type = vector.GetType();


void* boxes = malloc(bbox_size_ * expression_result.size());

// True if the indexed column holds a Temporal value (the bbox is
// derived per-row at insert time). False if the column already holds a
// span / tbox / stbox blob whose bytes are the bbox itself.
const bool indexes_temporal =
column_type_ == TemporalTypes::TINT() ||
column_type_ == TemporalTypes::TFLOAT() ||
column_type_ == TemporalTypes::TBOOL() ||
column_type_ == TemporalTypes::TTEXT() ||
column_type_ == TgeompointType::TGEOMPOINT() ||
column_type_ == TGeometryTypes::TGEOMETRY() ||
column_type_ == TGeographyTypes::TGEOGRAPHY() ||
column_type_ == TGeogpointType::TGEOGPOINT();

void *boxes = indexes_temporal ? nullptr
: malloc(bbox_size_ * expression_result.size());

for (idx_t i = 0; i < expression_result.size(); i++) {
if (FlatVector::IsNull(vector, i)) {
continue;
continue;
}

void *box = nullptr;

if (vector_type.id() == LogicalTypeId::BLOB) {
auto blob_data = FlatVector::GetData<string_t>(vector)[i];
const uint8_t *data = reinterpret_cast<const uint8_t*>(blob_data.GetData());
size_t data_size = blob_data.GetSize();


if (data_size != bbox_size_) {
continue;
}

box = malloc(data_size);
memcpy(box, data, data_size);
if (vector.GetType().id() != LogicalTypeId::BLOB) {
continue;
}

auto blob_data = FlatVector::GetData<string_t>(vector)[i];
const uint8_t *data = reinterpret_cast<const uint8_t *>(blob_data.GetData());
size_t data_size = blob_data.GetSize();

if (indexes_temporal) {
const Temporal *temp = reinterpret_cast<const Temporal *>(data);
// For temporal-spatial types extract the bbox and strip the
// SRID so index keys agree with the SRID-stripped query box
// used at search time (InitializeScan strips it too).
if (bbox_meostype == T_STBOX) {
STBox *stbox = (STBox*)box;
int32_t box_srid = stbox_srid(stbox);
if (box_srid != 0) {
STBox *normalized_box = stbox_set_srid(stbox, 0);
if (normalized_box) {
STBox *box = tspatial_to_stbox(temp);
if (!box) {
continue;
}
if (stbox_srid(box) != 0) {
STBox *normalized = stbox_set_srid(box, 0);
if (normalized) {
free(box);
box = normalized_box;
box = normalized;
}
}
rtree_insert(rtree_, box, static_cast<int>(row_data[i]));
free(box);
} else {
rtree_insert_temporal(rtree_, temp, static_cast<int>(row_data[i]));
}
} else {
continue;
}

if (box == nullptr) {
// Box / span blob: the bytes ARE the bbox.
if (data_size != bbox_size_) {
continue;
}

void* target = (char*)boxes + (i * bbox_size_);

void *box = malloc(data_size);
memcpy(box, data, data_size);

if (bbox_meostype == T_STBOX) {
STBox *stbox = (STBox *) box;
int32_t box_srid = stbox_srid(stbox);
if (box_srid != 0) {
STBox *normalized_box = stbox_set_srid(stbox, 0);
if (normalized_box) {
free(box);
box = normalized_box;
}
}
}

void *target = (char *) boxes + (i * bbox_size_);
memcpy(target, box, bbox_size_);
rtree_insert(rtree_, target, static_cast<int64_t>(row_data[i]));
rtree_insert(rtree_, target, static_cast<int>(row_data[i]));
free(box);
}
free(boxes);

if (boxes) free(boxes);
}


Expand Down Expand Up @@ -474,21 +560,15 @@ unique_ptr<ExpressionMatcher> TRTreeIndex::MakeFunctionMatcher() const {
matcher->expr_type = make_uniq<SpecificExpressionTypeMatcher>(ExpressionType::BOUND_FUNCTION);
matcher->policy = SetMatcher::Policy::UNORDERED;

LogicalType index_type;
if (bbox_meostype == T_STBOX) {
index_type = StboxType::STBOX();
} else if (bbox_meostype == T_TSTZSPAN) {
index_type = SpanTypes::TSTZSPAN();
} else {
index_type = LogicalType::BLOB;
}

// Left operand
// The left operand is the indexed column, so the matcher must accept
// the column's actual type, not the bbox type. A tgeompoint column
// with an R-tree over its bbox still appears as tgeompoint in the
// predicate AST.
auto lhs_matcher = make_uniq<ExpressionMatcher>();
lhs_matcher->type = make_uniq<SpecificTypeMatcher>(index_type);
lhs_matcher->type = make_uniq<SpecificTypeMatcher>(column_type_);
matcher->matchers.push_back(std::move(lhs_matcher));

// Right operand
// Right operand: any type (constant bbox, span, or temporal expr).
auto rhs_matcher = make_uniq<ExpressionMatcher>();
matcher->matchers.push_back(std::move(rhs_matcher));

Expand Down
Loading
Loading