From 0030e05f648130112e960ff1c688a1cf7c6efc92 Mon Sep 17 00:00:00 2001 From: "xufeihong.xfh" Date: Tue, 9 Jun 2026 21:44:08 +0800 Subject: [PATCH] fix: clean up RocksDB files on drop_index so create_index can be repeated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues prevented drop_index + create_index from working: 1. `reload_scalar_index(nullptr)` was a no-op — it never removed the old inverted indexer directory. Now it deletes the working dir and sets the pointer to null. 2. `create_scalar_index` returned a null output when no new fields needed indexing (early-return path), causing the caller to pass nullptr to `reload_scalar_index` which then wiped the existing indexer. Now it returns the current indexer pointer so the caller preserves it. Added an identity-pointer guard (`invert_indexers_ == scalar_indexer`) to avoid accidentally deleting an indexer's own directory during reload. Closes alibaba/zvec#427 Co-Authored-By: Claude Opus 4.6 --- src/db/index/segment/segment.cc | 12 +++- tests/db/collection_test.cc | 110 ++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc index 2e334c3fc..ca5d3adb3 100644 --- a/src/db/index/segment/segment.cc +++ b/src/db/index/segment/segment.cc @@ -1992,6 +1992,7 @@ Status SegmentImpl::create_scalar_index(const std::vector &columns, auto new_segment_meta = std::make_shared(*segment_meta_); if (fields.empty()) { *segment_meta = new_segment_meta; + *scalar_indexer = invert_indexers_; return Status::OK(); } @@ -2166,8 +2167,16 @@ Status SegmentImpl::reload_scalar_index( collection_schema_ = std::make_shared(schema); segment_meta_ = segment_meta; + if (invert_indexers_ == scalar_indexer) { + return Status::OK(); + } + if (!scalar_indexer) { - // no need to reload inverted indexer + if (invert_indexers_) { + auto old_dir = invert_indexers_->working_dir(); + invert_indexers_ = nullptr; + FileHelper::RemoveDirectory(old_dir); + } return Status::OK(); } @@ -2176,7 +2185,6 @@ Status SegmentImpl::reload_scalar_index( if (invert_indexers_) { auto old_dir = invert_indexers_->working_dir(); invert_indexers_ = scalar_indexer; - FileHelper::RemoveDirectory(old_dir); } else { invert_indexers_ = scalar_indexer; diff --git a/tests/db/collection_test.cc b/tests/db/collection_test.cc index 4805df32f..c933de631 100644 --- a/tests/db/collection_test.cc +++ b/tests/db/collection_test.cc @@ -6050,3 +6050,113 @@ TEST_F(CollectionTest, Feature_CreateOrDropFtsIndex) { FileHelper::RemoveDirectory(col_path); } } + +TEST_F(CollectionTest, Feature_DropAndRecreateScalarIndex) { +#ifdef __ANDROID__ + GTEST_SKIP() << "Skipped on Android: emulator filesystem lacks hardlink " + "support (needed by RocksDB checkpoint)"; +#endif + int doc_count = 100; + + auto schema = TestHelper::CreateNormalSchema(false, "demo"); + auto options = CollectionOptions{false, true, 64 * 1024 * 1024}; + auto collection = TestHelper::CreateCollectionWithDoc( + col_path, *schema, options, 0, doc_count, false); + + ASSERT_TRUE(collection->Flush().ok()); + ASSERT_EQ(collection->Stats().value().doc_count, doc_count); + + auto index_params = std::make_shared(false); + + // Round 1: create index + auto s = collection->CreateIndex("int32", index_params); + ASSERT_TRUE(s.ok()) << "Round 1 create failed: " << s.message(); + + // Round 1: drop index + s = collection->DropIndex("int32"); + ASSERT_TRUE(s.ok()) << "Round 1 drop failed: " << s.message(); + + // Round 2: recreate index on same field — this was the bug + s = collection->CreateIndex("int32", index_params); + ASSERT_TRUE(s.ok()) << "Round 2 create failed: " << s.message(); + + // Round 2: drop again + s = collection->DropIndex("int32"); + ASSERT_TRUE(s.ok()) << "Round 2 drop failed: " << s.message(); + + // Round 3: one more cycle + s = collection->CreateIndex("int32", index_params); + ASSERT_TRUE(s.ok()) << "Round 3 create failed: " << s.message(); + + // Verify data integrity after multiple create/drop cycles + for (int i = 0; i < doc_count; i++) { + auto expect_doc = TestHelper::CreateDoc(i, *schema); + auto result = collection->Fetch({expect_doc.pk()}); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value().size(), 1); + auto doc = result.value()[expect_doc.pk()]; + ASSERT_NE(doc, nullptr); + ASSERT_EQ(*doc, expect_doc); + } + + // Final drop + s = collection->DropIndex("int32"); + ASSERT_TRUE(s.ok()) << "Final drop failed: " << s.message(); +} + +TEST_F(CollectionTest, Feature_DropAndRecreateScalarIndex_MultipleFields) { +#ifdef __ANDROID__ + GTEST_SKIP() << "Skipped on Android: emulator filesystem lacks hardlink " + "support (needed by RocksDB checkpoint)"; +#endif + int doc_count = 100; + + auto schema = TestHelper::CreateNormalSchema(false, "demo"); + auto options = CollectionOptions{false, true, 64 * 1024 * 1024}; + auto collection = TestHelper::CreateCollectionWithDoc( + col_path, *schema, options, 0, doc_count, false); + + ASSERT_TRUE(collection->Flush().ok()); + + auto index_params = std::make_shared(false); + + // Create index on two fields + auto s = collection->CreateIndex("int32", index_params); + ASSERT_TRUE(s.ok()); + s = collection->CreateIndex("string", index_params); + ASSERT_TRUE(s.ok()); + + // Drop only one field — the other should remain functional + s = collection->DropIndex("int32"); + ASSERT_TRUE(s.ok()); + + // Recreate the dropped one + s = collection->CreateIndex("int32", index_params); + ASSERT_TRUE(s.ok()) << "Recreate int32 after partial drop failed: " + << s.message(); + + // Drop both + s = collection->DropIndex("int32"); + ASSERT_TRUE(s.ok()); + s = collection->DropIndex("string"); + ASSERT_TRUE(s.ok()); + + // Recreate both + s = collection->CreateIndex("int32", index_params); + ASSERT_TRUE(s.ok()) << "Recreate int32 after full drop failed: " + << s.message(); + s = collection->CreateIndex("string", index_params); + ASSERT_TRUE(s.ok()) << "Recreate string after full drop failed: " + << s.message(); + + // Verify data integrity + for (int i = 0; i < doc_count; i++) { + auto expect_doc = TestHelper::CreateDoc(i, *schema); + auto result = collection->Fetch({expect_doc.pk()}); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value().size(), 1); + auto doc = result.value()[expect_doc.pk()]; + ASSERT_NE(doc, nullptr); + ASSERT_EQ(*doc, expect_doc); + } +}