From d69932b800a2a2859c5d19e1d1979d5d401e82bf Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Fri, 5 Dec 2025 00:10:39 +0100 Subject: [PATCH 1/5] canInsert fix --- python_tests/test_index.py | 77 ++++++++++++++++++- .../core/collections/range_tree/BlockItem.hpp | 9 +++ .../core/collections/range_tree/RangeTree.hpp | 34 +++++++- tests/unit_tests/VBIndexTests.cpp | 24 ++++++ 4 files changed, 140 insertions(+), 4 deletions(-) diff --git a/python_tests/test_index.py b/python_tests/test_index.py index 14c65966..8457dbec 100644 --- a/python_tests/test_index.py +++ b/python_tests/test_index.py @@ -6,6 +6,7 @@ from datetime import timedelta, datetime import random import time +from .conftest import TEST_FILES_DIR_ROOT def test_index_instance_can_be_created_without_arguments(db0_fixture): @@ -724,8 +725,80 @@ def test_insert_1M_keys_to_index(db0_no_autocommit): start = time.perf_counter() for i in range(1_000_000): # add random int - cut.add(random.randint(0, 100_000_000), random.choice(objects)) + cut.add(random.randint(0, 100_000_000), objects[i % 25000]) + if i % 10_000 == 0: + assert len(cut) == i + 1 result = list(cut.select(0, 1)) end = time.perf_counter() assert len(cut) == 1_000_000 - print(f"Inserted 1M keys to index in {end - start:.2f} seconds") \ No newline at end of file + print(f"Inserted 1M keys to index in {end - start:.2f} seconds") + +@pytest.mark.stress_test +def test_insert_1M_fixed_keys_to_index(db0_no_autocommit): + cut = db0.index() + objects = [] + for i in range(25000): + objects.append(MemoTestClass(i)) + start = time.perf_counter() + numbers = [] + with open(f"{TEST_FILES_DIR_ROOT}/index_keys.txt", "r") as f: + numbers = [int(line.strip()) for line in f.readlines()] + for i in range(1_000_000): + number = numbers[i] + cut.add(number, objects[i % 25000]) + if i % 10_000 == 0: + print(f"Inserted {i} keys so far...") + assert len(cut) == i + 1 + result = list(cut.select(0, 1)) + end = time.perf_counter() + assert len(cut) == 1_000_000 + print(f"Inserted 1M keys to index in {end - start:.2f} seconds") + +@pytest.mark.stress_test +def test_insert_key_into_splitted_range(db0_no_autocommit): + cut = db0.index() + objects = [] + for i in range(35000): + objects.append(MemoTestClass(i)) + start = time.perf_counter() + elements = 257 * 1024 + # add more items than initial max_block_size to force block splits + for i in range(1, elements): + cut.add(i, objects[i % 35000]) + if i % 1000 == 0: + print(f"Inserted {i} keys so far...") + assert len(cut) == i + + # add an item to bounded range that has been splitted + cut.add(123, objects[-1]) + end = time.perf_counter() + elements += 1 + assert len(cut) == elements + print(f"Inserted {elements} keys to index in {end - start:.2f} seconds") + +@pytest.mark.stress_test +def test_remove_keys_from_splitted_range(db0_no_autocommit): + cut = db0.index() + # add more items than initial max_block_size to force block splits + elems = [] + elements = 277 * 1024 + pre_created_elements = [MemoTestClass(i) for i in range(elements)] + for i in range(1, elements): + if i <= 227 * 1024: + memo_object = MemoTestClass(1000000 + i) + else: + memo_object = pre_created_elements[i % elements] + cut.add(1, memo_object) + if i % 1000 == 0: + assert len(cut) == i + if i % 100_000 == 0: + print(f"Inserted {i} keys so far...") + elems.append(memo_object) + len_after_inserts = len(cut) + for i, obj in enumerate(elems): + cut.remove(1, obj) + if i % 10 == 0: + assert len(cut) == len_after_inserts - (i + 1) + assert len(cut) == len_after_inserts - len(elems) + for obj in elems: + assert len(db0.find(cut.select(), obj)) == 0 diff --git a/src/dbzero/core/collections/range_tree/BlockItem.hpp b/src/dbzero/core/collections/range_tree/BlockItem.hpp index 138a904f..7e51bab0 100644 --- a/src/dbzero/core/collections/range_tree/BlockItem.hpp +++ b/src/dbzero/core/collections/range_tree/BlockItem.hpp @@ -48,6 +48,15 @@ DB0_PACKED_BEGIN return m_key < other.m_key; } + // by-key + value comparison + inline bool gtByKey(const BlockItemT& other) const + { + if (m_key == other.m_key) { + return m_value > other.m_value; + } + return m_key > other.m_key; + } + inline bool operator!=(const BlockItemT& other) const { return (m_key != other.m_key) || (m_value != other.m_value); } diff --git a/src/dbzero/core/collections/range_tree/RangeTree.hpp b/src/dbzero/core/collections/range_tree/RangeTree.hpp index 66fcd91f..8da4d710 100644 --- a/src/dbzero/core/collections/range_tree/RangeTree.hpp +++ b/src/dbzero/core/collections/range_tree/RangeTree.hpp @@ -128,9 +128,15 @@ DB0_PACKED_END using CompT = typename ItemT::HeapCompT; // heapify the elements (min heap) std::make_heap(begin, end, CompT()); + size_t loop_counter = 0; while (begin != end) { auto range = getRange(*begin); + for (;;) { + loop_counter += 1; + if (loop_counter % 100 == 0) { + std::cerr << "bulkInsert loop iteration: " << loop_counter << "\n"; + } auto _end = end; // calculate the remaining capacity in the block auto block_capacity = 0; @@ -145,6 +151,9 @@ DB0_PACKED_END } while (block_capacity > 0 && begin != end && range.canInsert(*begin)) { + if (loop_counter % 100 == 0) { + std::cerr << "In Insert " << loop_counter << "\n"; + } std::pop_heap(begin, end, CompT()); --end; --block_capacity; @@ -298,8 +307,29 @@ DB0_PACKED_END bool canInsert(ItemT item) const { assert(m_asc); - // the second condition is to allow multiple range with identical element - return (m_is_first || !m_bounds.first || !(item < *m_bounds.first)) && (!m_bounds.second || (item < *m_bounds.second)); + + if (m_bounds.second) { + // std::cerr << "Can insert check for item: " + // << item.m_key << ":" << item.m_value << "\n"; + // std::cerr << "Checking for range : [" + // << (m_bounds.first ? std::to_string((*m_bounds.first).m_key) : "null") + // << ", " + // << (m_bounds.second ? std::to_string((*m_bounds.second).m_key) : "null") + // << ")\n"; + // std::cerr << "m_is_first: " << m_is_first << "\n"; + // std::cerr << "!m_bounds.first: " << !m_bounds.first << "\n"; + // std::cerr << "!(item < *m_bounds.first) " << !(item < *m_bounds.first) << "\n"; + // std::cerr << "(!m_bounds.second: " << !m_bounds.second << "\n"; + // std::cerr << "(*m_bounds.second).gtByKey(item): " << (*m_bounds.second).gtByKey(item) << "\n"; + // std::cerr << "FINAL: " << ( (m_is_first || !m_bounds.first || !(item < *m_bounds.first)) && (!m_bounds.second || (*m_bounds.second).gtByKey(item)) ) << "\n"; + // std::cerr << std::endl; + // try{ + // throw std::runtime_error("Debug Exception"); + // } catch (const std::exception &e) { + // std::cerr << e.what() << "\n"; + // } + } + return (m_is_first || !m_bounds.first || !(*m_bounds.first).gtByKey(item)) && (!m_bounds.second || (*m_bounds.second).gtByKey(item)); } std::pair, std::optional > getKeyRange() const diff --git a/tests/unit_tests/VBIndexTests.cpp b/tests/unit_tests/VBIndexTests.cpp index 4376bb56..69bdae1a 100644 --- a/tests/unit_tests/VBIndexTests.cpp +++ b/tests/unit_tests/VBIndexTests.cpp @@ -176,5 +176,29 @@ namespace tests timer.printLog(std::cout) << std::endl; } + TEST_F( VBIndexTests , testVBIndexBulkErase ) + { + using ItemT = db0::key_value; + auto memspace = getMemspace(); + std::vector values(1500); + for (std::uint32_t i = 0; i < 1500; ++i) { + values[i] = { i, 0 }; + } + db0::v_bindex cut(memspace, memspace.getPageSize()); + cut.bulkInsert(values.begin(), values.end()); + ASSERT_EQ(cut.size(), 1500u); + std::function selector = [](ItemT item) { + return item.key < 1000; + }; + cut.bulkErase(selector); + ASSERT_EQ(cut.size(), 500u); + // verify remaining items + auto it = cut.begin(), end = cut.end(); + while (it != end) { + ASSERT_EQ((*it).key >= 1000, true); + ++it; + } + } + } From 44a7d3b993596b6b64b5c8f74e064f6b20a0dfd2 Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Fri, 5 Dec 2025 08:32:05 +0100 Subject: [PATCH 2/5] cleanup --- .../core/collections/range_tree/RangeTree.hpp | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/src/dbzero/core/collections/range_tree/RangeTree.hpp b/src/dbzero/core/collections/range_tree/RangeTree.hpp index 807041f7..89156fd1 100644 --- a/src/dbzero/core/collections/range_tree/RangeTree.hpp +++ b/src/dbzero/core/collections/range_tree/RangeTree.hpp @@ -310,28 +310,6 @@ DB0_PACKED_END bool canInsert(ItemT item) const { assert(m_asc); - - if (m_bounds.second) { - // std::cerr << "Can insert check for item: " - // << item.m_key << ":" << item.m_value << "\n"; - // std::cerr << "Checking for range : [" - // << (m_bounds.first ? std::to_string((*m_bounds.first).m_key) : "null") - // << ", " - // << (m_bounds.second ? std::to_string((*m_bounds.second).m_key) : "null") - // << ")\n"; - // std::cerr << "m_is_first: " << m_is_first << "\n"; - // std::cerr << "!m_bounds.first: " << !m_bounds.first << "\n"; - // std::cerr << "!(item < *m_bounds.first) " << !(item < *m_bounds.first) << "\n"; - // std::cerr << "(!m_bounds.second: " << !m_bounds.second << "\n"; - // std::cerr << "(*m_bounds.second).gtByKey(item): " << (*m_bounds.second).gtByKey(item) << "\n"; - // std::cerr << "FINAL: " << ( (m_is_first || !m_bounds.first || !(item < *m_bounds.first)) && (!m_bounds.second || (*m_bounds.second).gtByKey(item)) ) << "\n"; - // std::cerr << std::endl; - // try{ - // throw std::runtime_error("Debug Exception"); - // } catch (const std::exception &e) { - // std::cerr << e.what() << "\n"; - // } - } return (m_is_first || !m_bounds.first || !(*m_bounds.first).gtByKey(item)) && (!m_bounds.second || (*m_bounds.second).gtByKey(item)); } From aac6c8c4ea9bda6f97a56be8c466c9b0b6e28d14 Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Fri, 5 Dec 2025 08:50:57 +0100 Subject: [PATCH 3/5] compile fix --- tests/utils/utils.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils/utils.hpp b/tests/utils/utils.hpp index 05382a4f..7617fcac 100644 --- a/tests/utils/utils.hpp +++ b/tests/utils/utils.hpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace db0::tests From bc1782fe918792a0d4b0b8b2a291e20e5faaf15d Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Fri, 5 Dec 2025 11:50:23 +0100 Subject: [PATCH 4/5] fix(RangeTree): remove redundant logging code --- src/dbzero/core/collections/range_tree/RangeTree.hpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/dbzero/core/collections/range_tree/RangeTree.hpp b/src/dbzero/core/collections/range_tree/RangeTree.hpp index 89156fd1..e0fcfe63 100644 --- a/src/dbzero/core/collections/range_tree/RangeTree.hpp +++ b/src/dbzero/core/collections/range_tree/RangeTree.hpp @@ -131,15 +131,10 @@ DB0_PACKED_END using CompT = typename ItemT::HeapCompT; // heapify the elements (min heap) std::make_heap(begin, end, CompT()); - size_t loop_counter = 0; while (begin != end) { auto range = getRange(*begin); for (;;) { - loop_counter += 1; - if (loop_counter % 100 == 0) { - std::cerr << "bulkInsert loop iteration: " << loop_counter << "\n"; - } auto _end = end; // calculate the remaining capacity in the block auto block_capacity = 0; @@ -154,9 +149,6 @@ DB0_PACKED_END } while (block_capacity > 0 && begin != end && range.canInsert(*begin)) { - if (loop_counter % 100 == 0) { - std::cerr << "In Insert " << loop_counter << "\n"; - } std::pop_heap(begin, end, CompT()); --end; --block_capacity; From f6fd37d767bf166388fbcef9a91db46d491b3cbd Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Fri, 5 Dec 2025 12:16:50 +0100 Subject: [PATCH 5/5] fix(test): changed test name --- python_tests/test_index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python_tests/test_index.py b/python_tests/test_index.py index 6c77293c..6899d1c7 100644 --- a/python_tests/test_index.py +++ b/python_tests/test_index.py @@ -736,8 +736,9 @@ def test_insert_1M_keys_to_index(db0_no_autocommit): assert len(cut) == 1_000_000 print(f"Inserted 1M keys to index in {end - start:.2f} seconds") + @pytest.mark.stress_test -def test_insert_key_into_splitted_range(db0_no_autocommit): +def test_insert_key_into_split_range (db0_no_autocommit): cut = db0.index() objects = [] for i in range(35000):