diff --git a/python_tests/test_index.py b/python_tests/test_index.py index c2ecec05..6899d1c7 100644 --- a/python_tests/test_index.py +++ b/python_tests/test_index.py @@ -9,6 +9,7 @@ from datetime import timedelta, datetime import random import time +from .conftest import TEST_FILES_DIR_ROOT def test_index_instance_can_be_created_without_arguments(db0_fixture): @@ -727,8 +728,33 @@ def test_insert_1M_keys_to_index(db0_no_autocommit): start = time.perf_counter() for i in range(1_000_000): # add random int - cut.add(random.randint(0, 100_000_000), random.choice(objects)) + cut.add(random.randint(0, 100_000_000), objects[i % 25000]) + if i % 10_000 == 0: + assert len(cut) == i + 1 result = list(cut.select(0, 1)) end = time.perf_counter() assert len(cut) == 1_000_000 - print(f"Inserted 1M keys to index in {end - start:.2f} seconds") \ No newline at end of file + print(f"Inserted 1M keys to index in {end - start:.2f} seconds") + + +@pytest.mark.stress_test +def test_insert_key_into_split_range (db0_no_autocommit): + cut = db0.index() + objects = [] + for i in range(35000): + objects.append(MemoTestClass(i)) + start = time.perf_counter() + elements = 257 * 1024 + # add more items than initial max_block_size to force block splits + for i in range(0, elements): + cut.add(i, objects[i % 35000]) + if i % 10000 == 0: + print(f"Inserted {i} keys so far...") + assert len(cut) == i + 1 + + # add an item to bounded range that has been splitted + cut.add(127, objects[-1]) + end = time.perf_counter() + elements += 1 + assert len(cut) == elements + print(f"Inserted {elements} keys to index in {end - start:.2f} seconds") diff --git a/src/dbzero/core/collections/range_tree/BlockItem.hpp b/src/dbzero/core/collections/range_tree/BlockItem.hpp index b0df2994..c8a380a8 100644 --- a/src/dbzero/core/collections/range_tree/BlockItem.hpp +++ b/src/dbzero/core/collections/range_tree/BlockItem.hpp @@ -51,6 +51,15 @@ DB0_PACKED_BEGIN return m_key < other.m_key; } + // by-key + value comparison + inline bool gtByKey(const BlockItemT& other) const + { + if (m_key == other.m_key) { + return m_value > other.m_value; + } + return m_key > other.m_key; + } + inline bool operator!=(const BlockItemT& other) const { return (m_key != other.m_key) || (m_value != other.m_value); } diff --git a/src/dbzero/core/collections/range_tree/RangeTree.hpp b/src/dbzero/core/collections/range_tree/RangeTree.hpp index 31c1cb19..e0fcfe63 100644 --- a/src/dbzero/core/collections/range_tree/RangeTree.hpp +++ b/src/dbzero/core/collections/range_tree/RangeTree.hpp @@ -133,6 +133,7 @@ DB0_PACKED_END std::make_heap(begin, end, CompT()); while (begin != end) { auto range = getRange(*begin); + for (;;) { auto _end = end; // calculate the remaining capacity in the block @@ -301,8 +302,7 @@ DB0_PACKED_END bool canInsert(ItemT item) const { assert(m_asc); - // the second condition is to allow multiple range with identical element - return (m_is_first || !m_bounds.first || !(item < *m_bounds.first)) && (!m_bounds.second || (item < *m_bounds.second)); + return (m_is_first || !m_bounds.first || !(*m_bounds.first).gtByKey(item)) && (!m_bounds.second || (*m_bounds.second).gtByKey(item)); } std::pair, std::optional > getKeyRange() const diff --git a/tests/utils/utils.hpp b/tests/utils/utils.hpp index 05382a4f..7617fcac 100644 --- a/tests/utils/utils.hpp +++ b/tests/utils/utils.hpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace db0::tests