From 0de939b194976c54e0bdc2ed2656b9e29db2d650 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Sat, 15 Nov 2025 18:09:05 +0100 Subject: [PATCH] bugfix in SGB_CompressedTree lookups --- python_tests/test_storage_stats.py | 70 +------------------ run_asan_stress_tests.sh | 5 ++ .../SGB_Tree/SGB_CompressedLookupTree.hpp | 33 ++++++--- src/dbzero/core/storage/SparseIndex.hpp | 10 +-- src/dbzero/core/storage/SparseIndexBase.hpp | 8 ++- 5 files changed, 42 insertions(+), 84 deletions(-) create mode 100755 run_asan_stress_tests.sh diff --git a/python_tests/test_storage_stats.py b/python_tests/test_storage_stats.py index 3c0d4cc2..16b4d511 100644 --- a/python_tests/test_storage_stats.py +++ b/python_tests/test_storage_stats.py @@ -13,8 +13,7 @@ class IndexContainer: @db0.memo(singleton = True) @dataclass -class IndexesSingleton: - # wystawcy po numerze NIP +class IndexesSingleton: indexes: List[IndexContainer] @@ -35,73 +34,6 @@ def format_results(diffs): lines.append(line) return "\n".join(lines) -@pytest.mark.stress_test -def test_io_operation_stability(db0_large_lang_cache_no_autocommit): - numbers = set() - print("Initializing test data...") - numbers = list(numbers) - indexes = IndexesSingleton(indexes=[]) - BYTES = "DB0"*2200 - diffs = [] - indexes_count = 250000 - for number in range(indexes_count): - indexes.indexes.append(IndexContainer(index=db0.index())) - - # commit init - print("Performing initial commit...") - start = datetime.now() - db0.commit() - stop = datetime.now() - - initial_commit_time = (stop - start).seconds - storage_stats = db0.get_storage_stats() - min_commit_time = initial_commit_time - max_commit_time = initial_commit_time - print(f" Initial commit time seconds: {initial_commit_time}") - print("Starting IO operation stability test...") - - for i in range(10): - print(f" Iteration {i+1}/10") - start = datetime.now() - iterations = 100000 - # perform iteration - for j in range(iterations): - number = (i*iterations + j)%indexes_count - index_container = indexes.indexes[number] - now = datetime.now() - new_value = Value(index_number=number, date=now, value=list_value) - index_container.index.add(now, new_value) - - # calculate objects per second - stop = datetime.now() - seconds = (stop - start).miliseconds / 1000.0 - print(f" Objects per second: {iterations / seconds}") - - # commit changes - start = datetime.now() - db0.commit() - stop = datetime.now() - - # measure commit time - commit_time = (stop - start).seconds - min_commit_time = min(min_commit_time, commit_time) - max_commit_time = max(max_commit_time, commit_time) - print(f" Commit time seconds: {commit_time}") - - storage_stats_after = db0.get_storage_stats() - # get storage stats difference - diff = {} - for key in storage_stats_after: - diff[key] = storage_stats_after[key] - storage_stats.get(key, 0) - print(f" Storage stats diff: {diff}") - diffs.append(diff) - storage_stats = storage_stats_after - - results = format_results(diffs) - print(f"IO Operation Stability Test Results:\n{results}") - print(f"Min commit time: {min_commit_time} seconds") - print(f"Max commit time: {max_commit_time} seconds") - @pytest.mark.stress_test def test_big_cache_should_prevent_random_reads(db0_large_lang_cache_no_autocommit): diff --git a/run_asan_stress_tests.sh b/run_asan_stress_tests.sh new file mode 100755 index 00000000..73e0907f --- /dev/null +++ b/run_asan_stress_tests.sh @@ -0,0 +1,5 @@ +#!/bin/bash +export PYTHONIOENCODING=utf8 +export LD_PRELOAD=$(gcc -print-file-name=libasan.so) + +python3 -m pytest -m 'stress_test' -c pytest.ini --capture=no "$@" diff --git a/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp b/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp index 1ac822d8..d87b7a39 100644 --- a/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp +++ b/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp @@ -285,8 +285,11 @@ DB0_PACKED_END // node will be sorted if needed (only if in READ/WRITE mode) if (this->m_access_type == AccessType::READ_WRITE) { this->onNodeLookup(node); + } + if (!node->header().canFit(key)) { + return { nullptr, sg_tree_const_iterator() }; } - // within the node look up by compressed key + // within the node look up by compressed key (only if able to fit) return { node->lower_equal_bound(node->header().compress(key), this->m_heap_comp), node }; } @@ -305,15 +308,18 @@ DB0_PACKED_END this->onNodeLookup(node); } // within the node look up by compressed key - auto item_ptr = node->lower_equal_bound(node->header().compress(key), this->m_heap_comp); - if (!item_ptr) { - return std::nullopt; + // NOTE: if unable to fit key then the item cannot be present in the node + if (node->header().canFit(key)) { + auto item_ptr = node->lower_equal_bound(node->header().compress(key), this->m_heap_comp); + if (item_ptr) { + // return uncompressed + return node->header().uncompress(*item_ptr); + } } - - // return uncompressed - return node->header().uncompress(*item_ptr); - } + return std::nullopt; + } + // Locate first element which is greater or equal to the key template std::optional upper_equal_bound(const KeyT &key) const { @@ -332,11 +338,14 @@ DB0_PACKED_END this->onNodeLookup(node); } // within the node look up by compressed key - auto item_ptr = node->upper_equal_bound(node->header().compress(key), this->m_heap_comp); + const CompressedItemT *item_ptr = nullptr; + if (node->header().canFit(key)) { + item_ptr = node->upper_equal_bound(node->header().compress(key), this->m_heap_comp); + } if (!item_ptr) { // check within the next node ++node; - if (node == base_t::end()) { + if (node == base_t::end() || !node->header().canFit(key)) { return std::nullopt; } item_ptr = node->upper_equal_bound(node->header().compress(key), this->m_heap_comp); @@ -361,7 +370,11 @@ DB0_PACKED_END if (this->m_access_type == AccessType::READ_WRITE) { this->onNodeLookup(node); } + if (!node->header().canFit(key)) { + return nullptr; + } // within the node look up by compressed key + // NOTE: if unable to fit key then the item cannot be present in the node return node->lower_equal_bound(node->header().compress(key), this->m_heap_comp); } diff --git a/src/dbzero/core/storage/SparseIndex.hpp b/src/dbzero/core/storage/SparseIndex.hpp index 9e6d1957..61ee108d 100644 --- a/src/dbzero/core/storage/SparseIndex.hpp +++ b/src/dbzero/core/storage/SparseIndex.hpp @@ -9,7 +9,6 @@ namespace db0 { -DB0_PACKED_BEGIN struct SI_Item; struct SI_CompressedItem; @@ -32,6 +31,7 @@ DB0_PACKED_BEGIN bool operator()(std::pair, const SI_Item &) const; }; +DB0_PACKED_BEGIN struct DB0_PACKED_ATTR SI_Item { using CompT = SI_ItemCompT; @@ -65,7 +65,8 @@ DB0_PACKED_BEGIN return m_state_num; } }; - +DB0_PACKED_END + struct SI_CompressedItemCompT { bool operator()(const SI_CompressedItem &, const SI_CompressedItem &) const; @@ -75,8 +76,9 @@ DB0_PACKED_BEGIN { bool operator()(const SI_CompressedItem &, const SI_CompressedItem &) const; }; - + // Compressed items are actual in-memory representation +DB0_PACKED_BEGIN struct DB0_PACKED_ATTR SI_CompressedItem { using CompT = SI_CompressedItemCompT; @@ -120,8 +122,8 @@ DB0_PACKED_BEGIN std::string toString() const; }; +DB0_PACKED_END using SparseIndex = SparseIndexBase; -DB0_PACKED_END } \ No newline at end of file diff --git a/src/dbzero/core/storage/SparseIndexBase.hpp b/src/dbzero/core/storage/SparseIndexBase.hpp index b549588c..eaaa699c 100644 --- a/src/dbzero/core/storage/SparseIndexBase.hpp +++ b/src/dbzero/core/storage/SparseIndexBase.hpp @@ -111,7 +111,6 @@ namespace db0 // Compress the key part only for lookup purposes CompressedItemT compress(std::pair) const; - CompressedItemT compress(const ItemT &) const; ItemT uncompress(const CompressedItemT &) const; @@ -119,6 +118,7 @@ namespace db0 // From a compressed item, retrieve the (logical) page number only PageNumT getPageNum(const CompressedItemT &) const; + bool canFit(std::pair) const; bool canFit(const ItemT &) const; std::string toString(const CompressedItemT &) const; @@ -309,6 +309,12 @@ DB0_PACKED_END bool SparseIndexBase::BlockHeader::canFit(const ItemT &item) const { return this->m_first_page_num == (item.m_page_num >> 24); } + + template + bool SparseIndexBase::BlockHeader::canFit(std::pair item) const + { + return this->m_first_page_num == (item.first >> 24); + } template ItemT SparseIndexBase::lookup(PageNumT page_num, StateNumT state_num) const {