From 0f05c1a6377dc5aa7c0413edf98bd731a2adbb11 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 24 Nov 2025 18:11:27 +0100 Subject: [PATCH 01/12] DP changelog header extensions --- src/dbzero/core/storage/BDevStorage.cpp | 24 +++++------- src/dbzero/core/storage/BDevStorage.hpp | 7 ++-- src/dbzero/core/storage/BaseStorage.cpp | 4 +- src/dbzero/core/storage/BaseStorage.hpp | 5 ++- src/dbzero/core/storage/ChangeLog.cpp | 4 +- src/dbzero/core/storage/ChangeLogIOStream.cpp | 3 +- src/dbzero/core/storage/ChangeLogIOStream.hpp | 2 + src/dbzero/core/storage/ChangeLogTypes.hpp | 28 ++++++++++++++ src/dbzero/core/storage/SparseIndexBase.hpp | 5 +-- src/dbzero/core/storage/SparsePair.cpp | 32 +++++----------- src/dbzero/core/storage/SparsePair.hpp | 6 +-- .../object_model/tags/SelectModified.cpp | 12 ++---- tests/unit_tests/BDevStorageTest.cpp | 13 +++---- tests/unit_tests/ChangeLogTest.cpp | 37 +++++++++++++++++++ tests/unit_tests/SparsePairTest.cpp | 28 +++++++------- 15 files changed, 126 insertions(+), 84 deletions(-) create mode 100644 src/dbzero/core/storage/ChangeLogTypes.hpp diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 04f8e083..2825b912 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -485,20 +485,15 @@ namespace db0 break; } - // First element from the chunk is the updated state number - auto it = dp_change_log_ptr->begin(), end = dp_change_log_ptr->end(); - assert(it != end); - // First element in the log is the updated state number - assert(*it != updated_state_num); - updated_state_num = *it; - // All other elements are page numbers - ++it; - for (; it != end; ++it) { - on_page_updated(*it, updated_state_num); + assert(dp_change_log_ptr->m_state_num != updated_state_num); + updated_state_num = dp_change_log_ptr->m_state_num; + // Elements are storage page numbers (mutated in that transaction) + for (auto storage_page_num: *dp_change_log_ptr) { + on_page_updated(storage_page_num, updated_state_num); } } } - + } catch (db0::IOException &) { // NOTE: this exception may appear on distributed filesystems // where changes are not guaranteed to be written sequentially @@ -581,7 +576,7 @@ namespace db0 #endif void BDevStorage::fetchDP_ChangeLogs(StateNumType begin_state, std::optional end_state, - std::function f) const + std::function f) const { std::unique_lock lock(m_mutex); if (m_dp_changelog_io.modified()) { @@ -612,14 +607,13 @@ namespace db0 // end of the stream reached break; } - // first item of the change-log is the state number - auto state_num = *change_log->begin(); + auto state_num = change_log->m_state_num; if (end_state && state_num >= *end_state) { // end of the range reached break; } if (state_num >= begin_state) { - f(state_num, *change_log); + f(*change_log); } } } catch (...) { diff --git a/src/dbzero/core/storage/BDevStorage.hpp b/src/dbzero/core/storage/BDevStorage.hpp index 38a16247..2ffaa119 100644 --- a/src/dbzero/core/storage/BDevStorage.hpp +++ b/src/dbzero/core/storage/BDevStorage.hpp @@ -118,7 +118,7 @@ DB0_PACKED_END std::pair getDiff_IOStats() const; void fetchDP_ChangeLogs(StateNumType begin_state, std::optional end_state, - std::function f) const override; + std::function f) const override; #ifndef NDEBUG void getDRAM_IOMap(std::unordered_map &) const override; @@ -137,9 +137,8 @@ DB0_PACKED_END // DRAM-changelog stream stores the sequence of updates to DRAM pages // DRAM-changelog must be initialized before DRAM_IOStream DRAM_ChangeLogStreamT m_dram_changelog_io; - // data-page change log, each chunk corresponds to a separate data transaction - // first element from each chunk represents the state number - // and the rest are the logical data page numbers mutated in that transaction + // data-page change log, each chunk corresponds to a separate data transaction + // holds logical data page numbers mutated in that transaction DP_ChangeLogStreamT m_dp_changelog_io; // meta-stream keeps meta-data about the other streams MetaIOStream m_meta_io; diff --git a/src/dbzero/core/storage/BaseStorage.cpp b/src/dbzero/core/storage/BaseStorage.cpp index 81ea7982..7eb1145a 100644 --- a/src/dbzero/core/storage/BaseStorage.cpp +++ b/src/dbzero/core/storage/BaseStorage.cpp @@ -58,9 +58,9 @@ namespace db0 void BaseStorage::endCommit() { } - + void BaseStorage::fetchDP_ChangeLogs(StateNumType begin_state, std::optional end_state, - std::function f) const + std::function f) const { THROWF(db0::InternalException) << "Operation not supported: fetchChangeLog"; } diff --git a/src/dbzero/core/storage/BaseStorage.hpp b/src/dbzero/core/storage/BaseStorage.hpp index 58c26e18..fb9e1925 100644 --- a/src/dbzero/core/storage/BaseStorage.hpp +++ b/src/dbzero/core/storage/BaseStorage.hpp @@ -7,6 +7,7 @@ #include #include #include +#include "ChangeLogTypes.hpp" namespace db0 @@ -22,7 +23,7 @@ namespace db0 { public: using DRAM_ChangeLogT = db0::o_change_log; - using DP_ChangeLogT = db0::o_change_log; + using DP_ChangeLogT = db0::o_change_log; BaseStorage(AccessType); virtual ~BaseStorage() = default; @@ -126,7 +127,7 @@ namespace db0 // in the change log (or up to the last state number if not specified) // @param f function to be called for each transaction's change log virtual void fetchDP_ChangeLogs(StateNumType begin_state, std::optional end_state, - std::function f) const; + std::function f) const; #ifndef NDEBUG // state number, file offset diff --git a/src/dbzero/core/storage/ChangeLog.cpp b/src/dbzero/core/storage/ChangeLog.cpp index b655dda4..b024414b 100644 --- a/src/dbzero/core/storage/ChangeLog.cpp +++ b/src/dbzero/core/storage/ChangeLog.cpp @@ -1,4 +1,5 @@ #include "ChangeLog.hpp" +#include "ChangeLogTypes.hpp" namespace db0 @@ -105,5 +106,6 @@ namespace db0 } template class o_change_log; - + template class o_change_log; + } \ No newline at end of file diff --git a/src/dbzero/core/storage/ChangeLogIOStream.cpp b/src/dbzero/core/storage/ChangeLogIOStream.cpp index c9c1f0d6..b2ab4cce 100644 --- a/src/dbzero/core/storage/ChangeLogIOStream.cpp +++ b/src/dbzero/core/storage/ChangeLogIOStream.cpp @@ -91,5 +91,6 @@ namespace db0 } template class ChangeLogIOStream<>; - + template class ChangeLogIOStream >; + } \ No newline at end of file diff --git a/src/dbzero/core/storage/ChangeLogIOStream.hpp b/src/dbzero/core/storage/ChangeLogIOStream.hpp index 3c145c55..53c7c9ce 100644 --- a/src/dbzero/core/storage/ChangeLogIOStream.hpp +++ b/src/dbzero/core/storage/ChangeLogIOStream.hpp @@ -2,6 +2,7 @@ #include "BlockIOStream.hpp" #include "ChangeLog.hpp" +#include "ChangeLogTypes.hpp" #include #include @@ -102,5 +103,6 @@ namespace db0 } extern template class ChangeLogIOStream<>; + extern template class ChangeLogIOStream >; } \ No newline at end of file diff --git a/src/dbzero/core/storage/ChangeLogTypes.hpp b/src/dbzero/core/storage/ChangeLogTypes.hpp new file mode 100644 index 00000000..3becf179 --- /dev/null +++ b/src/dbzero/core/storage/ChangeLogTypes.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include "ChangeLog.hpp" + +namespace db0 + +{ + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR o_dp_changelog_header: o_fixed + { + // state number this change log corresponds to + StateNumType m_state_num; + // sentinel storage page number for this transaction (see Page_IO::getEndPageNum()) + std::uint64_t m_end_storage_page_num; + + o_dp_changelog_header(StateNumType state_num, std::uint64_t end_storage_page_num = 0) + : m_state_num(state_num) + , m_end_storage_page_num(end_storage_page_num) + { + } + }; +DB0_PACKED_END + + extern template class o_change_log; + extern template class o_change_log; + +} diff --git a/src/dbzero/core/storage/SparseIndexBase.hpp b/src/dbzero/core/storage/SparseIndexBase.hpp index 434c9fd2..0c4ca484 100644 --- a/src/dbzero/core/storage/SparseIndexBase.hpp +++ b/src/dbzero/core/storage/SparseIndexBase.hpp @@ -237,7 +237,7 @@ DB0_PACKED_END template void SparseIndexBase::update(PageNumT page_num, StateNumT state_num, std::uint64_t max_storage_page_num) - { + { // update tree header if necessary this->update(max_storage_page_num); if (state_num > m_max_state_num) { @@ -246,9 +246,6 @@ DB0_PACKED_END } // put the currently generated state number as the first element in the change-log if (m_change_log_ptr) { - if (m_change_log_ptr->empty()) { - m_change_log_ptr->push_back(m_max_state_num); - } m_change_log_ptr->push_back(page_num); } } diff --git a/src/dbzero/core/storage/SparsePair.cpp b/src/dbzero/core/storage/SparsePair.cpp index e21cdb95..a4289c72 100644 --- a/src/dbzero/core/storage/SparsePair.cpp +++ b/src/dbzero/core/storage/SparsePair.cpp @@ -46,35 +46,23 @@ namespace db0 return m_sparse_index.size() + m_diff_index.size(); } - const SparsePair::DRAM_ChangeLogT &SparsePair::extractChangeLog(DRAM_ChangeLogStreamT &changelog_io) - { - assert(!m_change_log.empty()); - // sort change log but keep the 1st item (the state number) at its place - if (!m_change_log.empty()) { - std::sort(m_change_log.begin() + 1, m_change_log.end()); - } - + const SparsePair::DP_ChangeLogT &SparsePair::extractChangeLog(DP_ChangeLogStreamT &changelog_io) + { + std::sort(m_change_log.begin(), m_change_log.end()); ChangeLogData cl_data; - auto it = m_change_log.begin(), end = m_change_log.end(); - // first item is the state number - cl_data.m_rle_builder.append(*(it++), true); - // the first page number (second item) must be added even if it is identical as the state number - if (it != end) { - cl_data.m_rle_builder.append(*(it++), true); - } - // all remaining items add with deduplication - for (; it != end; ++it) { - cl_data.m_rle_builder.append(*it, false); + // add page numbers with deduplication + for (auto page_num : m_change_log) { + cl_data.m_rle_builder.append(page_num, false); } - // RLE encode, no duplicates - auto &result = changelog_io.appendChangeLog(std::move(cl_data)); + // RLE encode, no duplicates + auto &result = changelog_io.appendChangeLog(std::move(cl_data), this->getMaxStateNum()); m_change_log.clear(); return result; } - + std::size_t SparsePair::getChangeLogSize() const { - return m_change_log.empty() ? 0 : m_change_log.size() - 1; + return m_change_log.size(); } void SparsePair::commit() diff --git a/src/dbzero/core/storage/SparsePair.hpp b/src/dbzero/core/storage/SparsePair.hpp index 69c9cebe..5a5af2e7 100644 --- a/src/dbzero/core/storage/SparsePair.hpp +++ b/src/dbzero/core/storage/SparsePair.hpp @@ -17,8 +17,8 @@ namespace db0 using PageNumT = SparseIndex::PageNumT; using StateNumT = SparseIndex::StateNumT; using tag_create = SparseIndex::tag_create; - using DRAM_ChangeLogT = BaseStorage::DRAM_ChangeLogT; - using DRAM_ChangeLogStreamT = db0::ChangeLogIOStream; + using DP_ChangeLogT = BaseStorage::DP_ChangeLogT; + using DP_ChangeLogStreamT = db0::ChangeLogIOStream; SparsePair(std::size_t node_size); SparsePair(DRAM_Pair, AccessType); @@ -56,7 +56,7 @@ namespace db0 * Write internally managed change log into a specific stream * and then clean the internal change log */ - const DRAM_ChangeLogT &extractChangeLog(DRAM_ChangeLogStreamT &); + const DP_ChangeLogT &extractChangeLog(DP_ChangeLogStreamT &); std::size_t getChangeLogSize() const; diff --git a/src/dbzero/object_model/tags/SelectModified.cpp b/src/dbzero/object_model/tags/SelectModified.cpp index 2f44d24f..23815516 100644 --- a/src/dbzero/object_model/tags/SelectModified.cpp +++ b/src/dbzero/object_model/tags/SelectModified.cpp @@ -32,16 +32,10 @@ namespace db0::object_model // 4. refine results (lazy filter) by binary comparison of pre-scope and post-scope objects to identify actual mutations std::unordered_set mutated_dps; - storage.fetchDP_ChangeLogs(from_state, to_state + 1, [&](StateNumType, const DP_ChangeLogT &change_log) { - auto it = change_log.begin(), end = change_log.end(); - if (it != end) { - // first element holds the state number and should be ignored - ++it; + storage.fetchDP_ChangeLogs(from_state, to_state + 1, [&](const DP_ChangeLogT &change_log) { + for (auto page_num: change_log) { + mutated_dps.insert(page_num); } - - for (;it != end; ++it) { - mutated_dps.insert(*it); - } }); std::vector unique_dps; diff --git a/tests/unit_tests/BDevStorageTest.cpp b/tests/unit_tests/BDevStorageTest.cpp index ded721ed..c120b513 100644 --- a/tests/unit_tests/BDevStorageTest.cpp +++ b/tests/unit_tests/BDevStorageTest.cpp @@ -543,16 +543,13 @@ namespace tests for (auto range: state_ranges) { // collect and validate change-logs std::vector state_nums; - cut.fetchDP_ChangeLogs(range.first, range.second, [&](StateNumType fetched_state_num, const DP_ChangeLogT &cl) { - state_nums.push_back(fetched_state_num); + cut.fetchDP_ChangeLogs(range.first, range.second, [&](const DP_ChangeLogT &cl) { + state_nums.push_back(cl.m_state_num); std::vector page_nums; - auto it = cl.begin(); - ASSERT_EQ(fetched_state_num, *it); - ++it; - for (;it != cl.end(); ++it) { - page_nums.push_back(*it); + for (auto page_num: cl) { + page_nums.push_back(page_num); } - auto sorted_updates = updates[fetched_state_num - 1]; + auto sorted_updates = updates[cl.m_state_num - 1]; std::sort(sorted_updates.begin(), sorted_updates.end()); ASSERT_EQ(page_nums, sorted_updates); }); diff --git a/tests/unit_tests/ChangeLogTest.cpp b/tests/unit_tests/ChangeLogTest.cpp index 79a202be..149d605e 100644 --- a/tests/unit_tests/ChangeLogTest.cpp +++ b/tests/unit_tests/ChangeLogTest.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include using namespace std; @@ -87,4 +88,40 @@ namespace tests ASSERT_EQ(count, 5u); } + TEST_F( ChangeLogTest , testChangeLogWithHeader ) + { + std::vector buf1, buf2; + // create default change log (i.e. null header) + using ChangeLogT1 = o_change_log; + using ChangeLogT2 = o_change_log; + + // create without header first + std::vector change_log = { 1, 2, 3, 4, 5 }; + ChangeLogData data(std::move(change_log), true, false, false); + auto measured_size = ChangeLogT1::measure(data); + buf1.resize(measured_size); + auto &cut1 = ChangeLogT1::__new(buf1.data(), data); + + // create with header next (same data) + measured_size = ChangeLogT2::measure(data); + buf2.resize(measured_size); + auto &cut2 = ChangeLogT2::__new(buf2.data(), data, 123, 456); + ASSERT_EQ(cut1.sizeOf() + o_dp_changelog_header::sizeOf(), cut2.sizeOf()); + + // compare contents of both change logs + auto it1 = cut1.begin(); + auto it2 = cut2.begin(); + while (it1 != cut1.end() && it2 != cut2.end()) { + ASSERT_EQ(*it1, *it2); + ++it1; + ++it2; + } + ASSERT_FALSE(it1 != cut1.end()); + ASSERT_FALSE(it2 != cut2.end()); + + // access header fields + ASSERT_EQ(123u, cut2.m_state_num); + ASSERT_EQ(456u, cut2.m_end_storage_page_num); + } + } diff --git a/tests/unit_tests/SparsePairTest.cpp b/tests/unit_tests/SparsePairTest.cpp index b0c7655e..413ec3c4 100644 --- a/tests/unit_tests/SparsePairTest.cpp +++ b/tests/unit_tests/SparsePairTest.cpp @@ -20,6 +20,8 @@ namespace tests { public: static constexpr const char *file_name = "my-test-prefix_1.db0"; + using DP_ChangeLogStreamT = SparsePair::DP_ChangeLogStreamT; + SparsePairTest() = default; void SetUp() override { @@ -43,7 +45,7 @@ namespace tests SparsePair cut(SparsePair::tag_create(), dram_pair); auto &sparse_index = cut.getSparseIndex(); std::vector items_1 { - // page number, state number, physical page number, page type + // page number, state number, physical page number { 1, 1, 1 }, { 0, 1, 0 } }; @@ -56,21 +58,21 @@ namespace tests auto tail_function = [&]() { return file.size(); }; - + { - ChangeLogIOStream io(file, 0, 4096, tail_function); + DP_ChangeLogStreamT io(file, 0, 4096, tail_function); auto &change_log = cut.extractChangeLog(io); std::vector data; for (auto value: change_log) { data.push_back(value); } - io.close(); - // first element of the change log is the state number - ASSERT_EQ(data, (std::vector { 1, 0, 1 })); + io.close(); + ASSERT_EQ(data, (std::vector { 0, 1 })); + ASSERT_EQ(change_log.m_state_num, 1u); } - + std::vector items_2 { - // page number, state number, physical page number, page type + // page number, state number, physical page number { 2, 1, 2 }, { 3, 2, 3 }, { 0, 3, 4 }, { 2, 4, 5 }, { 4, 5, 6 } }; @@ -79,17 +81,17 @@ namespace tests } { - ChangeLogIOStream io(file, 0, 4096, tail_function); + DP_ChangeLogStreamT io(file, 0, 4096, tail_function); while (io.readChangeLogChunk()); auto &change_log = cut.extractChangeLog(io); - // first element of the change log is the state number - std::vector expected_data { 1, 0, 2, 3, 4 }; + std::vector expected_data { 0, 2, 3, 4 }; std::vector data; for (auto value: change_log) { data.push_back(value); } io.close(); ASSERT_EQ(data, expected_data); + ASSERT_EQ(change_log.m_state_num, 5u); } } @@ -120,7 +122,7 @@ namespace tests } // simulate change log extraction - db0::ChangeLogIOStream io(file, 0, 16 << 10, tail_function, AccessType::READ_WRITE); + DP_ChangeLogStreamT io(file, 0, 16 << 10, tail_function, AccessType::READ_WRITE); while (io.readChangeLogChunk()); cut.extractChangeLog(io); io.close(); @@ -130,5 +132,5 @@ namespace tests ASSERT_EQ(cut.getMaxStateNum(), i); } } - + } From 62bbd2c51aed6678b39cd26059db253249b11e3b Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 25 Nov 2025 09:34:17 +0100 Subject: [PATCH 02/12] fixes --- src/dbzero/core/storage/BDevStorage.cpp | 5 +++-- src/dbzero/core/storage/ChangeLogTypes.hpp | 2 +- src/dbzero/core/storage/SparsePair.cpp | 7 +++++-- src/dbzero/core/storage/SparsePair.hpp | 2 +- tests/unit_tests/SparsePairTest.cpp | 6 +++--- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 2825b912..50c59c44 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -330,11 +330,12 @@ namespace db0 m_meta_io.checkAndAppend(state_num); m_meta_io.flush(); + m_page_io.flush(); // Extract & flush sparse index change log first (on condition of any updates) - m_sparse_pair.extractChangeLog(m_dp_changelog_io); + // we also need to collect the end storage page number (sentinel) + m_sparse_pair.extractChangeLog(m_dp_changelog_io, m_page_io.getEndPageNum()); m_dram_io.flushUpdates(state_num, m_dram_changelog_io); m_dp_changelog_io.flush(); - m_page_io.flush(); // NOTE: fsync has stronger guarantees than flush in a multi-process environments m_file.fsync(); // flush changelog AFTER all updates from all other streams have been flushed diff --git a/src/dbzero/core/storage/ChangeLogTypes.hpp b/src/dbzero/core/storage/ChangeLogTypes.hpp index 3becf179..4ca01fcd 100644 --- a/src/dbzero/core/storage/ChangeLogTypes.hpp +++ b/src/dbzero/core/storage/ChangeLogTypes.hpp @@ -14,7 +14,7 @@ DB0_PACKED_BEGIN // sentinel storage page number for this transaction (see Page_IO::getEndPageNum()) std::uint64_t m_end_storage_page_num; - o_dp_changelog_header(StateNumType state_num, std::uint64_t end_storage_page_num = 0) + o_dp_changelog_header(StateNumType state_num, std::uint64_t end_storage_page_num) : m_state_num(state_num) , m_end_storage_page_num(end_storage_page_num) { diff --git a/src/dbzero/core/storage/SparsePair.cpp b/src/dbzero/core/storage/SparsePair.cpp index a4289c72..c488d787 100644 --- a/src/dbzero/core/storage/SparsePair.cpp +++ b/src/dbzero/core/storage/SparsePair.cpp @@ -46,7 +46,8 @@ namespace db0 return m_sparse_index.size() + m_diff_index.size(); } - const SparsePair::DP_ChangeLogT &SparsePair::extractChangeLog(DP_ChangeLogStreamT &changelog_io) + const SparsePair::DP_ChangeLogT &SparsePair::extractChangeLog(DP_ChangeLogStreamT &changelog_io, + std::uint64_t end_storage_page_num) { std::sort(m_change_log.begin(), m_change_log.end()); ChangeLogData cl_data; @@ -56,7 +57,9 @@ namespace db0 } // RLE encode, no duplicates - auto &result = changelog_io.appendChangeLog(std::move(cl_data), this->getMaxStateNum()); + auto &result = changelog_io.appendChangeLog( + std::move(cl_data), this->getMaxStateNum(), end_storage_page_num + ); m_change_log.clear(); return result; } diff --git a/src/dbzero/core/storage/SparsePair.hpp b/src/dbzero/core/storage/SparsePair.hpp index 5a5af2e7..6c516a07 100644 --- a/src/dbzero/core/storage/SparsePair.hpp +++ b/src/dbzero/core/storage/SparsePair.hpp @@ -56,7 +56,7 @@ namespace db0 * Write internally managed change log into a specific stream * and then clean the internal change log */ - const DP_ChangeLogT &extractChangeLog(DP_ChangeLogStreamT &); + const DP_ChangeLogT &extractChangeLog(DP_ChangeLogStreamT &, std::uint64_t end_storage_page_num); std::size_t getChangeLogSize() const; diff --git a/tests/unit_tests/SparsePairTest.cpp b/tests/unit_tests/SparsePairTest.cpp index 413ec3c4..c9311698 100644 --- a/tests/unit_tests/SparsePairTest.cpp +++ b/tests/unit_tests/SparsePairTest.cpp @@ -61,7 +61,7 @@ namespace tests { DP_ChangeLogStreamT io(file, 0, 4096, tail_function); - auto &change_log = cut.extractChangeLog(io); + auto &change_log = cut.extractChangeLog(io, 0); std::vector data; for (auto value: change_log) { data.push_back(value); @@ -83,7 +83,7 @@ namespace tests { DP_ChangeLogStreamT io(file, 0, 4096, tail_function); while (io.readChangeLogChunk()); - auto &change_log = cut.extractChangeLog(io); + auto &change_log = cut.extractChangeLog(io, 0); std::vector expected_data { 0, 2, 3, 4 }; std::vector data; for (auto value: change_log) { @@ -124,7 +124,7 @@ namespace tests // simulate change log extraction DP_ChangeLogStreamT io(file, 0, 16 << 10, tail_function, AccessType::READ_WRITE); while (io.readChangeLogChunk()); - cut.extractChangeLog(io); + cut.extractChangeLog(io, 0); io.close(); // refresh updates local cached variables with DRAM prefix From effa6368acf01a59e607f8cf657f0e1902b0dd0f Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 25 Nov 2025 15:37:35 +0100 Subject: [PATCH 03/12] Page IO step size configuration --- src/dbzero/bindings/python/PyAPI.cpp | 21 +++++++--- src/dbzero/bindings/python/PyWorkspace.cpp | 9 +++-- src/dbzero/bindings/python/PyWorkspace.hpp | 3 +- src/dbzero/core/storage/BDevStorage.cpp | 38 +++++++++++++----- src/dbzero/core/storage/BDevStorage.hpp | 18 ++++++--- src/dbzero/core/storage/Diff_IO.cpp | 13 ++++--- src/dbzero/core/storage/Diff_IO.hpp | 3 +- src/dbzero/core/storage/Page_IO.cpp | 45 +++++++++++++++------- src/dbzero/core/storage/Page_IO.hpp | 20 +++++++--- src/dbzero/workspace/Workspace.cpp | 20 +++++----- src/dbzero/workspace/Workspace.hpp | 14 +++++-- tests/unit_tests/Diff_IOTest.cpp | 10 ++--- 12 files changed, 145 insertions(+), 69 deletions(-) diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index cdf6ea9c..df0766d4 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -182,7 +182,8 @@ namespace db0::python { // prefix_name, open_mode, autocommit (bool) static const char *kwlist[] = { - "prefix_name", "open_mode", "autocommit", "slab_size", "lock_flags", "meta_io_step_size", NULL + "prefix_name", "open_mode", "autocommit", "slab_size", "lock_flags", "meta_io_step_size", + "page_io_step_size", NULL }; const char *prefix_name = nullptr; const char *open_mode = nullptr; @@ -190,8 +191,9 @@ namespace db0::python PyObject *py_slab_size = nullptr; PyObject *py_lock_flags = nullptr; PyObject *py_meta_io_step_size = nullptr; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sOOOO:open", const_cast(kwlist), - &prefix_name, &open_mode, &py_autocommit, &py_slab_size, &py_lock_flags, &py_meta_io_step_size)) + PyObject *py_page_io_step_size = nullptr; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sOOOOO:open", const_cast(kwlist), + &prefix_name, &open_mode, &py_autocommit, &py_slab_size, &py_lock_flags, &py_meta_io_step_size, &py_page_io_step_size)) { return NULL; } @@ -225,6 +227,7 @@ namespace db0::python } std::optional meta_io_step_size; + std::optional page_io_step_size; if (py_meta_io_step_size) { if (!PyLong_Check(py_meta_io_step_size)) { PyErr_SetString(PyExc_TypeError, "Invalid argument type: meta_io_step_size"); @@ -233,13 +236,21 @@ namespace db0::python meta_io_step_size = PyLong_AsUnsignedLong(py_meta_io_step_size); } + if (py_page_io_step_size) { + if (!PyLong_Check(py_page_io_step_size)) { + PyErr_SetString(PyExc_TypeError, "Invalid argument type: page_io_step_size"); + return NULL; + } + page_io_step_size = PyLong_AsUnsignedLong(py_page_io_step_size); + } + auto access_type = open_mode ? parseAccessType(open_mode) : db0::AccessType::READ_WRITE; PyToolkit::getPyWorkspace().open( - prefix_name, access_type, autocommit, slab_size, py_lock_flags, meta_io_step_size + prefix_name, access_type, autocommit, slab_size, py_lock_flags, meta_io_step_size, page_io_step_size ); Py_RETURN_NONE; } - + PyObject *PyAPI_open(PyObject *self, PyObject *args, PyObject *kwargs) { PY_API_FUNC diff --git a/src/dbzero/bindings/python/PyWorkspace.cpp b/src/dbzero/bindings/python/PyWorkspace.cpp index 603e62f2..e04fb44c 100644 --- a/src/dbzero/bindings/python/PyWorkspace.cpp +++ b/src/dbzero/bindings/python/PyWorkspace.cpp @@ -29,21 +29,22 @@ namespace db0::python } void PyWorkspace::open(const std::string &prefix_name, AccessType access_type, std::optional autocommit, - std::optional slab_size, ObjectPtr py_lock_flags, std::optional meta_io_step_size) + std::optional slab_size, ObjectPtr py_lock_flags, std::optional meta_io_step_size, + std::optional page_io_step_size) { if (!m_workspace) { // initialize dbzero with current working directory initWorkspace(""); } - + if (py_lock_flags) { db0::Config lock_flags_config(py_lock_flags); m_workspace->open(prefix_name, access_type, autocommit, slab_size, - lock_flags_config, meta_io_step_size + lock_flags_config, meta_io_step_size, page_io_step_size ); } else { m_workspace->open(prefix_name, access_type, autocommit, slab_size, - {}, meta_io_step_size + {}, meta_io_step_size, page_io_step_size ); } } diff --git a/src/dbzero/bindings/python/PyWorkspace.hpp b/src/dbzero/bindings/python/PyWorkspace.hpp index b0760d27..d433c5f1 100644 --- a/src/dbzero/bindings/python/PyWorkspace.hpp +++ b/src/dbzero/bindings/python/PyWorkspace.hpp @@ -61,10 +61,11 @@ namespace db0::python * Opens a specific prefix for read or read/write * a newly opened read/write prefix becomes the default one * @param slab_size will only have effect for a newly created prefixes + * @param page_io_step_size parameter only respected for newly created prefixes */ void open(const std::string &prefix_name, AccessType, std::optional autocommit = {}, std::optional slab_size = {}, ObjectPtr lock_flags = nullptr, - std::optional meta_io_step_size = {} + std::optional meta_io_step_size = {}, std::optional page_io_step_size = {} ); db0::Workspace &getWorkspace() const; diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 50c59c44..378dbf84 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -24,11 +24,14 @@ namespace db0 return std::move(io); } - o_prefix_config::o_prefix_config(std::uint32_t block_size, std::uint32_t page_size, std::uint32_t dram_page_size) + o_prefix_config::o_prefix_config(std::uint32_t block_size, std::uint32_t page_size, + std::uint32_t dram_page_size, std::uint32_t page_io_step_size) : m_block_size(block_size) , m_page_size(page_size) , m_dram_page_size(dram_page_size) + , m_page_io_step_size(page_io_step_size) { + std::memset(m_reserved.data(), 0, sizeof(m_reserved)); } BDevStorage::BDevStorage(const std::string &file_name, AccessType access_type, LockFlags lock_flags, @@ -51,7 +54,7 @@ namespace db0 , m_sparse_pair(m_dram_io.getDRAMPair(), access_type) , m_sparse_index(m_sparse_pair.getSparseIndex()) , m_diff_index(m_sparse_pair.getDiffIndex()) - , m_page_io(getPage_IO(m_sparse_pair.getNextStoragePageNum(), access_type)) + , m_page_io(getPage_IO(m_sparse_pair.getNextStoragePageNum(), m_config.m_page_io_step_size, access_type)) { // in read-only mode need to refresh in order to retrieve a consitent DRAM state // since other process might be actively modifying the underlying file @@ -88,8 +91,19 @@ namespace db0 return config; } + std::uint32_t getPageIOStepSize(std::optional step_size_hint) + { + if (step_size_hint) { + // FIXME: log + throw std::runtime_error("not implemented"); + } else { + // default to single-block steps + return 1u; + } + } + void BDevStorage::create(const std::string &file_name, std::optional page_size, - std::uint32_t dram_page_size_hint) + std::uint32_t dram_page_size_hint, std::optional step_size_hint) { if (!page_size) { page_size = DEFAULT_PAGE_SIZE; @@ -105,7 +119,9 @@ namespace db0 auto dram_page_size = block_size - BlockIOStream::sizeOfHeaders(DRAM_IOStream::ENABLE_CHECKSUMS) - DRAM_IOStream::sizeOfHeader(); // create a new config using placement new - auto config = new (buffer.data()) o_prefix_config(block_size, *page_size, dram_page_size); + auto config = new (buffer.data()) o_prefix_config( + block_size, *page_size, dram_page_size, getPageIOStepSize(step_size_hint) + ); std::uint64_t offset = CONFIG_BLOCK_SIZE; auto next_block_offset = [&]() @@ -389,8 +405,8 @@ namespace db0 return result; } - Diff_IO BDevStorage::getPage_IO(std::uint64_t next_page_hint, AccessType access_type) - { + Diff_IO BDevStorage::getPage_IO(std::uint64_t next_page_hint, std::uint32_t step_size, AccessType access_type) + { if (access_type == AccessType::READ_ONLY) { // return empty page IO return { CONFIG_BLOCK_SIZE, m_file, m_config.m_page_size }; @@ -399,14 +415,16 @@ namespace db0 assert(access_type == AccessType::READ_WRITE); auto block_id = (next_page_hint * m_config.m_page_size) / m_config.m_block_size; auto block_capacity = m_config.m_block_size / m_config.m_page_size; - + if (next_page_hint == 0) { // assign first page auto address = std::max(m_dram_io.tail(), m_meta_io.tail()); address = std::max(address, m_dram_changelog_io.tail()); address = std::max(address, m_dp_changelog_io.tail()); + // NOTE: initialize with a known block num = 0 (first block of the first step) return { CONFIG_BLOCK_SIZE, m_file, m_config.m_page_size, m_config.m_block_size, address, 0, - getBlockIOTailFunction() }; + step_size, getBlockIOTailFunction(), 0 + }; } auto address = CONFIG_BLOCK_SIZE + block_id * m_config.m_block_size; @@ -418,8 +436,10 @@ namespace db0 page_count = block_capacity; } + // NOTE: block num is unknown in this case return { CONFIG_BLOCK_SIZE, m_file, m_config.m_page_size, m_config.m_block_size, address, page_count, - getBlockIOTailFunction() }; + step_size, getBlockIOTailFunction() + }; } std::uint32_t BDevStorage::getMaxStateNum() const { diff --git a/src/dbzero/core/storage/BDevStorage.hpp b/src/dbzero/core/storage/BDevStorage.hpp index 2ffaa119..e20d8cfd 100644 --- a/src/dbzero/core/storage/BDevStorage.hpp +++ b/src/dbzero/core/storage/BDevStorage.hpp @@ -40,10 +40,15 @@ DB0_PACKED_BEGIN // data pages change log std::uint64_t m_dp_changelog_io_offset = 0; std::uint64_t m_meta_io_offset = 0; - // reserved for future use - std::array m_reserved = { 0, 0, 0, 0 }; - - o_prefix_config(std::uint32_t block_size, std::uint32_t page_size, std::uint32_t dram_page_size); + // the number of concsecutive blocks created by the PageIO + // a a single indivisible "step". + // This value (entire step) corresponts to a single entry in the REL_Index (if it's used) + std::uint32_t m_page_io_step_size; + // reserved for future use (0-filled) + std::array m_reserved; + + o_prefix_config(std::uint32_t block_size, std::uint32_t page_size, std::uint32_t dram_page_size, + std::uint32_t page_io_step_size); }; DB0_PACKED_END @@ -69,9 +74,10 @@ DB0_PACKED_END /** * Create a new .db0 file + * @param step_size_hint defines requested Page IO step size in bytes */ static void create(const std::string &file_name, std::optional page_size = {}, - std::uint32_t dram_page_size_hint = 16 * 1024 - 256); + std::uint32_t dram_page_size_hint = 16 * 1024 - 256, std::optional step_size_hint = {}); void read(std::uint64_t address, StateNumType state_num, std::size_t size, void *buffer, FlagSet = { AccessOptions::read, AccessOptions::write }) const override; @@ -183,7 +189,7 @@ DB0_PACKED_END MetaIOStream getMetaIOStream(std::uint64_t first_block_pos, std::size_t step_size, AccessType); - Diff_IO getPage_IO(std::uint64_t next_page_hint, AccessType); + Diff_IO getPage_IO(std::uint64_t next_page_hint, std::uint32_t step_size, AccessType); o_prefix_config readConfig() const; diff --git a/src/dbzero/core/storage/Diff_IO.cpp b/src/dbzero/core/storage/Diff_IO.cpp index b1e9f5ae..cdce8a93 100644 --- a/src/dbzero/core/storage/Diff_IO.cpp +++ b/src/dbzero/core/storage/Diff_IO.cpp @@ -6,8 +6,8 @@ namespace db0 { -DB0_PACKED_BEGIN +DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_diff_header: public o_fixed { // the number of objects contained @@ -16,6 +16,7 @@ DB0_PACKED_BEGIN // (bytes before offset can be taken by remnants of the object from the previous page) std::uint16_t m_offset = 0; }; +DB0_PACKED_END class DiffWriter { @@ -221,9 +222,10 @@ DB0_PACKED_BEGIN m_current += o_diff_header::sizeOf(); } - Diff_IO::Diff_IO(std::size_t header_size, CFile &file, std::uint32_t page_size, std::uint32_t block_size, std::uint64_t address, - std::uint32_t page_count, std::function tail_function) - : Page_IO(header_size, file, page_size, block_size, address, page_count, tail_function) + Diff_IO::Diff_IO(std::size_t header_size, CFile &file, std::uint32_t page_size, + std::uint32_t block_size, std::uint64_t address, std::uint32_t page_count, std::uint32_t step_size, + std::function tail_function, std::optional block_num) + : Page_IO(header_size, file, page_size, block_size, address, page_count, step_size, tail_function, block_num) , m_write_buf(page_size * 2) , m_read_buf(page_size * 2) , m_writer(std::make_unique( @@ -233,7 +235,7 @@ DB0_PACKED_BEGIN } Diff_IO::Diff_IO(std::size_t header_size, CFile &file, std::uint32_t page_size) - : Page_IO(header_size, file, page_size) + : Page_IO(header_size, file, page_size) , m_read_buf(page_size * 2) { } @@ -337,5 +339,4 @@ DB0_PACKED_BEGIN return { m_full_dp_bytes_written + m_diff_bytes_written, m_diff_bytes_written }; } -DB0_PACKED_END } \ No newline at end of file diff --git a/src/dbzero/core/storage/Diff_IO.hpp b/src/dbzero/core/storage/Diff_IO.hpp index 1658d178..f007d68e 100644 --- a/src/dbzero/core/storage/Diff_IO.hpp +++ b/src/dbzero/core/storage/Diff_IO.hpp @@ -14,7 +14,8 @@ namespace db0 { public: Diff_IO(std::size_t header_size, CFile &file, std::uint32_t page_size, std::uint32_t block_size, std::uint64_t address, - std::uint32_t page_count, std::function tail_function); + std::uint32_t page_count, std::uint32_t step_size, std::function tail_function, + std::optional block_num = {}); // Read-only Diff_IO Diff_IO(std::size_t header_size, CFile &file, std::uint32_t page_size); ~Diff_IO(); diff --git a/src/dbzero/core/storage/Page_IO.cpp b/src/dbzero/core/storage/Page_IO.cpp index 0776f12e..c6d31325 100644 --- a/src/dbzero/core/storage/Page_IO.cpp +++ b/src/dbzero/core/storage/Page_IO.cpp @@ -7,25 +7,28 @@ namespace db0 { Page_IO::Page_IO(std::size_t header_size, CFile &file, std::uint32_t page_size, std::uint32_t block_size, - std::uint64_t address, std::uint32_t page_count, std::function tail_function) - : m_header_size(header_size) + std::uint64_t address, std::uint32_t page_count, std::uint32_t step_size, std::function tail_function, + std::optional block_num) + : m_header_size(header_size) , m_page_size(page_size) , m_block_size(block_size) , m_block_capacity(block_size / page_size) + , m_step_size(step_size) , m_file(file) , m_address(address) , m_page_count(page_count) - , m_first_page_num(getPageNum(address)) + , m_first_page_num(getPageNum(address)) , m_tail_function(tail_function) , m_access_type(AccessType::READ_WRITE) - { + , m_block_num(block_num) + { assert(block_size % page_size == 0); } Page_IO::Page_IO(std::size_t header_size, CFile &file, std::uint32_t page_size) : m_header_size(header_size) - , m_page_size(page_size) - , m_file(file) + , m_page_size(page_size) + , m_file(file) , m_access_type(AccessType::READ_ONLY) { } @@ -47,10 +50,20 @@ namespace db0 void Page_IO::allocateNextBlock() { - // allocate the next block by appending it to the file - m_address = std::max(this->tail(), m_tail_function()); - m_first_page_num = getPageNum(m_address); - m_page_count = 0; + if (m_block_num && *m_block_num < (m_step_size - 1)) { + // allocate next block within the step + m_address += m_block_size; + m_first_page_num += m_block_capacity; + m_page_count = 0; + ++(*m_block_num); + } else { + // allocate the next step / block by appending it to the file + m_address = std::max(this->tail(), m_tail_function()); + m_first_page_num = getPageNum(m_address); + m_page_count = 0; + // initiate the next full step + m_block_num = 0; + } } void Page_IO::read(std::uint64_t page_num, void *buffer) const { @@ -64,11 +77,17 @@ namespace db0 std::uint64_t Page_IO::getPageNum(std::uint64_t address) const { return ((address - m_header_size) / m_block_size) * m_block_capacity; } - - std::uint64_t Page_IO::tail() const + + std::uint64_t Page_IO::tail() const { assert(m_access_type == AccessType::READ_WRITE); - return m_address + m_block_size; + if (m_block_num) { + // reserve space up to end of the step + return m_address + (m_step_size - *m_block_num) * m_block_size; + } else { + // step not known, return end of current block + return m_address + m_block_size; + } } std::uint32_t Page_IO::getPageSize() const { diff --git a/src/dbzero/core/storage/Page_IO.hpp b/src/dbzero/core/storage/Page_IO.hpp index e1465fad..822e9434 100644 --- a/src/dbzero/core/storage/Page_IO.hpp +++ b/src/dbzero/core/storage/Page_IO.hpp @@ -21,11 +21,15 @@ namespace db0 // @param block_size size of a unit block of pages to be pre-allocated by the stream // @param address of the currently active block // @param page_count the number of pages already stored in the current block - // @param tail_function a function returning current (unflushed) size of the file + // @param step_size number of blocks per single indivisible step (for REL_Index mapping) + // @param tail_function a function returning current (unflushed) size of the file (Page IO excluded) + // @param block_num the block number within the step if it is known Page_IO(std::size_t header_size, CFile &file, std::uint32_t page_size, std::uint32_t block_size, std::uint64_t address, - std::uint32_t page_count, std::function tail_function); + std::uint32_t page_count, std::uint32_t step_size, std::function tail_function, + std::optional block_num = {}); // Read-only Page_IO + // NOTE: step size is irrelevant in read-only mode, will be initialized to 0 Page_IO(std::size_t header_size, CFile &file, std::uint32_t page_size); ~Page_IO(); @@ -56,11 +60,13 @@ namespace db0 const std::uint32_t m_block_size = 0; // maximum number of pages in block const std::uint32_t m_block_capacity = 0; - + // must be >= 1 in read/write mode + const std::uint32_t m_step_size = 0; + // Get the next page number to be assigned by the "append" method (first) // and the number of consecutive pages available in the current block std::pair getNextPageNum(); - + private: CFile &m_file; // begin address of the current block @@ -71,9 +77,11 @@ namespace db0 std::uint64_t m_first_page_num = 0; std::function m_tail_function; const AccessType m_access_type; - + // block number within the step + std::optional m_block_num; + std::uint64_t getPageNum(std::uint64_t address) const; - void allocateNextBlock(); + void allocateNextBlock(); }; } \ No newline at end of file diff --git a/src/dbzero/workspace/Workspace.cpp b/src/dbzero/workspace/Workspace.cpp index 3094324c..7f2138df 100644 --- a/src/dbzero/workspace/Workspace.cpp +++ b/src/dbzero/workspace/Workspace.cpp @@ -32,7 +32,7 @@ namespace db0 const PrefixName &prefix_name, bool &new_file_created, AccessType access_type, std::optional page_size, std::optional slab_size, std::optional sparse_index_node_size, std::optional lock_flags, - std::optional meta_io_step_size) + std::optional meta_io_step_size, std::optional page_io_step_size) { if (!page_size) { page_size = DEFAULT_PAGE_SIZE; @@ -51,8 +51,8 @@ namespace db0 if (access_type == AccessType::READ_ONLY) { THROWF(db0::InputException) << "Prefix does not exist: " << prefix_name; } - - BDevStorage::create(file_name, *page_size, *sparse_index_node_size); + + BDevStorage::create(file_name, *page_size, *sparse_index_node_size, page_io_step_size); new_file_created = true; } auto storage = std::make_shared( @@ -334,7 +334,8 @@ namespace db0 db0::swine_ptr Workspace::tryGetFixtureEx(const PrefixName &prefix_name, std::optional access_type, std::optional page_size, std::optional slab_size, std::optional sparse_index_node_size, - std::optional autocommit, std::optional lock_flags, std::optional meta_io_step_size) + std::optional autocommit, std::optional lock_flags, std::optional meta_io_step_size, + std::optional page_io_step_size) { bool file_created = false; auto uuid = getUUID(prefix_name); @@ -349,7 +350,7 @@ namespace db0 } bool read_only = (*access_type == AccessType::READ_ONLY); auto [prefix, allocator] = openMemspace(prefix_name, file_created, *access_type, page_size, slab_size, - sparse_index_node_size, lock_flags, meta_io_step_size + sparse_index_node_size, lock_flags, meta_io_step_size, page_io_step_size ); if (file_created) { // initialize new fixture @@ -410,10 +411,10 @@ namespace db0 std::optional page_size, std::optional slab_size, std::optional sparse_index_node_size, std::optional autocommit, std::optional lock_flags, - std::optional meta_io_step_size) + std::optional meta_io_step_size, std::optional page_io_step_size) { auto fixture = tryGetFixtureEx(px_name, access_type, page_size, slab_size, sparse_index_node_size, - autocommit, lock_flags, meta_io_step_size + autocommit, lock_flags, meta_io_step_size, page_io_step_size ); if (!fixture) { THROWF(db0::InputException) << "Prefix: " << px_name << " not found"; @@ -552,10 +553,11 @@ namespace db0 } void Workspace::open(const PrefixName &prefix_name, AccessType access_type, std::optional autocommit, - std::optional slab_size, std::optional lock_flags, std::optional meta_io_step_size) + std::optional slab_size, std::optional lock_flags, + std::optional meta_io_step_size, std::optional page_io_step_size) { auto fixture = getFixtureEx(prefix_name, access_type, {}, slab_size, {}, autocommit, - lock_flags, meta_io_step_size + lock_flags, meta_io_step_size, page_io_step_size ); // update default fixture if (!m_default_fixture || (*m_default_fixture != *fixture)) { diff --git a/src/dbzero/workspace/Workspace.hpp b/src/dbzero/workspace/Workspace.hpp index 91a21b45..6a8ec8f5 100644 --- a/src/dbzero/workspace/Workspace.hpp +++ b/src/dbzero/workspace/Workspace.hpp @@ -115,7 +115,9 @@ namespace db0 std::pair, std::shared_ptr > openMemspace(const PrefixName &, bool &new_file_created, AccessType = AccessType::READ_WRITE, std::optional page_size = {}, std::optional slab_size = {}, std::optional sparse_index_node_size = {}, - std::optional lock_flags = {}, std::optional meta_io_step_size = {}); + std::optional lock_flags = {}, std::optional meta_io_step_size = {}, + std::optional page_io_step_size = {} + ); // Clear all internal in-memory caches void clearCache() const; @@ -171,13 +173,15 @@ namespace db0 std::optional page_size = {}, std::optional slab_size = {}, std::optional sparse_index_node_size = {}, std::optional autocommit = {}, std::optional lock_flags = {}, - std::optional meta_io_step_size = {}); + std::optional meta_io_step_size = {}, + std::optional page_io_step_size = {}); swine_ptr getFixtureEx(const PrefixName &, std::optional = AccessType::READ_WRITE, std::optional page_size = {}, std::optional slab_size = {}, std::optional sparse_index_node_size = {}, std::optional autocommit = {}, std::optional lock_flags = {}, - std::optional meta_io_step_size = {}); + std::optional meta_io_step_size = {}, + std::optional page_io_step_size = {}); /** * Get existing fixture by UUID @@ -216,10 +220,12 @@ namespace db0 * @param access_type * @param autocommit flag indicating if the prefix should be auto-committed * @param meta_io_step_size the size of the step in the underlying MetaIOStream (16MB by default) + * @param page_io_step_size parameter only respected for newly created prefixes */ void open(const PrefixName &, AccessType access_type, std::optional autocommit = {}, std::optional slab_size = {}, std::optional default_lock_flags = {}, - std::optional meta_io_step_size = {}); + std::optional meta_io_step_size = {}, std::optional page_io_step_size = {} + ); bool drop(const PrefixName &, bool if_exists = true); diff --git a/tests/unit_tests/Diff_IOTest.cpp b/tests/unit_tests/Diff_IOTest.cpp index 09f0d658..2476508d 100644 --- a/tests/unit_tests/Diff_IOTest.cpp +++ b/tests/unit_tests/Diff_IOTest.cpp @@ -18,7 +18,7 @@ namespace tests public: Diff_IOProxy(std::size_t header_size, CFile &file, std::uint32_t page_size, std::uint32_t block_size, std::uint64_t address, std::uint32_t page_count, std::function tail_function) - : Diff_IO(header_size, file, page_size, block_size, address, page_count, tail_function) + : Diff_IO(header_size, file, page_size, block_size, address, page_count, 1u, tail_function) { } @@ -93,7 +93,7 @@ namespace tests return file.size(); }; - Diff_IO cut(0, file, page_size, page_size * 16, 0, 0, tail_function); + Diff_IOProxy cut(0, file, page_size, page_size * 16, 0, 0, tail_function); std::vector diff_buf; db0::getDiffs(m_dp_0.data(), m_dp_1.data(), page_size, diff_buf); @@ -109,7 +109,7 @@ namespace tests return file.size(); }; - Diff_IO cut(0, file, page_size, page_size * 16, 0, 0, tail_function); + Diff_IOProxy cut(0, file, page_size, page_size * 16, 0, 0, tail_function); std::vector diff_buf; db0::getDiffs(m_dp_0.data(), m_dp_1.data(), page_size, diff_buf); @@ -129,7 +129,7 @@ namespace tests }; // block size set to "2" so that it overflows quickly - Diff_IO cut(0, file, page_size, page_size * 2, 0, 0, tail_function); + Diff_IOProxy cut(0, file, page_size, page_size * 2, 0, 0, tail_function); std::vector diff_buf; db0::getDiffs(m_dp_0.data(), m_dp_1.data(), page_size, diff_buf); @@ -149,7 +149,7 @@ namespace tests }; // block size set to "2" so that it overflows quickly - Diff_IO cut(0, file, page_size, page_size * 2, 0, 0, tail_function); + Diff_IOProxy cut(0, file, page_size, page_size * 2, 0, 0, tail_function); std::vector diff_buf; db0::getDiffs(m_dp_0.data(), m_dp_1.data(), page_size, diff_buf); From 430ea6d222b88b9d7ae88cf6f8fff952ffc45c65 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 25 Nov 2025 20:34:45 +0100 Subject: [PATCH 04/12] WIP: REL_Index + partial integration --- python_tests/test_page_io.py | 46 ++++++++++ src/dbzero/core/storage/BDevStorage.cpp | 112 +++++++++++++++++++----- src/dbzero/core/storage/BDevStorage.hpp | 34 ++++++- src/dbzero/core/storage/ExtSpace.cpp | 15 ++++ src/dbzero/core/storage/ExtSpace.hpp | 45 ++++++++++ src/dbzero/core/storage/Page_IO.cpp | 2 +- src/dbzero/core/storage/REL_Index.cpp | 38 ++++++++ src/dbzero/core/storage/REL_Index.hpp | 43 +++++++++ src/dbzero/core/storage/SparsePair.cpp | 2 +- src/dbzero/core/storage/SparsePair.hpp | 5 +- 10 files changed, 312 insertions(+), 30 deletions(-) create mode 100644 python_tests/test_page_io.py create mode 100644 src/dbzero/core/storage/ExtSpace.cpp create mode 100644 src/dbzero/core/storage/ExtSpace.hpp create mode 100644 src/dbzero/core/storage/REL_Index.cpp create mode 100644 src/dbzero/core/storage/REL_Index.hpp diff --git a/python_tests/test_page_io.py b/python_tests/test_page_io.py new file mode 100644 index 00000000..bf05b2e4 --- /dev/null +++ b/python_tests/test_page_io.py @@ -0,0 +1,46 @@ +import dbzero as db0 +from .memo_test_types import MemoTestClass, MemoTestSingleton +from .conftest import DB0_DIR + + +def test_create_prefix_with_page_io_step_size(db0_fixture): + # use 16 MB page I/O step size + db0.open("some-new-prefix", "rw", page_io_step_size = 16 << 20) + buf = [] + for _ in range(50): + buf.append(MemoTestClass("a" * 1024)) # 1 KB string + # commit after each append + db0.commit() + + px_size_1 = db0.get_storage_stats()["prefix_size"] + assert px_size_1 > (16 << 20) + + # after adding more pages, prefix size should not increase until next step is reached + for _ in range(50): + buf.append(MemoTestClass("a" * 1024)) # 1 KB string + # commit after each append + db0.commit() + + px_size_2 = db0.get_storage_stats()["prefix_size"] + assert (px_size_2 - px_size_1) < (128 << 10) + + +def test_continue_append_with_step_size(db0_fixture): + db0.open("some-new-prefix", "rw", page_io_step_size = 16 << 20) + root = MemoTestSingleton([]) + for _ in range(50): + root.value.append(MemoTestClass("a" * 1024)) # 1 KB string + db0.commit() + + db0.close() + db0.init(DB0_DIR) + # NOTE: we're opening an existing prefix with already initialized page I/O step size + db0.open("some-new-prefix", "rw") + root = db0.fetch(MemoTestSingleton) + for _ in range(250): + root.value.append(MemoTestClass("a" * 1024)) + db0.commit() + + # NOTE: this behavior will change after we implement REL_Index + assert db0.get_storage_stats()["prefix_size"] > (32 << 20) + \ No newline at end of file diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 378dbf84..0a331ba0 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -12,18 +12,6 @@ namespace db0 { - BlockIOStream readAll(BlockIOStream &&io) - { - // FIXME: implement WAL processing - std::vector buf; - for (;;) { - if (!io.readChunk(buf)) { - break; - } - } - return std::move(io); - } - o_prefix_config::o_prefix_config(std::uint32_t block_size, std::uint32_t page_size, std::uint32_t dram_page_size, std::uint32_t page_io_step_size) : m_block_size(block_size) @@ -34,6 +22,14 @@ namespace db0 std::memset(m_reserved.data(), 0, sizeof(m_reserved)); } + DRAM_Pair tryGetDRAMPair(DRAM_IOStream *dram_io_ptr) + { + if (!dram_io_ptr) { + return {}; + } + return dram_io_ptr->getDRAMPair(); + } + BDevStorage::BDevStorage(const std::string &file_name, AccessType access_type, LockFlags lock_flags, std::optional meta_io_step_size) : BaseStorage(access_type) @@ -55,6 +51,13 @@ namespace db0 , m_sparse_index(m_sparse_pair.getSparseIndex()) , m_diff_index(m_sparse_pair.getDiffIndex()) , m_page_io(getPage_IO(m_sparse_pair.getNextStoragePageNum(), m_config.m_page_io_step_size, access_type)) + , m_ext_dram_changelog_io(tryGetChangeLogIOStream( + m_config.m_ext_dram_changelog_io_offset, access_type) + ) + , m_ext_dram_io(init(tryGetDRAMIOStream( + m_config.m_ext_dram_io_offset, m_config.m_ext_dram_page_size, access_type), m_ext_dram_changelog_io.get()) + ) + , m_ext_space(tryGetDRAMPair(m_ext_dram_io.get()), access_type) { // in read-only mode need to refresh in order to retrieve a consitent DRAM state // since other process might be actively modifying the underlying file @@ -73,6 +76,15 @@ namespace db0 return std::move(dram_io); } + std::unique_ptr BDevStorage::init(std::unique_ptr &&dram_io, + DRAM_ChangeLogStreamT *dram_change_log) + { + if (dram_io && dram_change_log) { + dram_io->load(*dram_change_log); + } + return std::move(dram_io); + } + MetaIOStream BDevStorage::init(MetaIOStream &&io) { // exhaust the meta-log stream (position at the last item) and all managed streams @@ -91,11 +103,11 @@ namespace db0 return config; } - std::uint32_t getPageIOStepSize(std::optional step_size_hint) + std::uint32_t getPageIOStepSize(std::uint32_t block_size, std::optional step_size_hint) { - if (step_size_hint) { - // FIXME: log - throw std::runtime_error("not implemented"); + if (step_size_hint && *step_size_hint > 0) { + // align to full block size + return (*step_size_hint + block_size - 1) / block_size; } else { // default to single-block steps return 1u; @@ -120,7 +132,7 @@ namespace db0 DRAM_IOStream::sizeOfHeader(); // create a new config using placement new auto config = new (buffer.data()) o_prefix_config( - block_size, *page_size, dram_page_size, getPageIOStepSize(step_size_hint) + block_size, *page_size, dram_page_size, getPageIOStepSize(block_size, step_size_hint) ); std::uint64_t offset = CONFIG_BLOCK_SIZE; @@ -136,6 +148,16 @@ namespace db0 config->m_dram_changelog_io_offset = next_block_offset(); config->m_dp_changelog_io_offset = next_block_offset(); config->m_meta_io_offset = next_block_offset(); + + // initialize ext streams only when needed + bool has_ext_dram_io = config->m_page_io_step_size > 1; + if (has_ext_dram_io) { + config->m_ext_dram_io_offset = next_block_offset(); + // NOTE: use same as the prefix page size + config->m_ext_dram_page_size = *page_size; + config->m_ext_dram_changelog_io_offset = next_block_offset(); + } + CFile::create(file_name, buffer); // Create higher-order data structures @@ -143,10 +165,18 @@ namespace db0 CFile file(file_name, AccessType::READ_WRITE); DRAM_ChangeLogStreamT *dram_changelog_io_ptr = nullptr; DRAM_IOStream *dram_io_ptr = nullptr; - auto tail_function = [&]() { + std::unique_ptr ext_dram_changelog_io_ptr = nullptr; + std::unique_ptr ext_dram_io_ptr = nullptr; + + auto tail_function = [&]() + { assert(dram_io_ptr && dram_changelog_io_ptr); // take max from the underlying I/O streams - return std::max(offset, std::max(dram_io_ptr->tail(), dram_changelog_io_ptr->tail())); + auto result = std::max(offset, std::max(dram_io_ptr->tail(), dram_changelog_io_ptr->tail())); + if (ext_dram_io_ptr && ext_dram_changelog_io_ptr) { + result = std::max(result, std::max(ext_dram_io_ptr->tail(), ext_dram_changelog_io_ptr->tail())); + } + return result; }; auto dram_changelog_io = DRAM_ChangeLogStreamT(file, config->m_dram_changelog_io_offset, config->m_block_size, @@ -156,14 +186,36 @@ namespace db0 AccessType::READ_WRITE, config->m_dram_page_size); dram_io_ptr = &dram_io; + // Initialize extension streams when needed + if (has_ext_dram_io) { + ext_dram_changelog_io_ptr = std::make_unique(file, + static_cast(config->m_ext_dram_changelog_io_offset), + static_cast(config->m_block_size), tail_function, + AccessType::READ_WRITE); + ext_dram_io_ptr = std::make_unique(file, + static_cast(config->m_ext_dram_io_offset), + static_cast(config->m_block_size), tail_function, AccessType::READ_WRITE, + static_cast(config->m_ext_dram_page_size)); + } + // create then flush an empty sparse pair (i.e. SparseIndex + DiffIndex) SparsePair sparse_pair(SparsePair::tag_create(), dram_io.getDRAMPair()); - dram_io.flushUpdates(sparse_pair.getMaxStateNum(), dram_changelog_io); + auto max_state_num = sparse_pair.getMaxStateNum(); + dram_io.flushUpdates(max_state_num, dram_changelog_io); dram_changelog_io.flush(); dram_io.close(); dram_changelog_io.close(); + + if (has_ext_dram_io) { + assert(ext_dram_io_ptr && ext_dram_changelog_io_ptr); + ext_dram_io_ptr->flushUpdates(max_state_num, *ext_dram_changelog_io_ptr); + ext_dram_changelog_io_ptr->flush(); + ext_dram_io_ptr->close(); + ext_dram_changelog_io_ptr->close(); + } + file.close(); - } + } } bool BDevStorage::tryFindMutation(std::uint64_t page_num, StateNumType state_num, @@ -394,14 +446,30 @@ namespace db0 return { m_file, first_block_pos, m_config.m_block_size, getTailFunction(), access_type, dram_page_size }; } + std::unique_ptr BDevStorage::tryGetDRAMIOStream(std::uint64_t first_block_pos, + std::uint32_t dram_page_size, AccessType access_type) + { + if (!first_block_pos) { + return nullptr; + } + return std::make_unique(m_file, first_block_pos, m_config.m_block_size, + getTailFunction(), access_type, dram_page_size); + } + std::uint64_t BDevStorage::tail() const { // take max from the 4 underlying I/O streams auto result = std::max(m_dram_io.tail(), m_meta_io.tail()); result = std::max(result, m_dram_changelog_io.tail()); result = std::max(result, m_dp_changelog_io.tail()); - result = std::max(result, m_page_io.tail()); + result = std::max(result, m_page_io.tail()); + // include ext streams when initialized + if (m_ext_dram_io) { + assert(m_ext_dram_changelog_io); + result = std::max(result, std::max(m_ext_dram_io->tail(), m_ext_dram_changelog_io->tail())); + } + return result; } diff --git a/src/dbzero/core/storage/BDevStorage.hpp b/src/dbzero/core/storage/BDevStorage.hpp index e20d8cfd..7d9445c1 100644 --- a/src/dbzero/core/storage/BDevStorage.hpp +++ b/src/dbzero/core/storage/BDevStorage.hpp @@ -18,11 +18,14 @@ #include #include #include +#include "ExtSpace.hpp" namespace db0 { + class REL_Index; + DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_prefix_config: public o_fixed_versioned { @@ -30,7 +33,7 @@ DB0_PACKED_BEGIN static constexpr std::uint64_t DB0_MAGIC = 0x0DB0DB0DB0DB0DB0; std::uint64_t m_magic = DB0_MAGIC; - std::uint32_t m_version = 1; + std::uint32_t m_version = 1; std::uint32_t m_block_size; // the prefix page size std::uint32_t m_page_size; @@ -44,6 +47,9 @@ DB0_PACKED_BEGIN // a a single indivisible "step". // This value (entire step) corresponts to a single entry in the REL_Index (if it's used) std::uint32_t m_page_io_step_size; + std::uint64_t m_ext_dram_io_offset = 0; + std::uint32_t m_ext_dram_page_size = 0; + std::uint64_t m_ext_dram_changelog_io_offset = 0; // reserved for future use (0-filled) std::array m_reserved; @@ -51,7 +57,7 @@ DB0_PACKED_BEGIN std::uint32_t page_io_step_size); }; DB0_PACKED_END - + /** * Block-device based storage implementation * the SparseIndex is held in-memory, modifications are written to WAL and serialized to disk on close @@ -150,13 +156,19 @@ DB0_PACKED_END MetaIOStream m_meta_io; // memory-mapped file I/O DRAM_IOStream m_dram_io; - // SparseIndex + DiffIndex + // SparseIndex + DiffIndex (based over the dram_io) SparsePair m_sparse_pair; // DRAM-backed sparse index tree SparseIndex &m_sparse_index; DiffIndex &m_diff_index; // the stream for storing & reading full-DPs and diff-encoded DPs Diff_IO m_page_io; + // extension DRAM IO (only initialized when holding extension indexes e.g. REL_Index) + std::unique_ptr m_ext_dram_changelog_io; + std::unique_ptr m_ext_dram_io; + ExtSpace m_ext_space; + // the primary REL_Index instance (if used) + REL_Index *m_rel_index_ptr = nullptr; bool m_refresh_pending = false; mutable std::shared_mutex m_mutex; #ifndef NDEBUG @@ -168,6 +180,7 @@ DB0_PACKED_END #endif static DRAM_IOStream init(DRAM_IOStream &&, DRAM_ChangeLogStreamT &); + static std::unique_ptr init(std::unique_ptr &&, DRAM_ChangeLogStreamT *); static MetaIOStream init(MetaIOStream &&); @@ -180,6 +193,8 @@ DB0_PACKED_END BlockIOStream getBlockIOStream(std::uint64_t first_block_pos, AccessType); DRAM_IOStream getDRAMIOStream(std::uint64_t first_block_pos, std::uint32_t dram_page_size, AccessType); + std::unique_ptr tryGetDRAMIOStream(std::uint64_t first_block_pos, + std::uint32_t dram_page_size, AccessType); template ChangeLogIOStreamT getChangeLogIOStream(std::uint64_t first_block_pos, AccessType access_type) @@ -187,6 +202,19 @@ DB0_PACKED_END return { m_file, first_block_pos, m_config.m_block_size, getTailFunction(), access_type }; } + template + std::unique_ptr tryGetChangeLogIOStream(std::uint64_t first_block_pos, AccessType access_type) + { + if (first_block_pos) { + return std::make_unique( + m_file, first_block_pos, m_config.m_block_size, getTailFunction(), access_type + ); + } else { + // stream does not exist + return {}; + } + } + MetaIOStream getMetaIOStream(std::uint64_t first_block_pos, std::size_t step_size, AccessType); Diff_IO getPage_IO(std::uint64_t next_page_hint, std::uint32_t step_size, AccessType); diff --git a/src/dbzero/core/storage/ExtSpace.cpp b/src/dbzero/core/storage/ExtSpace.cpp new file mode 100644 index 00000000..1cf6ef53 --- /dev/null +++ b/src/dbzero/core/storage/ExtSpace.cpp @@ -0,0 +1,15 @@ +#include "ExtSpace.hpp" + +namespace db0 + +{ + + ExtSpace::ExtSpace(DRAM_Pair, AccessType access_type) + { + } + + ExtSpace::~ExtSpace() + { + } + +} \ No newline at end of file diff --git a/src/dbzero/core/storage/ExtSpace.hpp b/src/dbzero/core/storage/ExtSpace.hpp new file mode 100644 index 00000000..d9dbf486 --- /dev/null +++ b/src/dbzero/core/storage/ExtSpace.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include "BaseStorage.hpp" +#include "ChangeLogIOStream.hpp" +#include "REL_Index.hpp" + +namespace db0 + +{ + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR o_ext_space: public o_fixed + { + // the primary (mandatory) and secondary (optional) REL_Index addresses + std::array m_rel_index_addr = {0, 0}; + // reserved for future use + std::array m_reserved; + }; +DB0_PACKED_END + + // The ExtSpace manages extension indexes (e.g. REL_Index) + class ExtSpace + { + public: + using DP_ChangeLogT = BaseStorage::DP_ChangeLogT; + using DP_ChangeLogStreamT = db0::ChangeLogIOStream; + + // NOTE: dram pair may be nullptr (for a null ExtSpace) + ExtSpace(DRAM_Pair, AccessType); + ~ExtSpace(); + + // get the primary REL_Index + inline REL_Index &getREL_Index() { + return m_rel_index; + } + + void refresh(); + void commit(); + + private: + REL_Index m_rel_index; + }; + +} \ No newline at end of file diff --git a/src/dbzero/core/storage/Page_IO.cpp b/src/dbzero/core/storage/Page_IO.cpp index c6d31325..45f5f3a6 100644 --- a/src/dbzero/core/storage/Page_IO.cpp +++ b/src/dbzero/core/storage/Page_IO.cpp @@ -69,7 +69,7 @@ namespace db0 void Page_IO::read(std::uint64_t page_num, void *buffer) const { m_file.read(m_header_size + page_num * m_page_size, m_page_size, buffer); } - + void Page_IO::write(std::uint64_t page_num, void *buffer) { m_file.write(m_header_size + page_num * m_page_size, m_page_size, buffer); } diff --git a/src/dbzero/core/storage/REL_Index.cpp b/src/dbzero/core/storage/REL_Index.cpp new file mode 100644 index 00000000..10322c62 --- /dev/null +++ b/src/dbzero/core/storage/REL_Index.cpp @@ -0,0 +1,38 @@ +#include "REL_Index.hpp" +#include + +namespace db0 + +{ + + REL_Index::REL_Index(Memspace &memspace, std::uint32_t step_size) + : super_t(memspace) + , m_step_size(step_size) + , m_shift(db0::getPageShift(step_size, true)) + { + } + + REL_Index::REL_Index(mptr ptr, std::uint32_t step_size) + : super_t(ptr) + , m_step_size(step_size) + , m_shift(db0::getPageShift(step_size, true)) + { + } + + void REL_Index::add(std::uint64_t page_num, std::uint64_t page_io_address) { + super_t::setItem(page_num >> m_shift, page_io_address); + } + + std::uint64_t REL_Index::get(std::uint64_t page_num) const { + return super_t::operator[](page_num >> m_shift); + } + + void REL_Index::detach() const { + super_t::detach(); + } + + void REL_Index::commit() const { + super_t::commit(); + } + +} \ No newline at end of file diff --git a/src/dbzero/core/storage/REL_Index.hpp b/src/dbzero/core/storage/REL_Index.hpp new file mode 100644 index 00000000..2ad98026 --- /dev/null +++ b/src/dbzero/core/storage/REL_Index.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include + +namespace db0 + +{ + + // REL_Index holds a complete mapping from relative to absolute Page IO addresses + // (aka storage page numbers) + // it only holds the location of the entire step of blocks + // since relative addresses are from a continous space, they're represeted by vector indices + // NOTE: REL_Index must be initialized with a known "step size" (must be power of 2) + class REL_Index: protected db0::v_bvector + { + public: + using super_t = db0::v_bvector; + + // as null + REL_Index() = default; + REL_Index(const REL_Index &) = delete; + REL_Index(Memspace &, std::uint32_t step_size); + REL_Index(mptr, std::uint32_t step_size); + + // Add a new mapping from relative page num to storage page num + // @param page_num relative page num + // @param page_io_address absolute storage page num + void add(std::uint64_t page_num, std::uint64_t page_io_address); + + // Retrieve storage page num for a given relative page num + std::uint64_t get(std::uint64_t page_num) const; + + void detach() const; + void commit() const; + + private: + const std::uint32_t m_step_size; + const std::uint32_t m_shift; + }; + +} \ No newline at end of file diff --git a/src/dbzero/core/storage/SparsePair.cpp b/src/dbzero/core/storage/SparsePair.cpp index c488d787..af97f664 100644 --- a/src/dbzero/core/storage/SparsePair.cpp +++ b/src/dbzero/core/storage/SparsePair.cpp @@ -35,7 +35,7 @@ namespace db0 typename SparsePair::StateNumT SparsePair::getMaxStateNum() const { return std::max(m_sparse_index.getMaxStateNum(), m_diff_index.getMaxStateNum()); } - + void SparsePair::refresh() { m_sparse_index.refresh(); diff --git a/src/dbzero/core/storage/SparsePair.hpp b/src/dbzero/core/storage/SparsePair.hpp index 6c516a07..a1ec2f18 100644 --- a/src/dbzero/core/storage/SparsePair.hpp +++ b/src/dbzero/core/storage/SparsePair.hpp @@ -61,10 +61,9 @@ namespace db0 std::size_t getChangeLogSize() const; void commit(); - + private: - // change log contains the list of updates (modified items / page numbers) - // first element is the state number + // Change log contains the list of updates (modified items / page numbers) std::vector m_change_log; SparseIndex m_sparse_index; DiffIndex m_diff_index; From 2c4f134998baa755280ab24124d30a2231e7b759 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 25 Nov 2025 20:45:03 +0100 Subject: [PATCH 05/12] sync / save work --- src/dbzero/core/storage/ExtSpace.hpp | 3 ++- src/dbzero/core/storage/REL_Index.hpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/dbzero/core/storage/ExtSpace.hpp b/src/dbzero/core/storage/ExtSpace.hpp index d9dbf486..f5156430 100644 --- a/src/dbzero/core/storage/ExtSpace.hpp +++ b/src/dbzero/core/storage/ExtSpace.hpp @@ -1,9 +1,10 @@ #pragma once -#include #include "BaseStorage.hpp" #include "ChangeLogIOStream.hpp" #include "REL_Index.hpp" +#include +#include namespace db0 diff --git a/src/dbzero/core/storage/REL_Index.hpp b/src/dbzero/core/storage/REL_Index.hpp index 2ad98026..f1ef62f5 100644 --- a/src/dbzero/core/storage/REL_Index.hpp +++ b/src/dbzero/core/storage/REL_Index.hpp @@ -34,10 +34,10 @@ namespace db0 void detach() const; void commit() const; - + private: - const std::uint32_t m_step_size; - const std::uint32_t m_shift; + std::uint32_t m_step_size = 0; + std::uint32_t m_shift = 0; }; } \ No newline at end of file From 84ac3fabffee77f33d44304e8c730ad56ad8e05a Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 25 Nov 2025 21:24:45 +0100 Subject: [PATCH 06/12] WIP: save work --- src/dbzero/core/storage/BDevStorage.cpp | 10 ++++- src/dbzero/core/storage/ExtSpace.cpp | 44 +++++++++++++++++++-- src/dbzero/core/storage/ExtSpace.hpp | 17 +++++++- src/dbzero/core/storage/REL_Index.hpp | 5 +++ src/dbzero/core/storage/SparseIndexBase.hpp | 2 +- 5 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 0a331ba0..090bebd3 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -205,9 +205,11 @@ namespace db0 dram_changelog_io.flush(); dram_io.close(); dram_changelog_io.close(); - + + // create then flush the extension space if (has_ext_dram_io) { assert(ext_dram_io_ptr && ext_dram_changelog_io_ptr); + ExtSpace ext_space(ExtSpace::tag_create(), ext_dram_io_ptr->getDRAMPair()); ext_dram_io_ptr->flushUpdates(max_state_num, *ext_dram_changelog_io_ptr); ext_dram_changelog_io_ptr->flush(); ext_dram_io_ptr->close(); @@ -404,6 +406,12 @@ namespace db0 m_sparse_pair.extractChangeLog(m_dp_changelog_io, m_page_io.getEndPageNum()); m_dram_io.flushUpdates(state_num, m_dram_changelog_io); m_dp_changelog_io.flush(); + // Flush ext streams + if (m_ext_dram_io) { + assert(m_ext_dram_changelog_io); + m_ext_dram_io->flushUpdates(state_num, *m_ext_dram_changelog_io); + m_ext_dram_changelog_io->flush(); + } // NOTE: fsync has stronger guarantees than flush in a multi-process environments m_file.fsync(); // flush changelog AFTER all updates from all other streams have been flushed diff --git a/src/dbzero/core/storage/ExtSpace.cpp b/src/dbzero/core/storage/ExtSpace.cpp index 1cf6ef53..51f502bf 100644 --- a/src/dbzero/core/storage/ExtSpace.cpp +++ b/src/dbzero/core/storage/ExtSpace.cpp @@ -4,12 +4,50 @@ namespace db0 { - ExtSpace::ExtSpace(DRAM_Pair, AccessType access_type) + o_ext_space::o_ext_space() { + std::memset(m_reserved.data(), 0, sizeof(m_reserved)); + } + + ExtSpace::ExtSpace(tag_create, DRAM_Pair dram_pair, std::uint32_t step_size) + : m_dram_prefix(dram_pair.first) + , m_dram_allocator(dram_pair.second) + , m_dram_space(DRAMSpace::create(dram_pair)) + , m_access_type(AccessType::READ_WRITE) + , m_ext_space_root(m_dram_space) + , m_rel_index(m_dram_space, step_size) { + // NOTE: the secondary REL_Index is not used currently + m_ext_space_root.modify().m_rel_index_addr[0] = m_rel_index.getAddress(); } - + + ExtSpace::ExtSpace(DRAM_Pair dram_pair, AccessType access_type, std::uint32_t step_size) + : m_dram_prefix(dram_pair.first) + , m_dram_allocator(dram_pair.second) + , m_dram_space(DRAMSpace::create(dram_pair)) + , m_access_type(access_type) + , m_ext_space_root(tryOpenRoot()) + , m_rel_index(tryOpenPrimaryREL_Index(step_size)) + { + } + ExtSpace::~ExtSpace() { } - + + db0::v_object ExtSpace::tryOpenRoot() const + { + if (!m_dram_prefix || !m_dram_allocator) { + return {}; + } + return db0::v_object(m_dram_space.myPtr(Address::fromOffset(0))); + } + + REL_Index ExtSpace::tryOpenPrimaryREL_Index(std::uint32_t step_size) const + { + if (!m_ext_space_root) { + return {}; + } + return { m_dram_space.myPtr(m_ext_space_root->m_rel_index_addr[0]), step_size }; + } + } \ No newline at end of file diff --git a/src/dbzero/core/storage/ExtSpace.hpp b/src/dbzero/core/storage/ExtSpace.hpp index f5156430..216725c4 100644 --- a/src/dbzero/core/storage/ExtSpace.hpp +++ b/src/dbzero/core/storage/ExtSpace.hpp @@ -14,9 +14,11 @@ DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_ext_space: public o_fixed { // the primary (mandatory) and secondary (optional) REL_Index addresses - std::array m_rel_index_addr = {0, 0}; + std::array m_rel_index_addr; // reserved for future use std::array m_reserved; + + o_ext_space(); }; DB0_PACKED_END @@ -26,9 +28,11 @@ DB0_PACKED_END public: using DP_ChangeLogT = BaseStorage::DP_ChangeLogT; using DP_ChangeLogStreamT = db0::ChangeLogIOStream; + struct tag_create {}; // NOTE: dram pair may be nullptr (for a null ExtSpace) - ExtSpace(DRAM_Pair, AccessType); + ExtSpace(tag_create, DRAM_Pair, std::uint32_t step_size); + ExtSpace(DRAM_Pair, AccessType, std::uint32_t step_size); ~ExtSpace(); // get the primary REL_Index @@ -40,7 +44,16 @@ DB0_PACKED_END void commit(); private: + std::shared_ptr m_dram_prefix; + std::shared_ptr m_dram_allocator; + Memspace m_dram_space; + const AccessType m_access_type; + // the root object (created at address 0) + db0::v_object m_ext_space_root; REL_Index m_rel_index; + + db0::v_object tryOpenRoot() const; + REL_Index tryOpenPrimaryREL_Index(std::uint32_t step_size) const; }; } \ No newline at end of file diff --git a/src/dbzero/core/storage/REL_Index.hpp b/src/dbzero/core/storage/REL_Index.hpp index f1ef62f5..8afed688 100644 --- a/src/dbzero/core/storage/REL_Index.hpp +++ b/src/dbzero/core/storage/REL_Index.hpp @@ -36,6 +36,11 @@ namespace db0 void commit() const; private: + std::shared_ptr m_dram_prefix; + std::shared_ptr m_dram_allocator; + Memspace m_dram_space; + const AccessType m_access_type; + std::uint32_t m_step_size = 0; std::uint32_t m_shift = 0; }; diff --git a/src/dbzero/core/storage/SparseIndexBase.hpp b/src/dbzero/core/storage/SparseIndexBase.hpp index 0c4ca484..88819949 100644 --- a/src/dbzero/core/storage/SparseIndexBase.hpp +++ b/src/dbzero/core/storage/SparseIndexBase.hpp @@ -218,7 +218,7 @@ DB0_PACKED_END , m_dram_allocator(dram_pair.second) , m_dram_space(DRAMSpace::create(dram_pair)) , m_access_type(AccessType::READ_WRITE) - , m_index(createIndex()) + , m_index(createIndex()) , m_next_page_num(m_index.treeHeader().m_next_page_num) , m_max_state_num(m_index.treeHeader().m_max_state_num) , m_change_log_ptr(change_log_ptr) From 101f0ae1c342bf4b2c87a576c04927dc45004233 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 26 Nov 2025 13:17:02 +0100 Subject: [PATCH 07/12] WIP: save work --- .../SGB_Tree/SGB_CompressedLookupTree.hpp | 4 +- .../core/collections/vector/v_bvector.hpp | 80 ++++++++-- src/dbzero/core/dram/DRAMSpace.cpp | 15 +- src/dbzero/core/dram/DRAMSpace.hpp | 1 + src/dbzero/core/storage/BDevStorage.cpp | 17 ++- src/dbzero/core/storage/ExtSpace.cpp | 45 ++++-- src/dbzero/core/storage/ExtSpace.hpp | 42 ++++-- src/dbzero/core/storage/REL_Index.cpp | 74 +++++++-- src/dbzero/core/storage/REL_Index.hpp | 142 +++++++++++++++--- tests/unit_tests/DRAMSpaceTest.cpp | 19 +++ 10 files changed, 358 insertions(+), 81 deletions(-) diff --git a/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp b/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp index 32e3066b..27ac7017 100644 --- a/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp +++ b/src/dbzero/core/collections/SGB_Tree/SGB_CompressedLookupTree.hpp @@ -271,8 +271,8 @@ DB0_PACKED_END std::size_t size() const { return super_t::size(); } - - void commit() { + + void commit() const { super_t::commit(); } diff --git a/src/dbzero/core/collections/vector/v_bvector.hpp b/src/dbzero/core/collections/vector/v_bvector.hpp index 98881151..e5b748d7 100644 --- a/src/dbzero/core/collections/vector/v_bvector.hpp +++ b/src/dbzero/core/collections/vector/v_bvector.hpp @@ -11,16 +11,32 @@ #include #include #include +#include #include namespace db0 { + enum class BVectorOptions: std::uint16_t + { + // Forces all allocations to be constant size (= page size) + FIXED_BLOCK = 0x0001, + }; + + using BVectorFlags = FlagSet; + DB0_PACKED_BEGIN template - struct DB0_PACKED_ATTR o_bvector: public o_fixed_versioned > + class DB0_PACKED_ATTR o_bvector: public o_base, 0, true> { + using super_t = o_base, 0, true>; + friend super_t; + + o_bvector() = default; + o_bvector(std::uint32_t page_size_hint, BVectorFlags flags = {}); + + public: // common dbzero object header db0::o_unique_header m_header; // root node pointer (may be data or pointers' block) @@ -29,12 +45,11 @@ DB0_PACKED_BEGIN std::uint64_t m_size = 0; // page size hint std::uint32_t m_page_size; + BVectorFlags m_flags; + + static std::size_t measure(std::uint32_t page_size_hint, BVectorFlags flags = {}); - o_bvector() = default; - o_bvector(std::uint32_t page_size_hint) - : m_page_size(page_size_hint) - { - } + template static std::size_t safeSizeOf(buf_t buf); void incRef(bool is_tag) { m_header.incRef(is_tag); @@ -69,8 +84,8 @@ DB0_PACKED_END /** * New, empty instance of the data structure */ - v_bvector(Memspace &mem, AccessFlags access_mode = {}) - : super_t(mem, mem.getPageSize(), access_mode) + v_bvector(Memspace &mem, BVectorFlags flags = {}, AccessFlags access_mode = {}) + : super_t(mem, mem.getPageSize(), flags, access_mode) , m_db_shift(data_container::shift(mem.getPageSize())) , m_db_mask(data_container::mask(mem.getPageSize())) , m_pb_shift(ptr_container::shift(mem.getPageSize())) @@ -120,14 +135,15 @@ DB0_PACKED_END } // Construct populated with values from a specific sequence - template v_bvector(Memspace &mem, const SequenceT &in, AccessFlags access_mode = {}) - : v_bvector(mem, access_mode) + template v_bvector(Memspace &mem, const SequenceT &in, BVectorFlags flags = {}, + AccessFlags access_mode = {}) + : v_bvector(mem, flags, access_mode) { for (const auto &item: in) { push_back(item); } } - + template void init(Memspace &mem, const SequenceT &in, AccessFlags access_mode = {}) { @@ -1052,7 +1068,11 @@ DB0_PACKED_END std::size_t evaluateBClass(std::uint64_t size) const { - std::size_t result = 0; + // NOTE: fixed block always evaluates to 0 (full DP) irrespective of size + if ((*this)->m_flags[BVectorOptions::FIXED_BLOCK]) { + return 0; + } + std::size_t result = 0; std::uint32_t ref_size = (1 << (m_db_shift - 1)); while ((ref_size >= size) && (ref_size > 0)) { ref_size >>= 1; @@ -1594,4 +1614,40 @@ DB0_PACKED_END std::map, int> v_bvector::m_instance_log; #endif + template + o_bvector::o_bvector(std::uint32_t page_size_hint, BVectorFlags flags) + : m_page_size(page_size_hint) + , m_flags(flags) + { + } + + template + std::size_t o_bvector::measure(std::uint32_t page_size_hint, BVectorFlags flags) + { + // size aligned to 1 DP + if (flags[BVectorOptions::FIXED_BLOCK]) { + return page_size_hint; + } else { + // actual size of members + return super_t::measureMembers(); + } + } + + template + template + std::size_t o_bvector::safeSizeOf(buf_t buf) + { + auto _buf = buf; + _buf += super_t::baseSize(); + auto &self = o_bvector::__const_ref(buf); + // size aligned to 1 DP + if (self.m_flags[BVectorOptions::FIXED_BLOCK]) { + buf += self.m_page_size; + return self.m_page_size; + } else { + // actual size of members + return _buf - buf; + } + } + } diff --git a/src/dbzero/core/dram/DRAMSpace.cpp b/src/dbzero/core/dram/DRAMSpace.cpp index e43c6ffb..c87351af 100644 --- a/src/dbzero/core/dram/DRAMSpace.cpp +++ b/src/dbzero/core/dram/DRAMSpace.cpp @@ -17,8 +17,21 @@ namespace db0 return DRAMSpace::create(dram_pair); } - Memspace DRAMSpace::create(DRAM_Pair dram_pair) { + Memspace DRAMSpace::create(DRAM_Pair dram_pair) + { + if (!dram_pair.first || !dram_pair.second) { + THROWF(db0::InternalException) << "Invalid DRAM_Pair provided to DRAMSpace::create"; + } return { dram_pair.first, dram_pair.second }; } + Memspace DRAMSpace::tryCreate(DRAM_Pair dram_pair) + { + if (dram_pair.first && dram_pair.second) { + return { dram_pair.first, dram_pair.second }; + } else { + return {}; + } + } + } \ No newline at end of file diff --git a/src/dbzero/core/dram/DRAMSpace.hpp b/src/dbzero/core/dram/DRAMSpace.hpp index 569eee46..e1698fd0 100644 --- a/src/dbzero/core/dram/DRAMSpace.hpp +++ b/src/dbzero/core/dram/DRAMSpace.hpp @@ -15,6 +15,7 @@ namespace db0 { static Memspace create(std::size_t page_size, std::function callback = {}); static Memspace create(DRAM_Pair); + static Memspace tryCreate(DRAM_Pair); }; } \ No newline at end of file diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 090bebd3..a2261358 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -120,7 +120,7 @@ namespace db0 if (!page_size) { page_size = DEFAULT_PAGE_SIZE; } - + std::vector buffer(CONFIG_BLOCK_SIZE); // calculate block size to be page aligned and sufficient to fit a single sparse index node auto min_block_size = dram_page_size_hint + @@ -153,8 +153,8 @@ namespace db0 bool has_ext_dram_io = config->m_page_io_step_size > 1; if (has_ext_dram_io) { config->m_ext_dram_io_offset = next_block_offset(); - // NOTE: use same as the prefix page size - config->m_ext_dram_page_size = *page_size; + // NOTE: use entire block for ext DRAM page + config->m_ext_dram_page_size = dram_page_size; config->m_ext_dram_changelog_io_offset = next_block_offset(); } @@ -167,7 +167,7 @@ namespace db0 DRAM_IOStream *dram_io_ptr = nullptr; std::unique_ptr ext_dram_changelog_io_ptr = nullptr; std::unique_ptr ext_dram_io_ptr = nullptr; - + auto tail_function = [&]() { assert(dram_io_ptr && dram_changelog_io_ptr); @@ -430,9 +430,16 @@ namespace db0 flush(); } + // close extension streams + if (m_ext_dram_io) { + assert(m_ext_dram_changelog_io); + m_ext_dram_io->close(); + m_ext_dram_changelog_io->close(); + } + m_dram_io.close(); m_dram_changelog_io.close(); - m_dp_changelog_io.close(); + m_dp_changelog_io.close(); m_meta_io.close(); m_file.close(); } diff --git a/src/dbzero/core/storage/ExtSpace.cpp b/src/dbzero/core/storage/ExtSpace.cpp index 51f502bf..e7a68d47 100644 --- a/src/dbzero/core/storage/ExtSpace.cpp +++ b/src/dbzero/core/storage/ExtSpace.cpp @@ -4,29 +4,39 @@ namespace db0 { - o_ext_space::o_ext_space() { - std::memset(m_reserved.data(), 0, sizeof(m_reserved)); + o_ext_space::o_ext_space(std::uint32_t page_size) + : m_page_size(page_size) + { + assert(page_size >= sizeof(*this)); + // initialize reserved area to zero + std::memset((std::byte*)this + sizeof(*this), 0, page_size - sizeof(*this)); } - ExtSpace::ExtSpace(tag_create, DRAM_Pair dram_pair, std::uint32_t step_size) + std::size_t o_ext_space::measure(std::size_t page_size) { + return page_size; + } + + ExtSpace::ExtSpace(tag_create, DRAM_Pair dram_pair) : m_dram_prefix(dram_pair.first) , m_dram_allocator(dram_pair.second) , m_dram_space(DRAMSpace::create(dram_pair)) , m_access_type(AccessType::READ_WRITE) - , m_ext_space_root(m_dram_space) - , m_rel_index(m_dram_space, step_size) + , m_ext_space_root(m_dram_space, m_dram_space.getPageSize()) + , m_rel_index(std::make_unique(m_dram_space, m_dram_space.getPageSize(), AccessType::READ_WRITE)) { + assert(!!m_ext_space_root); + assert(m_rel_index); // NOTE: the secondary REL_Index is not used currently - m_ext_space_root.modify().m_rel_index_addr[0] = m_rel_index.getAddress(); + m_ext_space_root.modify().m_rel_index_addr[0] = m_rel_index->getAddress(); } - - ExtSpace::ExtSpace(DRAM_Pair dram_pair, AccessType access_type, std::uint32_t step_size) + + ExtSpace::ExtSpace(DRAM_Pair dram_pair, AccessType access_type) : m_dram_prefix(dram_pair.first) , m_dram_allocator(dram_pair.second) - , m_dram_space(DRAMSpace::create(dram_pair)) + , m_dram_space(DRAMSpace::tryCreate(dram_pair)) , m_access_type(access_type) , m_ext_space_root(tryOpenRoot()) - , m_rel_index(tryOpenPrimaryREL_Index(step_size)) + , m_rel_index(tryOpenPrimaryREL_Index(access_type)) { } @@ -34,20 +44,27 @@ namespace db0 { } + bool ExtSpace::operator!() const { + return !m_dram_prefix || !m_dram_allocator; + } + db0::v_object ExtSpace::tryOpenRoot() const { - if (!m_dram_prefix || !m_dram_allocator) { + if (!(*this)) { return {}; } return db0::v_object(m_dram_space.myPtr(Address::fromOffset(0))); } - REL_Index ExtSpace::tryOpenPrimaryREL_Index(std::uint32_t step_size) const + std::unique_ptr ExtSpace::tryOpenPrimaryREL_Index(AccessType access_type) const { if (!m_ext_space_root) { return {}; } - return { m_dram_space.myPtr(m_ext_space_root->m_rel_index_addr[0]), step_size }; + auto rel_index_addr = Address::fromOffset(m_ext_space_root->m_rel_index_addr[0]); + return std::make_unique( + m_dram_space.myPtr(rel_index_addr), m_dram_space.getPageSize(), access_type + ); } - + } \ No newline at end of file diff --git a/src/dbzero/core/storage/ExtSpace.hpp b/src/dbzero/core/storage/ExtSpace.hpp index 216725c4..96d9b96f 100644 --- a/src/dbzero/core/storage/ExtSpace.hpp +++ b/src/dbzero/core/storage/ExtSpace.hpp @@ -10,15 +10,30 @@ namespace db0 { + // NOTE: o_ext_space must occupy the entire DP (due to DRAM Allocator requirements) DB0_PACKED_BEGIN - struct DB0_PACKED_ATTR o_ext_space: public o_fixed + class DB0_PACKED_ATTR o_ext_space: public o_base { + using super_t = o_base; + friend super_t; + + o_ext_space(std::uint32_t page_size); + + public: + const std::uint32_t m_page_size; // the primary (mandatory) and secondary (optional) REL_Index addresses - std::array m_rel_index_addr; - // reserved for future use - std::array m_reserved; + std::array m_rel_index_addr = { 0, 0 }; - o_ext_space(); + static std::size_t measure(std::size_t page_size); + + template static std::size_t safeSizeOf(T buf) + { + auto _buf = buf; + _buf += super_t::baseSize(); + auto page_size = o_ext_space::__const_ref(buf).m_page_size; + buf += page_size; + return page_size; + } }; DB0_PACKED_END @@ -31,13 +46,16 @@ DB0_PACKED_END struct tag_create {}; // NOTE: dram pair may be nullptr (for a null ExtSpace) - ExtSpace(tag_create, DRAM_Pair, std::uint32_t step_size); - ExtSpace(DRAM_Pair, AccessType, std::uint32_t step_size); + ExtSpace(tag_create, DRAM_Pair); + ExtSpace(DRAM_Pair, AccessType); ~ExtSpace(); + bool operator!() const; + // get the primary REL_Index inline REL_Index &getREL_Index() { - return m_rel_index; + assert(m_rel_index); + return *m_rel_index; } void refresh(); @@ -46,14 +64,14 @@ DB0_PACKED_END private: std::shared_ptr m_dram_prefix; std::shared_ptr m_dram_allocator; - Memspace m_dram_space; + mutable Memspace m_dram_space; const AccessType m_access_type; // the root object (created at address 0) db0::v_object m_ext_space_root; - REL_Index m_rel_index; - + std::unique_ptr m_rel_index; + db0::v_object tryOpenRoot() const; - REL_Index tryOpenPrimaryREL_Index(std::uint32_t step_size) const; + std::unique_ptr tryOpenPrimaryREL_Index(AccessType) const; }; } \ No newline at end of file diff --git a/src/dbzero/core/storage/REL_Index.cpp b/src/dbzero/core/storage/REL_Index.cpp index 10322c62..f652acdb 100644 --- a/src/dbzero/core/storage/REL_Index.cpp +++ b/src/dbzero/core/storage/REL_Index.cpp @@ -4,29 +4,71 @@ namespace db0 { + + bool REL_ItemCompT::operator()(const REL_Item &lhs, const REL_Item &rhs) const { + return lhs.m_rel_page_num < rhs.m_rel_page_num; + } + + bool REL_ItemEqualT::operator()(const REL_Item &lhs, const REL_Item &rhs) const { + return lhs.m_rel_page_num == rhs.m_rel_page_num; + } - REL_Index::REL_Index(Memspace &memspace, std::uint32_t step_size) - : super_t(memspace) - , m_step_size(step_size) - , m_shift(db0::getPageShift(step_size, true)) + REL_Index::REL_Index(Memspace &memspace, std::size_t node_capacity, AccessType access_type) + : super_t(memspace, node_capacity, access_type) { } - REL_Index::REL_Index(mptr ptr, std::uint32_t step_size) - : super_t(ptr) - , m_step_size(step_size) - , m_shift(db0::getPageShift(step_size, true)) - { + bool REL_Item::operator==(const REL_Item &other) const { + return m_rel_page_num == other.m_rel_page_num; + } + + bool REL_CompressedItemCompT::operator()(const REL_CompressedItem &lhs, const REL_CompressedItem &rhs) const { + // compressed page numbers are comparable + return lhs.m_compressed_rel_page_num < rhs.m_compressed_rel_page_num; + } + + bool REL_CompressedItemEqualT::operator()(const REL_CompressedItem &lhs, const REL_CompressedItem &rhs) const { + return lhs.m_compressed_rel_page_num == rhs.m_compressed_rel_page_num; + } + + REL_CompressedItem::REL_CompressedItem(std::uint32_t first_rel_page_num, const REL_Item &item) + : m_storage_page_num(item.m_storage_page_num) + { + // check if can fit + assert(first_rel_page_num == (item.m_rel_page_num >> 32)); + // compress by taking low 32 bits only + m_compressed_rel_page_num = static_cast(item.m_rel_page_num & 0xFFFFFFFF); + } + + REL_CompressedItem::REL_CompressedItem(std::uint32_t first_rel_page_num, std::uint64_t rel_page_num, std::uint64_t storage_page_num) + : m_storage_page_num(storage_page_num) + { + // check if can fit + assert(first_rel_page_num == (rel_page_num >> 32)); + // compress by taking low 32 bits only + m_compressed_rel_page_num = static_cast(rel_page_num & 0xFFFFFFFF); + } + + REL_Item REL_CompressedItem::uncompress(std::uint32_t first_rel_page_num) const + { + std::uint64_t full_rel_page_num = (static_cast(first_rel_page_num) << 32) | static_cast(m_compressed_rel_page_num); + return { full_rel_page_num, m_storage_page_num }; } - void REL_Index::add(std::uint64_t page_num, std::uint64_t page_io_address) { - super_t::setItem(page_num >> m_shift, page_io_address); + std::string REL_CompressedItem::toString() const { + return "REL_CompressedItem{ rel_page_num=" + std::to_string(m_compressed_rel_page_num) + + ", storage_page_num=" + std::to_string(m_storage_page_num) + " }"; } - std::uint64_t REL_Index::get(std::uint64_t page_num) const { - return super_t::operator[](page_num >> m_shift); + REL_Index::REL_Index(mptr ptr, std::size_t node_capacity, AccessType access_type) + : super_t(ptr, node_capacity, access_type) + { } - + + db0::Address REL_Index::getAddress() const { + return super_t::getAddress(); + } + void REL_Index::detach() const { super_t::detach(); } @@ -35,4 +77,8 @@ namespace db0 super_t::commit(); } + void REL_Index::add(std::uint64_t start_rel_page_num, std::uint64_t start_storage_page_num) { + super_t::insert({ start_rel_page_num, start_storage_page_num }); + } + } \ No newline at end of file diff --git a/src/dbzero/core/storage/REL_Index.hpp b/src/dbzero/core/storage/REL_Index.hpp index 8afed688..9b36eaec 100644 --- a/src/dbzero/core/storage/REL_Index.hpp +++ b/src/dbzero/core/storage/REL_Index.hpp @@ -3,46 +3,146 @@ #include #include #include +#include +#include +#include namespace db0 { + + struct REL_Item; + struct REL_CompressedItem; + + struct REL_ItemCompT + { + bool operator()(const REL_Item &, const REL_Item &) const; + }; + + struct REL_ItemEqualT + { + bool operator()(const REL_Item &, const REL_Item &) const; + }; + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR REL_Item + { + using CompT = REL_ItemCompT; + using EqualT = REL_ItemEqualT; + + // the starting relative page number + std::uint64_t m_rel_page_num = 0; + // the starting storage page number (absolute) + std::uint64_t m_storage_page_num = 0; + + REL_Item() = default; + + REL_Item(std::uint64_t rel_page_num, std::uint64_t storage_page_num) + : m_rel_page_num(rel_page_num) + , m_storage_page_num(storage_page_num) + { + } + + bool operator==(const REL_Item &) const; + }; +DB0_PACKED_END + + struct REL_CompressedItemCompT + { + bool operator()(const REL_CompressedItem &, const REL_CompressedItem &) const; + }; + + struct REL_CompressedItemEqualT + { + bool operator()(const REL_CompressedItem &, const REL_CompressedItem &) const; + }; + // Compressed items are actual in-memory representation +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR REL_CompressedItem + { + using CompT = REL_CompressedItemCompT; + using EqualT = REL_CompressedItemEqualT; + // construct REL-compressed item relative to the specific page number - i.e. first_page_num + REL_CompressedItem(std::uint32_t first_rel_page_num, const REL_Item &); + REL_CompressedItem(std::uint32_t first_rel_page_num, std::uint64_t rel_page_num, std::uint64_t storage_page_num); + + std::uint32_t m_compressed_rel_page_num; + std::uint64_t m_storage_page_num; + + // uncompress relative to a specific page number + REL_Item uncompress(std::uint32_t first_rel_page_num) const; + std::string toString() const; + }; +DB0_PACKED_END + + struct REL_IndexTypes + { + DB0_PACKED_BEGIN + // tree-level header type (currently unused) + struct DB0_PACKED_ATTR o_rel_index_header: o_fixed_versioned + { + // reserved space for future use + std::array m_reserved = {0, 0, 0, 0}; + }; +DB0_PACKED_END + + using ItemT = REL_Item; + using CompressedItemT = REL_CompressedItem; + + struct BlockHeader + { + // number of the 1st page in a data block / (high order bits) + std::uint32_t m_first_page_num = 0; + + CompressedItemT compressFirst(const ItemT &); + CompressedItemT compress(const ItemT &) const; + + ItemT uncompress(const CompressedItemT &) const; + + // From a compressed item, retrieve the (relative) page number only + std::uint64_t getRelPageNum(const CompressedItemT &) const; + + bool canFit(const ItemT &) const; + + std::string toString(const CompressedItemT &) const; + std::string toString() const; + }; + + // DRAM space deployed REL-index (in-memory) + using IndexT = SGB_CompressedLookupTree< + REL_Item, REL_CompressedItem, BlockHeader, + REL_ItemCompT, REL_CompressedItemCompT, REL_ItemEqualT, REL_CompressedItemEqualT, + o_rel_index_header>; + + using ConstNodeIterator = typename IndexT::sg_tree_const_iterator; + using ConstItemIterator = typename IndexT::ConstItemIterator; + }; + // REL_Index holds a complete mapping from relative to absolute Page IO addresses // (aka storage page numbers) - // it only holds the location of the entire step of blocks - // since relative addresses are from a continous space, they're represeted by vector indices - // NOTE: REL_Index must be initialized with a known "step size" (must be power of 2) - class REL_Index: protected db0::v_bvector + // it only holds the location of the entire ranges of blocks, assuming consecutive following numbers + class REL_Index: protected REL_IndexTypes::IndexT { public: - using super_t = db0::v_bvector; + using super_t = REL_IndexTypes::IndexT; // as null REL_Index() = default; REL_Index(const REL_Index &) = delete; - REL_Index(Memspace &, std::uint32_t step_size); - REL_Index(mptr, std::uint32_t step_size); + REL_Index(Memspace &, std::size_t node_capacity, AccessType); + REL_Index(mptr, std::size_t node_capacity, AccessType); // Add a new mapping from relative page num to storage page num - // @param page_num relative page num - // @param page_io_address absolute storage page num - void add(std::uint64_t page_num, std::uint64_t page_io_address); + void add(std::uint64_t start_rel_page_num, std::uint64_t start_storage_page_num); - // Retrieve storage page num for a given relative page num - std::uint64_t get(std::uint64_t page_num) const; + // Retrieve storage (absolute) page num for a given relative page num + std::uint64_t get(std::uint64_t rel_page_num) const; + db0::Address getAddress() const; + void detach() const; void commit() const; - - private: - std::shared_ptr m_dram_prefix; - std::shared_ptr m_dram_allocator; - Memspace m_dram_space; - const AccessType m_access_type; - - std::uint32_t m_step_size = 0; - std::uint32_t m_shift = 0; }; } \ No newline at end of file diff --git a/tests/unit_tests/DRAMSpaceTest.cpp b/tests/unit_tests/DRAMSpaceTest.cpp index 14993120..4afbaae3 100644 --- a/tests/unit_tests/DRAMSpaceTest.cpp +++ b/tests/unit_tests/DRAMSpaceTest.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -131,4 +132,22 @@ namespace tests } } + TEST_F( DRAMSpaceTest, testVBVectorCanBePutOnDRAMSpace ) + { + // use 4KiB, 16KiB page sizes + std::vector page_sizes { 4u << 10, 16u << 10 }; + for (auto page_size: page_sizes) { + auto cut = DRAMSpace::create(page_size); + + // Using std::uint32_t as capacity type to handle large page size + using BVectorT = db0::v_bvector; + // NOTE: must be created as fixed-block to DRAM space requirements + BVectorT b_vector(cut, { BVectorOptions::FIXED_BLOCK }); + for (std::uint64_t i = 0; i < 10000; ++i) { + b_vector.push_back(i * 10); + } + ASSERT_EQ(b_vector.size(), 10000u); + } + } + } \ No newline at end of file From dd532bd6150cf1f5fd0fd516755e3bf221c2c49b Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 26 Nov 2025 17:13:22 +0100 Subject: [PATCH 08/12] WIP: save work --- dbzero/dbzero/dbzero.py | 2 +- src/dbzero/core/dram/DRAMSpace.cpp | 4 +- src/dbzero/core/storage/BDevStorage.cpp | 96 ++++++++++++++++++------ src/dbzero/core/storage/BDevStorage.hpp | 7 +- src/dbzero/core/storage/Diff_IO.cpp | 11 +-- src/dbzero/core/storage/Diff_IO.hpp | 6 +- src/dbzero/core/storage/ExtSpace.cpp | 13 ++-- src/dbzero/core/storage/ExtSpace.hpp | 16 +++- src/dbzero/core/storage/Page_IO.cpp | 13 +++- src/dbzero/core/storage/Page_IO.hpp | 9 ++- src/dbzero/core/storage/REL_Index.cpp | 97 +++++++++++++++++++++++-- src/dbzero/core/storage/REL_Index.hpp | 28 +++++-- src/dbzero/core/storage/SparsePair.cpp | 4 +- 13 files changed, 238 insertions(+), 68 deletions(-) diff --git a/dbzero/dbzero/dbzero.py b/dbzero/dbzero/dbzero.py index c9e4f4dc..21899e3d 100644 --- a/dbzero/dbzero/dbzero.py +++ b/dbzero/dbzero/dbzero.py @@ -10,7 +10,7 @@ def load_dynamic(name, path): def __bootstrap__(): global __bootstrap__, __loader__, __file__ - paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"] + paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"] __file__ = None for path in paths: if os.path.isdir(path): diff --git a/src/dbzero/core/dram/DRAMSpace.cpp b/src/dbzero/core/dram/DRAMSpace.cpp index c87351af..97fd4153 100644 --- a/src/dbzero/core/dram/DRAMSpace.cpp +++ b/src/dbzero/core/dram/DRAMSpace.cpp @@ -19,6 +19,7 @@ namespace db0 Memspace DRAMSpace::create(DRAM_Pair dram_pair) { + assert((dram_pair.first && dram_pair.second) || (!dram_pair.first && !dram_pair.second)); if (!dram_pair.first || !dram_pair.second) { THROWF(db0::InternalException) << "Invalid DRAM_Pair provided to DRAMSpace::create"; } @@ -27,7 +28,8 @@ namespace db0 Memspace DRAMSpace::tryCreate(DRAM_Pair dram_pair) { - if (dram_pair.first && dram_pair.second) { + assert((dram_pair.first && dram_pair.second) || (!dram_pair.first && !dram_pair.second)); + if (dram_pair.first && dram_pair.second) { return { dram_pair.first, dram_pair.second }; } else { return {}; diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index a2261358..02afa170 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -29,7 +29,7 @@ namespace db0 } return dram_io_ptr->getDRAMPair(); } - + BDevStorage::BDevStorage(const std::string &file_name, AccessType access_type, LockFlags lock_flags, std::optional meta_io_step_size) : BaseStorage(access_type) @@ -50,7 +50,6 @@ namespace db0 , m_sparse_pair(m_dram_io.getDRAMPair(), access_type) , m_sparse_index(m_sparse_pair.getSparseIndex()) , m_diff_index(m_sparse_pair.getDiffIndex()) - , m_page_io(getPage_IO(m_sparse_pair.getNextStoragePageNum(), m_config.m_page_io_step_size, access_type)) , m_ext_dram_changelog_io(tryGetChangeLogIOStream( m_config.m_ext_dram_changelog_io_offset, access_type) ) @@ -58,6 +57,7 @@ namespace db0 m_config.m_ext_dram_io_offset, m_config.m_ext_dram_page_size, access_type), m_ext_dram_changelog_io.get()) ) , m_ext_space(tryGetDRAMPair(m_ext_dram_io.get()), access_type) + , m_page_io(getPage_IO(m_sparse_pair.getNextStoragePageNum(), m_config.m_page_io_step_size, access_type)) { // in read-only mode need to refresh in order to retrieve a consitent DRAM state // since other process might be actively modifying the underlying file @@ -79,7 +79,8 @@ namespace db0 std::unique_ptr BDevStorage::init(std::unique_ptr &&dram_io, DRAM_ChangeLogStreamT *dram_change_log) { - if (dram_io && dram_change_log) { + if (dram_io) { + assert(dram_change_log); dram_io->load(*dram_change_log); } return std::move(dram_io); @@ -279,10 +280,14 @@ namespace db0 } // query.first yields the full-DP (if it exists) - std::uint64_t storage_page_num = query.first(); - if (storage_page_num) { - // read full page - m_page_io.read(storage_page_num, read_buf); + std::uint64_t page_io_id = query.first(); + if (page_io_id) { + if (!!m_ext_space) { + // convert relative page number back to absolute + page_io_id = m_ext_space.get(page_io_id); + } + // read full DP + m_page_io.read(page_io_id, read_buf); } else { // requesting a diff-DP only encoded page, use zero buffer as a base memset(read_buf, 0, m_config.m_page_size); @@ -290,9 +295,13 @@ namespace db0 // apply changes from diff-DPs std::uint32_t diff_state_num; - while (query.next(diff_state_num, storage_page_num)) { + while (query.next(diff_state_num, page_io_id)) { + if (!!m_ext_space) { + // convert relative page number back to absolute + page_io_id = m_ext_space.get(page_io_id); + } // apply all diff-updates on top of the full-DP - m_page_io.applyFrom(storage_page_num, read_buf, { page_num, diff_state_num }); + m_page_io.applyFrom(page_io_id, read_buf, { page_num, diff_state_num }); // collect chain-len statistics if (chain_len) { ++(*chain_len); @@ -311,7 +320,7 @@ namespace db0 auto end_page = begin_page + size / m_config.m_page_size; std::byte *write_buf = reinterpret_cast(buffer); - + std::unique_lock lock(m_mutex); // write as physical pages and register with the sparse index for (auto page_num = begin_page; page_num != end_page; ++page_num, write_buf += m_config.m_page_size) { @@ -320,11 +329,22 @@ namespace db0 if (item && item.m_state_num == state_num) { // page already added in current transaction / update in the stream // this may happen due to cache overflow and later modification of the same page - m_page_io.write(item.m_storage_page_num, write_buf); + auto page_io_id = item.m_storage_page_num; + if (!!m_ext_space) { + // convert relative page number back to absolute + page_io_id = m_ext_space.get(page_io_id); + } + m_page_io.write(page_io_id, write_buf); } else { // append as new page - auto storage_page_id = m_page_io.append(write_buf); - m_sparse_index.emplace(page_num, state_num, storage_page_id); + bool is_first_page; + auto page_io_id = m_page_io.append(write_buf, &is_first_page); + if (!!m_ext_space) { + // NOTE: first page (of each step) must be registered with REL_Index if it's maintained + // assign a relative page number + page_io_id = m_ext_space.toRelative(page_io_id, is_first_page); + } + m_sparse_index.emplace(page_num, state_num, page_io_id); #ifndef NDEBUG m_page_io_raw_bytes += m_config.m_page_size; checkCrashFromCommit(); @@ -352,18 +372,32 @@ namespace db0 if (first_state_num == state_num) { // page already added in current transaction / update in the stream // this may happen due to cache overflow and later modification of the same page - m_page_io.write(storage_page_num, buffer); + auto page_io_id = storage_page_num; + if (!!m_ext_space) { + // convert relative page number back to absolute + page_io_id = m_ext_space.get(page_io_id); + } + m_page_io.write(page_io_id, buffer); return; } + bool is_first_page; if (query.leftLessThan(max_len)) { - // append as diff-page (NOTE: diff-writes are only appended) - auto [storage_page_num, overflow] = m_page_io.appendDiff(buffer, { page_num, state_num }, diff_data); - m_diff_index.insert(page_num, state_num, storage_page_num, overflow); + // append as diff-page (NOTE: diff-writes are only appended) + auto [page_io_id, overflow] = m_page_io.appendDiff(buffer, { page_num, state_num }, diff_data, &is_first_page); + if (!!m_ext_space) { + // NOTE: first page (of each step) must be registered with REL_Index if it's maintained + // assign a relative page number + page_io_id = m_ext_space.toRelative(page_io_id, is_first_page); + } + m_diff_index.insert(page_num, state_num, page_io_id, overflow); } else { - // full-DP write - auto storage_page_num = m_page_io.append(buffer); - m_sparse_index.emplace(page_num, state_num, storage_page_num); + // full-DP write + auto page_io_id = m_page_io.append(buffer, &is_first_page); + if (!!m_ext_space) { + page_io_id = m_ext_space.toRelative(page_io_id, is_first_page); + } + m_sparse_index.emplace(page_num, state_num, page_io_id); } #ifndef NDEBUG @@ -425,12 +459,12 @@ namespace db0 } void BDevStorage::close() - { + { if (m_access_type == AccessType::READ_WRITE) { flush(); } - // close extension streams + // Close extension streams if (m_ext_dram_io) { assert(m_ext_dram_changelog_io); m_ext_dram_io->close(); @@ -504,6 +538,12 @@ namespace db0 auto address = std::max(m_dram_io.tail(), m_meta_io.tail()); address = std::max(address, m_dram_changelog_io.tail()); address = std::max(address, m_dp_changelog_io.tail()); + if (m_ext_dram_io) { + assert(m_ext_dram_changelog_io); + address = std::max(address, m_ext_dram_io->tail()); + address = std::max(address, m_ext_dram_changelog_io->tail()); + } + // NOTE: initialize with a known block num = 0 (first block of the first step) return { CONFIG_BLOCK_SIZE, m_file, m_config.m_page_size, m_config.m_block_size, address, 0, step_size, getBlockIOTailFunction(), 0 @@ -542,7 +582,12 @@ namespace db0 return [this]() -> std::uint64_t { auto result = std::max(m_dram_io.tail(), m_meta_io.tail()); result = std::max(result, m_dram_changelog_io.tail()); - result = std::max(result, m_dp_changelog_io.tail()); + result = std::max(result, m_dp_changelog_io.tail()); + if (m_ext_dram_io) { + assert(m_ext_dram_changelog_io); + result = std::max(result, m_ext_dram_io->tail()); + result = std::max(result, m_ext_dram_changelog_io->tail()); + } return result; }; } @@ -631,7 +676,7 @@ namespace db0 { std::unique_lock lock(m_mutex); callback("dram_io_rand_ops", m_dram_io.getRandOpsCount()); - callback("dram_prefix_size", m_dram_io.getDRAMPrefix().size()); + callback("dram_io_size", m_dram_io.getDRAMPrefix().size()); auto file_rand_ops = m_file.getRandOps(); callback("file_rand_read_ops", file_rand_ops.first); callback("file_rand_write_ops", file_rand_ops.second); @@ -644,6 +689,9 @@ namespace db0 auto page_io_stats = m_page_io.getStats(); callback("page_io_total_bytes", page_io_stats.first); callback("page_io_diff_bytes", page_io_stats.second); + if (m_ext_dram_io) { + callback("ext_dram_io_size", m_ext_dram_io->getDRAMPrefix().size()); + } #ifndef NDEBUG callback("page_io_raw_bytes", m_page_io_raw_bytes); #endif diff --git a/src/dbzero/core/storage/BDevStorage.hpp b/src/dbzero/core/storage/BDevStorage.hpp index 7d9445c1..5046b65b 100644 --- a/src/dbzero/core/storage/BDevStorage.hpp +++ b/src/dbzero/core/storage/BDevStorage.hpp @@ -161,14 +161,13 @@ DB0_PACKED_END // DRAM-backed sparse index tree SparseIndex &m_sparse_index; DiffIndex &m_diff_index; - // the stream for storing & reading full-DPs and diff-encoded DPs - Diff_IO m_page_io; // extension DRAM IO (only initialized when holding extension indexes e.g. REL_Index) std::unique_ptr m_ext_dram_changelog_io; std::unique_ptr m_ext_dram_io; ExtSpace m_ext_space; - // the primary REL_Index instance (if used) - REL_Index *m_rel_index_ptr = nullptr; + // the stream for storing & reading full-DPs and diff-encoded DPs + Diff_IO m_page_io; + bool m_refresh_pending = false; mutable std::shared_mutex m_mutex; #ifndef NDEBUG diff --git a/src/dbzero/core/storage/Diff_IO.cpp b/src/dbzero/core/storage/Diff_IO.cpp index cdce8a93..9fc8ba7c 100644 --- a/src/dbzero/core/storage/Diff_IO.cpp +++ b/src/dbzero/core/storage/Diff_IO.cpp @@ -244,8 +244,9 @@ DB0_PACKED_END { } - std::pair Diff_IO::appendDiff(const void *dp_data, - std::pair page_and_state, const std::vector &diff_data) + std::pair Diff_IO::appendDiff( + const void *dp_data, std::pair page_and_state, + const std::vector &diff_data, bool *is_first_page) { // must lock because the write-buffer is shared std::unique_lock lock(m_mx_write); @@ -255,7 +256,7 @@ DB0_PACKED_END m_diff_bytes_written += m_writer->flushDP(); } bool overflow = false; - auto next_page_num = Page_IO::getNextPageNum(); + auto next_page_num = Page_IO::getNextPageNum(is_first_page); assert(next_page_num.second > 0); if (m_writer->append((const std::byte*)dp_data, page_and_state, diff_data, overflow)) { if (overflow) { @@ -324,7 +325,7 @@ DB0_PACKED_END Page_IO::read(page_num, buffer); } - std::uint64_t Diff_IO::append(const void *buffer) + std::uint64_t Diff_IO::append(const void *buffer, bool *is_first_page_ptr) { // full-DP write can only be performed after flushing from diff-writer std::unique_lock lock(m_mx_write); @@ -332,7 +333,7 @@ DB0_PACKED_END m_diff_bytes_written += m_writer->flush(); } m_full_dp_bytes_written += m_page_size; - return Page_IO::append(buffer); + return Page_IO::append(buffer, is_first_page_ptr); } std::pair Diff_IO::getStats() const { diff --git a/src/dbzero/core/storage/Diff_IO.hpp b/src/dbzero/core/storage/Diff_IO.hpp index f007d68e..d0aa5d39 100644 --- a/src/dbzero/core/storage/Diff_IO.hpp +++ b/src/dbzero/core/storage/Diff_IO.hpp @@ -28,7 +28,7 @@ namespace db0 // @param diff_data the diff buffer (see getDiffs) // @return page number + overflow flag (where "true" means that 2 pages were written to) std::pair appendDiff(const void *dp_data, std::pair page_and_state, - const std::vector &diff_data); + const std::vector &diff_data, bool *is_first_page = nullptr); // Read diff stream and apply changes to the DP-buffer (must be already populated with the base data) // @param page_num the storage page number to read from @@ -36,7 +36,7 @@ namespace db0 // @param page_and_state logical page and state numbers (possibly relative) to identify the diff block // Exception raised if the diff block is not found void applyFrom(std::uint64_t page_num, void *buffer, std::pair page_and_state) const; - + // Flush needs to be called before closing the stream // and after each transaction void flush(); @@ -44,7 +44,7 @@ namespace db0 // Write as full-DP void write(std::uint64_t page_num, void *buffer); - std::uint64_t append(const void *buffer); + std::uint64_t append(const void *buffer, bool *is_first_page = nullptr); void read(std::uint64_t page_num, void *buffer) const; diff --git a/src/dbzero/core/storage/ExtSpace.cpp b/src/dbzero/core/storage/ExtSpace.cpp index e7a68d47..3572cea4 100644 --- a/src/dbzero/core/storage/ExtSpace.cpp +++ b/src/dbzero/core/storage/ExtSpace.cpp @@ -26,6 +26,8 @@ namespace db0 { assert(!!m_ext_space_root); assert(m_rel_index); + // make sure root is the first allocation + assert(m_ext_space_root.getAddress() == m_dram_allocator->firstAlloc()); // NOTE: the secondary REL_Index is not used currently m_ext_space_root.modify().m_rel_index_addr[0] = m_rel_index->getAddress(); } @@ -39,26 +41,23 @@ namespace db0 , m_rel_index(tryOpenPrimaryREL_Index(access_type)) { } - + ExtSpace::~ExtSpace() { } - bool ExtSpace::operator!() const { - return !m_dram_prefix || !m_dram_allocator; - } - db0::v_object ExtSpace::tryOpenRoot() const { if (!(*this)) { return {}; } - return db0::v_object(m_dram_space.myPtr(Address::fromOffset(0))); + // retrieve root from the first allocation + return db0::v_object(m_dram_space.myPtr(m_dram_allocator->firstAlloc())); } std::unique_ptr ExtSpace::tryOpenPrimaryREL_Index(AccessType access_type) const { - if (!m_ext_space_root) { + if (!(*this)) { return {}; } auto rel_index_addr = Address::fromOffset(m_ext_space_root->m_rel_index_addr[0]); diff --git a/src/dbzero/core/storage/ExtSpace.hpp b/src/dbzero/core/storage/ExtSpace.hpp index 96d9b96f..49db896a 100644 --- a/src/dbzero/core/storage/ExtSpace.hpp +++ b/src/dbzero/core/storage/ExtSpace.hpp @@ -50,14 +50,22 @@ DB0_PACKED_END ExtSpace(DRAM_Pair, AccessType); ~ExtSpace(); - bool operator!() const; + inline bool operator!() const { + return !m_dram_prefix || !m_dram_allocator; + } - // get the primary REL_Index - inline REL_Index &getREL_Index() { + // Assign a mapping from an absolute to relative page number + std::uint64_t toRelative(std::uint64_t storage_page_num, bool is_first_in_step) { assert(m_rel_index); - return *m_rel_index; + return m_rel_index->toRelative(storage_page_num, is_first_in_step); } + // Retrieve storage (absolute) page num for a given relative page num + std::uint64_t get(std::uint64_t rel_page_num) const { + assert(m_rel_index); + return m_rel_index->get(rel_page_num); + } + void refresh(); void commit(); diff --git a/src/dbzero/core/storage/Page_IO.cpp b/src/dbzero/core/storage/Page_IO.cpp index 45f5f3a6..0c40b988 100644 --- a/src/dbzero/core/storage/Page_IO.cpp +++ b/src/dbzero/core/storage/Page_IO.cpp @@ -37,13 +37,18 @@ namespace db0 { } - std::uint64_t Page_IO::append(const void *buffer) + std::uint64_t Page_IO::append(const void *buffer, bool *is_first_page_ptr) { assert(m_access_type == AccessType::READ_WRITE); if (m_page_count == m_block_capacity) { allocateNextBlock(); } + if (is_first_page_ptr) { + // first page of the first block in the step + *is_first_page_ptr = (m_page_count == 0) && (m_block_num && *m_block_num == 0); + } + m_file.write(m_address + m_page_count * m_page_size, m_page_size, buffer); return m_first_page_num + (m_page_count++); } @@ -94,12 +99,16 @@ namespace db0 return m_page_size; } - std::pair Page_IO::getNextPageNum() + std::pair Page_IO::getNextPageNum(bool *is_first_page_ptr) { assert(m_access_type == AccessType::READ_WRITE); if (m_page_count == m_block_capacity) { allocateNextBlock(); } + if (is_first_page_ptr) { + // first page of the first block in the step + *is_first_page_ptr = (m_page_count == 0) && (m_block_num && *m_block_num == 0); + } return { m_first_page_num + m_page_count, m_block_capacity - m_page_count }; } diff --git a/src/dbzero/core/storage/Page_IO.hpp b/src/dbzero/core/storage/Page_IO.hpp index 822e9434..8b40ae8f 100644 --- a/src/dbzero/core/storage/Page_IO.hpp +++ b/src/dbzero/core/storage/Page_IO.hpp @@ -35,8 +35,9 @@ namespace db0 ~Page_IO(); // Appends a new page to the stream - // @return ever increasing page number (aka storage page number) - std::uint64_t append(const void *buffer); + // @return ever increasing page number (aka storage page number) + is_first_page (of the current step) optional flag + // NOTE: first block (on first page) must be registered with REL_Index if it's maintained + std::uint64_t append(const void *buffer, bool *is_first_page = nullptr); void read(std::uint64_t page_num, void *buffer) const; @@ -46,7 +47,7 @@ namespace db0 void write(std::uint64_t page_num, void *buffer); std::uint64_t tail() const; - + std::uint32_t getPageSize() const; // Get the page number which is > all pages currently stored @@ -65,7 +66,7 @@ namespace db0 // Get the next page number to be assigned by the "append" method (first) // and the number of consecutive pages available in the current block - std::pair getNextPageNum(); + std::pair getNextPageNum(bool *is_first_page = nullptr); private: CFile &m_file; diff --git a/src/dbzero/core/storage/REL_Index.cpp b/src/dbzero/core/storage/REL_Index.cpp index f652acdb..224bb80b 100644 --- a/src/dbzero/core/storage/REL_Index.cpp +++ b/src/dbzero/core/storage/REL_Index.cpp @@ -9,9 +9,25 @@ namespace db0 return lhs.m_rel_page_num < rhs.m_rel_page_num; } + bool REL_ItemCompT::operator()(const REL_Item &lhs, std::uint64_t rhs) const { + return lhs.m_rel_page_num < rhs; + } + + bool REL_ItemCompT::operator()(std::uint64_t lhs, const REL_Item &rhs) const { + return lhs < rhs.m_rel_page_num; + } + bool REL_ItemEqualT::operator()(const REL_Item &lhs, const REL_Item &rhs) const { return lhs.m_rel_page_num == rhs.m_rel_page_num; - } + } + + bool REL_ItemEqualT::operator()(const REL_Item &lhs, std::uint64_t rhs) const { + return lhs.m_rel_page_num == rhs; + } + + bool REL_ItemEqualT::operator()(std::uint64_t lhs, const REL_Item &rhs) const { + return lhs == rhs.m_rel_page_num; + } REL_Index::REL_Index(Memspace &memspace, std::size_t node_capacity, AccessType access_type) : super_t(memspace, node_capacity, access_type) @@ -48,7 +64,7 @@ namespace db0 // compress by taking low 32 bits only m_compressed_rel_page_num = static_cast(rel_page_num & 0xFFFFFFFF); } - + REL_Item REL_CompressedItem::uncompress(std::uint32_t first_rel_page_num) const { std::uint64_t full_rel_page_num = (static_cast(first_rel_page_num) << 32) | static_cast(m_compressed_rel_page_num); @@ -60,8 +76,44 @@ namespace db0 + ", storage_page_num=" + std::to_string(m_storage_page_num) + " }"; } + REL_IndexTypes::CompressedItemT + REL_IndexTypes::BlockHeader::compressFirst(const ItemT &item) + { + m_first_page_num = item.m_rel_page_num >> 32; + return CompressedItemT(m_first_page_num, item); + } + + REL_IndexTypes::CompressedItemT + REL_IndexTypes::BlockHeader::compress(const ItemT &item) const + { + // ensure can fit + assert(m_first_page_num == (item.m_rel_page_num >> 32)); + return CompressedItemT(m_first_page_num, item); + } + + REL_IndexTypes::CompressedItemT + REL_IndexTypes::BlockHeader::compress(std::uint64_t rel_page_num) const { + // ensure can fit + assert(m_first_page_num == (rel_page_num >> 32)); + return CompressedItemT(m_first_page_num, rel_page_num, 0); + } + + REL_IndexTypes::ItemT + REL_IndexTypes::BlockHeader::uncompress(const CompressedItemT &item) const { + return item.uncompress(m_first_page_num); + } + + bool REL_IndexTypes::BlockHeader::canFit(const ItemT &item) const { + return m_first_page_num == (item.m_rel_page_num >> 32); + } + + bool REL_IndexTypes::BlockHeader::canFit(std::uint64_t rel_page_num) const { + return m_first_page_num == (rel_page_num >> 32); + } + REL_Index::REL_Index(mptr ptr, std::size_t node_capacity, AccessType access_type) : super_t(ptr, node_capacity, access_type) + , m_next_rel_page_num(this->treeHeader().m_next_rel_page_num) { } @@ -73,12 +125,45 @@ namespace db0 super_t::detach(); } - void REL_Index::commit() const { + void REL_Index::commit() const + { + // flush locally cached value + const_cast(*this).modifyTreeHeader().m_next_rel_page_num = m_next_rel_page_num; super_t::commit(); } - - void REL_Index::add(std::uint64_t start_rel_page_num, std::uint64_t start_storage_page_num) { - super_t::insert({ start_rel_page_num, start_storage_page_num }); + + std::uint64_t REL_Index::toRelative(std::uint64_t storage_page_num, bool is_first_in_step) + { + if (is_first_in_step) { + super_t::insert({ m_next_rel_page_num, storage_page_num }); + } + // FIXME: log + auto result = m_next_rel_page_num++; + std::cout << storage_page_num << " -> " << result << std::endl; + return result; + } + + void REL_Index::refresh() + { + m_next_rel_page_num = this->treeHeader().m_next_rel_page_num; + detach(); + } + + std::uint64_t REL_Index::get(std::uint64_t rel_page_num) const + { + auto result = super_t::lower_equal_bound(rel_page_num); + if (!result) { + THROWF(db0::InternalException) << "REL_Index: page lookup failed on: " << rel_page_num; + } + // translate to absolute storage page number + // FIXME: log + auto storage_page_num = result->m_storage_page_num + (rel_page_num - result->m_rel_page_num); + std::cout << rel_page_num << "(rel) -> " << storage_page_num << std::endl; + return storage_page_num; + } + + std::uint64_t REL_Index::size() const { + return super_t::size(); } } \ No newline at end of file diff --git a/src/dbzero/core/storage/REL_Index.hpp b/src/dbzero/core/storage/REL_Index.hpp index 9b36eaec..69c5a232 100644 --- a/src/dbzero/core/storage/REL_Index.hpp +++ b/src/dbzero/core/storage/REL_Index.hpp @@ -16,12 +16,16 @@ namespace db0 struct REL_ItemCompT { - bool operator()(const REL_Item &, const REL_Item &) const; + bool operator()(const REL_Item &lhs, const REL_Item &rhs) const; + bool operator()(const REL_Item &lhs, std::uint64_t rhs) const; + bool operator()(std::uint64_t lhs, const REL_Item &rhs) const; }; struct REL_ItemEqualT { - bool operator()(const REL_Item &, const REL_Item &) const; + bool operator()(const REL_Item &lhs, const REL_Item &rhs) const; + bool operator()(const REL_Item &lhs, std::uint64_t rhs) const; + bool operator()(std::uint64_t lhs, const REL_Item &rhs) const; }; DB0_PACKED_BEGIN @@ -82,6 +86,8 @@ DB0_PACKED_END // tree-level header type (currently unused) struct DB0_PACKED_ATTR o_rel_index_header: o_fixed_versioned { + // maximum relative page number assigned by this instance + std::uint64_t m_next_rel_page_num = 0; // reserved space for future use std::array m_reserved = {0, 0, 0, 0}; }; @@ -97,6 +103,8 @@ DB0_PACKED_END CompressedItemT compressFirst(const ItemT &); CompressedItemT compress(const ItemT &) const; + // compress for comparison only + CompressedItemT compress(std::uint64_t rel_page_num) const; ItemT uncompress(const CompressedItemT &) const; @@ -104,6 +112,7 @@ DB0_PACKED_END std::uint64_t getRelPageNum(const CompressedItemT &) const; bool canFit(const ItemT &) const; + bool canFit(std::uint64_t rel_page_num) const; std::string toString(const CompressedItemT &) const; std::string toString() const; @@ -133,16 +142,25 @@ DB0_PACKED_END REL_Index(Memspace &, std::size_t node_capacity, AccessType); REL_Index(mptr, std::size_t node_capacity, AccessType); - // Add a new mapping from relative page num to storage page num - void add(std::uint64_t start_rel_page_num, std::uint64_t start_storage_page_num); + // Assign a mapping from an absolute to relative page number + // NOTE: the mapping needs to be persisted for each "first_in_step" page + std::uint64_t toRelative(std::uint64_t storage_page_num, bool is_first_in_step); // Retrieve storage (absolute) page num for a given relative page num std::uint64_t get(std::uint64_t rel_page_num) const; db0::Address getAddress() const; - + void detach() const; void commit() const; + + void refresh(); + + std::uint64_t size() const; + + private: + // value maintained in-sync with the tree + std::uint64_t m_next_rel_page_num = 0; }; } \ No newline at end of file diff --git a/src/dbzero/core/storage/SparsePair.cpp b/src/dbzero/core/storage/SparsePair.cpp index af97f664..37dc967d 100644 --- a/src/dbzero/core/storage/SparsePair.cpp +++ b/src/dbzero/core/storage/SparsePair.cpp @@ -9,9 +9,9 @@ namespace db0 , m_diff_index(node_size, &m_change_log) { } - + SparsePair::SparsePair(DRAM_Pair dram_pair, AccessType access_type) - : m_sparse_index(dram_pair, access_type, Address::fromOffset(0), &m_change_log) + : m_sparse_index(dram_pair, access_type, {}, &m_change_log) , m_diff_index(dram_pair, access_type, Address::fromOffset(m_sparse_index.getExtraData()), &m_change_log) { } From d5f52c39a72fc0b632ab037c08a69829a7876795 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 26 Nov 2025 20:53:53 +0100 Subject: [PATCH 09/12] WIP: save work --- python_tests/test_page_io.py | 1 + src/dbzero/core/storage/BDevStorage.cpp | 18 +++++++----- src/dbzero/core/storage/Diff_IO.cpp | 5 ++++ src/dbzero/core/storage/ExtSpace.cpp | 10 +++++++ src/dbzero/core/storage/ExtSpace.hpp | 10 +++---- src/dbzero/core/storage/REL_Index.cpp | 39 ++++++++++++++++--------- src/dbzero/core/storage/REL_Index.hpp | 20 ++++++++----- 7 files changed, 70 insertions(+), 33 deletions(-) diff --git a/python_tests/test_page_io.py b/python_tests/test_page_io.py index bf05b2e4..6c59662d 100644 --- a/python_tests/test_page_io.py +++ b/python_tests/test_page_io.py @@ -32,6 +32,7 @@ def test_continue_append_with_step_size(db0_fixture): root.value.append(MemoTestClass("a" * 1024)) # 1 KB string db0.commit() + print("--- before close ---") db0.close() db0.init(DB0_DIR) # NOTE: we're opening an existing prefix with already initialized page I/O step size diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 02afa170..83365bec 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -284,7 +284,7 @@ namespace db0 if (page_io_id) { if (!!m_ext_space) { // convert relative page number back to absolute - page_io_id = m_ext_space.get(page_io_id); + page_io_id = m_ext_space.getAbsolute(page_io_id); } // read full DP m_page_io.read(page_io_id, read_buf); @@ -298,7 +298,7 @@ namespace db0 while (query.next(diff_state_num, page_io_id)) { if (!!m_ext_space) { // convert relative page number back to absolute - page_io_id = m_ext_space.get(page_io_id); + page_io_id = m_ext_space.getAbsolute(page_io_id); } // apply all diff-updates on top of the full-DP m_page_io.applyFrom(page_io_id, read_buf, { page_num, diff_state_num }); @@ -332,7 +332,7 @@ namespace db0 auto page_io_id = item.m_storage_page_num; if (!!m_ext_space) { // convert relative page number back to absolute - page_io_id = m_ext_space.get(page_io_id); + page_io_id = m_ext_space.getAbsolute(page_io_id); } m_page_io.write(page_io_id, write_buf); } else { @@ -342,7 +342,7 @@ namespace db0 if (!!m_ext_space) { // NOTE: first page (of each step) must be registered with REL_Index if it's maintained // assign a relative page number - page_io_id = m_ext_space.toRelative(page_io_id, is_first_page); + page_io_id = m_ext_space.assignRelative(page_io_id, is_first_page); } m_sparse_index.emplace(page_num, state_num, page_io_id); #ifndef NDEBUG @@ -375,7 +375,7 @@ namespace db0 auto page_io_id = storage_page_num; if (!!m_ext_space) { // convert relative page number back to absolute - page_io_id = m_ext_space.get(page_io_id); + page_io_id = m_ext_space.getAbsolute(page_io_id); } m_page_io.write(page_io_id, buffer); return; @@ -388,14 +388,14 @@ namespace db0 if (!!m_ext_space) { // NOTE: first page (of each step) must be registered with REL_Index if it's maintained // assign a relative page number - page_io_id = m_ext_space.toRelative(page_io_id, is_first_page); + page_io_id = m_ext_space.assignRelative(page_io_id, is_first_page); } m_diff_index.insert(page_num, state_num, page_io_id, overflow); } else { // full-DP write auto page_io_id = m_page_io.append(buffer, &is_first_page); if (!!m_ext_space) { - page_io_id = m_ext_space.toRelative(page_io_id, is_first_page); + page_io_id = m_ext_space.assignRelative(page_io_id, is_first_page); } m_sparse_index.emplace(page_num, state_num, page_io_id); } @@ -427,6 +427,10 @@ namespace db0 return false; } + if (!!m_ext_space) { + m_ext_space.commit(); + } + // save metadata checkpoints before making any updates to the managed streams // NOTE: the checkpoint is only saved after exceeding specific threshold of updates in the managed streams auto state_num = m_sparse_pair.getMaxStateNum(); diff --git a/src/dbzero/core/storage/Diff_IO.cpp b/src/dbzero/core/storage/Diff_IO.cpp index 9fc8ba7c..3f00bc96 100644 --- a/src/dbzero/core/storage/Diff_IO.cpp +++ b/src/dbzero/core/storage/Diff_IO.cpp @@ -258,6 +258,11 @@ DB0_PACKED_END bool overflow = false; auto next_page_num = Page_IO::getNextPageNum(is_first_page); assert(next_page_num.second > 0); + if (is_first_page) { + // Must be first write into the first page (of the step) + // to report result as the is_first_page = true + *is_first_page &= m_writer->empty(); + } if (m_writer->append((const std::byte*)dp_data, page_and_state, diff_data, overflow)) { if (overflow) { // on overflow we can either append remnants to the next storage page (+1) diff --git a/src/dbzero/core/storage/ExtSpace.cpp b/src/dbzero/core/storage/ExtSpace.cpp index 3572cea4..af56e744 100644 --- a/src/dbzero/core/storage/ExtSpace.cpp +++ b/src/dbzero/core/storage/ExtSpace.cpp @@ -46,6 +46,16 @@ namespace db0 { } + void ExtSpace::commit() + { + if (!!m_ext_space_root) { + m_ext_space_root.commit(); + } + if (m_rel_index) { + m_rel_index->commit(); + } + } + db0::v_object ExtSpace::tryOpenRoot() const { if (!(*this)) { diff --git a/src/dbzero/core/storage/ExtSpace.hpp b/src/dbzero/core/storage/ExtSpace.hpp index 49db896a..7a962fdf 100644 --- a/src/dbzero/core/storage/ExtSpace.hpp +++ b/src/dbzero/core/storage/ExtSpace.hpp @@ -55,17 +55,17 @@ DB0_PACKED_END } // Assign a mapping from an absolute to relative page number - std::uint64_t toRelative(std::uint64_t storage_page_num, bool is_first_in_step) { + std::uint64_t assignRelative(std::uint64_t storage_page_num, bool is_first_in_step) { assert(m_rel_index); - return m_rel_index->toRelative(storage_page_num, is_first_in_step); + return m_rel_index->assignRelative(storage_page_num, is_first_in_step); } // Retrieve storage (absolute) page num for a given relative page num - std::uint64_t get(std::uint64_t rel_page_num) const { + std::uint64_t getAbsolute(std::uint64_t rel_page_num) const { assert(m_rel_index); - return m_rel_index->get(rel_page_num); + return m_rel_index->getAbsolute(rel_page_num); } - + void refresh(); void commit(); diff --git a/src/dbzero/core/storage/REL_Index.cpp b/src/dbzero/core/storage/REL_Index.cpp index 224bb80b..c872a1fc 100644 --- a/src/dbzero/core/storage/REL_Index.cpp +++ b/src/dbzero/core/storage/REL_Index.cpp @@ -113,7 +113,9 @@ namespace db0 REL_Index::REL_Index(mptr ptr, std::size_t node_capacity, AccessType access_type) : super_t(ptr, node_capacity, access_type) - , m_next_rel_page_num(this->treeHeader().m_next_rel_page_num) + , m_last_storage_page_num(this->treeHeader().m_last_storage_page_num) + , m_rel_page_num(this->treeHeader().m_rel_page_num) + , m_max_rel_page_num(this->treeHeader().m_max_rel_page_num) { } @@ -128,38 +130,47 @@ namespace db0 void REL_Index::commit() const { // flush locally cached value - const_cast(*this).modifyTreeHeader().m_next_rel_page_num = m_next_rel_page_num; + auto &self = const_cast(*this); + self.modifyTreeHeader().m_last_storage_page_num = m_last_storage_page_num; + self.modifyTreeHeader().m_rel_page_num = m_rel_page_num; + self.modifyTreeHeader().m_max_rel_page_num = m_max_rel_page_num; super_t::commit(); } - std::uint64_t REL_Index::toRelative(std::uint64_t storage_page_num, bool is_first_in_step) - { + std::uint64_t REL_Index::assignRelative(std::uint64_t storage_page_num, bool is_first_in_step) + { if (is_first_in_step) { - super_t::insert({ m_next_rel_page_num, storage_page_num }); + super_t::insert({ ++m_max_rel_page_num, storage_page_num }); + assert(storage_page_num > m_last_storage_page_num); + m_last_storage_page_num = storage_page_num; + m_rel_page_num = m_max_rel_page_num; } - // FIXME: log - auto result = m_next_rel_page_num++; - std::cout << storage_page_num << " -> " << result << std::endl; + + assert(storage_page_num >= m_last_storage_page_num); + auto result = m_rel_page_num + (storage_page_num - m_last_storage_page_num); + if (result > m_max_rel_page_num) { + m_max_rel_page_num = result; + } + return result; } void REL_Index::refresh() { - m_next_rel_page_num = this->treeHeader().m_next_rel_page_num; + m_last_storage_page_num = this->treeHeader().m_last_storage_page_num; + m_rel_page_num = this->treeHeader().m_rel_page_num; + m_max_rel_page_num = this->treeHeader().m_max_rel_page_num; detach(); } - std::uint64_t REL_Index::get(std::uint64_t rel_page_num) const + std::uint64_t REL_Index::getAbsolute(std::uint64_t rel_page_num) const { auto result = super_t::lower_equal_bound(rel_page_num); if (!result) { THROWF(db0::InternalException) << "REL_Index: page lookup failed on: " << rel_page_num; } // translate to absolute storage page number - // FIXME: log - auto storage_page_num = result->m_storage_page_num + (rel_page_num - result->m_rel_page_num); - std::cout << rel_page_num << "(rel) -> " << storage_page_num << std::endl; - return storage_page_num; + return result->m_storage_page_num + (rel_page_num - result->m_rel_page_num); } std::uint64_t REL_Index::size() const { diff --git a/src/dbzero/core/storage/REL_Index.hpp b/src/dbzero/core/storage/REL_Index.hpp index 69c5a232..959c587b 100644 --- a/src/dbzero/core/storage/REL_Index.hpp +++ b/src/dbzero/core/storage/REL_Index.hpp @@ -86,8 +86,12 @@ DB0_PACKED_END // tree-level header type (currently unused) struct DB0_PACKED_ATTR o_rel_index_header: o_fixed_versioned { - // maximum relative page number assigned by this instance - std::uint64_t m_next_rel_page_num = 0; + // the largest registered mapping from absolute page number + std::uint64_t m_last_storage_page_num = 0; + // relative page number associated with the + std::uint64_t m_rel_page_num = 0; + // the maximum assigned relative page number + std::uint64_t m_max_rel_page_num = 0; // reserved space for future use std::array m_reserved = {0, 0, 0, 0}; }; @@ -142,12 +146,12 @@ DB0_PACKED_END REL_Index(Memspace &, std::size_t node_capacity, AccessType); REL_Index(mptr, std::size_t node_capacity, AccessType); - // Assign a mapping from an absolute to relative page number + // Assign (append) a mapping from an absolute to relative page number // NOTE: the mapping needs to be persisted for each "first_in_step" page - std::uint64_t toRelative(std::uint64_t storage_page_num, bool is_first_in_step); + std::uint64_t assignRelative(std::uint64_t storage_page_num, bool is_first_in_step); // Retrieve storage (absolute) page num for a given relative page num - std::uint64_t get(std::uint64_t rel_page_num) const; + std::uint64_t getAbsolute(std::uint64_t rel_page_num) const; db0::Address getAddress() const; @@ -159,8 +163,10 @@ DB0_PACKED_END std::uint64_t size() const; private: - // value maintained in-sync with the tree - std::uint64_t m_next_rel_page_num = 0; + // values maintained in-sync with the tree + std::uint64_t m_last_storage_page_num = 0; + std::uint64_t m_rel_page_num = 0; + std::uint64_t m_max_rel_page_num = 0; }; } \ No newline at end of file From 1d8fe4d0a72104c82e31d1c8cb97409f0e3dbc13 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 27 Nov 2025 09:17:42 +0100 Subject: [PATCH 10/12] REL-index refresh --- dbzero/dbzero/dbzero.py | 2 +- src/dbzero/core/storage/BDevStorage.cpp | 21 ++++++++++++++++++++- src/dbzero/core/storage/ExtSpace.cpp | 8 ++++++++ src/dbzero/core/storage/REL_Index.cpp | 4 ++-- src/dbzero/core/storage/SparseIndexBase.hpp | 6 +++--- 5 files changed, 34 insertions(+), 7 deletions(-) diff --git a/dbzero/dbzero/dbzero.py b/dbzero/dbzero/dbzero.py index 21899e3d..c9e4f4dc 100644 --- a/dbzero/dbzero/dbzero.py +++ b/dbzero/dbzero/dbzero.py @@ -10,7 +10,7 @@ def load_dynamic(name, path): def __bootstrap__(): global __bootstrap__, __loader__, __file__ - paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"] + paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"] __file__ = None for path in paths: if os.path.isdir(path): diff --git a/src/dbzero/core/storage/BDevStorage.cpp b/src/dbzero/core/storage/BDevStorage.cpp index 83365bec..b5b809f4 100644 --- a/src/dbzero/core/storage/BDevStorage.cpp +++ b/src/dbzero/core/storage/BDevStorage.cpp @@ -603,6 +603,8 @@ namespace db0 } if (!m_refresh_pending) { m_refresh_pending = m_dram_changelog_io.refresh(); + // NOTE: inclusion of ext-space is not necessary here since DRAM changelog + // is sufficient to determine if there're any updates } return m_refresh_pending; } @@ -616,10 +618,20 @@ namespace db0 do { // safe stream positions for rollback on file read failure auto dram_changelog_io_pos = m_dram_changelog_io.getStreamPos(); + std::pair ext_dram_changelog_io_pos; + if (!!m_ext_space) { + assert(m_ext_dram_changelog_io); + ext_dram_changelog_io_pos = m_ext_dram_changelog_io->getStreamPos(); + } auto dp_changelog_io_pos = m_dp_changelog_io.getStreamPos(); try { m_dram_io.beginApplyChanges(m_dram_changelog_io); dram_changelog_io_pos = m_dram_changelog_io.getStreamPos(); + if (!!m_ext_space) { + assert(m_ext_dram_changelog_io); + m_ext_dram_io->beginApplyChanges(*m_ext_dram_changelog_io); + ext_dram_changelog_io_pos = m_ext_dram_changelog_io->getStreamPos(); + } // send all page-update notifications to the provided handler if (on_page_updated) { StateNumType updated_state_num = 0; @@ -652,7 +664,11 @@ namespace db0 // where changes are not guaranteed to be written sequentially // need to revert the refresh operation to the point where it originally started m_dram_changelog_io.setStreamPos(dram_changelog_io_pos); - m_dp_changelog_io.setStreamPos(dp_changelog_io_pos); + m_dp_changelog_io.setStreamPos(dp_changelog_io_pos); + if (!!m_ext_space) { + assert(m_ext_dram_changelog_io); + m_ext_dram_changelog_io->setStreamPos(ext_dram_changelog_io_pos); + } break; } @@ -664,6 +680,9 @@ namespace db0 // refresh underlying sparse index / diff index after DRAM update m_sparse_pair.refresh(); } + if (!!m_ext_space && m_ext_dram_io->completeApplyChanges()) { + m_ext_space.refresh(); + } m_meta_io.refresh(); // refresh cycle complete m_refresh_pending = false; diff --git a/src/dbzero/core/storage/ExtSpace.cpp b/src/dbzero/core/storage/ExtSpace.cpp index af56e744..8538c8a7 100644 --- a/src/dbzero/core/storage/ExtSpace.cpp +++ b/src/dbzero/core/storage/ExtSpace.cpp @@ -46,6 +46,14 @@ namespace db0 { } + void ExtSpace::refresh() + { + m_ext_space_root.detach(); + if (m_rel_index) { + m_rel_index->refresh(); + } + } + void ExtSpace::commit() { if (!!m_ext_space_root) { diff --git a/src/dbzero/core/storage/REL_Index.cpp b/src/dbzero/core/storage/REL_Index.cpp index c872a1fc..145046b6 100644 --- a/src/dbzero/core/storage/REL_Index.cpp +++ b/src/dbzero/core/storage/REL_Index.cpp @@ -157,10 +157,10 @@ namespace db0 void REL_Index::refresh() { + detach(); m_last_storage_page_num = this->treeHeader().m_last_storage_page_num; m_rel_page_num = this->treeHeader().m_rel_page_num; - m_max_rel_page_num = this->treeHeader().m_max_rel_page_num; - detach(); + m_max_rel_page_num = this->treeHeader().m_max_rel_page_num; } std::uint64_t REL_Index::getAbsolute(std::uint64_t rel_page_num) const diff --git a/src/dbzero/core/storage/SparseIndexBase.hpp b/src/dbzero/core/storage/SparseIndexBase.hpp index 88819949..c8cfae6b 100644 --- a/src/dbzero/core/storage/SparseIndexBase.hpp +++ b/src/dbzero/core/storage/SparseIndexBase.hpp @@ -361,10 +361,10 @@ DB0_PACKED_END template void SparseIndexBase::refresh() - { - m_next_page_num = m_index.treeHeader().m_next_page_num; - m_max_state_num = m_index.treeHeader().m_max_state_num; + { m_index.detach(); + m_next_page_num = m_index.treeHeader().m_next_page_num; + m_max_state_num = m_index.treeHeader().m_max_state_num; } template From f23fbf2e2c35e21b03be2094318dda617c4bc849 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 27 Nov 2025 10:05:23 +0100 Subject: [PATCH 11/12] REL-index flags + refresh --- python_tests/conftest.py | 8 ++- python_tests/test_issues_1.py | 70 ++++++++++++++++++++++++- python_tests/test_refresh.py | 75 ++++----------------------- src/dbzero/bindings/python/PyAPI.cpp | 20 +++---- src/dbzero/core/storage/REL_Index.cpp | 7 ++- src/dbzero/core/storage/REL_Index.hpp | 18 +++++-- 6 files changed, 114 insertions(+), 84 deletions(-) diff --git a/python_tests/conftest.py b/python_tests/conftest.py index b9442d3f..c59a2da4 100644 --- a/python_tests/conftest.py +++ b/python_tests/conftest.py @@ -27,8 +27,12 @@ def db0_fixture(request): DB0_DIR, suppress_dist_overflow_error=__extract_param(request, "suppress_dist_overflow_error", False), ) - db0.open("my-test-prefix") - yield db0 + db0.open( + "my-test-prefix", + # use custom page_io_step_size if specified in request.param + page_io_step_size=__extract_param(request, "page_io_step_size", None) + ) + yield db0 gc.collect() db0.close() if os.path.exists(DB0_DIR): diff --git a/python_tests/test_issues_1.py b/python_tests/test_issues_1.py index 87073144..5b1b4648 100644 --- a/python_tests/test_issues_1.py +++ b/python_tests/test_issues_1.py @@ -2,7 +2,9 @@ import dbzero as db0 import random import string -from .memo_test_types import MemoTestClass, DynamicDataSingleton, MemoScopedSingleton +import time +import multiprocessing +from .memo_test_types import MemoTestClass, MemoTestSingleton from .conftest import DB0_DIR @@ -138,4 +140,70 @@ def test_db0_commit_close_issue_1(db0_fixture): db0.init(DB0_DIR) db0.open(prefix.name, "rw") + + +def make_small_update(px_name, expected_values): + time.sleep(0.25) + db0.init(DB0_DIR) + db0.open(px_name, "rw") + note = MemoTestClass(expected_values[0]) + db0.tags(note).add("tag") + db0.commit() + time.sleep(0.25) + if 'D' in db0.build_flags(): + db0.dbg_start_logs() + note.value = expected_values[1] + db0.close() + + +@pytest.mark.parametrize("db0_slab_size", [{"slab_size": 1 << 20}], indirect=True) +def test_refresh_issue1(db0_slab_size): + """ + Issue: process blocked on refresh attempt + Reason: missing SparsePair.commit() call when finishing a transaction + """ + px_name = db0.get_current_prefix().name + expected_values = ["first string", "second string"] + + rand_ints = [350, 480, 343, 475, 871, 493, 550, 723, 342, 236, 110, 585, 633, 54, 797, 478, 850, 716, 1021, + 136, 248, 879, 151, 249, 15, 717, 773, 625, 738, 731, 955, 280, 208, 730, 754, 982, 281, 221, + 549, 501, 282, 307, 551, 472, 509, 761, 78, 735, 744, 450, 388, 645, 577, 706, 417, 78, 849, + 873, 904, 534, 945, 985, 431, 725, 826, 49, 64, 766, 32, 460, 971, 766, 390, 990, 899, 835, + 16, 570, 190, 573, 54, 642, 840, 817, 924, 793, 634, 889, 835, 250, 676, 1006, 819, 322, + 373, 278, 895, 767, 380, 442] + + index = 0 + root = MemoTestSingleton([]) + for _ in range(10000): + str_len = rand_ints[index] + root.value.append(''.join("A" for i in range(str_len))) + index += 1 + if index == len(rand_ints): + index = 0 + db0.close() + time.sleep(1) + p = multiprocessing.Process(target=make_small_update, + args=(px_name, expected_values)) + p.start() + + db0.init(DB0_DIR) + db0.open(px_name, "r") + + for i in range(2): + state_num = db0.get_state_num(px_name) + # refresh until 2 transactions are detected + max_repeat = 30 + if i == 1 and 'D' in db0.build_flags(): + db0.dbg_start_logs() + + while db0.get_state_num(px_name) == state_num: + assert max_repeat > 0 + db0.refresh() + time.sleep(0.1) + max_repeat -= 1 + assert next(iter(db0.find(MemoTestClass))).value == expected_values[i] + max_repeat -= 1 + + p.join() + \ No newline at end of file diff --git a/python_tests/test_refresh.py b/python_tests/test_refresh.py index de6a5f00..fc3fd63e 100644 --- a/python_tests/test_refresh.py +++ b/python_tests/test_refresh.py @@ -6,6 +6,14 @@ from .conftest import DB0_DIR from .memo_test_types import DynamicDataClass, DynamicDataSingleton, MemoTestClass, MemoTestSingleton +# NOTE: all tests in this module are run twice +# to verify that refresh works correctly with custom page_io_step_size + +pytestmark = pytest.mark.parametrize("db0_fixture", [ + {}, # default parameters + {"page_io_step_size": 16 << 10} # with custom page_io_step_size +], indirect=True) + @db0.memo(singleton=True) class MemoClassX: @@ -471,68 +479,6 @@ def make_trasaction(n): p.terminate() p.join() -def make_small_update(px_name, expected_values): - time.sleep(0.25) - db0.init(DB0_DIR) - db0.open(px_name, "rw") - note = MemoTestClass(expected_values[0]) - db0.tags(note).add("tag") - db0.commit() - time.sleep(0.25) - if 'D' in db0.build_flags(): - db0.dbg_start_logs() - note.value = expected_values[1] - db0.close() - -@pytest.mark.parametrize("db0_slab_size", [{"slab_size": 1 * 1024 * 1024}], indirect=True) -def test_refresh_issue1(db0_slab_size): - """ - Issue: process blocked on refresh attempt - Reason: missing SparsePair.commit() call when finishing a transaction - """ - px_name = db0.get_current_prefix().name - expected_values = ["first string", "second string"] - - rand_ints = [350, 480, 343, 475, 871, 493, 550, 723, 342, 236, 110, 585, 633, 54, 797, 478, 850, 716, 1021, - 136, 248, 879, 151, 249, 15, 717, 773, 625, 738, 731, 955, 280, 208, 730, 754, 982, 281, 221, - 549, 501, 282, 307, 551, 472, 509, 761, 78, 735, 744, 450, 388, 645, 577, 706, 417, 78, 849, - 873, 904, 534, 945, 985, 431, 725, 826, 49, 64, 766, 32, 460, 971, 766, 390, 990, 899, 835, - 16, 570, 190, 573, 54, 642, 840, 817, 924, 793, 634, 889, 835, 250, 676, 1006, 819, 322, - 373, 278, 895, 767, 380, 442] - - index = 0 - root = MemoTestSingleton([]) - for _ in range(10000): - str_len = rand_ints[index] - root.value.append(''.join("A" for i in range(str_len))) - index += 1 - if index == len(rand_ints): - index = 0 - db0.close() - time.sleep(1) - p = multiprocessing.Process(target=make_small_update, - args=(px_name, expected_values)) - p.start() - - db0.init(DB0_DIR) - db0.open(px_name, "r") - - for i in range(2): - state_num = db0.get_state_num(px_name) - # refresh until 2 transactions are detected - max_repeat = 30 - if i == 1 and 'D' in db0.build_flags(): - db0.dbg_start_logs() - - while db0.get_state_num(px_name) == state_num: - assert max_repeat > 0 - db0.refresh() - time.sleep(0.1) - max_repeat -= 1 - assert next(iter(db0.find(MemoTestClass))).value == expected_values[i] - max_repeat -= 1 - - p.join() def writer_process(prefix, writer_sem, reader_sem): db0.init(DB0_DIR) @@ -559,8 +505,6 @@ async def test_async_wait_for_updates(db0_fixture): db0.commit() db0.close() - - writer_sem = multiprocessing.Semaphore(0) reader_sem = multiprocessing.Semaphore(0) @@ -570,8 +514,7 @@ async def test_async_wait_for_updates(db0_fixture): db0.init(DB0_DIR) db0.open(prefix, "r") - - + # Start waiting before transactions complete current_num = db0.get_state_num(prefix) make_trasaction(writer_sem, 5) diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index df0766d4..2538df4d 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -202,14 +202,13 @@ namespace db0::python if (py_autocommit && PyLong_Check(py_autocommit)) { autocommit_interval = PyLong_AsLong(py_autocommit); } - + std::optional slab_size; - if (py_slab_size && !PyLong_Check(py_slab_size)) { - PyErr_SetString(PyExc_TypeError, "Invalid argument type: slab_size"); - return NULL; - } - - if (py_slab_size) { + if (py_slab_size && py_slab_size != Py_None) { + if (!PyLong_Check(py_slab_size)) { + PyErr_SetString(PyExc_TypeError, "Invalid argument type: slab_size"); + return NULL; + } slab_size = PyLong_AsUnsignedLong(py_slab_size); } @@ -228,7 +227,7 @@ namespace db0::python std::optional meta_io_step_size; std::optional page_io_step_size; - if (py_meta_io_step_size) { + if (py_meta_io_step_size && py_meta_io_step_size != Py_None) { if (!PyLong_Check(py_meta_io_step_size)) { PyErr_SetString(PyExc_TypeError, "Invalid argument type: meta_io_step_size"); return NULL; @@ -236,8 +235,9 @@ namespace db0::python meta_io_step_size = PyLong_AsUnsignedLong(py_meta_io_step_size); } - if (py_page_io_step_size) { - if (!PyLong_Check(py_page_io_step_size)) { + // check for None (default) + if (py_page_io_step_size && py_page_io_step_size != Py_None) { + if (!PyLong_Check(py_page_io_step_size)) { PyErr_SetString(PyExc_TypeError, "Invalid argument type: page_io_step_size"); return NULL; } diff --git a/src/dbzero/core/storage/REL_Index.cpp b/src/dbzero/core/storage/REL_Index.cpp index 145046b6..42c55c4f 100644 --- a/src/dbzero/core/storage/REL_Index.cpp +++ b/src/dbzero/core/storage/REL_Index.cpp @@ -49,6 +49,7 @@ namespace db0 REL_CompressedItem::REL_CompressedItem(std::uint32_t first_rel_page_num, const REL_Item &item) : m_storage_page_num(item.m_storage_page_num) + , m_flags(item.m_flags) { // check if can fit assert(first_rel_page_num == (item.m_rel_page_num >> 32)); @@ -56,8 +57,10 @@ namespace db0 m_compressed_rel_page_num = static_cast(item.m_rel_page_num & 0xFFFFFFFF); } - REL_CompressedItem::REL_CompressedItem(std::uint32_t first_rel_page_num, std::uint64_t rel_page_num, std::uint64_t storage_page_num) + REL_CompressedItem::REL_CompressedItem(std::uint32_t first_rel_page_num, std::uint64_t rel_page_num, + std::uint64_t storage_page_num, REL_Flags flags) : m_storage_page_num(storage_page_num) + , m_flags(flags) { // check if can fit assert(first_rel_page_num == (rel_page_num >> 32)); @@ -68,7 +71,7 @@ namespace db0 REL_Item REL_CompressedItem::uncompress(std::uint32_t first_rel_page_num) const { std::uint64_t full_rel_page_num = (static_cast(first_rel_page_num) << 32) | static_cast(m_compressed_rel_page_num); - return { full_rel_page_num, m_storage_page_num }; + return { full_rel_page_num, m_storage_page_num, m_flags }; } std::string REL_CompressedItem::toString() const { diff --git a/src/dbzero/core/storage/REL_Index.hpp b/src/dbzero/core/storage/REL_Index.hpp index 959c587b..4d60f4fa 100644 --- a/src/dbzero/core/storage/REL_Index.hpp +++ b/src/dbzero/core/storage/REL_Index.hpp @@ -14,6 +14,14 @@ namespace db0 struct REL_Item; struct REL_CompressedItem; + // Options to additionally annotate REL_Index elements (i.e. continuous Page-IO steps) + // this might be usefull for maintaining different classess of data (e.g. metadata vs no-cache data) + enum class REL_Options: std::uint8_t + { + }; + + using REL_Flags = FlagSet; + struct REL_ItemCompT { bool operator()(const REL_Item &lhs, const REL_Item &rhs) const; @@ -38,12 +46,14 @@ DB0_PACKED_BEGIN std::uint64_t m_rel_page_num = 0; // the starting storage page number (absolute) std::uint64_t m_storage_page_num = 0; + REL_Flags m_flags; REL_Item() = default; - REL_Item(std::uint64_t rel_page_num, std::uint64_t storage_page_num) + REL_Item(std::uint64_t rel_page_num, std::uint64_t storage_page_num, REL_Flags flags = {}) : m_rel_page_num(rel_page_num) , m_storage_page_num(storage_page_num) + , m_flags(flags) { } @@ -69,10 +79,12 @@ DB0_PACKED_BEGIN using EqualT = REL_CompressedItemEqualT; // construct REL-compressed item relative to the specific page number - i.e. first_page_num REL_CompressedItem(std::uint32_t first_rel_page_num, const REL_Item &); - REL_CompressedItem(std::uint32_t first_rel_page_num, std::uint64_t rel_page_num, std::uint64_t storage_page_num); - + REL_CompressedItem(std::uint32_t first_rel_page_num, std::uint64_t rel_page_num, + std::uint64_t storage_page_num, REL_Flags flags = {}); + std::uint32_t m_compressed_rel_page_num; std::uint64_t m_storage_page_num; + REL_Flags m_flags; // uncompress relative to a specific page number REL_Item uncompress(std::uint32_t first_rel_page_num) const; From c27b83c0e829e8d008af966f61acb894b959a601 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 27 Nov 2025 10:23:47 +0100 Subject: [PATCH 12/12] cleanup --- dbzero/dbzero/initialization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbzero/dbzero/initialization.py b/dbzero/dbzero/initialization.py index ebfb7f26..c6bfc287 100644 --- a/dbzero/dbzero/initialization.py +++ b/dbzero/dbzero/initialization.py @@ -33,8 +33,8 @@ def init(dbzero_root: str, **kwargs) -> None: """ init_kwargs = {} - - config_keys = ("autocommit", "autocommit_interval", "cache_size", "lang_cache_size", "suppress_dist_overflow_error") + + config_keys = ("autocommit", "autocommit_interval", "cache_size", "lang_cache_size") config = {} for key in config_keys: if key in kwargs: