From 969f2d921980baa34d52b9c2e519afca1552fe98 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 6 Nov 2025 20:31:34 +0100 Subject: [PATCH 01/11] save work --- src/dbzero/core/memory/CacheRecycler.cpp | 134 +++++++++++++++-------- src/dbzero/core/memory/CacheRecycler.hpp | 29 +++-- src/dbzero/core/memory/PageMap.hpp | 14 +-- src/dbzero/core/memory/ResourceLock.cpp | 6 +- src/dbzero/core/memory/ResourceLock.hpp | 8 +- src/dbzero/workspace/GC0.cpp | 2 + 6 files changed, 126 insertions(+), 67 deletions(-) diff --git a/src/dbzero/core/memory/CacheRecycler.cpp b/src/dbzero/core/memory/CacheRecycler.cpp index c4dfea26..8d06cc97 100644 --- a/src/dbzero/core/memory/CacheRecycler.cpp +++ b/src/dbzero/core/memory/CacheRecycler.cpp @@ -7,12 +7,28 @@ namespace db0 { + std::size_t getCapacity(std::size_t total_capacity, int priority) + { + auto result = total_capacity; + auto low_result = total_capacity >> 3; // 12.5% for low priority + if (priority == 0) { + result -= low_result; + } else { + result = low_result; + } + return result; + } + + std::size_t getMaxSize(std::size_t capacity) { + return (capacity > 0) ? ((capacity - 1) / MIN_PAGE_SIZE + 1) : 0; + } + CacheRecycler::CacheRecycler(std::size_t capacity, const std::atomic &dirty_meter, std::optional flush_size, std::function flush_dirty, std::function flush_callback) - : m_res_buf((capacity > 0)?((capacity - 1) / MIN_PAGE_SIZE + 1):0) - , m_capacity(capacity) + : m_capacity { db0::getCapacity(capacity, 0), db0::getCapacity(capacity, 1) } + , m_res_bufs { getMaxSize(m_capacity[0]), getMaxSize(m_capacity[1]) } , m_dirty_meter(dirty_meter) // assign default flush size , m_flush_size(flush_size.value_or(DEFAULT_FLUSH_SIZE)) @@ -21,46 +37,61 @@ namespace db0 { } - void CacheRecycler::adjustSize(std::unique_lock &, std::size_t requested_release_size) + std::size_t CacheRecycler::adjustSize(std::unique_lock &, list_t &res_buf, + std::size_t requested_release_size) { // calculate size to be released from the dirty locks // so that they occupy <50% of the cache // NOTE: this has to be done before actual size adjustment - if (m_flush_dirty && m_dirty_meter > ((m_current_size - requested_release_size) >> 1)) { - std::int64_t limit = m_dirty_meter - ((m_current_size - requested_release_size) >> 1); + if (m_flush_dirty && m_dirty_meter > ((getCurrentSize() - requested_release_size) >> 1)) { + std::int64_t limit = m_dirty_meter - ((getCurrentSize() - requested_release_size) >> 1); // request flushing (and releasing) specific volume of dirty locks m_flush_dirty(limit); } std::size_t released_size = 0; // try flushing 'requested_release_size' number of excess elements - auto it = m_res_buf.begin(), end = m_res_buf.end(); + auto it = res_buf.begin(), end = res_buf.end(); while (it != end && released_size < requested_release_size) { // only release locks with no active external references (other than the CacheRecycler itself) // NOTE: dirty locks are relased by m_flush_dirty callback if ((*it).use_count() == 1 && !(*it)->isDirty()) { released_size += (*it)->usedMem(); - it = m_res_buf.erase(it); + it = res_buf.erase(it); } else { ++it; } } - // update current size - m_current_size -= released_size; + return released_size; } - void CacheRecycler::updateSize(std::unique_lock &lock, std::size_t expected_size) - { + void CacheRecycler::adjustSize(std::unique_lock &lock, std::size_t release_size) + { + // release from low-priority cache first + auto released_size = adjustSize(lock, m_res_bufs[1], release_size); + // update current size + m_current_size[1] -= released_size; + release_size -= released_size; + if (release_size > 0) { + released_size = adjustSize(lock, m_res_bufs[0], release_size); + m_current_size[0] -= released_size; + } + } + + void CacheRecycler::updateSize(std::unique_lock &lock, int priority, std::size_t expected_size) + { + assert(priority == 0 || priority == 1); // we make 2 iterations because dependent locks (i.e. owned by the boundary lock) // will be released only during the second pass for (int i = 0; i < 2; ++i) { - if (m_current_size <= expected_size) { + if (m_current_size[priority] <= expected_size) { break; } // release excess locks plus flush size - adjustSize(lock, m_current_size - expected_size); + auto released_size = adjustSize(lock, m_res_bufs[priority], m_current_size[priority] - expected_size); + m_current_size[priority] -= released_size; } } @@ -78,38 +109,41 @@ namespace db0 if (res_lock) { // access existing resource std::unique_lock lock(m_mutex); + int priority = res_lock->isCached() ? 0 : 1; if (res_lock->isRecycled()) { // resource already in cache, just bring to back (lowest priority for removal) - m_res_buf.splice(m_res_buf.end(), res_lock->m_recycle_it); - } else if (res_lock->isCached()) { + m_res_bufs[priority].splice(m_res_bufs[priority].end(), res_lock->m_recycle_it); + } else { // add new resource (if to be cached) auto lock_size = res_lock->usedMem(); - if (lock_size > m_capacity) { + auto &res_buf = m_res_bufs[priority]; + auto capacity = m_capacity[priority]; + if (lock_size > capacity) { // Cache size is too small to keep this resource // (or is uninitialized) return; } - m_current_size += lock_size; - if (m_current_size > m_capacity) { + m_current_size[priority] += lock_size; + if (m_current_size[priority] > capacity) { // try reducing cache utilization to capacity minus flush size - auto flush_size = std::min(m_capacity >> 1, m_flush_size); - updateSize(lock, m_capacity - flush_size); + auto flush_size = std::min(capacity >> 1, m_flush_size); + updateSize(lock, priority, capacity - flush_size); flushed = true; - flush_result = m_current_size <= (m_capacity - flush_size); + flush_result = m_current_size[priority] <= (capacity - flush_size); } // resize is a costly operation but cannot be avoided if the number of locked // resources exceeds the assumed limit // note that this operation does not change the configured cache capacity - if (m_res_buf.size() == m_res_buf.max_size()) { + if (res_buf.size() == res_buf.max_size()) { // After resize, all iterators to cached elements will be invalidated!! - m_res_buf.resize(m_res_buf.size() * 2); + res_buf.resize(res_buf.size() * 2); // Update self-iterators in all cached locks - for (auto it = m_res_buf.begin(), end = m_res_buf.end(); it != end; ++it) { + for (auto it = res_buf.begin(), end = res_buf.end(); it != end; ++it) { (*it)->m_recycle_it = it; } } - m_res_buf.push_back(res_lock); - res_lock->m_recycle_it = std::prev(m_res_buf.end()); + res_buf.push_back(res_lock); + res_lock->m_recycle_it = std::prev(res_buf.end()); res_lock->setRecycled(true); } } @@ -124,27 +158,35 @@ namespace db0 { std::unique_lock lock(m_mutex); // try releasing all locks - updateSize(lock, 0); + updateSize(lock, 0, 0); + updateSize(lock, 1, 0); } void CacheRecycler::resize(std::size_t new_size) + { + resize(db0::getCapacity(new_size, 0), 0); + resize(db0::getCapacity(new_size, 1), 1); + } + + void CacheRecycler::resize(std::size_t new_size, int priority) { std::unique_lock lock(m_mutex); - if (new_size == m_capacity) { + if (new_size == m_capacity[priority]) { return; } - m_capacity = new_size; + m_capacity[priority] = new_size; // try releasing excess locks - updateSize(lock, m_capacity); + updateSize(lock, priority, m_capacity[priority]); + auto &res_buf = m_res_bufs[priority]; // new capacity of the fixed list should allow storing existing locks - auto new_max_size = std::max((m_capacity - 1) / MIN_PAGE_SIZE + 1, m_res_buf.size()); - if (new_max_size != m_res_buf.max_size()) { + auto new_max_size = std::max((m_capacity[priority] - 1) / MIN_PAGE_SIZE + 1, res_buf.size()); + if (new_max_size != res_buf.max_size()) { // After resize, all iterators to cached elements will be invalidated!! - m_res_buf.resize(new_max_size); - + res_buf.resize(new_max_size); + // Update self-iterators in all cached locks - for (auto it = m_res_buf.begin(), end = m_res_buf.end(); it != end; ++it) { + for (auto it = res_buf.begin(), end = res_buf.end(); it != end; ++it) { (*it)->m_recycle_it = it; } } @@ -154,25 +196,31 @@ namespace db0 { if (res.isRecycled()) { res.setRecycled(false); - m_current_size -= res.size(); - m_res_buf.erase(res.m_recycle_it); + int priority = res.isCached() ? 0 : 1; + m_current_size[priority] -= res.size(); + m_res_bufs[priority].erase(res.m_recycle_it); } } std::size_t CacheRecycler::size() const { - return m_current_size; + return getCurrentSize(); } - std::size_t CacheRecycler::getCapacity() const { - return m_capacity; - } - void CacheRecycler::forEach(std::function)> f) const { std::unique_lock lock(m_mutex); - for (const auto &p: m_res_buf) { + for (const auto &p: m_res_bufs[0]) { f(p); } + for (const auto &p: m_res_bufs[1]) { + f(p); + } + } + + std::size_t CacheRecycler::getCapacity() const + { + std::unique_lock lock(m_mutex); + return m_capacity[0] + m_capacity[1]; } } \ No newline at end of file diff --git a/src/dbzero/core/memory/CacheRecycler.hpp b/src/dbzero/core/memory/CacheRecycler.hpp index 688e4ac1..340982b0 100644 --- a/src/dbzero/core/memory/CacheRecycler.hpp +++ b/src/dbzero/core/memory/CacheRecycler.hpp @@ -16,7 +16,7 @@ namespace db0 class CacheRecycler { - public : + public: static constexpr std::size_t DEFAULT_FLUSH_SIZE = 128 << 20u; /** @@ -71,14 +71,15 @@ namespace db0 */ void forEach(std::function)>) const; - private : + private: using list_t = db0::FixedList >; using iterator = list_t::iterator; - - list_t m_res_buf; - std::size_t m_current_size = 0; - // cache capacity as number of bytes - std::size_t m_capacity; + + // cache capacities as number of bytes (priority 0 and 1) + std::array m_capacity; + // buffers for priority cache (#0) and secondary cache (#1) + std::array m_res_bufs; + std::array m_current_size = {0, 0}; const std::atomic &m_dirty_meter; // number of locks to be flushed at once std::size_t m_flush_size; @@ -86,14 +87,22 @@ namespace db0 std::function m_flush_dirty; std::function m_flush_callback; std::pair m_last_flush_callback_result = {true, false}; - + + void resize(std::size_t new_size, int priority); + /** * Adjusts cache size after updates, collect locks to unlock (can be unlocked off main thread) * @param released_locks locks to be released * @param release_size total number of bytes to be released + * @return number of bytes actually released */ - void adjustSize(std::unique_lock &, std::size_t release_size); - void updateSize(std::unique_lock &, std::size_t expected_size); + std::size_t adjustSize(std::unique_lock &, list_t &res_buf, std::size_t release_size); + void adjustSize(std::unique_lock &, std::size_t release_size); + void updateSize(std::unique_lock &, int priority, std::size_t expected_size); + + inline std::size_t getCurrentSize() const { + return m_current_size[0] + m_current_size[1]; + } }; } \ No newline at end of file diff --git a/src/dbzero/core/memory/PageMap.hpp b/src/dbzero/core/memory/PageMap.hpp index b10b1a9f..87c1cf10 100644 --- a/src/dbzero/core/memory/PageMap.hpp +++ b/src/dbzero/core/memory/PageMap.hpp @@ -90,7 +90,7 @@ namespace db0 mutable std::map, CompT> m_cache; using CacheIterator = typename decltype(m_cache)::iterator; - CacheIterator find(std::uint64_t page_num, StateNumType state_num) const; + CacheIterator findImpl(std::uint64_t page_num, StateNumType state_num) const; // Erase ALL locks with a given page number where state < state_num // irrespective of their use count, this is required for handling inconsistent locks problem @@ -146,7 +146,7 @@ namespace db0 bool PageMap::exists(StateNumType state_num, std::uint64_t page_num) const { std::shared_lock _lock(m_rw_mutex); - return find(page_num, state_num) != m_cache.end(); + return findImpl(page_num, state_num) != m_cache.end(); } template @@ -155,7 +155,7 @@ namespace db0 { // needs to be unique locked due to potential m_cache::erase operation std::unique_lock lock(m_rw_mutex); - auto it = find(page_num, state_num); + auto it = findImpl(page_num, state_num); if (it == m_cache.end()) { return nullptr; } @@ -164,7 +164,7 @@ namespace db0 } template - typename PageMap::CacheIterator PageMap::find( + typename PageMap::CacheIterator PageMap::findImpl( std::uint64_t page_num, StateNumType state_num) const { if (m_cache.empty()) { @@ -209,7 +209,7 @@ namespace db0 { std::unique_lock lock(m_rw_mutex); auto page_num = res_lock->getAddress() >> m_shift; - auto it = find(page_num, state_num); + auto it = findImpl(page_num, state_num); assert(it != m_cache.end()); assert(it->second.lock() == res_lock); m_cache.erase(it); @@ -220,8 +220,8 @@ namespace db0 std::unique_lock lock(m_rw_mutex); m_cache.clear(); } - - template bool PageMap::empty() const + + template bool PageMap::empty() const { std::shared_lock lock(m_rw_mutex); return m_cache.empty(); diff --git a/src/dbzero/core/memory/ResourceLock.cpp b/src/dbzero/core/memory/ResourceLock.cpp index abcec796..0a0b6efc 100644 --- a/src/dbzero/core/memory/ResourceLock.cpp +++ b/src/dbzero/core/memory/ResourceLock.cpp @@ -77,11 +77,7 @@ namespace db0 atomicResetFlags(m_resource_flags, RESOURCE_RECYCLED); } } - - bool ResourceLock::isCached() const { - return !m_access_mode[AccessOptions::no_cache]; - } - + bool ResourceLock::resetDirtyFlag() { using MutexT = ResourceDirtyMutexT; diff --git a/src/dbzero/core/memory/ResourceLock.hpp b/src/dbzero/core/memory/ResourceLock.hpp index 67384012..70d5bbf8 100644 --- a/src/dbzero/core/memory/ResourceLock.hpp +++ b/src/dbzero/core/memory/ResourceLock.hpp @@ -109,7 +109,11 @@ namespace db0 inline bool isRecycled() const { return m_resource_flags & db0::RESOURCE_RECYCLED; } - + + inline bool isCached() const { + return !m_access_mode[AccessOptions::no_cache]; + } + // Mark lock as dirty without range specification void setDirty(); @@ -121,7 +125,7 @@ namespace db0 // Sets the RESOURCE_FREEZE flag void freeze(); - bool isCached() const; + #ifndef NDEBUG bool isVolatile() const; diff --git a/src/dbzero/workspace/GC0.cpp b/src/dbzero/workspace/GC0.cpp index 9f3adfc9..7877d27c 100644 --- a/src/dbzero/workspace/GC0.cpp +++ b/src/dbzero/workspace/GC0.cpp @@ -135,6 +135,7 @@ namespace db0 void GC0::commit() { // Important ! Collect instance addresses first because push_back can trigger "remove" calls + /* FIXME: log std::vector addresses; std::unique_lock lock(m_mutex); for (auto &vptr_item : m_vptr_map) { @@ -154,6 +155,7 @@ namespace db0 super_t::push_back(toTypedAddress(addr_pair)); } m_scheduled_for_deletion.clear(); + */ super_t::commit(); } From daeff85d22b485a9611d520ead8bf51b70e783d5 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Fri, 7 Nov 2025 17:28:44 +0100 Subject: [PATCH 02/11] WIP: refactor --- .../core/collections/SGB_Tree/SGB_Tree.hpp | 6 +- .../collections/vector/v_sorted_vector.hpp | 4 +- src/dbzero/core/memory/Address.hpp | 9 +- src/dbzero/core/vspace/v_object.hpp | 369 ++++++++++------ src/dbzero/core/vspace/v_ptr.hpp | 418 ------------------ .../core/vspace/{v_ptr.cpp => vtypeless.cpp} | 2 +- src/dbzero/core/vspace/vtypeless.hpp | 166 +++++++ src/dbzero/object_model/ObjectBase.hpp | 2 +- src/dbzero/object_model/has_fixture.hpp | 14 +- 9 files changed, 411 insertions(+), 579 deletions(-) delete mode 100644 src/dbzero/core/vspace/v_ptr.hpp rename src/dbzero/core/vspace/{v_ptr.cpp => vtypeless.cpp} (98%) create mode 100644 src/dbzero/core/vspace/vtypeless.hpp diff --git a/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp b/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp index eef6eadb..d35c2c30 100644 --- a/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp +++ b/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp @@ -547,13 +547,13 @@ namespace db0 const std::size_t m_node_capacity; const NodeItemCompT m_item_comp; const HeapCompT m_heap_comp; - + template ItemIterator emplace_to_empty(Args&&... args) { - super_t::modify().m_sgb_size++; + ++super_t::modify().m_sgb_size; // create the root node which shares the same allocation as the 'head' node // obtain mutable mem lock first - auto mem_lock = this->get_v_ptr().modifyMappedRange(); + auto mem_lock = this->modifyMappedRange(); // calculate residual capacity auto residual_capacity = (*this)->sizeOf() - (*this)->trueSizeOf(); // use the remaining capacity to initialize the root node diff --git a/src/dbzero/core/collections/vector/v_sorted_vector.hpp b/src/dbzero/core/collections/vector/v_sorted_vector.hpp index 2b74f2f3..1616b756 100644 --- a/src/dbzero/core/collections/vector/v_sorted_vector.hpp +++ b/src/dbzero/core/collections/vector/v_sorted_vector.hpp @@ -1031,7 +1031,7 @@ DB0_PACKED_BEGIN // delete VSPACE "this" this->destroy(); // claim new identity - this->v_this = new_vector.get_v_ptr(); + (*this) = new_vector; return true; } else { return false; @@ -1057,7 +1057,7 @@ DB0_PACKED_BEGIN // delete VSPACE "this" this->destroy(); // claim new identity - this->v_this = new_vector.get_v_ptr(); + (*this) = new_vector; return true; } else { return false; diff --git a/src/dbzero/core/memory/Address.hpp b/src/dbzero/core/memory/Address.hpp index 6dddaa48..3473db50 100644 --- a/src/dbzero/core/memory/Address.hpp +++ b/src/dbzero/core/memory/Address.hpp @@ -9,8 +9,8 @@ namespace db0 { -DB0_PACKED_BEGIN +DB0_PACKED_BEGIN template class DB0_PACKED_ATTR AddressType { public: @@ -79,9 +79,10 @@ DB0_PACKED_BEGIN }; using Address = AddressType; - + // The UniqueAddress combines memory offset and instance ID // by definition the UniqueAddress will not be assigned more than once throughut the lifetime of the prefix +DB0_PACKED_BEGIN class DB0_PACKED_ATTR UniqueAddress { public: @@ -173,10 +174,10 @@ DB0_PACKED_BEGIN { } }; - +DB0_PACKED_END + UniqueAddress makeUniqueAddr(std::uint64_t offset, std::uint16_t id); -DB0_PACKED_END } namespace std diff --git a/src/dbzero/core/vspace/v_object.hpp b/src/dbzero/core/vspace/v_object.hpp index 21917248..8c0bf145 100644 --- a/src/dbzero/core/vspace/v_object.hpp +++ b/src/dbzero/core/vspace/v_object.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include "vtypeless.hpp" #include #include @@ -15,33 +15,27 @@ namespace db0 * @tparam T container object type */ template - class v_object + class v_object: public v_typeless { public: - using c_type = T; - using ptr_t = v_ptr; + using ContainerT = T; v_object() = default; - - v_object(const ptr_t &ptr) - : v_this(ptr) - { - } - + v_object(mptr ptr, FlagSet access_mode = {}) - : v_this(ptr, access_mode) + : vtypeless(ptr, access_mode) { } // Construct a verified instance - i.e. backed by a valid db0 address with a known size v_object(db0::tag_verified, mptr ptr, std::size_t size_of = 0, FlagSet access_mode = {}) - : v_this(ptr, access_mode) + : vtypeless(ptr, access_mode) { - v_this.safeConstRef(size_of); + this->safeConstRef(size_of); } v_object(const v_object &other) - : v_this(other.v_this) + : vtypeless(other) { } @@ -52,49 +46,49 @@ namespace db0 private: template::value-1> v_object(Memspace &memspace, Tuple&& t, int_seq) - : v_this(ptr_t::makeNew( - memspace, - c_type::measure(std::get(std::forward(t))...), - std::get(std::forward(t)) ) - ) { - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + initNew( + memspace, + ContainerT::measure(std::get(std::forward(t))...), + std::get(std::forward(t)) ); + + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } /// Pre-locked constructor struct tag_prelocked {}; template::value-1> - v_object(Memspace &memspace, tag_prelocked, Tuple&& t, int_seq) - : v_this(ptr_t::makeNew(memspace, std::move(std::get(std::forward(t))))) + v_object(Memspace &memspace, tag_prelocked, Tuple&& t, int_seq) { + intiNew(memspace, std::move(std::get(std::forward(t)))); // placement new syntax - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } template::value-1> void init(Memspace &memspace, Tuple&& t, int_seq) { - v_this = ptr_t::makeNew( + initNew( memspace, - c_type::measure(std::get(std::forward(t))...), + ContainerT::measure(std::get(std::forward(t))...), // access options (the last argument) std::get(std::forward(t)) ); - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } template::value-1> std::uint16_t initUnique(Memspace &memspace, Tuple&& t, int_seq) { std::uint16_t instance_id; - v_this = ptr_t::makeNewUnique( + initNewUnique( memspace, instance_id, - c_type::measure(std::get(std::forward(t))...), + ContainerT::measure(std::get(std::forward(t))...), // access options (the last argument) std::get(std::forward(t)) ); - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); return instance_id; } @@ -148,167 +142,256 @@ namespace db0 return initUnique(memspace, std::forward(args)..., FlagSet {}); } - // Construct from v-pointer - v_object(ptr_t &&ptr) - : v_this(std::move(ptr)) - { - } - v_object(v_object &&other) - : v_this(std::move(other.v_this)) + : vtypeless(std::move(other)) { } - /** - * static V-Space allocator - */ template static std::uint64_t makeNew(Memspace &memspace, Args&&... args) { v_object new_object(memspace, std::forward(args)...); return new_object.getAddress(); } - - void operator=(const v_object &other) { - v_this = other.v_this; - } - - void operator=(v_object &&other) - { - v_this = std::move(other.v_this); - other.v_this = {}; - } - - /** - * Readonly data access operator - */ - inline const c_type *operator->() const { - return v_this.get(); - } - - inline const c_type *getData() const { - return v_this.get(); - } - - /** - * Reference data container for read - */ - inline const c_type &const_ref() const { - return *(v_this.get()); + + // Readonly data access operator + inline const ContainerT *operator->() const { + return this->getData(); } - const c_type& safeRef() const { - return v_this.safeRef(); + const ContainerT *getData() const + { + assureInitialized(); + return reinterpret_cast(m_mem_lock.m_buffer); } - const c_type& safeRef(std::uint32_t access_mode) const { - return v_this.safeRef(access_mode); + // Reference data container for read + inline const ContainerT &const_ref() const { + return *this->getData(); } - /** - * Reference data container for update - */ - inline c_type &modify() { - return v_this.modify(); + // Reference data container for update + ContainerT &modify() + { + assert(m_memspace_ptr); + // access resource for read-write + while (!ResourceReadWriteMutexT::__ref(m_resource_flags).get()) { + ResourceReadWriteMutexT::WriteOnlyLock lock(m_resource_flags); + if (lock.isLocked()) { + // release the MemLock first to avoid or reduce CoWs + // otherwise mapRange might need to manage multiple lock versions + m_mem_lock.release(); + // lock for +write + // note that lock is getting updated, possibly copy-on-write is being performed + // NOTE: must extract physical address for mapRange + m_mem_lock = m_memspace_ptr->getPrefix().mapRange( + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read); + // by calling MemLock::modify we mark the object's associated range as modified + m_mem_lock.modify(); + lock.commit_set(); + break; + } + } + // this is to notify dirty-callbacks if needed + return *reinterpret_cast(m_mem_lock.m_buffer); } - // Mark specific range as modified - // NOTE: even if the range is not updated it will be forced-diff - void modify(std::size_t offset, std::size_t size) { - v_this.modify(offset, size); + void modify(std::size_t offset, std::size_t size) + { + auto &ref = modify(); + m_mem_lock.modify((std::byte*)&ref + offset, size); } - inline Address getAddress() const { - return v_this.getAddress(); - } - - inline const ptr_t &get_v_ptr() const { - return this->v_this; + void destroy() + { + if (m_address.isValid()) { + assert(m_memspace_ptr); + // container's destroy + (*this)->destroy(*m_memspace_ptr); + m_mem_lock.release(); + m_memspace_ptr->free(m_address); + this->m_address = {}; + this->m_resource_flags = 0; + this->m_cached_size.reset(); + } } - - inline ptr_t &get_v_ptr() { - return this->v_this; + + mptr myPtr(Address address, FlagSet access_mode = {}) const { + return this->getMemspace().myPtr(address, access_mode); } - void destroy() const + /* FIXME: + void commit() const { - if (v_this) { - v_this.destroy(); - v_this = {}; - } + // NOTE: this operation assumes that only one v_object instance pointing to the same address exists + // otherwise modifications done to one instance will not be visible to the other instances + // this assumption holds true for dbzero objects but if unable to fulfill in the future, + // it must be changed to "this->detach()" + + v_this.commit(); } + */ - inline Memspace &getMemspace() const { - return v_this.getMemspace(); + // Calculate the number of DPs spanned by this object + // NOTE: even small objects may span more than 1 DP if are positioned on a boundary + // however allocators typically will avoid such situations + unsigned int span() const + { + auto first_dp = this->getMemspace().getPageNum(this->m_address); + auto last_dp = this->getMemspace().getPageNum(this->m_address + (*this)->sizeOf()); + return last_dp - first_dp + 1; } - inline bool isNull() const { - return v_this.isNull(); - } - - /** - * instance compare - */ - bool operator==(const v_object &other) const { - return (v_this==other.v_this); + // Check if the underlying resource is available as mutable + // i.e. was already access for read/write + bool isModified() const { + return ResourceReadWriteMutexT::__ref(m_resource_flags).get(); } - explicit operator bool() const { - return !v_this.isNull(); + // Get the underlying mapped range (for mutation) + MemLock modifyMappedRange() + { + modify(); + return this->m_mem_lock; } - bool operator!() const { - return v_this.isNull(); - } - - mptr myPtr(Address address, FlagSet access_mode = {}) const { - return v_this.getMemspace().myPtr(address, access_mode); - } + private: - /** - * Get use count of the underlying lock - */ - unsigned int use_count() const { - return v_this.use_count(); + // Create a new instance + void initNew(Memspace &memspace, std::size_t size, FlagSet access_mode = {}) + { + // read not allowed for instance creation + assert(!access_mode[AccessOptions::read]); + this->m_memspace_ptr = &memspace; + this->m_address = memspace.alloc(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); + // lock for create & write + // NOTE: must extract physical address for mapRange + this->m_mem_lock = memspace.getPrefix().mapRange( + m_address, size, access_mode | AccessOptions::write + ); + // mark the entire writable area as modified + this->m_mem_lock.modify(); + this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; + this->m_access_mode = access_mode; } - void detach() const { - v_this.detach(); + // Create a new instance using allocUnique functionality + void initNewUnique(Memspace &memspace, std::uint16_t &instance_id, std::size_t size, + FlagSet access_mode = {}) + { + // read not allowed for instance creation + assert(!access_mode[AccessOptions::read]); + this->m_memspace_ptr = &memspace; + auto unique_address = memspace.allocUnique(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); + instance_id = unique_address.getInstanceId(); + // lock for create & write + // NOTE: must extract physical address for mapRange + this->m_address = unique_address; + this->m_mem_lock = memspace.getPrefix().mapRange( + unique_address.getOffset(), size, access_mode | AccessOptions::write + ); + // mark the entire writable area as modified + this->m_mem_lock.modify(); + // mark as available for both write & read + this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; + this->m_access_mode = access_mode; } - void commit() const + /** + * Create a new instance from the mapped address + * @param memspace the memspace to use + * @param mapped_addr the mapped address + * @param access_mode additional access mode flags + */ + void initNew(Memspace &memspace, MappedAddress &&mapped_addr, FlagSet access_mode = {}) { - // FIXME: optimization - // potentially we could call v_this.commit() here BUT - // if there exist 2 instances of v_object and one of them gets modified - // then the "read-only" instance will not see the updates + this->m_memspace_ptr = &memspace; + // mark the entire writable area as modified + mapped_addr.m_mem_lock.modify(); + this->m_address = mapped_addr.m_address; + this->m_mem_lock = std::move(mapped_addr.m_mem_lock); + // mark as available for read & write + this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; + this->m_access_mode = access_mode; + } + + static inline unsigned char getLocality(FlagSet access_mode) { + // NOTE: use locality = 1 for no_cache allocations, 0 otherwise (undefined) + return access_mode[AccessOptions::no_cache] ? 1 : 0; + } - v_this.detach(); + void assureInitialized() const + { + assert(m_memspace_ptr); + // access the resource for read (or check if the read or read/write access has already been gained) + while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { + ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); + if (lock.isLocked()) { + // NOTE: must extract physical address for mapRange + m_mem_lock = m_memspace_ptr->getPrefix().mapRange( + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read); + lock.commit_set(); + break; + } + } + assert(m_mem_lock.m_buffer); } - // Calculate the number of DPs spanned by this object - // NOTE: even small objects may span more than 1 DP if are positioned on a boundary - // however allocators typically will avoid such situations - unsigned int span() const + // version with known size-of (pre-retrieved from the allocator) + // we made it as a separate implementation for potential performance gains + void assureInitialized(std::size_t size_of) const { - auto first_dp = v_this.getMemspace().getPageNum(v_this.getAddress()); - auto last_dp = v_this.getMemspace().getPageNum(v_this.getAddress() + v_this->sizeOf()); - return last_dp - first_dp + 1; + assert(m_memspace_ptr); + // access the resource for read (or check if the read or read/write access has already been gained) + while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { + ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); + if (lock.isLocked()) { + // NOTE: must extract physical address for mapRange + m_mem_lock = m_memspace_ptr->getPrefix().mapRange( + m_address.getOffset(), size_of, m_access_mode | AccessOptions::read); + lock.commit_set(); + break; + } + } + assert(m_mem_lock.m_buffer); } + + const ContainerT &safeConstRef(std::size_t size_of = 0) const + { + if (!size_of) { + size_of = this->getSize(); + } + assureInitialized(size_of); + return ContainerT::__safe_const_ref( + safe_buf_t((std::byte*)m_mem_lock.m_buffer, (std::byte*)m_mem_lock.m_buffer + size_of) + ); + } - // Check if the underlying resource is available as mutable - // i.e. was already access for read/write - bool isModified() const { - return v_this.isModified(); + // Resolve the instance size + std::uint32_t fetchSize() const + { + assert(m_memspace_ptr); + if constexpr(metaprog::has_constant_size::value) { + // fixed size type + return ContainerT::measure(); + } + else if constexpr(metaprog::has_fixed_header::value) { + v_object header(mptr{*m_memspace_ptr, m_address}); + return header.getData()->getOBaseSize(); + } + + // retrieve from allocator (slowest) + return m_memspace_ptr->getAllocator().getAllocSize(m_address, REALM_ID); } - bool isNoCache() const { - return v_this.isNoCache(); + // Get from cache or fetch size + std::uint32_t getSize() const + { + if (!m_cached_size) { + m_cached_size = fetchSize(); + } + return *m_cached_size; } - - protected: - // container reference - mutable ptr_t v_this; }; // Utility function to safely mutate a v_object's fixed-size member diff --git a/src/dbzero/core/vspace/v_ptr.hpp b/src/dbzero/core/vspace/v_ptr.hpp deleted file mode 100644 index fb33bb32..00000000 --- a/src/dbzero/core/vspace/v_ptr.hpp +++ /dev/null @@ -1,418 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "MappedAddress.hpp" -#include "safe_buf_t.hpp" - -namespace db0 - -{ - - template - class v_object; - - class vtypeless - { - protected : - using ResourceReadMutexT = ROWO_Mutex< - std::uint16_t, - db0::RESOURCE_AVAILABLE_FOR_READ, - db0::RESOURCE_AVAILABLE_FOR_READ, - db0::RESOURCE_LOCK >; - - using ResourceReadWriteMutexT = ROWO_Mutex< - std::uint16_t, - db0::RESOURCE_AVAILABLE_FOR_WRITE, - db0::RESOURCE_AVAILABLE_FOR_RW, - db0::RESOURCE_LOCK >; - - // detach checks either R/W flags and clears both of them - using ResourceDetachMutexT = ROWO_Mutex< - std::uint16_t, - db0::RESOURCE_AVAILABLE_FOR_RW, - db0::RESOURCE_AVAILABLE_FOR_RW, - db0::RESOURCE_LOCK >; - - /** - * Within-prefix address of this object - */ - Address m_address = {}; - Memspace *m_memspace_ptr = nullptr; - mutable std::atomic m_resource_flags = 0; - // initial access flags (e.g. read / write / create) - FlagSet m_access_mode; - // NOTE: cached size may speed-up updates but also is relevant for existing vptr's reinterpret casts - mutable std::optional m_cached_size; - - /** - * Memory mapped range corresponding to this object - */ - mutable MemLock m_mem_lock; - - public: - vtypeless() = default; - - vtypeless(Memspace &, Address address, FlagSet); - - /** - * Create mem-locked with specific flags (e.g. read/ write) - */ - vtypeless(Memspace &, Address address, MemLock &&, std::uint16_t resource_flags, - FlagSet); - - vtypeless(const vtypeless& other); - vtypeless(vtypeless&&); - - /** - * @param access_mode additional flags / modes to use - */ - inline vtypeless(mptr ptr, FlagSet access_mode = {}) - : m_address(ptr.m_address) - , m_memspace_ptr(&ptr.m_memspace.get()) - , m_access_mode(ptr.m_access_mode | access_mode) - { - assertFlags(); - } - - inline FlagSet getAccessMode() const { - return m_access_mode; - } - - vtypeless &operator=(const vtypeless &other); - void operator=(vtypeless &&); - - /** - * Instance compare - */ - inline bool operator==(const vtypeless &ptr) const { - return (m_memspace_ptr == ptr.m_memspace_ptr && m_address == ptr.m_address); - } - - inline bool operator!=(const vtypeless &ptr) const { - return (m_memspace_ptr != ptr.m_memspace_ptr || m_address != ptr.m_address); - } - - inline bool isNull() const { - return !m_address.isValid(); - } - - inline operator bool() const { - return m_address.isValid(); - } - - inline Address getAddress() const { - return m_address; - } - - inline Memspace &getMemspace() const { - assert(m_memspace_ptr); - return *m_memspace_ptr; - } - - inline Memspace *getMemspacePtr() const { - return m_memspace_ptr; - } - - inline bool isNoCache() const { - return m_access_mode[AccessOptions::no_cache]; - } - - /** - * Get use count of the underlying lock - */ - unsigned int use_count() const; - - /** - * Check if the underlying resource is available in local memory - */ - bool isAttached() const; - - /** - * Detach underlying resource lock (i.e. mark resource as not available in local memory) - */ - void detach(); - - /** - * Commit by marking the write as final. - * The subsequent modify() will need to refresh the underlying lock - */ - void commit(); - - /** - * Cast to a specific concrete type - * @return pointer which may be null if the underlying lock does not exist - */ - template const T *castTo() const { - return reinterpret_cast(m_mem_lock.m_buffer); - } - - private: - inline void assertFlags() - { - // read / write / create flags are disallowed since they're assigned dynamically - assert(!m_access_mode[AccessOptions::read]); - assert(!m_access_mode[AccessOptions::write]); - } - }; - - /** - * virtual pointer to object of ContainerT - */ - template - class v_ptr : public vtypeless - { - public : - using container_t = ContainerT; - using self_t = v_ptr; - - inline v_ptr() = default; - - inline v_ptr(Memspace &memspace, Address address, FlagSet access_mode = {}) - : vtypeless(memspace, address, access_mode) - { - } - - inline v_ptr(Memspace &memspace, Address address, MemLock &&lock, std::uint16_t resource_flags, - FlagSet access_mode = {}) - : vtypeless(memspace, address, std::move(lock), resource_flags, access_mode) - { - } - - v_ptr(mptr ptr) - : vtypeless(ptr) - { - } - - v_ptr(mptr ptr, FlagSet access_mode) - : vtypeless(ptr, access_mode) - { - } - - // Explicit upcast from typeless - explicit v_ptr(const vtypeless &ptr) - : vtypeless(ptr) - { - } - - void destroy() - { - assert(m_memspace_ptr); - // container's destroy - (*this)->destroy(*m_memspace_ptr); - m_mem_lock.release(); - m_memspace_ptr->free(m_address); - this->m_address = {}; - this->m_resource_flags = 0; - this->m_cached_size.reset(); - } - - ContainerT &modify() - { - assert(m_memspace_ptr); - // access resource for read-write - while (!ResourceReadWriteMutexT::__ref(m_resource_flags).get()) { - ResourceReadWriteMutexT::WriteOnlyLock lock(m_resource_flags); - if (lock.isLocked()) { - // release the MemLock first to avoid or reduce CoWs - // otherwise mapRange might need to manage multiple lock versions - m_mem_lock.release(); - // lock for +write - // note that lock is getting updated, possibly copy-on-write is being performed - // NOTE: must extract physical address for mapRange - m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read); - // by calling MemLock::modify we mark the object's associated range as modified - m_mem_lock.modify(); - lock.commit_set(); - break; - } - } - // this is to notify dirty-callbacks if needed - return *reinterpret_cast(m_mem_lock.m_buffer); - } - - void modify(std::size_t offset, std::size_t size) - { - auto &ref = modify(); - m_mem_lock.modify((std::byte*)&ref + offset, size); - } - - // Check if the underlying resource is available as mutable - // i.e. was already access for read/write - bool isModified() const { - return ResourceReadWriteMutexT::__ref(m_resource_flags).get(); - } - - const ContainerT &safeConstRef(std::size_t size_of = 0) const - { - if (!size_of) { - size_of = this->getSize(); - } - assureInitialized(size_of); - return ContainerT::__safe_const_ref( - safe_buf_t((std::byte*)m_mem_lock.m_buffer, (std::byte*)m_mem_lock.m_buffer + size_of) - ); - } - - const ContainerT *get() const - { - assureInitialized(); - return reinterpret_cast(m_mem_lock.m_buffer); - } - - const ContainerT *getData() const - { - assureInitialized(); - return reinterpret_cast(m_mem_lock.m_buffer); - } - - inline const ContainerT *operator->() const { - return get(); - } - - static self_t makeNew(Memspace &memspace, std::size_t size, FlagSet access_mode = {}) - { - // read not allowed for instance creation - assert(!access_mode[AccessOptions::read]); - auto address = memspace.alloc(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); - // lock for create & write - // NOTE: must extract physical address for mapRange - auto mem_lock = memspace.getPrefix().mapRange(address, size, access_mode | AccessOptions::write); - // mark the entire writable area as modified - mem_lock.modify(); - // mark as available for both write & read - return self_t( - memspace, address, std::move(mem_lock), - db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE, access_mode - ); - } - - // Create a new instance using allocUnique functionality - static self_t makeNewUnique(Memspace &memspace, std::uint16_t &instance_id, std::size_t size, - FlagSet access_mode = {}) - { - // read not allowed for instance creation - assert(!access_mode[AccessOptions::read]); - auto unique_address = memspace.allocUnique(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); - instance_id = unique_address.getInstanceId(); - // lock for create & write - // NOTE: must extract physical address for mapRange - auto mem_lock = memspace.getPrefix().mapRange( - unique_address.getOffset(), size, access_mode | AccessOptions::write - ); - // mark the entire writable area as modified - mem_lock.modify(); - // mark as available for both write & read - return self_t( - memspace, unique_address, std::move(mem_lock), - db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE, access_mode - ); - } - - /** - * Create a new instance from the mapped address - * @param memspace the memspace to use - * @param mapped_addr the mapped address - * @param access_mode additional access mode flags - */ - static self_t makeNew(Memspace &memspace, MappedAddress &&mapped_addr, FlagSet access_mode = {}) - { - // mark the entire writable area as modified - mapped_addr.m_mem_lock.modify(); - return self_t(memspace, mapped_addr.m_address, - std::move(mapped_addr.m_mem_lock), - // mark as available for read & write - db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE, access_mode - ); - } - - /** - * Get the underlying mapped range (for mutation) - */ - MemLock modifyMappedRange() - { - modify(); - return this->m_mem_lock; - } - - private: - - static inline unsigned char getLocality(FlagSet access_mode) { - // NOTE: use locality = 1 for no_cache allocations, 0 otherwise (undefined) - return access_mode[AccessOptions::no_cache] ? 1 : 0; - } - - void assureInitialized() const - { - assert(m_memspace_ptr); - // access the resource for read (or check if the read or read/write access has already been gained) - while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { - ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); - if (lock.isLocked()) { - // NOTE: must extract physical address for mapRange - m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read); - lock.commit_set(); - break; - } - } - assert(m_mem_lock.m_buffer); - } - - // version with known size-of (pre-retrieved from the allocator) - // we made it as a separate implementation for potential performance gains - void assureInitialized(std::size_t size_of) const - { - assert(m_memspace_ptr); - // access the resource for read (or check if the read or read/write access has already been gained) - while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { - ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); - if (lock.isLocked()) { - // NOTE: must extract physical address for mapRange - m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), size_of, m_access_mode | AccessOptions::read); - lock.commit_set(); - break; - } - } - assert(m_mem_lock.m_buffer); - } - - // Resolve the instance size - std::uint32_t fetchSize() const - { - assert(m_memspace_ptr); - if constexpr(metaprog::has_constant_size::value) { - // fixed size type - return ContainerT::measure(); - } - else if constexpr(metaprog::has_fixed_header::value) { - v_object header(mptr{*m_memspace_ptr, m_address}); - return header.getData()->getOBaseSize(); - } - - // retrieve from allocator (slowest) - return m_memspace_ptr->getAllocator().getAllocSize(m_address, REALM_ID); - } - - // Get from cache or fetch size - std::uint32_t getSize() const - { - if (!m_cached_size) { - m_cached_size = fetchSize(); - } - return *m_cached_size; - } - }; - -} diff --git a/src/dbzero/core/vspace/v_ptr.cpp b/src/dbzero/core/vspace/vtypeless.cpp similarity index 98% rename from src/dbzero/core/vspace/v_ptr.cpp rename to src/dbzero/core/vspace/vtypeless.cpp index 703f9ffb..462796d3 100644 --- a/src/dbzero/core/vspace/v_ptr.cpp +++ b/src/dbzero/core/vspace/vtypeless.cpp @@ -1,4 +1,4 @@ -#include +#include "vtypeless.hpp" namespace db0 diff --git a/src/dbzero/core/vspace/vtypeless.hpp b/src/dbzero/core/vspace/vtypeless.hpp new file mode 100644 index 00000000..880fbfff --- /dev/null +++ b/src/dbzero/core/vspace/vtypeless.hpp @@ -0,0 +1,166 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "MappedAddress.hpp" +#include "safe_buf_t.hpp" + +namespace db0 + +{ + + class vtypeless + { + protected: + using ResourceReadMutexT = ROWO_Mutex< + std::uint16_t, + db0::RESOURCE_AVAILABLE_FOR_READ, + db0::RESOURCE_AVAILABLE_FOR_READ, + db0::RESOURCE_LOCK >; + + using ResourceReadWriteMutexT = ROWO_Mutex< + std::uint16_t, + db0::RESOURCE_AVAILABLE_FOR_WRITE, + db0::RESOURCE_AVAILABLE_FOR_RW, + db0::RESOURCE_LOCK >; + + // detach checks either R/W flags and clears both of them + using ResourceDetachMutexT = ROWO_Mutex< + std::uint16_t, + db0::RESOURCE_AVAILABLE_FOR_RW, + db0::RESOURCE_AVAILABLE_FOR_RW, + db0::RESOURCE_LOCK >; + + /** + * Within-prefix address of this object + */ + Address m_address = {}; + Memspace *m_memspace_ptr = nullptr; + mutable std::atomic m_resource_flags = 0; + // initial access flags (e.g. read / write / create) + FlagSet m_access_mode; + // NOTE: cached size may speed-up updates but also is relevant for existing vptr's reinterpret casts + mutable std::optional m_cached_size; + + // Memory mapped range corresponding to this object + mutable MemLock m_mem_lock; + + public: + vtypeless() = default; + + vtypeless(Memspace &, Address address, FlagSet); + + /** + * Create mem-locked with specific flags (e.g. read/ write) + */ + vtypeless(Memspace &, Address address, MemLock &&, std::uint16_t resource_flags, + FlagSet); + + vtypeless(const vtypeless& other); + vtypeless(vtypeless&&); + + /** + * @param access_mode additional flags / modes to use + */ + inline vtypeless(mptr ptr, FlagSet access_mode = {}) + : m_address(ptr.m_address) + , m_memspace_ptr(&ptr.m_memspace.get()) + , m_access_mode(ptr.m_access_mode | access_mode) + { + assertFlags(); + } + + inline FlagSet getAccessMode() const { + return m_access_mode; + } + + vtypeless &operator=(const vtypeless &other); + void operator=(vtypeless &&); + + /** + * Instance compare + */ + inline bool operator==(const vtypeless &ptr) const { + return (m_memspace_ptr == ptr.m_memspace_ptr && m_address == ptr.m_address); + } + + inline bool operator!=(const vtypeless &ptr) const { + return (m_memspace_ptr != ptr.m_memspace_ptr || m_address != ptr.m_address); + } + + inline bool isNull() const { + return !m_address.isValid(); + } + + inline operator bool() const { + return m_address.isValid(); + } + + bool operator!() const { + return !m_address.isValid(); + } + + inline Address getAddress() const { + return m_address; + } + + inline Memspace &getMemspace() const { + assert(m_memspace_ptr); + return *m_memspace_ptr; + } + + inline Memspace *getMemspacePtr() const { + return m_memspace_ptr; + } + + inline bool isNoCache() const { + return m_access_mode[AccessOptions::no_cache]; + } + + // Get use count of the underlying lock + unsigned int use_count() const; + + /** + * Check if the underlying resource is available in local memory + */ + bool isAttached() const; + + /** + * Detach underlying resource lock (i.e. mark resource as not available in local memory) + */ + void detach(); + + /** + * Commit by marking the write as final. + * The subsequent modify() will need to refresh the underlying lock + */ + void commit(); + + /** + * Cast to a specific concrete type + * @return pointer which may be null if the underlying lock does not exist + */ + template const T *castTo() const { + return reinterpret_cast(m_mem_lock.m_buffer); + } + + private: + + inline void assertFlags() + { + // read / write / create flags are disallowed since they're assigned dynamically + assert(!m_access_mode[AccessOptions::read]); + assert(!m_access_mode[AccessOptions::write]); + } + }; + +} diff --git a/src/dbzero/object_model/ObjectBase.hpp b/src/dbzero/object_model/ObjectBase.hpp index c7cef626..61239ffe 100644 --- a/src/dbzero/object_model/ObjectBase.hpp +++ b/src/dbzero/object_model/ObjectBase.hpp @@ -205,7 +205,7 @@ namespace db0 // Get access flags to propagate to members (e.g. no_cache) AccessFlags getMemberFlags() const { - return this->v_this.getAccessMode() & AccessOptions::no_cache; + return this->getAccessMode() & AccessOptions::no_cache; } protected: diff --git a/src/dbzero/object_model/has_fixture.hpp b/src/dbzero/object_model/has_fixture.hpp index 22f000d6..f2f0dad0 100644 --- a/src/dbzero/object_model/has_fixture.hpp +++ b/src/dbzero/object_model/has_fixture.hpp @@ -51,7 +51,7 @@ namespace db0 template void init(db0::swine_ptr &fixture, Args &&... args) { // must release existing weak ref - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -65,7 +65,7 @@ namespace db0 std::uint16_t initUnique(db0::swine_ptr &fixture, Args &&... args) { // must release existing weak ref - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -78,7 +78,7 @@ namespace db0 ~has_fixture() { - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -87,7 +87,7 @@ namespace db0 db0::swine_ptr tryGetFixture() const { - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // construct swine_ptr from raw ptr return db0::swine_ptr::lock_weak(raw_ptr); @@ -109,7 +109,7 @@ namespace db0 void operator=(const has_fixture &other) { // must release existing weak ref and take from the copied object - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -120,10 +120,10 @@ namespace db0 other_fixture.take_weak(); } - void operator=(has_fixture &&other) + void operator=(has_fixture &&other) { // must release existing weak ref and take from the copied object - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); From 92144ae4e991d05de9a79f6f5ba346044e6f30ce Mon Sep 17 00:00:00 2001 From: Wojtek Date: Fri, 7 Nov 2025 20:34:26 +0100 Subject: [PATCH 03/11] WIP: refactor --- .../core/collections/SGB_Tree/SGB_Tree.hpp | 2 +- .../core/collections/SGB_Tree/sgb_types.hpp | 4 +- .../core/collections/b_index/v_bindex.hpp | 2 +- .../core/collections/pools/RC_LimitedPool.hpp | 4 +- .../collections/sgtree/intrusive_node.hpp | 14 +- .../core/collections/sgtree/sgtree_node.hpp | 11 +- .../core/collections/sgtree/v_sgtree.hpp | 49 ++--- .../collections/vector/v_sorted_sequence.hpp | 2 +- .../collections/vector/v_sorted_vector.hpp | 2 +- src/dbzero/core/memory/PrefixImpl.hpp | 2 +- src/dbzero/core/vspace/db0_ptr.hpp | 1 - src/dbzero/core/vspace/v_object.hpp | 174 +++------------ src/dbzero/core/vspace/v_ptr.hpp | 200 ++++++++++++++++++ src/dbzero/core/vspace/vtypeless.cpp | 4 +- src/dbzero/core/vspace/vtypeless.hpp | 6 +- src/dbzero/object_model/has_fixture.hpp | 3 +- src/dbzero/workspace/Fixture.hpp | 2 +- src/dbzero/workspace/GC0.hpp | 6 +- tests/unit_tests/VSpaceTests.cpp | 24 --- 19 files changed, 278 insertions(+), 234 deletions(-) create mode 100644 src/dbzero/core/vspace/v_ptr.hpp diff --git a/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp b/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp index d35c2c30..b7b9767b 100644 --- a/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp +++ b/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp @@ -20,7 +20,7 @@ namespace db0 using CompT = typename TypesT::CompT; using AddressT = typename TypesT::AddressT; using NodeT = typename TypesT::NodeT; - using NodePtrT = typename NodeT::ptr_t; + using NodePtrT = NodeT; using node_iterator = typename TypesT::o_sgb_node_t::iterator; using node_const_iterator = typename TypesT::o_sgb_node_t::const_iterator; using sg_tree_const_iterator = typename super_t::const_iterator; diff --git a/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp b/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp index c1caec48..152e8b98 100644 --- a/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp +++ b/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp @@ -75,11 +75,11 @@ namespace db0 } inline operator ptr_t&() { - return this->v_this; + return *this; } inline operator const ptr_t&() const { - return this->v_this; + return *this; } }; diff --git a/src/dbzero/core/collections/b_index/v_bindex.hpp b/src/dbzero/core/collections/b_index/v_bindex.hpp index 0304b692..5bca7993 100644 --- a/src/dbzero/core/collections/b_index/v_bindex.hpp +++ b/src/dbzero/core/collections/b_index/v_bindex.hpp @@ -121,7 +121,7 @@ namespace db0 return b_index.getAddress(); } - void destroy() const + void destroy() { // must clear all nodes (item destroy) assert(!m_item_destroy_func && "Operation not implemented"); diff --git a/src/dbzero/core/collections/pools/RC_LimitedPool.hpp b/src/dbzero/core/collections/pools/RC_LimitedPool.hpp index 515fa071..ab99cd6a 100644 --- a/src/dbzero/core/collections/pools/RC_LimitedPool.hpp +++ b/src/dbzero/core/collections/pools/RC_LimitedPool.hpp @@ -193,14 +193,14 @@ DB0_PACKED_END void RC_LimitedPool::commit() const { m_pool_map.commit(); - db0::v_object::commit(); + db0::vtypeless::commit(); } template void RC_LimitedPool::detach() const { m_pool_map.detach(); - db0::v_object::detach(); + db0::vtypeless::detach(); } template diff --git a/src/dbzero/core/collections/sgtree/intrusive_node.hpp b/src/dbzero/core/collections/sgtree/intrusive_node.hpp index 5e2cb50c..5e81b4ba 100644 --- a/src/dbzero/core/collections/sgtree/intrusive_node.hpp +++ b/src/dbzero/core/collections/sgtree/intrusive_node.hpp @@ -2,24 +2,24 @@ #include "v_sgtree.hpp" #include -#include +#include namespace db0 { - + /** * VSPACE node type compliant with intrusive containers * c_type - node container type * comp_t - node pointer comparer type */ - template > class intrusive_node - : public v_object + template > + class intrusive_node: public v_object { public : using super = v_object; using c_type = T; - using ptr_t = typename super::ptr_t; + using ptr_t = typename v_object::ptr_t; using comp_t = comp_t_; // type compliant with intrusive NodeTraits requirements using traits_t = base_traits_t, ptr_t>; @@ -46,14 +46,14 @@ namespace db0 * Cast to pointer */ inline operator ptr_t&() { - return this->v_this; + return *this; } /** * Cast to const-pointer */ inline operator const ptr_t&() const { - return this->v_this; + return *this; } }; diff --git a/src/dbzero/core/collections/sgtree/sgtree_node.hpp b/src/dbzero/core/collections/sgtree/sgtree_node.hpp index c17353bf..0361f4bf 100644 --- a/src/dbzero/core/collections/sgtree/sgtree_node.hpp +++ b/src/dbzero/core/collections/sgtree/sgtree_node.hpp @@ -79,12 +79,13 @@ DB0_PACKED_BEGIN data_t m_data; }; DB0_PACKED_END - - template class o_sgtree_node_traits + + template + class o_sgtree_node_traits { - public : - typedef typename data_t::Initializer Initializer; - typedef typename v_object >::ptr_t node_ptr_t; + public: + using Initializer = typename data_t::Initializer; + using node_ptr_t = typename v_object >::ptr_t; struct comp_t { diff --git a/src/dbzero/core/collections/sgtree/v_sgtree.hpp b/src/dbzero/core/collections/sgtree/v_sgtree.hpp index f63e242c..17d0a1ef 100644 --- a/src/dbzero/core/collections/sgtree/v_sgtree.hpp +++ b/src/dbzero/core/collections/sgtree/v_sgtree.hpp @@ -121,7 +121,7 @@ DB0_PACKED_END { public: using super = typename node_t::tree_base_t; - using c_type = typename super::c_type; + using c_type = typename super::ContainerT; using comp_t = typename node_t::comp_t; using node_ptr_t = typename node_t::ptr_t; using ptr_t = typename super::ptr_t; @@ -226,28 +226,23 @@ DB0_PACKED_END using const_iterator = iterator; iterator begin() { - // cast to node_prt_t - return _Tree::begin_node(node_ptr_t(this->get_v_ptr())); + return _Tree::begin_node(node_ptr_t(*this)); } iterator end() { - // cast to node_prt_t - return _Tree::end_node(node_ptr_t(this->get_v_ptr())); + return _Tree::end_node(node_ptr_t(*this)); } - iterator begin() const { - // cast to node_prt_t - return _Tree::begin_node(node_ptr_t(this->get_v_ptr())); + iterator begin() const { + return _Tree::begin_node(node_ptr_t(*this)); } iterator end() const { - // cast to node_prt_t - return _Tree::end_node(node_ptr_t(this->get_v_ptr())); + return _Tree::end_node(node_ptr_t(*this)); } bool empty() const { - // cast to node_prt_t - return (_Tree::begin_node(node_ptr_t(this->get_v_ptr())) == _Tree::end_node(node_ptr_t(this->get_v_ptr()))); + return (_Tree::begin_node(node_ptr_t(*this))) == _Tree::end_node(node_ptr_t(*this)); } // This method allows constructing an iterator from a previously saved address @@ -271,7 +266,8 @@ DB0_PACKED_END * KeyInitializer - node key initializer type * args - data initializers */ - template iterator insert_equal(const KeyInitializer &key, Args&&... args) + template + iterator insert_equal(const KeyInitializer &key, Args&&... args) { std::size_t depth; link_data ld; @@ -282,7 +278,7 @@ DB0_PACKED_END SG_Tree::link(this->head(), new_node, ld); SG_Tree::rebalance_after_insertion(new_node, depth, this->modify().size++, _alpha); this->updateMaxTreeSize(); - return new_node.get_v_ptr(); + return new_node; } /** @@ -300,7 +296,7 @@ DB0_PACKED_END SG_Tree::link(this->head(), new_node, ld); SG_Tree::rebalance_after_insertion(new_node, depth, ++this->modify().size, _alpha); this->updateMaxTreeSize(); - return new_node.get_v_ptr(); + return new_node; } /** @@ -335,7 +331,7 @@ DB0_PACKED_END this->head(), new_node, commit_data, this->modify().size++, _alpha ); this->updateMaxTreeSize(); - return std::make_pair(new_node.get_v_ptr(), true); + return std::make_pair(new_node, true); } /** @@ -436,7 +432,7 @@ DB0_PACKED_END /** * Destroy SG-Tree and all its nodes (v-objects) */ - void destroy() const + void destroy() { // destroy SG-Tree starting from the "head" element destroyHeadNode(this->head()); @@ -466,16 +462,14 @@ DB0_PACKED_END #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstrict-aliasing" #endif - node_ptr_t &head() - { + node_ptr_t &head() { // cast to head - return (node_ptr_t&)(this->v_this); + return reinterpret_cast(*this); } - - const node_ptr_t &head() const - { + + const node_ptr_t &head() const { // cast to head - return (const node_ptr_t&)(this->v_this); + return reinterpret_cast(*this); } #ifdef __linux__ @@ -562,7 +556,7 @@ DB0_PACKED_END * Join / use specialized comparer */ template bool join(join_stack &it, const KeyT &key, - NodePtrKeyComp key_comp, int direction) const + NodePtrKeyComp key_comp, int direction) const { if (direction > 0) { // initialize join stack @@ -601,11 +595,10 @@ DB0_PACKED_END /// joinBound implementation with dedicated key comparator template void joinBound(join_stack &it, const KeyT &key, - NodePtrKeyComp key_comp) const + NodePtrKeyComp key_comp) const { // initialize join stack - if (it.empty()) - { + if (it.empty()) { SG_Tree::beginJoinBackward(this->head(),it); } SG_Tree::joinBound(it,key, key_comp); diff --git a/src/dbzero/core/collections/vector/v_sorted_sequence.hpp b/src/dbzero/core/collections/vector/v_sorted_sequence.hpp index 8809b51e..16d344a1 100644 --- a/src/dbzero/core/collections/vector/v_sorted_sequence.hpp +++ b/src/dbzero/core/collections/vector/v_sorted_sequence.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/dbzero/core/collections/vector/v_sorted_vector.hpp b/src/dbzero/core/collections/vector/v_sorted_vector.hpp index 1616b756..c5baf02f 100644 --- a/src/dbzero/core/collections/vector/v_sorted_vector.hpp +++ b/src/dbzero/core/collections/vector/v_sorted_vector.hpp @@ -834,7 +834,7 @@ DB0_PACKED_BEGIN return ((*this)->m_size == 0); } - void destroy() const + void destroy() { // container destroy (*this)->destroy(this->getMemspace(), m_item_destroy_func); diff --git a/src/dbzero/core/memory/PrefixImpl.hpp b/src/dbzero/core/memory/PrefixImpl.hpp index f8f598db..b750402b 100644 --- a/src/dbzero/core/memory/PrefixImpl.hpp +++ b/src/dbzero/core/memory/PrefixImpl.hpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/dbzero/core/vspace/db0_ptr.hpp b/src/dbzero/core/vspace/db0_ptr.hpp index 3e6c75ba..55b12577 100644 --- a/src/dbzero/core/vspace/db0_ptr.hpp +++ b/src/dbzero/core/vspace/db0_ptr.hpp @@ -1,6 +1,5 @@ #pragma once -#include "v_ptr.hpp" #include #include #include diff --git a/src/dbzero/core/vspace/v_object.hpp b/src/dbzero/core/vspace/v_object.hpp index 8c0bf145..d0e0483c 100644 --- a/src/dbzero/core/vspace/v_object.hpp +++ b/src/dbzero/core/vspace/v_object.hpp @@ -1,6 +1,6 @@ #pragma once -#include "vtypeless.hpp" +#include "v_ptr.hpp" #include #include @@ -9,33 +9,40 @@ namespace db0 { struct tag_verified {}; - + /** * Base class for vspace-mapped objects * @tparam T container object type */ template - class v_object: public v_typeless + class v_object: public v_ptr { public: using ContainerT = T; + // for compatiblility with intrusive containers (e.g. v_sgtree) + using ptr_t = v_ptr; v_object() = default; + v_object(const ptr_t &ptr) + : ptr_t(ptr) + { + } + v_object(mptr ptr, FlagSet access_mode = {}) - : vtypeless(ptr, access_mode) + : ptr_t(ptr, access_mode) { } // Construct a verified instance - i.e. backed by a valid db0 address with a known size v_object(db0::tag_verified, mptr ptr, std::size_t size_of = 0, FlagSet access_mode = {}) - : vtypeless(ptr, access_mode) + : ptr_t(ptr, access_mode) { - this->safeConstRef(size_of); + ptr_t::safeConstRef(size_of); } v_object(const v_object &other) - : vtypeless(other) + : ptr_t(other) { } @@ -143,7 +150,7 @@ namespace db0 } v_object(v_object &&other) - : vtypeless(std::move(other)) + : ptr_t(std::move(other)) { } @@ -153,68 +160,11 @@ namespace db0 v_object new_object(memspace, std::forward(args)...); return new_object.getAddress(); } - - // Readonly data access operator - inline const ContainerT *operator->() const { - return this->getData(); - } - - const ContainerT *getData() const - { - assureInitialized(); - return reinterpret_cast(m_mem_lock.m_buffer); - } - + // Reference data container for read inline const ContainerT &const_ref() const { return *this->getData(); } - - // Reference data container for update - ContainerT &modify() - { - assert(m_memspace_ptr); - // access resource for read-write - while (!ResourceReadWriteMutexT::__ref(m_resource_flags).get()) { - ResourceReadWriteMutexT::WriteOnlyLock lock(m_resource_flags); - if (lock.isLocked()) { - // release the MemLock first to avoid or reduce CoWs - // otherwise mapRange might need to manage multiple lock versions - m_mem_lock.release(); - // lock for +write - // note that lock is getting updated, possibly copy-on-write is being performed - // NOTE: must extract physical address for mapRange - m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read); - // by calling MemLock::modify we mark the object's associated range as modified - m_mem_lock.modify(); - lock.commit_set(); - break; - } - } - // this is to notify dirty-callbacks if needed - return *reinterpret_cast(m_mem_lock.m_buffer); - } - - void modify(std::size_t offset, std::size_t size) - { - auto &ref = modify(); - m_mem_lock.modify((std::byte*)&ref + offset, size); - } - - void destroy() - { - if (m_address.isValid()) { - assert(m_memspace_ptr); - // container's destroy - (*this)->destroy(*m_memspace_ptr); - m_mem_lock.release(); - m_memspace_ptr->free(m_address); - this->m_address = {}; - this->m_resource_flags = 0; - this->m_cached_size.reset(); - } - } mptr myPtr(Address address, FlagSet access_mode = {}) const { return this->getMemspace().myPtr(address, access_mode); @@ -242,17 +192,16 @@ namespace db0 return last_dp - first_dp + 1; } - // Check if the underlying resource is available as mutable - // i.e. was already access for read/write - bool isModified() const { - return ResourceReadWriteMutexT::__ref(m_resource_flags).get(); + v_object &operator=(v_object &&other) + { + vtypeless::operator=(std::move(other)); + return *this; } - // Get the underlying mapped range (for mutation) - MemLock modifyMappedRange() + v_object &operator=(v_object const &other) { - modify(); - return this->m_mem_lock; + vtypeless::operator=(other); + return *this; } private: @@ -267,7 +216,7 @@ namespace db0 // lock for create & write // NOTE: must extract physical address for mapRange this->m_mem_lock = memspace.getPrefix().mapRange( - m_address, size, access_mode | AccessOptions::write + this->m_address, size, access_mode | AccessOptions::write ); // mark the entire writable area as modified this->m_mem_lock.modify(); @@ -318,80 +267,7 @@ namespace db0 static inline unsigned char getLocality(FlagSet access_mode) { // NOTE: use locality = 1 for no_cache allocations, 0 otherwise (undefined) return access_mode[AccessOptions::no_cache] ? 1 : 0; - } - - void assureInitialized() const - { - assert(m_memspace_ptr); - // access the resource for read (or check if the read or read/write access has already been gained) - while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { - ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); - if (lock.isLocked()) { - // NOTE: must extract physical address for mapRange - m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read); - lock.commit_set(); - break; - } - } - assert(m_mem_lock.m_buffer); - } - - // version with known size-of (pre-retrieved from the allocator) - // we made it as a separate implementation for potential performance gains - void assureInitialized(std::size_t size_of) const - { - assert(m_memspace_ptr); - // access the resource for read (or check if the read or read/write access has already been gained) - while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { - ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); - if (lock.isLocked()) { - // NOTE: must extract physical address for mapRange - m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), size_of, m_access_mode | AccessOptions::read); - lock.commit_set(); - break; - } - } - assert(m_mem_lock.m_buffer); - } - - const ContainerT &safeConstRef(std::size_t size_of = 0) const - { - if (!size_of) { - size_of = this->getSize(); - } - assureInitialized(size_of); - return ContainerT::__safe_const_ref( - safe_buf_t((std::byte*)m_mem_lock.m_buffer, (std::byte*)m_mem_lock.m_buffer + size_of) - ); - } - - // Resolve the instance size - std::uint32_t fetchSize() const - { - assert(m_memspace_ptr); - if constexpr(metaprog::has_constant_size::value) { - // fixed size type - return ContainerT::measure(); - } - else if constexpr(metaprog::has_fixed_header::value) { - v_object header(mptr{*m_memspace_ptr, m_address}); - return header.getData()->getOBaseSize(); - } - - // retrieve from allocator (slowest) - return m_memspace_ptr->getAllocator().getAllocSize(m_address, REALM_ID); - } - - // Get from cache or fetch size - std::uint32_t getSize() const - { - if (!m_cached_size) { - m_cached_size = fetchSize(); - } - return *m_cached_size; - } + } }; // Utility function to safely mutate a v_object's fixed-size member diff --git a/src/dbzero/core/vspace/v_ptr.hpp b/src/dbzero/core/vspace/v_ptr.hpp new file mode 100644 index 00000000..f12060a1 --- /dev/null +++ b/src/dbzero/core/vspace/v_ptr.hpp @@ -0,0 +1,200 @@ +#pragma once + +#include "vtypeless.hpp" + +namespace db0 + +{ + + template + class v_object; + + /** + * virtual pointer to object of ContainerT + */ + template + class v_ptr : public vtypeless + { + public : + using container_t = ContainerT; + using self_t = v_ptr; + + inline v_ptr() = default; + + inline v_ptr(Memspace &memspace, Address address, FlagSet access_mode = {}) + : vtypeless(memspace, address, access_mode) + { + } + + inline v_ptr(Memspace &memspace, Address address, MemLock &&lock, std::uint16_t resource_flags, + FlagSet access_mode = {}) + : vtypeless(memspace, address, std::move(lock), resource_flags, access_mode) + { + } + + v_ptr(mptr ptr) + : vtypeless(ptr) + { + } + + v_ptr(mptr ptr, FlagSet access_mode) + : vtypeless(ptr, access_mode) + { + } + + // Explicit upcast from typeless + explicit v_ptr(const vtypeless &ptr) + : vtypeless(ptr) + { + } + + void destroy() + { + assert(m_memspace_ptr); + // container's destroy + (*this)->destroy(*m_memspace_ptr); + m_mem_lock.release(); + m_memspace_ptr->free(m_address); + this->m_address = {}; + this->m_resource_flags = 0; + this->m_cached_size.reset(); + } + + ContainerT &modify() + { + assert(m_memspace_ptr); + // access resource for read-write + while (!ResourceReadWriteMutexT::__ref(m_resource_flags).get()) { + ResourceReadWriteMutexT::WriteOnlyLock lock(m_resource_flags); + if (lock.isLocked()) { + // release the MemLock first to avoid or reduce CoWs + // otherwise mapRange might need to manage multiple lock versions + m_mem_lock.release(); + // lock for +write + // note that lock is getting updated, possibly copy-on-write is being performed + // NOTE: must extract physical address for mapRange + m_mem_lock = m_memspace_ptr->getPrefix().mapRange( + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read); + // by calling MemLock::modify we mark the object's associated range as modified + m_mem_lock.modify(); + lock.commit_set(); + break; + } + } + // this is to notify dirty-callbacks if needed + return *reinterpret_cast(m_mem_lock.m_buffer); + } + + void modify(std::size_t offset, std::size_t size) + { + auto &ref = modify(); + m_mem_lock.modify((std::byte*)&ref + offset, size); + } + + // Check if the underlying resource is available as mutable + // i.e. was already access for read/write + bool isModified() const { + return ResourceReadWriteMutexT::__ref(m_resource_flags).get(); + } + + const ContainerT *getData() const + { + assureInitialized(); + return reinterpret_cast(m_mem_lock.m_buffer); + } + + inline const ContainerT *operator->() const { + return this->getData(); + } + + // Get the underlying mapped range (for mutation) + MemLock modifyMappedRange() + { + modify(); + return this->m_mem_lock; + } + + protected: + + const ContainerT &safeConstRef(std::size_t size_of = 0) const + { + if (!size_of) { + size_of = this->getSize(); + } + assureInitialized(size_of); + return ContainerT::__safe_const_ref( + safe_buf_t((std::byte*)m_mem_lock.m_buffer, (std::byte*)m_mem_lock.m_buffer + size_of) + ); + } + + private: + + static inline unsigned char getLocality(FlagSet access_mode) { + // NOTE: use locality = 1 for no_cache allocations, 0 otherwise (undefined) + return access_mode[AccessOptions::no_cache] ? 1 : 0; + } + + void assureInitialized() const + { + assert(m_memspace_ptr); + // access the resource for read (or check if the read or read/write access has already been gained) + while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { + ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); + if (lock.isLocked()) { + // NOTE: must extract physical address for mapRange + m_mem_lock = m_memspace_ptr->getPrefix().mapRange( + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read); + lock.commit_set(); + break; + } + } + assert(m_mem_lock.m_buffer); + } + + // version with known size-of (pre-retrieved from the allocator) + // we made it as a separate implementation for potential performance gains + void assureInitialized(std::size_t size_of) const + { + assert(m_memspace_ptr); + // access the resource for read (or check if the read or read/write access has already been gained) + while (!ResourceReadMutexT::__ref(m_resource_flags).get()) { + ResourceReadMutexT::WriteOnlyLock lock(m_resource_flags); + if (lock.isLocked()) { + // NOTE: must extract physical address for mapRange + m_mem_lock = m_memspace_ptr->getPrefix().mapRange( + m_address.getOffset(), size_of, m_access_mode | AccessOptions::read); + lock.commit_set(); + break; + } + } + assert(m_mem_lock.m_buffer); + } + + // Resolve the instance size + std::uint32_t fetchSize() const + { + assert(m_memspace_ptr); + if constexpr(metaprog::has_constant_size::value) { + // fixed size type + return ContainerT::measure(); + } + else if constexpr(metaprog::has_fixed_header::value) { + v_object header(mptr{*m_memspace_ptr, m_address}); + return header.getData()->getOBaseSize(); + } + + // retrieve from allocator (slowest) + return m_memspace_ptr->getAllocator().getAllocSize(m_address, REALM_ID); + } + + // Get from cache or fetch size + std::uint32_t getSize() const + { + if (!m_cached_size) { + m_cached_size = fetchSize(); + } + return *m_cached_size; + } + }; + +} diff --git a/src/dbzero/core/vspace/vtypeless.cpp b/src/dbzero/core/vspace/vtypeless.cpp index 462796d3..2774fefc 100644 --- a/src/dbzero/core/vspace/vtypeless.cpp +++ b/src/dbzero/core/vspace/vtypeless.cpp @@ -93,7 +93,7 @@ namespace db0 return m_mem_lock.m_buffer != nullptr; } - void vtypeless::detach() + void vtypeless::detach() const { // detaching clears the reasource available for read flag while (ResourceDetachMutexT::__ref(m_resource_flags).get()) { @@ -107,7 +107,7 @@ namespace db0 } } - void vtypeless::commit() + void vtypeless::commit() const { // commit clears the reasource available for write flag // it might still be available for read diff --git a/src/dbzero/core/vspace/vtypeless.hpp b/src/dbzero/core/vspace/vtypeless.hpp index 880fbfff..1d1e5409 100644 --- a/src/dbzero/core/vspace/vtypeless.hpp +++ b/src/dbzero/core/vspace/vtypeless.hpp @@ -128,7 +128,7 @@ namespace db0 // Get use count of the underlying lock unsigned int use_count() const; - + /** * Check if the underlying resource is available in local memory */ @@ -137,13 +137,13 @@ namespace db0 /** * Detach underlying resource lock (i.e. mark resource as not available in local memory) */ - void detach(); + void detach() const; /** * Commit by marking the write as final. * The subsequent modify() will need to refresh the underlying lock */ - void commit(); + void commit() const; /** * Cast to a specific concrete type diff --git a/src/dbzero/object_model/has_fixture.hpp b/src/dbzero/object_model/has_fixture.hpp index f2f0dad0..32cd76a5 100644 --- a/src/dbzero/object_model/has_fixture.hpp +++ b/src/dbzero/object_model/has_fixture.hpp @@ -17,8 +17,7 @@ namespace db0 */ template class has_fixture: public BaseT { - public: - using ptr_t = typename BaseT::ptr_t; + public: has_fixture() = default; // create new instance diff --git a/src/dbzero/workspace/Fixture.hpp b/src/dbzero/workspace/Fixture.hpp index 099caef1..f8620e08 100644 --- a/src/dbzero/workspace/Fixture.hpp +++ b/src/dbzero/workspace/Fixture.hpp @@ -287,7 +287,7 @@ DB0_PACKED_BEGIN SlotAllocator &m_slot_allocator; MetaAllocator &m_meta_allocator; const std::uint64_t m_UUID; - // the registry holds active v_ptr instances (important for refresh) + // the registry holds active v_object instances (important for refresh) // and cleanup of the "hanging" references db0::GC0 *m_gc0_ptr = nullptr; StringPoolT m_string_pool; diff --git a/src/dbzero/workspace/GC0.hpp b/src/dbzero/workspace/GC0.hpp index 2bdd822d..2d66c78e 100644 --- a/src/dbzero/workspace/GC0.hpp +++ b/src/dbzero/workspace/GC0.hpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -61,9 +61,9 @@ namespace db0 static GCOps_ID m_gc_ops_id; #define GC0_Define(T) GCOps_ID T::m_gc_ops_id; - + /** - * GC0 keeps track of all "live" v_ptr instances. + * GC0 keeps track of all "live" v_object instances. * and drops associated dbzero instances once they are no longer referenced from Python * GC0 has also a persistence layer to keep track of unreferenced instances as long as * the corresponding Python objects are still alive. diff --git a/tests/unit_tests/VSpaceTests.cpp b/tests/unit_tests/VSpaceTests.cpp index 2a9853a6..1c451576 100644 --- a/tests/unit_tests/VSpaceTests.cpp +++ b/tests/unit_tests/VSpaceTests.cpp @@ -53,31 +53,7 @@ namespace tests db0::v_object i2(std::move(i1)); ASSERT_EQ(i2->size(), 4096); } - - TEST_F( VSpaceTests , testMoveConstructorForVPtr ) - { - auto memspace = getMemspace(); - - auto vptr_1 = db0::v_ptr::makeNew(memspace, o_binary::measure(4096), {}); - o_binary::__new(reinterpret_cast(&vptr_1.modify()), 4096); - ASSERT_TRUE(vptr_1.isAttached()); - db0::v_ptr vptr_2(std::move(vptr_1)); - - ASSERT_EQ(vptr_2->size(), 4096); - } - - TEST_F( VSpaceTests , testVPtrImplementsDetach ) - { - auto memspace = getMemspace(); - auto vptr_1 = db0::v_ptr::makeNew(memspace, o_binary::measure(4096), {}); - o_binary::__new(reinterpret_cast(&vptr_1.modify()), 4096); - ASSERT_TRUE(vptr_1.isAttached()); - - vptr_1.detach(); - ASSERT_FALSE(vptr_1.isAttached()); - } - TEST_F( VSpaceTests , testVObjectCanBeAccessedAfterDetach ) { auto memspace = getMemspace(); From 41c08a091430804289e362fdfa2ed1dd17c687f3 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Fri, 7 Nov 2025 20:42:29 +0100 Subject: [PATCH 04/11] fixes --- src/dbzero/bindings/python/Memo.cpp | 4 +++- src/dbzero/core/collections/b_index/mb_index.hpp | 2 +- src/dbzero/core/collections/sgtree/intrusive_node.hpp | 2 +- src/dbzero/core/collections/vector/v_bvector.hpp | 2 +- src/dbzero/core/vspace/v_object.hpp | 2 +- src/dbzero/object_model/dict/Dict.cpp | 2 +- src/dbzero/object_model/dict/Dict.hpp | 2 +- src/dbzero/object_model/index/Index.cpp | 2 +- src/dbzero/object_model/index/Index.hpp | 4 ++-- src/dbzero/object_model/list/List.cpp | 2 +- src/dbzero/object_model/list/List.hpp | 2 +- src/dbzero/object_model/object/ObjectImplBase.cpp | 2 +- src/dbzero/object_model/object/ObjectImplBase.hpp | 2 +- src/dbzero/object_model/set/Set.cpp | 2 +- src/dbzero/object_model/set/Set.hpp | 2 +- src/dbzero/object_model/tuple/Tuple.cpp | 2 +- src/dbzero/object_model/tuple/Tuple.hpp | 2 +- 17 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index 6f576fea..8c8ab214 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -211,6 +211,7 @@ namespace db0::python { using Class = db0::object_model::Class; using TagIndex = db0::object_model::TagIndex; + using ExtT = typename MemoImplT::ExtT; PY_API_FUNC // the instance may already exist (e.g. if this is a singleton) @@ -230,7 +231,8 @@ namespace db0::python auto type = self->ext().getClassPtr(); if (type->isExistingSingleton(fixture_uuid)) { // drop existing instance - self->ext().destroy(); + // NOTE: may use ext() because destroy does not mutate the instance itself + const_cast(self->ext()).destroy(); // unload singleton from a different fixture if (!type->unloadSingleton(&self->modifyExt(), fixture_uuid)) { PyErr_SetString(PyExc_RuntimeError, "Unloading singleton failed"); diff --git a/src/dbzero/core/collections/b_index/mb_index.hpp b/src/dbzero/core/collections/b_index/mb_index.hpp index e8f51f41..1570f773 100644 --- a/src/dbzero/core/collections/b_index/mb_index.hpp +++ b/src/dbzero/core/collections/b_index/mb_index.hpp @@ -613,7 +613,7 @@ namespace db0 /** * Destroy existing instance */ - void destroy() const { + void destroy() { m_interface.destroy(*m_memspace_ptr); } diff --git a/src/dbzero/core/collections/sgtree/intrusive_node.hpp b/src/dbzero/core/collections/sgtree/intrusive_node.hpp index 5e81b4ba..bd6187b2 100644 --- a/src/dbzero/core/collections/sgtree/intrusive_node.hpp +++ b/src/dbzero/core/collections/sgtree/intrusive_node.hpp @@ -33,7 +33,7 @@ namespace db0 // Copy constructor struct tag_copy {}; intrusive_node(tag_copy, Memspace &memspace, Memspace &other_memspace, const ptr_t &other) - : super(memspace, memspace, other_memspace, *other.get()) + : super(memspace, memspace, other_memspace, *other.getData()) { } diff --git a/src/dbzero/core/collections/vector/v_bvector.hpp b/src/dbzero/core/collections/vector/v_bvector.hpp index ebdebd0a..98881151 100644 --- a/src/dbzero/core/collections/vector/v_bvector.hpp +++ b/src/dbzero/core/collections/vector/v_bvector.hpp @@ -260,7 +260,7 @@ DB0_PACKED_END } } - void destroy() const + void destroy() { destroyAllBlocks(); m_pb_cache.clear(); diff --git a/src/dbzero/core/vspace/v_object.hpp b/src/dbzero/core/vspace/v_object.hpp index d0e0483c..bb4d96a5 100644 --- a/src/dbzero/core/vspace/v_object.hpp +++ b/src/dbzero/core/vspace/v_object.hpp @@ -67,7 +67,7 @@ namespace db0 template::value-1> v_object(Memspace &memspace, tag_prelocked, Tuple&& t, int_seq) { - intiNew(memspace, std::move(std::get(std::forward(t)))); + initNew(memspace, std::move(std::get(std::forward(t)))); // placement new syntax ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } diff --git a/src/dbzero/object_model/dict/Dict.cpp b/src/dbzero/object_model/dict/Dict.cpp index 311e90be..ae617738 100644 --- a/src/dbzero/object_model/dict/Dict.cpp +++ b/src/dbzero/object_model/dict/Dict.cpp @@ -269,7 +269,7 @@ namespace db0::object_model return m_index.end(); } - void Dict::destroy() const + void Dict::destroy() { unrefMembers(); m_index.destroy(); diff --git a/src/dbzero/object_model/dict/Dict.hpp b/src/dbzero/object_model/dict/Dict.hpp index 8c007ecf..87fb1928 100644 --- a/src/dbzero/object_model/dict/Dict.hpp +++ b/src/dbzero/object_model/dict/Dict.hpp @@ -89,7 +89,7 @@ DB0_PACKED_END void unrefMembers() const; - void destroy() const; + void destroy(); std::shared_ptr getIterator(ObjectPtr lang_dict) const; diff --git a/src/dbzero/object_model/index/Index.cpp b/src/dbzero/object_model/index/Index.cpp index f2a6f5c9..68acb2f9 100644 --- a/src/dbzero/object_model/index/Index.cpp +++ b/src/dbzero/object_model/index/Index.cpp @@ -577,7 +577,7 @@ namespace db0::object_model super_t::detach(); } - void Index::destroy() const + void Index::destroy() { m_mutation_log = nullptr; // discard any pending changes diff --git a/src/dbzero/object_model/index/Index.hpp b/src/dbzero/object_model/index/Index.hpp index 250a9e3f..bc3dc7c7 100644 --- a/src/dbzero/object_model/index/Index.hpp +++ b/src/dbzero/object_model/index/Index.hpp @@ -73,7 +73,7 @@ namespace db0::object_model void detach() const; - void destroy() const; + void destroy(); // remove any cached updates / revert void rollback(); @@ -225,7 +225,7 @@ namespace db0::object_model this->modify().m_index_addr = new_range_tree.getAddress(); } - template const typename db0::RangeTree &getExistingRangeTree() const + template typename db0::RangeTree &getExistingRangeTree() const { assert(hasRangeTree()); return const_cast(this)->getRangeTree(); diff --git a/src/dbzero/object_model/list/List.cpp b/src/dbzero/object_model/list/List.cpp index 06764cad..26a3f215 100644 --- a/src/dbzero/object_model/list/List.cpp +++ b/src/dbzero/object_model/list/List.cpp @@ -183,7 +183,7 @@ namespace db0::object_model super_t::moveTo(fixture); } - void List::destroy() const + void List::destroy() { clearMembers(); super_t::destroy(); diff --git a/src/dbzero/object_model/list/List.hpp b/src/dbzero/object_model/list/List.hpp index 71cc34e4..1f857117 100644 --- a/src/dbzero/object_model/list/List.hpp +++ b/src/dbzero/object_model/list/List.hpp @@ -64,7 +64,7 @@ namespace db0::object_model void moveTo(db0::swine_ptr &); - void destroy() const; + void destroy(); void clearMembers() const; diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index d0963a29..3b67394d 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -744,7 +744,7 @@ namespace db0::object_model } template - void ObjectImplBase::destroy() const + void ObjectImplBase::destroy() { if (this->hasInstance()) { // associated class type (may require unloading) diff --git a/src/dbzero/object_model/object/ObjectImplBase.hpp b/src/dbzero/object_model/object/ObjectImplBase.hpp index 6dcf5ce6..e6aa28b7 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.hpp +++ b/src/dbzero/object_model/object/ObjectImplBase.hpp @@ -101,7 +101,7 @@ namespace db0::object_model // Get description of the field layout FieldLayout getFieldLayout() const; - void destroy() const; + void destroy(); // execute the function for all members (until false is returned from the input lambda) void forAll(std::function) const; diff --git a/src/dbzero/object_model/set/Set.cpp b/src/dbzero/object_model/set/Set.cpp index b9543d2e..1d0378e0 100644 --- a/src/dbzero/object_model/set/Set.cpp +++ b/src/dbzero/object_model/set/Set.cpp @@ -195,7 +195,7 @@ namespace db0::object_model return nullptr; } - void Set::destroy() const + void Set::destroy() { unrefMembers(); m_index.destroy(); diff --git a/src/dbzero/object_model/set/Set.hpp b/src/dbzero/object_model/set/Set.hpp index b13b975c..4121489d 100644 --- a/src/dbzero/object_model/set/Set.hpp +++ b/src/dbzero/object_model/set/Set.hpp @@ -82,7 +82,7 @@ DB0_PACKED_END void detach() const; // drop underlying dbzero representation - void destroy() const; + void destroy(); const_iterator begin() const; const_iterator end() const; diff --git a/src/dbzero/object_model/tuple/Tuple.cpp b/src/dbzero/object_model/tuple/Tuple.cpp index 9dee6b31..90cbd845 100644 --- a/src/dbzero/object_model/tuple/Tuple.cpp +++ b/src/dbzero/object_model/tuple/Tuple.cpp @@ -146,7 +146,7 @@ namespace db0::object_model return !(*this == tuple); } - void Tuple::destroy() const + void Tuple::destroy() { auto fixture = this->getFixture(); for (auto &elem: this->getData()->items()) { diff --git a/src/dbzero/object_model/tuple/Tuple.hpp b/src/dbzero/object_model/tuple/Tuple.hpp index c68373f1..89b7a68c 100644 --- a/src/dbzero/object_model/tuple/Tuple.hpp +++ b/src/dbzero/object_model/tuple/Tuple.hpp @@ -95,7 +95,7 @@ DB0_PACKED_END void operator=(Tuple &&); bool operator!=(const Tuple &) const; - void destroy() const; + void destroy(); const o_typed_item *begin() const; const o_typed_item *end() const; From 2bbc3b147155c78f3ad80221eb6e9e0fa7ff0fc6 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Sat, 8 Nov 2025 19:22:08 +0100 Subject: [PATCH 05/11] WIP: save work --- python_tests/test_object_stress.py | 8 +- src/dbzero/bindings/python/PyToolkit.cpp | 8 +- .../bindings/python/collections/PyIndex.cpp | 12 +- .../bindings/python/collections/PyIndex.hpp | 2 +- .../core/collections/range_tree/RangeTree.hpp | 21 ++-- src/dbzero/core/memory/Memspace.cpp | 7 +- src/dbzero/core/memory/Memspace.hpp | 25 +++- src/dbzero/core/vspace/v_object.hpp | 26 ++-- src/dbzero/core/vspace/v_ptr.hpp | 15 ++- src/dbzero/core/vspace/vtypeless.cpp | 10 +- src/dbzero/object_model/ObjectBase.hpp | 10 +- src/dbzero/object_model/index/Index.cpp | 16 ++- src/dbzero/object_model/index/Index.hpp | 15 +-- .../object_model/index/IndexBuilder.hpp | 14 ++- src/dbzero/workspace/Fixture.cpp | 52 ++++---- src/dbzero/workspace/FixtureThreads.cpp | 8 +- src/dbzero/workspace/FixtureThreads.hpp | 4 +- src/dbzero/workspace/GC0.cpp | 112 ++++++++++++------ src/dbzero/workspace/GC0.hpp | 65 +++++----- 19 files changed, 267 insertions(+), 163 deletions(-) diff --git a/python_tests/test_object_stress.py b/python_tests/test_object_stress.py index 5df38c7d..2abe207b 100644 --- a/python_tests/test_object_stress.py +++ b/python_tests/test_object_stress.py @@ -63,7 +63,7 @@ def read_value(value) -> int: total_bytes += len(buf[-1].value) count += 1 if total_bytes > report_bytes: - pre_commit = datetime.now() + flush = datetime.now() print("*** next transaction ***") db0.commit() storage_stats = db0.get_storage_stats() @@ -71,7 +71,7 @@ def read_value(value) -> int: print(f"Rand DRAM I/O ops: {storage_stats['dram_io_rand_ops'] - rand_dram_io}") print(f"Rand file write ops: {storage_stats['file_rand_write_ops'] - rand_file_write_ops}") print(f"File bytes written: {storage_stats['file_bytes_written'] - bytes_written}") - print(f"Commit took: {datetime.now() - pre_commit}\n") + print(f"Commit took: {datetime.now() - flush}\n") rand_dram_io = storage_stats["dram_io_rand_ops"] rand_file_write_ops = storage_stats["file_rand_write_ops"] bytes_written = storage_stats["file_bytes_written"] @@ -107,7 +107,7 @@ def rand_string(max_len): total_bytes += len(buf[-1].value) count += 1 if total_bytes > report_bytes: - pre_commit = datetime.now() + flush = datetime.now() # NOTE: with each commit the size of GC0 is increasing due to large # number of objects referenced only from python db0.commit() @@ -116,7 +116,7 @@ def rand_string(max_len): print(f"Rand DRAM I/O ops: {storage_stats['dram_io_rand_ops'] - rand_dram_io}") print(f"Rand file write ops: {storage_stats['file_rand_write_ops'] - rand_file_write_ops}") print(f"File bytes written: {storage_stats['file_bytes_written'] - bytes_written}") - print(f"Commit took: {datetime.now() - pre_commit}") + print(f"Commit took: {datetime.now() - flush}") rand_dram_io = storage_stats["dram_io_rand_ops"] rand_file_write_ops = storage_stats["file_rand_write_ops"] bytes_written = storage_stats["file_bytes_written"] diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index a4f2a4ce..d40fddc1 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -259,7 +259,7 @@ namespace db0::python } return shared_py_cast(std::move(list_object)); } - + PyToolkit::ObjectSharedPtr PyToolkit::unloadByteArray(db0::swine_ptr fixture, Address address, AccessFlags access_mode) { @@ -281,9 +281,9 @@ namespace db0::python return shared_py_cast(std::move(byte_array_object)); } - PyToolkit::ObjectSharedPtr PyToolkit::unloadIndex(db0::swine_ptr fixture, + PyToolkit::ObjectSharedPtr PyToolkit::unloadIndex(db0::swine_ptr fixture, Address address, std::uint16_t, AccessFlags access_mode) - { + { // try pulling from cache first auto &lang_cache = fixture->getLangCache(); auto object_ptr = lang_cache.get(address); @@ -292,7 +292,7 @@ namespace db0::python return object_ptr; } - auto index_object = IndexDefaultObject_new(); + auto index_object = Py_OWN(IndexDefaultObject_new()); // retrieve actual dbzero instance index_object->unload(fixture, address, access_mode); diff --git a/src/dbzero/bindings/python/collections/PyIndex.cpp b/src/dbzero/bindings/python/collections/PyIndex.cpp index 2bef4015..eec72314 100644 --- a/src/dbzero/bindings/python/collections/PyIndex.cpp +++ b/src/dbzero/bindings/python/collections/PyIndex.cpp @@ -40,8 +40,8 @@ namespace db0::python return reinterpret_cast(type->tp_alloc(type, 0)); } - shared_py_object IndexDefaultObject_new() { - return { IndexObject_new(&IndexObjectType, NULL, NULL), false }; + IndexObject *IndexDefaultObject_new() { + return IndexObject_new(&IndexObjectType, NULL, NULL); } void PyAPI_IndexObject_del(IndexObject* index_obj) @@ -67,12 +67,12 @@ namespace db0::python IndexObject *tryMakeIndex(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { // make actual dbzero instance, use default fixture - auto index_object = IndexDefaultObject_new(); + auto py_index = Py_OWN(IndexDefaultObject_new()); db0::FixtureLock lock(PyToolkit::getPyWorkspace().getWorkspace().getCurrentFixture()); - index_object->makeNew(*lock); + auto &index = py_index->makeNew(*lock); // register newly created index with py-object cache - lock->getLangCache().add(index_object.get()->ext().getAddress(), index_object.get()); - return index_object.steal(); + lock->getLangCache().add(index.getAddress(), py_index.get()); + return py_index.steal(); } IndexObject *PyAPI_makeIndex(PyObject *self, PyObject *const *args, Py_ssize_t nargs) diff --git a/src/dbzero/bindings/python/collections/PyIndex.hpp b/src/dbzero/bindings/python/collections/PyIndex.hpp index 821b8baa..eba2f877 100644 --- a/src/dbzero/bindings/python/collections/PyIndex.hpp +++ b/src/dbzero/bindings/python/collections/PyIndex.hpp @@ -10,7 +10,7 @@ namespace db0::python using IndexObject = PyWrapper; IndexObject *IndexObject_new(PyTypeObject *type, PyObject *, PyObject *); - shared_py_object IndexDefaultObject_new(); + IndexObject* IndexDefaultObject_new(); void PyAPI_IndexObject_del(IndexObject* self); Py_ssize_t PyAPI_IndexObject_len(IndexObject *); diff --git a/src/dbzero/core/collections/range_tree/RangeTree.hpp b/src/dbzero/core/collections/range_tree/RangeTree.hpp index 54c97a5a..989a3523 100644 --- a/src/dbzero/core/collections/range_tree/RangeTree.hpp +++ b/src/dbzero/core/collections/range_tree/RangeTree.hpp @@ -158,6 +158,8 @@ DB0_PACKED_END if (!range.isUnbound() || begin == end) { break; } + // FIXME: log + std::cout << "!!! bulkInsert continuing to insert into new range" << std::endl; // in case of unbound ranges (i.e. the last range) append a new one and continue range = insertRange(*begin); } @@ -240,9 +242,11 @@ DB0_PACKED_END auto null_block_ptr = getNullBlock(); assert(null_block_ptr); - + // erase values from the null block directly - auto diff = null_block_ptr->bulkErase(begin, end, static_cast(nullptr), erase_callback_ptr); + auto diff = null_block_ptr->bulkErase( + begin, end, static_cast(nullptr), erase_callback_ptr + ); if (diff > 0) { this->modify().m_size -= diff; } @@ -428,14 +432,17 @@ DB0_PACKED_END m_it.modifyItem().m_first_item = first_item; } } - + // Forwards a value to the add item callback + /* FIXME: log std::function add_item_callback = [&](ItemT item) { (*add_callback_ptr)(item.m_value); - }; + }; std::function *add_item_callback_ptr = (add_callback_ptr ? &add_item_callback : nullptr); return (*this)->bulkInsertUnique(begin_item, end_item, add_item_callback_ptr).second; + */ + return (*this)->bulkInsertUnique(begin_item, end_item).second; } /** @@ -603,7 +610,7 @@ DB0_PACKED_END CallbackT *erase_callback_ptr = nullptr) { // erase items first - if (!m_remove_items.empty()) { + if (!m_remove_items.empty()) { std::vector items; std::copy(m_remove_items.begin(), m_remove_items.end(), std::back_inserter(items)); range_tree.bulkErase(items.begin(), items.end(), erase_callback_ptr); @@ -616,7 +623,7 @@ DB0_PACKED_END } if (!m_add_items.empty()) { std::vector items; - std::copy(m_add_items.begin(), m_add_items.end(), std::back_inserter(items)); + std::copy(m_add_items.begin(), m_add_items.end(), std::back_inserter(items)); range_tree.bulkInsert(items.begin(), items.end(), add_callback_ptr); m_add_items.clear(); } @@ -751,7 +758,7 @@ DB0_PACKED_END // retrieve existing range return { m_index, it, m_index.begin(), m_index.end(), it == m_index.begin(), true }; } - + RangeIterator insertRange(ItemT item) { BlockT new_block(this->getMemspace()); diff --git a/src/dbzero/core/memory/Memspace.cpp b/src/dbzero/core/memory/Memspace.cpp index 6273a117..49d32ffe 100644 --- a/src/dbzero/core/memory/Memspace.cpp +++ b/src/dbzero/core/memory/Memspace.cpp @@ -5,7 +5,7 @@ namespace db0 { - + Memspace::Memspace(std::shared_ptr prefix, std::shared_ptr allocator, std::optional uuid) : m_prefix(prefix) , m_storage_ptr(&prefix->getStorage()) @@ -52,6 +52,9 @@ namespace db0 bool Memspace::commit(ProcessTimer *timer) { assert(m_prefix); + m_maybe_need_flush.clear(); + m_maybe_modified.clear(); + // prepare the allocator for the next transaction getAllocatorForUpdate().commit(); auto state_num = m_prefix->getStateNum(false); @@ -70,6 +73,8 @@ namespace db0 timer = std::make_unique("Memspace::close", timer_ptr); } + m_maybe_need_flush.clear(); + m_maybe_modified.clear(); m_allocator_ptr = nullptr; m_allocator = nullptr; m_prefix->close(); diff --git a/src/dbzero/core/memory/Memspace.hpp b/src/dbzero/core/memory/Memspace.hpp index 8252ed00..7172bc64 100644 --- a/src/dbzero/core/memory/Memspace.hpp +++ b/src/dbzero/core/memory/Memspace.hpp @@ -13,6 +13,8 @@ namespace db0 { class ProcessTimer; + class vtypeless; + class GC0; /** * Combines application requisites, prefix related @@ -112,6 +114,14 @@ namespace db0 // NOTE: m_page_shift is 0 if page size is not a power of 2 return m_page_shift ? (address.getOffset() >> m_page_shift) : (address.getOffset() / m_page_size); } + + void collectForFlush(db0::vtypeless *vptr) { + m_maybe_need_flush.push_back(vptr); + } + + void collectModified(db0::vtypeless *vptr) { + m_maybe_modified.push_back(vptr); + } protected: std::shared_ptr m_prefix; @@ -124,12 +134,23 @@ namespace db0 bool m_atomic = false; std::size_t m_page_size = 0; unsigned int m_page_shift = 0; - + // exhaustive list of instances which may need flush + std::vector m_maybe_need_flush; + // exhaustive list of pointers to instances (may be expired!) modified within the current transaction + std::vector m_maybe_modified; + inline Allocator &getAllocatorForUpdate() { assert(m_allocator_ptr); return *m_allocator_ptr; } - + + const std::vector &getModified() const { + return m_maybe_modified; + } + + const std::vector &getForFlush() const { + return m_maybe_need_flush; + } }; } diff --git a/src/dbzero/core/vspace/v_object.hpp b/src/dbzero/core/vspace/v_object.hpp index bb4d96a5..3e666ffc 100644 --- a/src/dbzero/core/vspace/v_object.hpp +++ b/src/dbzero/core/vspace/v_object.hpp @@ -57,8 +57,8 @@ namespace db0 initNew( memspace, ContainerT::measure(std::get(std::forward(t))...), - std::get(std::forward(t)) ); - + std::get(std::forward(t)) + ); ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } @@ -170,18 +170,6 @@ namespace db0 return this->getMemspace().myPtr(address, access_mode); } - /* FIXME: - void commit() const - { - // NOTE: this operation assumes that only one v_object instance pointing to the same address exists - // otherwise modifications done to one instance will not be visible to the other instances - // this assumption holds true for dbzero objects but if unable to fulfill in the future, - // it must be changed to "this->detach()" - - v_this.commit(); - } - */ - // Calculate the number of DPs spanned by this object // NOTE: even small objects may span more than 1 DP if are positioned on a boundary // however allocators typically will avoid such situations @@ -222,6 +210,8 @@ namespace db0 this->m_mem_lock.modify(); this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; this->m_access_mode = access_mode; + // collect as a modified instance for commit speedup + this->m_memspace_ptr->collectModified(this); } // Create a new instance using allocUnique functionality @@ -243,7 +233,9 @@ namespace db0 this->m_mem_lock.modify(); // mark as available for both write & read this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; - this->m_access_mode = access_mode; + this->m_access_mode = access_mode; + // collect as a modified instance for commit speedup + this->m_memspace_ptr->collectModified(this); } /** @@ -262,8 +254,10 @@ namespace db0 // mark as available for read & write this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; this->m_access_mode = access_mode; + // collect as a modified instance for commit speedup + this->m_memspace_ptr->collectModified(this); } - + static inline unsigned char getLocality(FlagSet access_mode) { // NOTE: use locality = 1 for no_cache allocations, 0 otherwise (undefined) return access_mode[AccessOptions::no_cache] ? 1 : 0; diff --git a/src/dbzero/core/vspace/v_ptr.hpp b/src/dbzero/core/vspace/v_ptr.hpp index f12060a1..f300e9f7 100644 --- a/src/dbzero/core/vspace/v_ptr.hpp +++ b/src/dbzero/core/vspace/v_ptr.hpp @@ -8,7 +8,7 @@ namespace db0 template class v_object; - + /** * virtual pointer to object of ContainerT */ @@ -74,9 +74,12 @@ namespace db0 // note that lock is getting updated, possibly copy-on-write is being performed // NOTE: must extract physical address for mapRange m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read); + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read + ); // by calling MemLock::modify we mark the object's associated range as modified m_mem_lock.modify(); + // collect as a modified instance for commit speedup + m_memspace_ptr->collectModified(this); lock.commit_set(); break; } @@ -115,7 +118,7 @@ namespace db0 } protected: - + const ContainerT &safeConstRef(std::size_t size_of = 0) const { if (!size_of) { @@ -143,7 +146,8 @@ namespace db0 if (lock.isLocked()) { // NOTE: must extract physical address for mapRange m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read); + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read + ); lock.commit_set(); break; } @@ -162,7 +166,8 @@ namespace db0 if (lock.isLocked()) { // NOTE: must extract physical address for mapRange m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), size_of, m_access_mode | AccessOptions::read); + m_address.getOffset(), size_of, m_access_mode | AccessOptions::read + ); lock.commit_set(); break; } diff --git a/src/dbzero/core/vspace/vtypeless.cpp b/src/dbzero/core/vspace/vtypeless.cpp index 2774fefc..9c7f390e 100644 --- a/src/dbzero/core/vspace/vtypeless.cpp +++ b/src/dbzero/core/vspace/vtypeless.cpp @@ -88,7 +88,7 @@ namespace db0 unsigned int vtypeless::use_count() const { return m_mem_lock.use_count(); } - + bool vtypeless::isAttached() const { return m_mem_lock.m_buffer != nullptr; } @@ -109,9 +109,17 @@ namespace db0 void vtypeless::commit() const { + /* FIXME: + // NOTE: this operation assumes that only one v_object instance pointing to the same address exists + // otherwise modifications done to one instance will not be visible to the other instances + // this assumption holds true for dbzero objects but if unable to fulfill in the future, + // it must be changed to "this->detach()" + // commit clears the reasource available for write flag // it might still be available for read atomicResetFlags(m_resource_flags, db0::RESOURCE_AVAILABLE_FOR_WRITE); + */ + detach(); } } \ No newline at end of file diff --git a/src/dbzero/object_model/ObjectBase.hpp b/src/dbzero/object_model/ObjectBase.hpp index 61239ffe..24635340 100644 --- a/src/dbzero/object_model/ObjectBase.hpp +++ b/src/dbzero/object_model/ObjectBase.hpp @@ -221,16 +221,16 @@ namespace db0 has_fixture::init(fixture, std::forward(args)...); } } - - // member should be overridden for derived types which need pre-commit - using PreCommitFunction = void (*)(void *, bool revert); - static PreCommitFunction getPreCommitFunction() { + + // member should be overridden for derived types which need flush + using FlushFunction = void (*)(void *, bool revert); + static FlushFunction getFlushFunction() { return nullptr; } // called from GC0 to bind GC_Ops for this type static GC_Ops getGC_Ops() { - return { hasRefsOp, dropOp, detachOp, commitOp, getTypedAddress, dropByAddr, T::getPreCommitFunction() }; + return { hasRefsOp, dropOp, detachOp, commitOp, getTypedAddress, dropByAddr, T::getFlushFunction() }; } void operator=(ObjectBase &&other) diff --git a/src/dbzero/object_model/index/Index.cpp b/src/dbzero/object_model/index/Index.cpp index 68acb2f9..bb77f119 100644 --- a/src/dbzero/object_model/index/Index.cpp +++ b/src/dbzero/object_model/index/Index.cpp @@ -68,7 +68,7 @@ namespace db0::object_model { // in case of index we need to unregister first because otherwise // it may trigger discard of unflushed data (which has to be performed before destruction of 'builder') - unregister(); + unregister(); // after unregister object might still have unflushed data, we need to flush them if (hasInstance() && isDirty()) { @@ -198,7 +198,7 @@ namespace db0::object_model } m_builder.flush(); } - + void Index::rollback() { m_builder.rollback(); } @@ -298,6 +298,8 @@ namespace db0::object_model << " does not allow adding key type: " << LangToolkit::getTypeName(key) << THROWF_END; } + // subscribe for flush operation + getMemspace().collectForFlush(this); m_mutation_log->onDirty(); } @@ -333,6 +335,8 @@ namespace db0::object_model << " does not allow keys of type: " << LangToolkit::getTypeName(key) << THROWF_END; } + // subscribe for flush operation + getMemspace().collectForFlush(this); m_mutation_log->onDirty(); } @@ -453,6 +457,8 @@ namespace db0::object_model << "Unsupported index data type: " << static_cast(m_builder.getDataType()) << THROWF_END; } + // subscribe for flush operation + getMemspace().collectForFlush(this); m_mutation_log->onDirty(); } @@ -475,7 +481,7 @@ namespace db0::object_model return type_manager.extractUInt64(type_manager.getTypeId(value), value); } - void Index::preCommit(bool revert) + void Index::flush(bool revert) { if (revert) { rollback(); @@ -484,8 +490,8 @@ namespace db0::object_model } } - void Index::preCommitOp(void *ptr, bool revert) { - static_cast(ptr)->preCommit(revert); + void Index::flushOp(void *ptr, bool revert) { + static_cast(ptr)->flush(revert); } void Index::removeNull(ObjectPtr obj_ptr) diff --git a/src/dbzero/object_model/index/Index.hpp b/src/dbzero/object_model/index/Index.hpp index bc3dc7c7..54db25e2 100644 --- a/src/dbzero/object_model/index/Index.hpp +++ b/src/dbzero/object_model/index/Index.hpp @@ -61,8 +61,8 @@ namespace db0::object_model std::unique_ptr range(ObjectPtr min, ObjectPtr max, bool null_first = false) const; - static PreCommitFunction getPreCommitFunction() { - return preCommitOp; + static FlushFunction getFlushFunction() { + return flushOp; } void moveTo(db0::swine_ptr &); @@ -84,8 +84,8 @@ namespace db0::object_model // the default / provisional type using DefaultT = std::int64_t; friend struct Builder; - void preCommit(bool revert); - static void preCommitOp(void *, bool revert); + void flush(bool revert); + static void flushOp(void *, bool revert); template static constexpr IndexDataType dataTypeOf() { @@ -105,7 +105,7 @@ namespace db0::object_model // concrete data type to be assigned (only allowed to update from Auto) IndexDataType m_initial_type; IndexDataType m_new_type; - mutable std::shared_ptr m_index_builder; + mutable std::shared_ptr m_index_builder; Builder(Index &); @@ -134,7 +134,7 @@ namespace db0::object_model m_index_builder = db0::make_shared_void >(); m_new_type = Index::dataTypeOf(); } - return *static_cast*>(m_index_builder.get()); + return *static_cast*>(m_index_builder.get()); } template IndexBuilder &getExisting() const @@ -154,6 +154,8 @@ namespace db0::object_model } if (!std::is_same_v) { + // FIXME: log + std::cout << "Index builder update !!" << std::endl; m_index_builder = db0::make_shared_void >( get().releaseRemoveNullItems(), get().releaseAddNullItems(), @@ -162,7 +164,6 @@ namespace db0::object_model m_new_type = Index::dataTypeOf(); } } - }; Builder m_builder; diff --git a/src/dbzero/object_model/index/IndexBuilder.hpp b/src/dbzero/object_model/index/IndexBuilder.hpp index fa7bf740..f4461f9c 100644 --- a/src/dbzero/object_model/index/IndexBuilder.hpp +++ b/src/dbzero/object_model/index/IndexBuilder.hpp @@ -24,7 +24,8 @@ namespace db0::object_model IndexBuilder(std::unordered_set &&remove_null_values, std::unordered_set &&add_null_values, std::unordered_map &&object_cache); - + ~IndexBuilder(); + void add(KeyT key, ObjectPtr obj_ptr); void remove(KeyT key, ObjectPtr obj_ptr); @@ -66,6 +67,10 @@ namespace db0::object_model { } + template IndexBuilder::~IndexBuilder() + { + } + template void IndexBuilder::add(KeyT key, ObjectPtr obj_ptr) { super_t::add(key, addToCache(obj_ptr)); } @@ -84,6 +89,7 @@ namespace db0::object_model template void IndexBuilder::flush(RangeTreeT &index) { + /* FIXME: log std::function add_callback = [&](UniqueAddress address) { auto it = m_object_cache.find(address); assert(it != m_object_cache.end()); @@ -94,13 +100,15 @@ namespace db0::object_model auto it = m_object_cache.find(address); assert(it != m_object_cache.end()); m_type_manager.extractMutableAnyObject(it->second.get()).decRef(false); - }; + }; super_t::flush(index, &add_callback, &erase_callback); + */ + super_t::flush(index); m_object_cache.clear(); } - template + template UniqueAddress IndexBuilder::addToCache(ObjectPtr obj_ptr) { auto obj_addr = m_type_manager.extractAnyObject(obj_ptr).getUniqueAddress(); diff --git a/src/dbzero/workspace/Fixture.cpp b/src/dbzero/workspace/Fixture.cpp index 2315bd90..395638fa 100644 --- a/src/dbzero/workspace/Fixture.cpp +++ b/src/dbzero/workspace/Fixture.cpp @@ -191,13 +191,13 @@ namespace db0 // prevents commit on a closed fixture std::unique_lock lock(m_close_mutex); if (!Memspace::isClosed()) { - // pre-commit to prepare objects which require it (e.g. Index) for commit - // NOTE: pre-commit must NOT lock the fixture's shared mutex + // flush to prepare objects which require it (e.g. Index) for commit + // NOTE: flush must NOT lock the fixture's shared mutex if (m_gc0_ptr) { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush()); } - - // clear lang cache again since pre-commit might've released some Python instances + + // clear lang cache again since flush might've released some Python instances m_lang_cache.clear(true); // lock for exclusive access @@ -290,27 +290,31 @@ namespace db0 bool Fixture::commit() { + auto process_timer = std::make_unique("Fixture::commit"); assert(getPrefixPtr()); - // pre-commit to prepare objects which require it (e.g. Index) for commit - // NOTE: pre-commit must NOT lock the fixture's shared mutex - // NOTE: pre-commit may release some of the Python instances + // flush to prepare objects which require it (e.g. Index) for commit + // NOTE: flush must NOT lock the fixture's shared mutex + // NOTE: flush may release some of the Python instances if (m_gc0_ptr) { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush(), process_timer.get()); } // Flush using registered flush handlers for (auto &handler: m_flush_handlers) { handler(); } - + // Clear expired instances from cache so that they're not persisted - m_lang_cache.clear(true); + // FIXME: log + // m_lang_cache.clear(true); std::unique_lock lock(m_commit_mutex); - bool result = tryCommit(lock); + bool result = tryCommit(lock, process_timer.get()); m_updated = false; auto callbacks = collectStateReachedCallbacks(); lock.unlock(); executeStateReachedCallbacks(callbacks); + // FIXME: log + process_timer->printLog(std::cout) << std::endl; return result; } @@ -328,8 +332,8 @@ namespace db0 if (!prefix_ptr) { return result; } - - std::unique_ptr gc0_ctx = m_gc0_ptr ? getGC0().beginCommit() : nullptr; + + std::unique_ptr gc0_ctx = m_gc0_ptr ? getGC0().beginSave() : nullptr; // NOTE: close handlers perform internal buffers flush (e.g. TagIndex) // which may result in modifications (e.g. incRef) // it's therefore important to perform this action before GC0::commitAll (which commits finalized objects) @@ -337,14 +341,15 @@ namespace db0 commit(true); } + // Commit modified only (to avoid scan over all objects) if (m_gc0_ptr) { - getGC0().commitAll(); + getGC0().commitAllOf(Memspace::getModified(), timer.get()); } - - // commit garbage collector's state + + // Save garbage collector's state // we check if gc0 exists because the unit-tests set up may not have it if (gc0_ctx) { - gc0_ctx->commit(); + gc0_ctx->save(timer.get()); } m_string_pool.commit(); m_object_catalogue.commit(); @@ -369,11 +374,11 @@ namespace db0 return {}; } - assert(!Memspace::isClosed()); - // pre-commit to prepare objects which require it (e.g. Index) for commit - // NOTE: pre-commit must NOT lock the fixture's shared mutex + assert(!Memspace::isClosed()); + // flush to prepare objects which require it (e.g. Index) for commit + // NOTE: flush must NOT lock the fixture's shared mutex if (m_gc0_ptr) { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush()); } // Flush using registered flush handlers @@ -432,7 +437,8 @@ namespace db0 void Fixture::preAtomic() { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush()); + m_maybe_need_flush.clear(); for (auto &commit: m_close_handlers) { commit(true); } diff --git a/src/dbzero/workspace/FixtureThreads.cpp b/src/dbzero/workspace/FixtureThreads.cpp index 228f3aea..b552c78c 100644 --- a/src/dbzero/workspace/FixtureThreads.cpp +++ b/src/dbzero/workspace/FixtureThreads.cpp @@ -178,14 +178,14 @@ namespace db0 /** * Acquires locks for safe execution and handles post-commit callbacks */ - class AutoCommitContext : public FixtureThreadCallbacksContext + class AutoSaveContext : public FixtureThreadCallbacksContext { std::unique_lock m_commit_lock; std::unique_lock m_locked_context_lock; std::unique_lock m_atomic_lock; public: - AutoCommitContext( + AutoSaveContext( std::unique_lock &&commit_lock, std::unique_lock &&locked_context_lock, std::unique_lock &&atomic_lock) @@ -234,13 +234,13 @@ namespace db0 std::shared_ptr AutoCommitThread::prepareContext() { - assert(!m_tmp_context.lock() && "Only one AutoCommitContext should exist at the time!"); + assert(!m_tmp_context.lock() && "Only one AutoSaveContext should exist at the time!"); auto commit_lock = std::unique_lock(m_commit_mutex); // must acquire unique lock-context's lock auto locked_context_lock = db0::LockedContext::lockUnique(); // and the atomic lock next (order is relevant here !!) auto atomic_lock = db0::AtomicContext::lock(); - auto context = std::make_shared(std::move(commit_lock), + auto context = std::make_shared(std::move(commit_lock), std::move(locked_context_lock), std::move(atomic_lock) ); // To collect callbacks from fixtures as we proceed with commiting diff --git a/src/dbzero/workspace/FixtureThreads.hpp b/src/dbzero/workspace/FixtureThreads.hpp index 695700c4..cbf83f2c 100644 --- a/src/dbzero/workspace/FixtureThreads.hpp +++ b/src/dbzero/workspace/FixtureThreads.hpp @@ -86,7 +86,7 @@ namespace db0 * The purpose of the AutoCommitThread is to commit changes from all read/write fixtures * after 250ms (unless configured differently) since the last modification */ - class AutoCommitContext; + class AutoSaveContext; class AutoCommitThread: public FixtureThread { public: @@ -101,7 +101,7 @@ namespace db0 private: static std::mutex m_commit_mutex; - std::weak_ptr m_tmp_context; + std::weak_ptr m_tmp_context; }; } diff --git a/src/dbzero/workspace/GC0.cpp b/src/dbzero/workspace/GC0.cpp index 7877d27c..58640856 100644 --- a/src/dbzero/workspace/GC0.cpp +++ b/src/dbzero/workspace/GC0.cpp @@ -30,23 +30,23 @@ namespace db0 { } - GC0::CommitContext::CommitContext(GC0 &gc0) + GC0::SaveContext::SaveContext(GC0 &gc0) : m_gc0(gc0) { - assert(!m_gc0.m_commit_pending); - m_gc0.m_commit_pending = true; + assert(!m_gc0.m_save_pending); + m_gc0.m_save_pending = true; } - GC0::CommitContext::~CommitContext() + GC0::SaveContext::~SaveContext() { - assert(m_gc0.m_commit_pending); - m_gc0.m_commit_pending = false; + assert(m_gc0.m_save_pending); + m_gc0.m_save_pending = false; } - void GC0::CommitContext::commit() + void GC0::SaveContext::save(ProcessTimer *timer) { - assert(m_gc0.m_commit_pending); - m_gc0.commit(); + assert(m_gc0.m_save_pending); + m_gc0.save(timer); } bool GC0::tryRemove(void *vptr, bool is_volatile) @@ -59,9 +59,9 @@ namespace db0 NoArgsFunction drop_op = nullptr; auto &ops = m_ops[it->second]; - // if type implements preCommit then remove it from pre-commit map as well - if (ops.preCommit) { - m_pre_commit_map.erase(vptr); + // if type implements flush then remove it from flush map as well + if (ops.flush) { + m_flush_map.erase(vptr); } // do not drop when in read-only mode (e.g. snapshot owned) @@ -70,8 +70,8 @@ namespace db0 if (!m_read_only && ops.hasRefs && ops.drop && !is_volatile && !ops.hasRefs(it->first)) { - if (m_commit_pending) { - // must schedule for deletion since unable to drop while commit is pending + if (m_save_pending) { + // must schedule for deletion since unable to drop while save is pending auto addr_pair = ops.address(it->first); m_scheduled_for_deletion[addr_pair.first] = addr_pair.second; } else { @@ -104,36 +104,76 @@ namespace db0 } } + void GC0::commitAllOf(const std::vector &vptrs, ProcessTimer *timer_ptr) + { + std::unique_ptr timer; + if (timer_ptr) { + timer = std::make_unique("GC0::commitAllOf", timer_ptr); + } + + std::unique_lock lock(m_mutex); + std::size_t count = 0; + for (auto vptr : vptrs) { + auto it = m_vptr_map.find(vptr); + if (it != m_vptr_map.end()) { + m_ops[it->second].commit(vptr); + ++count; + } + } + // FIXME: log + std::cout << "GC0::commit size: " << count << std::endl; + } + void GC0::commitAll() { + // FIXME: log + std::cout << "commitAll" << std::endl; std::unique_lock lock(m_mutex); for (auto &vptr_item : m_vptr_map) { m_ops[vptr_item.second].commit(vptr_item.first); } } - + std::size_t GC0::size() const { std::unique_lock lock(m_mutex); return m_vptr_map.size(); } - void GC0::preCommit() + void GC0::flushAllOf(const std::vector &vptrs, ProcessTimer *timer_ptr) { + std::unique_ptr timer; + if (timer_ptr) { + timer = std::make_unique("GC0::flushAllOf", timer_ptr); + } + std::unique_lock lock(m_mutex); - // collect ops first (this is necessary because preCommit can trigger "remove" calls) - std::vector> pre_commit_ops; - std::copy(m_pre_commit_map.begin(), m_pre_commit_map.end(), std::back_inserter(pre_commit_ops)); + // collect ops first (this is necessary because flush can trigger "remove" calls) + std::vector> flush_ops; + for (auto vptr : vptrs) { + auto it = m_flush_map.find(vptr); + if (it != m_flush_map.end()) { + flush_ops.push_back(*it); + } + } lock.unlock(); - - // call pre-commit where it's provided - for (auto &item : pre_commit_ops) { - m_ops[item.second].preCommit(item.first, false); + + // call flush where it's provided + for (auto &item : flush_ops) { + m_ops[item.second].flush(item.first, false); } + // FIXME: log + std::cout << "GC0 flushed: " << flush_ops.size() << std::endl; } - void GC0::commit() + void GC0::save(ProcessTimer *timer_ptr) { + std::unique_ptr timer; + if (timer_ptr) { + timer = std::make_unique("GC0::save", timer_ptr); + } + + // collect unreferenced instances // Important ! Collect instance addresses first because push_back can trigger "remove" calls /* FIXME: log std::vector addresses; @@ -210,28 +250,28 @@ namespace db0 tryRemove(vptr, true); } } - // call reverse pre-commit where it's provided (use revert=true) - for (auto &item : m_pre_commit_map) { - m_ops[item.second].preCommit(item.first, true); + // call reverse flush where it's provided (use revert=true) + for (auto &item : m_flush_map) { + m_ops[item.second].flush(item.first, true); } m_volatile.clear(); m_atomic = false; } - - std::unique_ptr GC0::beginCommit() { - return std::make_unique(*this); + + std::unique_ptr GC0::beginSave() { + return std::make_unique(*this); } std::optional GC0::erase(void *vptr) { - std::optional pre_commit_op; + std::optional flush_op; std::unique_lock lock(m_mutex); assert(m_vptr_map.find(vptr) != m_vptr_map.end()); m_vptr_map.erase(vptr); - auto it = m_pre_commit_map.find(vptr); - if (it != m_pre_commit_map.end()) { - pre_commit_op = it->second; - m_pre_commit_map.erase(it); + auto it = m_flush_map.find(vptr); + if (it != m_flush_map.end()) { + flush_op = it->second; + m_flush_map.erase(it); } if (m_atomic) { @@ -241,7 +281,7 @@ namespace db0 } } } - return pre_commit_op; + return flush_op; } } \ No newline at end of file diff --git a/src/dbzero/workspace/GC0.hpp b/src/dbzero/workspace/GC0.hpp index 2d66c78e..74450956 100644 --- a/src/dbzero/workspace/GC0.hpp +++ b/src/dbzero/workspace/GC0.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ namespace db0 { class Fixture; + class ProcessTimer; using TypedAddress = db0::object_model::TypedAddress; using StorageClass = db0::object_model::StorageClass; @@ -26,7 +28,7 @@ namespace db0 using GetAddress = std::pair (*)(const void *); using StorageClass = db0::object_model::StorageClass; using DropByAddrFunction = void (*)(db0::swine_ptr &, Address); - using PreCommitFunction = void (*)(void *, bool revert); + using FlushFunction = void (*)(void *, bool revert); struct GC_Ops { @@ -37,8 +39,8 @@ namespace db0 NoArgsFunction commit = nullptr; GetAddress address = nullptr; DropByAddrFunction dropByAddr = nullptr; - // null allowed, preCommit handler is called just before fixture.commit - PreCommitFunction preCommit = nullptr; + // null allowed, flush handler is called just before fixture.commit + FlushFunction flush = nullptr; }; struct GCOps_ID @@ -81,38 +83,37 @@ namespace db0 // move instance from another GC0 template void moveFrom(GC0 &other, void *vptr); - // preCommit calls the operation on objects which implement it - void preCommit(); - /** * Unregister instance (i.e. when reference from Python was removed) * @return true if object was also dropped */ bool tryRemove(void *vptr, bool is_volatile = false); - /** - * Detach all instances held by this registry. - */ + // flush calls the operation on objects which implement it + void flushAllOf(const std::vector &, ProcessTimer * = nullptr); + + // Detach all instances held by this registry void detachAll(); - void commitAll(); + // Commit specific (e.g. modified) instances held by this registry + void commitAllOf(const std::vector &, ProcessTimer * = nullptr); std::size_t size() const; - struct CommitContext + struct SaveContext { GC0 &m_gc0; - CommitContext(GC0 &gc0); - ~CommitContext(); + SaveContext(GC0 &gc0); + ~SaveContext(); - void commit(); + void save(ProcessTimer * = nullptr); }; - + /** - * Commit serializes the list of unreferenced instances to the persistence layer + * Save serializes the list of unreferenced instances to the persistence layer * this is to be able to drop those instances once the corresponding references from Python expire */ - std::unique_ptr beginCommit(); + std::unique_ptr beginSave(); template static void registerTypes(); @@ -127,11 +128,11 @@ namespace db0 void cancelAtomic(); protected: - friend CommitContext; - bool m_commit_pending = false; - - void commit(); - // @return pre-commit ops-id if element was assigned it + friend SaveContext; + bool m_save_pending = false; + + void save(ProcessTimer * = nullptr); + // @return flush ops-id if element was assigned it std::optional erase(void *vptr); private: @@ -143,9 +144,9 @@ namespace db0 const bool m_read_only; // type / ops_id std::unordered_map m_vptr_map; - // the map dedicated to instances which implement preCommit + // the map dedicated to instances which implement flush // it's assumed that it's much smaller than m_vptr_map (it duplicates some of its entries) - std::unordered_map m_pre_commit_map; + std::unordered_map m_flush_map; // objects irrevocably scheduled for deletion std::unordered_map m_scheduled_for_deletion; // flag indicating atomic operation in progress @@ -154,6 +155,8 @@ namespace db0 std::vector m_volatile; mutable std::mutex m_mutex; + void commitAll(); + template static void registerSingleType() { T::m_gc_ops_id = GCOps_ID(m_ops.size()); @@ -171,9 +174,9 @@ namespace db0 assert(m_ops[T::m_gc_ops_id].detach); assert(m_ops[T::m_gc_ops_id].address); m_vptr_map[vptr] = T::m_gc_ops_id; - // if the type implements preCommit then also add it to the preCommit map - if (m_ops[T::m_gc_ops_id].preCommit) { - m_pre_commit_map[vptr] = T::m_gc_ops_id; + // if the type implements flush then also add it to the flush map + if (m_ops[T::m_gc_ops_id].flush) { + m_flush_map[vptr] = T::m_gc_ops_id; } if (m_atomic) { m_volatile.push_back(vptr); @@ -183,11 +186,11 @@ namespace db0 template void GC0::moveFrom(GC0 &other, void *vptr) { std::unique_lock lock(m_mutex); - auto pre_commit_op = other.erase(vptr); + auto flush_op = other.erase(vptr); m_vptr_map[vptr] = T::m_gc_ops_id; - // also move between pre-commit maps - if (pre_commit_op) { - m_pre_commit_map[vptr] = *pre_commit_op; + // also move between flush maps + if (flush_op) { + m_flush_map[vptr] = *flush_op; } if (m_atomic) { m_volatile.push_back(vptr); From 9c5e9c01156ec4ad9ad23a128ba56aac098bbf7c Mon Sep 17 00:00:00 2001 From: Wojtek Date: Sat, 8 Nov 2025 20:32:16 +0100 Subject: [PATCH 06/11] WIP: save work --- src/dbzero/object_model/index/Index.cpp | 27 ++++++++++++++++++------- src/dbzero/workspace/Fixture.cpp | 10 +++++++-- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/dbzero/object_model/index/Index.cpp b/src/dbzero/object_model/index/Index.cpp index bb77f119..9a0ca31d 100644 --- a/src/dbzero/object_model/index/Index.cpp +++ b/src/dbzero/object_model/index/Index.cpp @@ -281,6 +281,11 @@ namespace db0::object_model m_builder.update(type_manager.getTypeId(key)); } + // subscribe for flush operation + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { case IndexDataType::Int64: { m_builder.get().add(type_manager.extractInt64(key), value); @@ -298,8 +303,6 @@ namespace db0::object_model << " does not allow adding key type: " << LangToolkit::getTypeName(key) << THROWF_END; } - // subscribe for flush operation - getMemspace().collectForFlush(this); m_mutation_log->onDirty(); } @@ -318,6 +321,11 @@ namespace db0::object_model m_builder.update(type_manager.getTypeId(key)); } + // subscribe for flush operation + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { case IndexDataType::Int64: { m_builder.get().remove(type_manager.extractInt64(key), value); @@ -335,11 +343,9 @@ namespace db0::object_model << " does not allow keys of type: " << LangToolkit::getTypeName(key) << THROWF_END; } - // subscribe for flush operation - getMemspace().collectForFlush(this); m_mutation_log->onDirty(); } - + std::unique_ptr Index::range(ObjectPtr min, ObjectPtr max, bool null_first) const { assert(hasInstance()); @@ -435,6 +441,11 @@ namespace db0::object_model void Index::addNull(ObjectPtr obj_ptr) { assert(hasInstance()); + // subscribe for flush operation + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { // use provisional data type for Auto case IndexDataType::Auto: { @@ -457,8 +468,6 @@ namespace db0::object_model << "Unsupported index data type: " << static_cast(m_builder.getDataType()) << THROWF_END; } - // subscribe for flush operation - getMemspace().collectForFlush(this); m_mutation_log->onDirty(); } @@ -496,6 +505,10 @@ namespace db0::object_model void Index::removeNull(ObjectPtr obj_ptr) { + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { // use provisional data type for Auto case IndexDataType::Auto: { diff --git a/src/dbzero/workspace/Fixture.cpp b/src/dbzero/workspace/Fixture.cpp index 395638fa..d4bd8dc2 100644 --- a/src/dbzero/workspace/Fixture.cpp +++ b/src/dbzero/workspace/Fixture.cpp @@ -300,8 +300,14 @@ namespace db0 } // Flush using registered flush handlers - for (auto &handler: m_flush_handlers) { - handler(); + { + std::unique_ptr flush_timer; + if (process_timer) { + flush_timer = std::make_unique("Fixture::commit:flush_handlers", process_timer.get()); + } + for (auto &handler: m_flush_handlers) { + handler(); + } } // Clear expired instances from cache so that they're not persisted From 46cbc246889e55c1dcef308788935cc784bf4198 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 10 Nov 2025 14:15:06 +0100 Subject: [PATCH 07/11] WIP: save work --- python_tests/test_index.py | 16 +- src/dbzero/core/memory/MetaAllocator.cpp | 749 +----------------- src/dbzero/core/memory/MetaAllocator.hpp | 16 +- src/dbzero/core/memory/PageMap.hpp | 25 +- src/dbzero/core/memory/SlabAllocator.cpp | 33 +- src/dbzero/core/memory/SlabAllocator.hpp | 28 +- .../core/memory/SlabAllocatorConfig.hpp | 31 + src/dbzero/core/memory/SlabManager.cpp | 597 ++++++++++++++ src/dbzero/core/memory/SlabManager.hpp | 279 +++++++ src/dbzero/object_model/index/Index.cpp | 2 +- src/dbzero/workspace/Workspace.hpp | 10 +- tests/unit_tests/IndexTest.cpp | 21 + tests/unit_tests/VBIndexTests.cpp | 49 ++ 13 files changed, 1048 insertions(+), 808 deletions(-) create mode 100644 src/dbzero/core/memory/SlabAllocatorConfig.hpp create mode 100644 src/dbzero/core/memory/SlabManager.cpp create mode 100644 src/dbzero/core/memory/SlabManager.hpp create mode 100644 tests/unit_tests/IndexTest.cpp diff --git a/python_tests/test_index.py b/python_tests/test_index.py index e884c510..14c65966 100644 --- a/python_tests/test_index.py +++ b/python_tests/test_index.py @@ -5,6 +5,7 @@ from dbzero import find from datetime import timedelta, datetime import random +import time def test_index_instance_can_be_created_without_arguments(db0_fixture): @@ -714,4 +715,17 @@ def test_find_in_index_range_issue_1(db0_fixture): index.add(3, test_obj) assert test_obj in set(index.range()) assert list(db0.find(index.range(), test_obj)) == [test_obj] - \ No newline at end of file + + +@pytest.mark.stress_test +def test_insert_1M_keys_to_index(db0_no_autocommit): + cut = db0.index() + objects = [MemoTestClass(0) for _ in range(25000)] + start = time.perf_counter() + for i in range(1_000_000): + # add random int + cut.add(random.randint(0, 100_000_000), random.choice(objects)) + result = list(cut.select(0, 1)) + end = time.perf_counter() + assert len(cut) == 1_000_000 + print(f"Inserted 1M keys to index in {end - start:.2f} seconds") \ No newline at end of file diff --git a/src/dbzero/core/memory/MetaAllocator.cpp b/src/dbzero/core/memory/MetaAllocator.cpp index 60d76f8f..2b500f6d 100644 --- a/src/dbzero/core/memory/MetaAllocator.cpp +++ b/src/dbzero/core/memory/MetaAllocator.cpp @@ -2,6 +2,7 @@ #include "OneShotAllocator.hpp" #include "Memspace.hpp" #include "SlabRecycler.hpp" +#include "SlabManager.hpp" #include #include @@ -102,626 +103,6 @@ namespace db0 { } - class SlabManager - { - public: - static constexpr std::size_t NUM_REALMS = MetaAllocator::NUM_REALMS; - - SlabManager(std::shared_ptr prefix, MetaAllocator::SlabTreeT &slab_defs, - MetaAllocator::CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, - std::function address_func, std::function slab_id_func, - unsigned char realm_id) - : m_prefix(prefix) - , m_realm_id(realm_id) - , m_slab_defs(slab_defs) - , m_capacity_items(capacity_items) - , m_recycler_ptr(recycler) - , m_slab_size(slab_size) - , m_page_size(page_size) - , m_slab_address_func(address_func) - , m_slab_id_func(slab_id_func) - , m_next_slab_id(fetchNextSlabId()) - { - } - - using CapacityItem = MetaAllocator::CapacityItem; - using SlabDef = MetaAllocator::SlabDef; - - struct FindResult - { - std::shared_ptr m_slab; - CapacityItem m_cap_item; - - bool operator==(std::uint32_t slab_id) const { - return m_slab && m_cap_item.m_slab_id == slab_id; - } - - bool operator==(const FindResult &rhs) const { - return *this == rhs.m_cap_item.m_slab_id; - } - - const SlabAllocator &operator*() const { - return *m_slab; - } - - inline bool operator!() const { - return !m_slab; - } - }; - - // NOTE: only localities 0 and 1 are currently supported - struct ActiveSlab: public std::array - { - bool contains(std::uint32_t slab_id) const { - return ((*this)[0] == slab_id || (*this)[1] == slab_id); - } - - bool contains(const FindResult &slab) const { - return ((*this)[0] == slab || (*this)[1] == slab); - } - - FindResult find(std::uint32_t slab_id) const - { - if ((*this)[0] == slab_id) { - return (*this)[0]; - } else if ((*this)[1] == slab_id) { - return (*this)[1]; - } - return {}; - } - - void erase(const FindResult &slab) - { - if ((*this)[0] == slab) { - (*this)[0] = {}; - } else if ((*this)[1] == slab) { - (*this)[1] = {}; - } else { - assert(false); - THROWF(db0::InternalException) << "Slab not found in active slabs." << THROWF_END; - } - } - }; - - /** - * Retrieves the active slab or returns nullptr if no active slab available - */ - FindResult tryGetActiveSlab(unsigned char locality) { - assert(locality < m_active_slab.size()); - return m_active_slab[locality]; - } - - void resetActiveSlab(unsigned char locality) { - assert(locality < m_active_slab.size()); - m_active_slab[locality] = {}; - } - - /** - * Retrieve the 1st slab to allocate a block of at least min_capacity - * this is only a 'hint' and if the allocation is not possible, the next slab should be attempted - */ - FindResult findFirst(std::size_t min_capacity, unsigned char locality) - { - // visit slabs starting from the largest available capacity - auto it = m_capacity_items.cbegin(); - for (;;) { - if (it.is_end() || it->m_remaining_capacity < min_capacity) { - // no existing slab has sufficient capacity - return {}; - } - - if (m_active_slab.contains(it->m_slab_id)) { - // do not include active slab in find operation - ++it; - continue; - } - auto slab = openSlab(m_slab_address_func(it->m_slab_id)); - if (!m_active_slab[locality]) { - // make the slab active - m_active_slab[locality] = slab; - } - return slab; - } - } - - // Continue after findFirst - FindResult findNext(FindResult last_result, std::size_t min_capacity, unsigned char locality) - { - for (;;) { - // this is to find the next item in order - last_result.m_cap_item.m_slab_id++; - auto it = m_capacity_items.upper_equal_bound(last_result.m_cap_item); - if (!it.first || it.first->m_remaining_capacity < min_capacity) { - return {}; - } - - if (m_active_slab.contains(it.first->m_slab_id)) { - // do not include active slab in find operation - continue; - } - auto slab = openSlab(m_slab_address_func(it.first->m_slab_id)); - if (!m_active_slab[locality]) { - // make the slab active - m_active_slab[locality] = slab; - } - return slab; - } - } - - unsigned int getSlabCount() const { - return (nextSlabId() - m_realm_id) / NUM_REALMS; - } - - /** - * Create a new, unregistered slab instance - */ - std::pair, std::uint32_t> createNewSlab() - { - if (!m_next_slab_id) { - m_next_slab_id = fetchNextSlabId(); - } - auto slab_id = *m_next_slab_id; - (*m_next_slab_id) += NUM_REALMS; - auto address = m_slab_address_func(slab_id); - // create the new slab - auto capacity = SlabAllocator::formatSlab(m_prefix, address, m_slab_size, m_page_size); - // NOTE: for a new slab, the initial lost capacity is 0 - auto slab = std::make_shared(m_prefix, address, m_slab_size, m_page_size, capacity, 0); - if (m_atomic) { - // if atomic operation is in progress, add to the volatile slabs - m_volatile_slabs.push_back(address); - } - - return { slab, slab_id }; - } - - // Create a new, registered slab instance - FindResult addNewSlab(unsigned char locality) - { - auto [slab, slab_id] = createNewSlab(); - auto address = m_slab_address_func(slab_id); - CapacityItem cap_item { - static_cast(slab->getRemainingCapacity()), - static_cast(slab->getLostCapacity()), - slab_id - }; - // register with slab defs - m_slab_defs.emplace(slab_id, - static_cast(cap_item.m_remaining_capacity), - static_cast(cap_item.m_lost_capacity) - ); - // register with capacity items - m_capacity_items.insert(cap_item); - // add to cache - auto cache_item = std::make_shared(slab, cap_item); - m_slabs.emplace(address, cache_item); - // capture remaining capacity before instance is closed - slab->setOnCloseHandler([cache_item](const SlabAllocator &alloc) { - cache_item->m_final_remaining_capacity = alloc.getRemainingCapacity(); - cache_item->m_final_lost_capacity = alloc.getLostCapacity(); - }); - - // append with the recycler - if (m_recycler_ptr) { - m_recycler_ptr->append(slab); - } - - // make the newly added slab active - m_active_slab[locality] = { slab, cap_item }; - return m_active_slab[locality]; - } - - std::uint32_t getRemainingCapacity(std::uint32_t slab_id) const - { - // look up with the cache first - auto address = m_slab_address_func(slab_id); - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab->getRemainingCapacity(); - } - } - - // look up with the slab defs next - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - if (!slab_def_ptr.first) { - THROWF(db0::InternalException) << "Slab definition not found."; - } - return slab_def_ptr.first->m_remaining_capacity; - } - - void close() - { - m_active_slab = {}; - m_reserved_slabs.clear(); - for (auto it = m_slabs.begin(); it != m_slabs.end();) { - it = unregisterSlab(it); - } - } - - // Find existing slab by ID - FindResult tryFind(std::uint32_t slab_id) const - { - if (slab_id < nextSlabId()) { - if (m_active_slab.contains(slab_id)) { - return m_active_slab.find(slab_id); - } - // look up with the cache first - auto address = m_slab_address_func(slab_id); - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return { slab, it->second->m_cap_item }; - } - } - - return tryOpenSlab(address); - } - return {}; - } - - FindResult find(std::uint32_t slab_id) const - { - auto slab = tryFind(slab_id); - if (!slab) { - THROWF(db0::BadAddressException) << "Slab " << slab_id << " not found"; - } - return slab; - } - - /** - * Erase if 'slab' is the last slab - */ - void erase(const FindResult &slab) { - erase(slab, true); - } - - bool empty() const { - return nextSlabId() == m_realm_id; - } - - std::shared_ptr reserveNewSlab() - { - auto [slab, slab_id] = createNewSlab(); - // internally register the slab with capacity = 0 (to avoid use in regular allocations) - CapacityItem cap_item { 0, 0, slab_id }; - // register with slab defs - m_slab_defs.emplace( - slab_id, - static_cast(cap_item.m_remaining_capacity), - static_cast(cap_item.m_lost_capacity) - ); - // register with capacity items - m_capacity_items.insert(cap_item); - return slab; - } - - std::shared_ptr openExistingSlab(const SlabDef &slab_def) - { - if (slab_def.m_slab_id >= nextSlabId()) { - THROWF(db0::InputException) << "Slab " << slab_def.m_slab_id << " does not exist"; - } - auto address = m_slab_address_func(slab_def.m_slab_id); - // look up with the cache first - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab; - } - } - // pull through cache - return openSlab(slab_def).m_slab; - } - - /** - * Open existing slab which has been previously reserved - */ - std::shared_ptr openReservedSlab(Address address) const - { - auto slab_id = m_slab_id_func(address); - if (slab_id >= nextSlabId()) { - THROWF(db0::InputException) << "Slab " << slab_id << " does not exist"; - } - - // look up with the cache first - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab; - } - } - - // retrieve slab definition - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - if (!slab_def_ptr.first) { - THROWF(db0::InternalException) << "Slab definition not found: " << slab_id; - } - - // pull through cache - auto result = openSlab(*slab_def_ptr.first).m_slab; - // and add for non-expiry cache - m_reserved_slabs.push_back(result); - return result; - } - - Address getFirstAddress() const { - return m_slab_address_func(m_realm_id) + SlabAllocator::getFirstAddress(); - } - - void commit() const - { - for (auto &it : m_slabs) { - it.second->commit(); - } - } - - void detach() const - { - // detach all cached slabs - for (auto &it : m_slabs) { - it.second->detach(); - } - // NOTE: we retain the slab element because it's detached - // invalidate cached variable - m_next_slab_id = {}; - } - - std::uint32_t nextSlabId() const - { - if (!m_next_slab_id) { - m_next_slab_id = fetchNextSlabId(); - } - return *m_next_slab_id; - } - - void beginAtomic() - { - assert(!m_atomic); - assert(m_volatile_slabs.empty()); - m_atomic = true; - } - - void endAtomic() - { - assert(m_atomic); - m_volatile_slabs.clear(); - m_atomic = false; - } - - void cancelAtomic() - { - assert(m_atomic); - // revert all volatile slabs from cache - for (auto slab_addr : m_volatile_slabs) { - auto it = m_slabs.find(slab_addr); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - // this is to prevent the slab from materializing any updates - if (slab) { - slab->resetOnCloseHandler(); - } - m_slabs.erase(it); - } - } - m_active_slab = {}; - m_volatile_slabs.clear(); - m_atomic = false; - } - - private: - - struct CacheItem - { - std::weak_ptr m_slab; - CapacityItem m_cap_item; - // the slab's remaining capacity reflected with backend when the SlabAllocator gets destroyed - std::uint32_t m_final_remaining_capacity = 0; - std::uint32_t m_final_lost_capacity = 0; - - CacheItem(std::weak_ptr slab, CapacityItem cap) - : m_slab(slab) - , m_cap_item(cap) - { - } - - void commit() const - { - if (auto slab = m_slab.lock()) { - if (slab) { - slab->commit(); - } - } - } - - void detach() const - { - if (auto slab = m_slab.lock()) { - if (slab) { - slab->detach(); - } - } - } - - // Check if any of the properties changed when compared to "capacity item" - bool isModified() const { - return m_final_remaining_capacity != m_cap_item.m_remaining_capacity || - m_final_lost_capacity != m_cap_item.m_lost_capacity; - } - }; - - using CacheIterator = std::unordered_map >::iterator; - - std::shared_ptr m_prefix; - const unsigned char m_realm_id; - MetaAllocator::SlabTreeT &m_slab_defs; - MetaAllocator::CapacityTreeT &m_capacity_items; - SlabRecycler *m_recycler_ptr = nullptr; - const std::uint32_t m_slab_size; - const std::uint32_t m_page_size; - // slab cache by address - mutable std::unordered_map > m_slabs; - mutable std::vector > m_reserved_slabs; - // active slabs for each supported locality (0 or 1) - mutable ActiveSlab m_active_slab; - // address by allocation ID (from the algo-allocator) - std::function m_slab_address_func; - std::function m_slab_id_func; - mutable std::optional m_next_slab_id; - // addresses of slabs newly created during atomic operations (potentially to be reverted) - mutable std::vector m_volatile_slabs; - // the atomic operation's flag - bool m_atomic = false; - - CacheIterator unregisterSlab(CacheIterator it) const - { - auto cache_item = it->second; - if (!cache_item->m_slab.expired()) { - THROWF(db0::InternalException) - << "Slab " << static_cast(cache_item->m_cap_item.m_slab_id) << " is not closed"; - } - - auto &item = *cache_item; - // if the remaining capacity has hanged, reflect this with backend - if (item.isModified()) { - auto slab_id = item.m_cap_item.m_slab_id; - if (item.m_final_remaining_capacity != item.m_cap_item.m_remaining_capacity) { - auto it = m_capacity_items.find_equal(item.m_cap_item); - assert(!it.isEnd()); - // register under a modified key - m_capacity_items.erase(it); - m_capacity_items.emplace( - item.m_final_remaining_capacity, item.m_final_lost_capacity, slab_id - ); - } - // and update with the slab defs - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - m_slab_defs.modify(slab_def_ptr)->m_remaining_capacity = item.m_final_remaining_capacity; - m_slab_defs.modify(slab_def_ptr)->m_lost_capacity = item.m_final_lost_capacity; - } - return m_slabs.erase(it); - } - - FindResult tryOpenSlab(Address address) const - { - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto result = it->second->m_slab.lock(); - if (result) { - return { result, it->second->m_cap_item }; - } - // unregister expired slab from cache - unregisterSlab(it); - } - - auto slab_id = m_slab_id_func(address); - // retrieve slab definition - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - if (!slab_def_ptr.first) { - return {}; - } - - return openSlab(*slab_def_ptr.first); - } - - FindResult openSlab(Address address) const - { - auto slab = tryOpenSlab(address); - if (!slab) { - THROWF(db0::BadAddressException) << "Invalid address accessed"; - } - return slab; - } - - // open slab by definition and add to cache - FindResult openSlab(const SlabDef &def) const - { - auto cap_item = CapacityItem(def.m_remaining_capacity, def.m_lost_capacity, def.m_slab_id); - auto addr = m_slab_address_func(def.m_slab_id); - auto slab = std::make_shared( - m_prefix, addr, m_slab_size, m_page_size, def.m_remaining_capacity, def.m_lost_capacity - ); - // add to cache (it's safe to reference item from the unordered_map) - auto cache_item = std::make_shared(slab, cap_item); - m_slabs.emplace(addr, cache_item).first->second; - // capture remaining capacity before instance is closed - slab->setOnCloseHandler([cache_item](const SlabAllocator &alloc) { - cache_item->m_final_remaining_capacity = alloc.getRemainingCapacity(); - cache_item->m_final_lost_capacity = alloc.getLostCapacity(); - }); - - // append with the recycler - if (m_recycler_ptr) { - m_recycler_ptr->append(slab); - } - - return { slab, cap_item }; - } - - void erase(const FindResult &slab, bool cleanup) - { - // erasing the last slab - if (slab.m_cap_item.m_slab_id != nextSlabId() - NUM_REALMS) { - return; - } - - auto addr = m_slab_address_func(slab.m_cap_item.m_slab_id); - // unregister from cache - auto it = m_slabs.find(addr); - if (it != m_slabs.end()) { - m_slabs.erase(it); - } - // unregister from recycler - if (m_recycler_ptr) { - m_recycler_ptr->closeOne([&slab](const SlabAllocator &s) { - return slab.m_slab.get() == &s; - }); - } - // unregister if active - if (m_active_slab.contains(slab)) { - m_active_slab.erase(slab); - } - // unregister from slab defs - if (!m_slab_defs.erase_equal(slab.m_cap_item.m_slab_id).first) { - THROWF(db0::InternalException) << "Slab definition not found."; - } - // unregister from capacity items - if (!m_capacity_items.erase_equal(slab.m_cap_item).first) { - THROWF(db0::InternalException) << "Capacity item not found."; - } - if (!m_next_slab_id) { - m_next_slab_id = fetchNextSlabId(); - } - (*m_next_slab_id) -= NUM_REALMS; - // try removing other empty slabs if such exist - if (cleanup) { - while (!empty()) { - auto slab = openSlab(m_slab_address_func(nextSlabId() - NUM_REALMS)); - if (!slab.m_slab->empty()) { - break; - } - erase(slab, false); - } - } - } - - std::uint32_t fetchNextSlabId() const - { - // determine the max slab id - auto it = m_slab_defs.find_max(); - if (it.first) { - return it.first->m_slab_id + NUM_REALMS; - } else { - // first slab being created - return m_realm_id; - } - } - - }; - std::uint64_t MetaAllocator::Realm::getSlabMaxAddress() const { // take max of the 2 collections @@ -747,9 +128,8 @@ namespace db0 m_header.m_page_size ) , m_metaspace(createMetaspace()) - , m_realms(m_metaspace, m_prefix, recycler, m_header, NUM_REALMS) - , m_recycler_ptr(recycler) - , m_deferred_free(deferred_free) + , m_realms(m_metaspace, m_prefix, recycler, m_header, NUM_REALMS, deferred_free) + , m_recycler_ptr(recycler) , m_slab_id_function(getSlabIdFunction(o_meta_header::sizeOf(), m_header.m_page_size, m_header.m_slab_size)) { auto max_addr = m_realms.getSlabMaxAddress(); @@ -764,7 +144,7 @@ namespace db0 } MetaAllocator::Realm::Realm(Memspace &metaspace, std::shared_ptr prefix, SlabRecycler *slab_recycler, - o_realm realm, std::uint32_t slab_size, std::uint32_t page_size, unsigned char realm_id) + o_realm realm, std::uint32_t slab_size, std::uint32_t page_size, unsigned char realm_id, bool deferred_free) : m_slab_defs(metaspace.myPtr(realm.m_slab_defs_ptr), page_size) , m_capacity_items(metaspace.myPtr(realm.m_capacity_items_ptr), page_size) , m_slab_manager(std::make_unique(prefix, m_slab_defs, m_capacity_items, slab_recycler, @@ -772,7 +152,8 @@ namespace db0 page_size, getSlabAddressFunction(o_meta_header::sizeOf(), page_size, slab_size), getSlabIdFunction(o_meta_header::sizeOf(), page_size, slab_size), - realm_id + realm_id, + deferred_free )) { } @@ -843,90 +224,22 @@ namespace db0 std::uint16_t &instance_id, unsigned char realm_id, unsigned char locality) { assert(slot_num == 0); - assert(size > 0); - // try allocating from the active slab first - auto &realm = m_realms[realm_id]; - auto slab = realm.tryGetActiveSlab(locality); - bool is_first = true; - bool is_new = false; - for (;;) { - if (slab.m_slab) { - for (;;) { - auto addr = slab.m_slab->tryAlloc(size, 0, aligned); - if (!addr) { - // NOTE: since the last allocation failed, don't use this slab as "active" - realm.resetActiveSlab(locality); - break; - } - - if (!unique || slab.m_slab->tryMakeAddressUnique(*addr, instance_id)) { - return addr; - } - - // unable to make the address unique, schedule for deferred free and try again - // NOTE: the allocation is lost - deferredFree(*addr); - } - if (size > slab.m_slab->getMaxAllocSize()) { - THROWF(db0::InternalException) - << "Requested allocation size " << size << " is larger than the slab size " << slab.m_slab->getMaxAllocSize(); - } - if (is_new) { - THROWF(db0::InternalException) << "Slab is new but cannot allocate " << size; - } - } - if (is_first) { - slab = realm.findFirst(size, locality); - is_first = false; - } else { - slab = realm.findNext(slab, size, locality); - } - if (!slab.m_slab) { - slab = realm.addNewSlab(locality); - is_new = true; - } - } + assert(size > 0); + return m_realms[realm_id].tryAlloc(size, slot_num, aligned, unique, instance_id, locality); } void MetaAllocator::free(Address address) - { - assert(m_deferred_free_ops.find(address) == m_deferred_free_ops.end()); - if (m_deferred_free) { - deferredFree(address); - } else { - _free(address); - } - } - - void MetaAllocator::deferredFree(Address address) - { - if (m_atomic) { - m_atomic_deferred_free_ops.push_back(address); - } else { - m_deferred_free_ops.insert(address); - } - } - - void MetaAllocator::_free(Address address) { auto slab_id = m_slab_id_function(address); auto realm_id = getRealmID(slab_id); - auto slab = m_realms[realm_id].find(slab_id); - slab.m_slab->free(address); - if (slab.m_slab->empty()) { - // erase or mark as erased - m_realms[realm_id].erase(slab); - } + m_realms[realm_id].free(address, slab_id); } - + std::size_t MetaAllocator::getAllocSize(Address address) const { - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - THROWF(db0::BadAddressException) << "Address " << address << " not found (pending deferred free)"; - } auto slab_id = m_slab_id_function(address); auto realm_id = getRealmID(slab_id); - return m_realms[realm_id].find(slab_id).m_slab->getAllocSize(address); + return m_realms[realm_id].getAllocSize(address, slab_id); } std::size_t MetaAllocator::getAllocSize(Address address, unsigned char realm_id) const @@ -935,10 +248,7 @@ namespace db0 if (realm_id != getRealmID(slab_id)) { THROWF(db0::BadAddressException) << "Invalid address accessed"; } - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - THROWF(db0::BadAddressException) << "Address " << address << " not found (pending deferred free)"; - } - return m_realms[realm_id].find(slab_id).m_slab->getAllocSize(address); + return m_realms[realm_id].getAllocSize(address, slab_id); } bool MetaAllocator::isAllocated(Address address, std::size_t *size_of_result) const @@ -948,13 +258,9 @@ namespace db0 } auto slab_id = m_slab_id_function(address); auto realm_id = getRealmID(slab_id); - auto slab = m_realms[realm_id].tryFind(slab_id); - if (!slab) { - return false; - } - return slab.m_slab->isAllocated(address, size_of_result); + return m_realms[realm_id].isAllocated(address, slab_id, size_of_result); } - + bool MetaAllocator::isAllocated(Address address, unsigned char realm_id, std::size_t *size_of_result) const { auto slab_id = m_slab_id_function(address); @@ -964,11 +270,7 @@ namespace db0 if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { return false; } - auto slab = m_realms[realm_id].tryFind(slab_id); - if (!slab) { - return false; - } - return slab.m_slab->isAllocated(address, size_of_result); + return m_realms[realm_id].isAllocated(address, slab_id, size_of_result); } unsigned int MetaAllocator::getSlabCount() const @@ -1013,11 +315,11 @@ namespace db0 { auto slab_id = m_slab_id_function(address); auto realm_id = slab_id & MetaAllocator::REALM_MASK; - auto result = m_realms[realm_id].openReservedSlab(address); + auto result = m_realms[realm_id].openReservedSlab(address, slab_id); assert(result->size() == size); return result; } - + void MetaAllocator::Realm::commit() const { m_slab_defs.commit(); @@ -1050,15 +352,10 @@ namespace db0 return m_recycler_ptr; } - void MetaAllocator::Realm::forAllSlabs(std::function f) const - { - auto it = m_slab_defs.cbegin(); - for (;!it.is_end();++it) { - auto slab = m_slab_manager->openExistingSlab(*it); - f(*slab, it->m_slab_id); - } + void MetaAllocator::Realm::forAllSlabs(std::function f) const { + m_slab_manager->forAllSlabs(f); } - + void MetaAllocator::forAllSlabs(std::function f) const { m_realms.forAllSlabs(f); } @@ -1111,14 +408,14 @@ namespace db0 } MetaAllocator::RealmsVector::RealmsVector(Memspace &metaspace, std::shared_ptr prefix, SlabRecycler *slab_recycler, - o_meta_header &meta_header, unsigned int size) + o_meta_header &meta_header, unsigned int size, bool deferred_free) { reserve(size); auto slab_size = meta_header.m_slab_size; auto page_size = meta_header.m_page_size; for (unsigned int i = 0; i < size; ++i) { - emplace_back(metaspace, prefix, slab_recycler, meta_header.m_realms[i], - slab_size, page_size, static_cast(i) + emplace_back(metaspace, prefix, slab_recycler, meta_header.m_realms[i], slab_size, + page_size, static_cast(i), deferred_free ); } } diff --git a/src/dbzero/core/memory/MetaAllocator.hpp b/src/dbzero/core/memory/MetaAllocator.hpp index f0e76119..601d09ca 100644 --- a/src/dbzero/core/memory/MetaAllocator.hpp +++ b/src/dbzero/core/memory/MetaAllocator.hpp @@ -44,7 +44,7 @@ DB0_PACKED_BEGIN o_meta_header(std::uint32_t page_size, std::uint32_t slab_size); }; -DB0_PACKED_END +DB0_PACKED_END class MetaAllocator: public Allocator { @@ -293,7 +293,7 @@ DB0_PACKED_END std::unique_ptr m_slab_manager; Realm(Memspace &, std::shared_ptr, SlabRecycler *, o_realm, std::uint32_t slab_size, - std::uint32_t page_size, unsigned char realm_id); + std::uint32_t page_size, unsigned char realm_id, bool deferred_free); // get the max address from all underlying slabs std::uint64_t getSlabMaxAddress() const; @@ -311,8 +311,8 @@ DB0_PACKED_END struct RealmsVector: protected std::vector { RealmsVector(Memspace &, std::shared_ptr, SlabRecycler *, o_meta_header &, - unsigned int size); - + unsigned int size, bool deferred_free); + // evaluate the max address from all realms std::uint64_t getSlabMaxAddress() const; @@ -338,12 +338,10 @@ DB0_PACKED_END RealmsVector m_realms; SlabRecycler *m_recycler_ptr; - const bool m_deferred_free; - mutable std::unordered_set
m_deferred_free_ops; + const bool m_deferred_free; std::function m_slab_id_function; // flag indicating if the atomic operation is in progress bool m_atomic = false; - std::vector
m_atomic_deferred_free_ops; /** * Reads header information from the prefix @@ -357,10 +355,6 @@ DB0_PACKED_END * if not found then create a new slab */ std::shared_ptr getSlabAllocator(std::size_t min_capacity); - - // internal "free" implementation which performs the dealloc instanly - void _free(Address); - void deferredFree(Address); // NOTE: instance ID will only be populated when unique = true std::optional
tryAllocImpl(std::size_t size, std::uint32_t slot_num, bool aligned, bool unique, diff --git a/src/dbzero/core/memory/PageMap.hpp b/src/dbzero/core/memory/PageMap.hpp index 87c1cf10..c2df3f00 100644 --- a/src/dbzero/core/memory/PageMap.hpp +++ b/src/dbzero/core/memory/PageMap.hpp @@ -91,10 +91,6 @@ namespace db0 using CacheIterator = typename decltype(m_cache)::iterator; CacheIterator findImpl(std::uint64_t page_num, StateNumType state_num) const; - - // Erase ALL locks with a given page number where state < state_num - // irrespective of their use count, this is required for handling inconsistent locks problem - void eraseAll(std::uint64_t page_num, StateNumType state_num) const; }; template @@ -183,26 +179,6 @@ namespace db0 } return m_cache.end(); } - - template void PageMap::eraseAll( - std::uint64_t page_num, StateNumType state_num) const - { - if (m_cache.empty()) { - return; - } - auto it = m_cache.lower_bound({page_num, state_num}); - if (it == m_cache.end() && !m_cache.empty()) { - assert(!m_cache.empty()); - --it; - } - if (it != m_cache.begin() && (it->first.second > state_num || it->first.first != page_num)) { - --it; - } - // NOTE: we're NOT erasing locks exactly matching the state number - while (it->first.first == page_num && it->first.second < state_num) { - it = m_cache.erase(it); - } - } template void PageMap::erase(StateNumType state_num, std::shared_ptr res_lock) @@ -243,6 +219,7 @@ namespace db0 std::shared_ptr PageMap::replace( StateNumType state_num, std::shared_ptr lock, std::uint64_t page_num) { + std::unique_lock _lock(m_rw_mutex); // find exact match of the page / state auto it = m_cache.find({page_num, state_num}); if (it == m_cache.end()) { diff --git a/src/dbzero/core/memory/SlabAllocator.cpp b/src/dbzero/core/memory/SlabAllocator.cpp index d2a536f5..6fd8ffd9 100644 --- a/src/dbzero/core/memory/SlabAllocator.cpp +++ b/src/dbzero/core/memory/SlabAllocator.cpp @@ -58,9 +58,6 @@ namespace db0 SlabAllocator::~SlabAllocator() { - if (m_on_close_handler) { - m_on_close_handler(*this); - } } std::optional
SlabAllocator::tryAlloc(std::size_t size, std::uint32_t slot_num, @@ -113,20 +110,24 @@ namespace db0 if (size % page_size != 0) { THROWF(db0::InternalException) << "Slab size not multiple of page size: " << size << " % " << page_size; } - - // put bitspace right before the header (at the end of the slab ) - BitSpace::create(prefix, headerAddr(begin_addr, size), page_size, -1); + + // put bitspace right before the header (at the end of the slab) + BitSpace::create( + prefix, headerAddr(begin_addr, size), page_size, -1 + ); // open newly created bitspace // use offset = begin_addr (to allow storing internal addresses as 32bit) - BitSpace bitspace(prefix, headerAddr(begin_addr, size), page_size, -1); + BitSpace bitspace( + prefix, headerAddr(begin_addr, size), page_size, -1 + ); - // create the CRDT allocator data structures on top of the bitspace + // Create the CRDT allocator data structures on top of the bitspace AllocSetT allocs(bitspace, page_size); BlankSetT blanks(bitspace, page_size); AlignedBlankSetT aligned_blanks(bitspace, page_size, CompT(page_size), page_size); StripeSetT stripes(bitspace, page_size); LimitedVector alloc_counter(bitspace, page_size); - alloc_counter.reserve(SLAB_BITSPACE_SIZE()); + alloc_counter.reserve(SlabAllocatorConfig::SLAB_BITSPACE_SIZE()); // calculate size initially available to CRTD allocator std::uint32_t crdt_size = static_cast(size - admin_size - admin_margin_bytes); assert(crdt_size > 0); @@ -167,13 +168,13 @@ namespace db0 std::size_t SlabAllocator::calculateAdminSpaceSize(std::size_t page_size) { - auto result = BitSpace::sizeOf() + o_slab_header::sizeOf(); + auto result = BitSpace::sizeOf() + o_slab_header::sizeOf(); // round to full page size result = (result + page_size - 1) / page_size * page_size; // add ADMIN_SPAN pages for CRDT types (actual space initially occupied) result += page_size * ADMIN_SPAN(); // include limited vector's reserved capacity - result += LimitedVectorT::DP_REQ(SLAB_BITSPACE_SIZE(), page_size) * page_size; + result += LimitedVectorT::DP_REQ(SlabAllocatorConfig::SLAB_BITSPACE_SIZE(), page_size) * page_size; return result; } @@ -203,15 +204,7 @@ namespace db0 const Prefix &SlabAllocator::getPrefix() const { return *m_prefix; } - - void SlabAllocator::setOnCloseHandler(std::function handler) { - m_on_close_handler = handler; - } - - void SlabAllocator::resetOnCloseHandler() { - m_on_close_handler = {}; - } - + bool SlabAllocator::empty() const { return m_allocs.empty(); } diff --git a/src/dbzero/core/memory/SlabAllocator.hpp b/src/dbzero/core/memory/SlabAllocator.hpp index 81540f23..78fb9453 100644 --- a/src/dbzero/core/memory/SlabAllocator.hpp +++ b/src/dbzero/core/memory/SlabAllocator.hpp @@ -4,6 +4,7 @@ #include "Prefix.hpp" #include "BitSpace.hpp" #include "Memspace.hpp" +#include "SlabAllocatorConfig.hpp" #include #include #include @@ -13,8 +14,8 @@ namespace db0 { -DB0_PACKED_BEGIN - + +DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_slab_header: public db0::o_fixed { const std::uint32_t m_version = 1; @@ -42,13 +43,8 @@ DB0_PACKED_BEGIN { } }; - - static constexpr unsigned int SLAB_BITSPACE_SIZE() { - // typical configuration, sufficient for a 64MB slab - // FIXME: page_size hardcoded - return 64 * 1024 * 1024 / 4096; - } - +DB0_PACKED_END + /** * The SlabAllocator takes a fixed size address range (e.g. 64MB) * and organizes the space with the use of BitSetAllocator/BitSpace + CRDT_Allocator @@ -122,13 +118,7 @@ DB0_PACKED_BEGIN std::size_t getLostCapacity() const; const Prefix &getPrefix() const; - - /** - * Register a handler to be called pre-destruction - */ - void setOnCloseHandler(std::function); - void resetOnCloseHandler(); - + bool empty() const; /** @@ -185,7 +175,7 @@ DB0_PACKED_BEGIN const std::uint32_t m_slab_size; Memspace m_internal_memspace; v_object m_header; - BitSpace m_bitspace; + BitSpace m_bitspace; AllocSetT m_allocs; BlankSetT m_blanks; AlignedBlankSetT m_aligned_blanks; @@ -195,11 +185,9 @@ DB0_PACKED_BEGIN CRDT_Allocator m_allocator; const std::optional m_initial_remaining_capacity; const std::optional m_initial_lost_capacity; - std::size_t m_initial_admin_size; - std::function m_on_close_handler; + std::size_t m_initial_admin_size; static Address headerAddr(Address begin_addr, std::uint32_t size); }; -DB0_PACKED_END } diff --git a/src/dbzero/core/memory/SlabAllocatorConfig.hpp b/src/dbzero/core/memory/SlabAllocatorConfig.hpp new file mode 100644 index 00000000..a4085259 --- /dev/null +++ b/src/dbzero/core/memory/SlabAllocatorConfig.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include "AccessOptions.hpp" +#include + +namespace db0 + +{ + + struct SlabAllocatorConfig + { + // 4KB pages + static constexpr std::size_t DEFAULT_PAGE_SIZE = 4096; + static constexpr std::size_t DEFAULT_SLAB_SIZE = 128u << 20; + + static constexpr unsigned int SLAB_BITSPACE_SIZE() { + // Must equal the number of data pages in the entire slab + return DEFAULT_SLAB_SIZE / DEFAULT_PAGE_SIZE; + } + + // Minimum operational capacity in bytes + // i.e. slabs below this capacity will not be considered for allocation + static constexpr std::size_t MIN_OP_CAPACITY() { + return DEFAULT_SLAB_SIZE / 16; + } + }; + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabManager.cpp b/src/dbzero/core/memory/SlabManager.cpp new file mode 100644 index 00000000..884e49b1 --- /dev/null +++ b/src/dbzero/core/memory/SlabManager.cpp @@ -0,0 +1,597 @@ +#include "SlabManager.hpp" +#include "SlabRecycler.hpp" + +namespace db0 + +{ + + SlabManager::SlabManager(std::shared_ptr prefix, MetaAllocator::SlabTreeT &slab_defs, + MetaAllocator::CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, + std::function address_func, std::function slab_id_func, + unsigned char realm_id, bool deferred_free) + : m_prefix(prefix) + , m_realm_id(realm_id) + , m_slab_defs(slab_defs) + , m_capacity_items(capacity_items) + , m_recycler_ptr(recycler) + , m_slab_size(slab_size) + , m_page_size(page_size) + , m_slab_address_func(address_func) + , m_slab_id_func(slab_id_func) + , m_next_slab_id(fetchNextSlabId()) + , m_deferred_free(deferred_free) + { + } + + SlabManager::FindResult SlabManager::tryGetActiveSlab(unsigned char locality) { + assert(locality < m_active_slab.size()); + return m_active_slab[locality]; + } + + void SlabManager::resetActiveSlab(unsigned char locality) { + assert(locality < m_active_slab.size()); + m_active_slab[locality] = {}; + } + + SlabManager::FindResult SlabManager::findFirst(std::size_t size, unsigned char locality) + { + // visit slabs starting from the largest available capacity + auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY()); + auto it = m_capacity_items.cbegin(); + for (;;) { + if (it.is_end() || it->m_remaining_capacity < min_capacity) { + // no existing slab has sufficient capacity + return {}; + } + + if (m_active_slab.contains(it->m_slab_id)) { + // do not include active slab in find operation + ++it; + continue; + } + auto slab = openSlab(m_slab_address_func(it->m_slab_id)); + // make the slab active + m_active_slab[locality] = slab; + return slab; + } + } + + SlabManager::FindResult SlabManager::findNext(FindResult last_result, std::size_t size, + unsigned char locality) + { + auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY()); + for (;;) { + // this is to find the next item in order + last_result.m_cap_item.m_slab_id += NUM_REALMS; + auto it = m_capacity_items.upper_equal_bound(last_result.m_cap_item); + if (!it.first || it.first->m_remaining_capacity < min_capacity) { + return {}; + } + + if (m_active_slab.contains(it.first->m_slab_id)) { + // do not include active slab in find operation + continue; + } + auto slab = openSlab(m_slab_address_func(it.first->m_slab_id)); + // make the slab active and for a specific locality + m_active_slab[locality] = slab; + return slab; + } + } + + std::pair, std::uint32_t> SlabManager::createNewSlab() + { + if (!m_next_slab_id) { + m_next_slab_id = fetchNextSlabId(); + } + + auto slab_id = *m_next_slab_id; + (*m_next_slab_id) += NUM_REALMS; + auto address = m_slab_address_func(slab_id); + // create the new slab + auto capacity = SlabAllocator::formatSlab(m_prefix, address, m_slab_size, m_page_size); + // NOTE: for a new slab, the initial lost capacity is 0 + auto slab = std::make_shared(m_prefix, address, m_slab_size, m_page_size, capacity, 0); + if (m_atomic) { + // if atomic operation is in progress, add to the volatile slabs + m_volatile_slabs.push_back(address); + } + + return { slab, slab_id }; + } + + SlabManager::FindResult SlabManager::addNewSlab(unsigned char locality) + { + auto [slab, slab_id] = createNewSlab(); + auto address = m_slab_address_func(slab_id); + CapacityItem cap_item { + static_cast(slab->getRemainingCapacity()), + static_cast(slab->getLostCapacity()), + slab_id + }; + // register with slab defs + m_slab_defs.emplace(slab_id, + static_cast(cap_item.m_remaining_capacity), + static_cast(cap_item.m_lost_capacity) + ); + // register with capacity items + m_capacity_items.insert(cap_item); + // add to cache + auto cache_item = std::make_shared(slab, cap_item); + m_slabs.emplace(address, cache_item); + + // append with the recycler + if (m_recycler_ptr) { + m_recycler_ptr->append(slab); + } + + // make the newly added slab active + m_active_slab[locality] = { slab, cap_item }; + return m_active_slab[locality]; + } + + std::uint32_t SlabManager::getRemainingCapacity(std::uint32_t slab_id) const + { + // look up with the cache first + auto address = m_slab_address_func(slab_id); + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab = it->second->m_slab.lock(); + if (slab) { + return slab->getRemainingCapacity(); + } + } + + // look up with the slab defs next + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + if (!slab_def_ptr.first) { + THROWF(db0::InternalException) << "Slab definition not found."; + } + return slab_def_ptr.first->m_remaining_capacity; + } + + void SlabManager::close() + { + m_active_slab = {}; + m_reserved_slabs.clear(); + for (auto it = m_slabs.begin(); it != m_slabs.end();) { + it = unregisterSlab(it); + } + } + + SlabManager::FindResult SlabManager::tryFind(std::uint32_t slab_id) const + { + if (slab_id < nextSlabId()) { + if (m_active_slab.contains(slab_id)) { + return m_active_slab.find(slab_id); + } + // look up with the cache first + auto address = m_slab_address_func(slab_id); + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab = it->second->m_slab.lock(); + if (slab) { + return { slab, it->second->m_cap_item }; + } + } + + return tryOpenSlab(address); + } + return {}; + } + + SlabManager::FindResult SlabManager::find(std::uint32_t slab_id) const + { + auto slab = tryFind(slab_id); + if (!slab) { + THROWF(db0::BadAddressException) << "Slab " << slab_id << " not found"; + } + return slab; + } + + void SlabManager::erase(const FindResult &slab) { + erase(slab, true); + } + + bool SlabManager::empty() const { + return nextSlabId() == m_realm_id; + } + + std::shared_ptr SlabManager::reserveNewSlab() + { + auto [slab, slab_id] = createNewSlab(); + // internally register the slab with capacity = 0 (to avoid use in regular allocations) + CapacityItem cap_item { 0, 0, slab_id }; + // register with slab defs + m_slab_defs.emplace( + slab_id, + static_cast(cap_item.m_remaining_capacity), + static_cast(cap_item.m_lost_capacity) + ); + // register with capacity items + m_capacity_items.insert(cap_item); + return slab; + } + + std::shared_ptr SlabManager::openExistingSlab(const SlabDef &slab_def) + { + if (slab_def.m_slab_id >= nextSlabId()) { + THROWF(db0::InputException) << "Slab " << slab_def.m_slab_id << " does not exist"; + } + auto address = m_slab_address_func(slab_def.m_slab_id); + // look up with the cache first + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab = it->second->m_slab.lock(); + if (slab) { + return slab; + } + } + // pull through cache + return openSlab(slab_def).m_slab; + } + + std::shared_ptr SlabManager::openReservedSlab(Address address) const { + return openReservedSlab(address, m_slab_id_func(address)); + } + + std::shared_ptr SlabManager::openReservedSlab(Address address, std::uint32_t slab_id) const + { + assert(m_slab_id_func(address) == slab_id); + if (slab_id >= nextSlabId()) { + THROWF(db0::InputException) << "Slab " << slab_id << " does not exist"; + } + + // look up with the cache first + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab = it->second->m_slab.lock(); + if (slab) { + return slab; + } + } + + // retrieve slab definition + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + if (!slab_def_ptr.first) { + THROWF(db0::InternalException) << "Slab definition not found: " << slab_id; + } + + // pull through cache + auto result = openSlab(*slab_def_ptr.first).m_slab; + // and add for non-expiry cache + m_reserved_slabs.push_back(result); + return result; + } + + Address SlabManager::getFirstAddress() const { + return m_slab_address_func(m_realm_id) + SlabAllocator::getFirstAddress(); + } + + void SlabManager::commit() const + { + for (auto &it : m_slabs) { + it.second->commit(); + } + } + + void SlabManager::detach() const + { + // detach all cached slabs + for (auto &it : m_slabs) { + it.second->detach(); + } + // NOTE: we retain the slab element because it's detached + // invalidate cached variable + m_next_slab_id = {}; + } + + std::uint32_t SlabManager::nextSlabId() const + { + if (!m_next_slab_id) { + m_next_slab_id = fetchNextSlabId(); + } + return *m_next_slab_id; + } + + void SlabManager::beginAtomic() + { + assert(!m_atomic); + assert(m_volatile_slabs.empty()); + m_atomic = true; + } + + void SlabManager::endAtomic() + { + assert(m_atomic); + m_volatile_slabs.clear(); + m_atomic = false; + } + + void SlabManager::cancelAtomic() + { + assert(m_atomic); + // revert all volatile slabs from cache + for (auto slab_addr : m_volatile_slabs) { + auto it = m_slabs.find(slab_addr); + if (it != m_slabs.end()) { + m_slabs.erase(it); + } + } + m_active_slab = {}; + m_volatile_slabs.clear(); + m_atomic = false; + } + + void SlabManager::saveItem(CacheItem &item) const + { + // if the remaining capacity has hanged, reflect this with backend + if (item.isModified()) { + auto slab_id = item.m_cap_item.m_slab_id; + if (item.m_final_remaining_capacity != item.m_cap_item.m_remaining_capacity) { + auto it = m_capacity_items.find_equal(item.m_cap_item); + assert(!it.isEnd()); + // register under a modified key + m_capacity_items.erase(it); + m_capacity_items.emplace( + item.m_final_remaining_capacity, item.m_final_lost_capacity, slab_id + ); + } + // and update with the slab defs + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + m_slab_defs.modify(slab_def_ptr)->m_remaining_capacity = item.m_final_remaining_capacity; + m_slab_defs.modify(slab_def_ptr)->m_lost_capacity = item.m_final_lost_capacity; + + item.m_final_remaining_capacity = item.m_cap_item.m_remaining_capacity; + item.m_final_lost_capacity = item.m_cap_item.m_lost_capacity; + assert(!item.isModified()); + } + } + + SlabManager::CacheIterator SlabManager::unregisterSlab(CacheIterator it) const + { + auto cache_item = it->second; + if (!cache_item->m_slab.expired()) { + THROWF(db0::InternalException) + << "Slab " << static_cast(cache_item->m_cap_item.m_slab_id) << " is not closed"; + } + + // auto &item = *cache_item; + // commitItem(item); + return m_slabs.erase(it); + } + + SlabManager::FindResult SlabManager::tryOpenSlab(Address address) const + { + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto result = it->second->m_slab.lock(); + if (result) { + return { result, it->second->m_cap_item }; + } + // unregister expired slab from cache + unregisterSlab(it); + } + + auto slab_id = m_slab_id_func(address); + // retrieve slab definition + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + if (!slab_def_ptr.first) { + return {}; + } + + return openSlab(*slab_def_ptr.first); + } + + SlabManager::FindResult SlabManager::openSlab(Address address) const + { + auto slab = tryOpenSlab(address); + if (!slab) { + THROWF(db0::BadAddressException) << "Invalid address accessed"; + } + return slab; + } + + SlabManager::FindResult SlabManager::openSlab(const SlabDef &def) const + { + auto cap_item = CapacityItem(def.m_remaining_capacity, def.m_lost_capacity, def.m_slab_id); + auto addr = m_slab_address_func(def.m_slab_id); + auto slab = std::make_shared( + m_prefix, addr, m_slab_size, m_page_size, def.m_remaining_capacity, def.m_lost_capacity + ); + // add to cache (it's safe to reference item from the unordered_map) + auto cache_item = std::make_shared(slab, cap_item); + m_slabs.emplace(addr, cache_item).first->second; + + // append with the recycler + if (m_recycler_ptr) { + m_recycler_ptr->append(slab); + } + + return { slab, cap_item }; + } + + void SlabManager::erase(const FindResult &slab, bool cleanup) + { + // erasing the last slab + if (slab.m_cap_item.m_slab_id != nextSlabId() - NUM_REALMS) { + return; + } + + auto addr = m_slab_address_func(slab.m_cap_item.m_slab_id); + // unregister from cache + auto it = m_slabs.find(addr); + if (it != m_slabs.end()) { + m_slabs.erase(it); + } + // unregister from recycler + if (m_recycler_ptr) { + m_recycler_ptr->closeOne([&slab](const SlabAllocator &s) { + return slab.m_slab.get() == &s; + }); + } + // unregister if active + if (m_active_slab.contains(slab)) { + m_active_slab.erase(slab); + } + // unregister from slab defs + if (!m_slab_defs.erase_equal(slab.m_cap_item.m_slab_id).first) { + THROWF(db0::InternalException) << "Slab definition not found."; + } + // unregister from capacity items + if (!m_capacity_items.erase_equal(slab.m_cap_item).first) { + THROWF(db0::InternalException) << "Capacity item not found."; + } + if (!m_next_slab_id) { + m_next_slab_id = fetchNextSlabId(); + } + (*m_next_slab_id) -= NUM_REALMS; + // try removing other empty slabs if such exist + if (cleanup) { + while (!empty()) { + auto slab = openSlab(m_slab_address_func(nextSlabId() - NUM_REALMS)); + if (!slab.m_slab->empty()) { + break; + } + erase(slab, false); + } + } + } + + std::uint32_t SlabManager::fetchNextSlabId() const + { + // determine the max slab id + auto it = m_slab_defs.find_max(); + if (it.first) { + return it.first->m_slab_id + NUM_REALMS; + } else { + // first slab being created + return m_realm_id; + } + } + + std::optional
SlabManager::tryAlloc(std::size_t size, std::uint32_t slot_num, bool aligned, + bool unique, std::uint16_t &instance_id, unsigned char locality) + { + auto slab = tryGetActiveSlab(locality); + bool is_first = true; + bool is_new = false; + for (;;) { + if (slab.m_slab) { + for (;;) { + auto addr = slab.m_slab->tryAlloc(size, 0, aligned); + if (!addr) { + // NOTE: since the last allocation failed, don't use this slab as "active" + resetActiveSlab(locality); + break; + } + + if (!unique || slab.m_slab->tryMakeAddressUnique(*addr, instance_id)) { + return addr; + } + + // unable to make the address unique, schedule for deferred free and try again + // NOTE: the allocation is lost + deferredFree(*addr); + } + if (size > slab.m_slab->getMaxAllocSize()) { + THROWF(db0::InternalException) + << "Requested allocation size " << size << " is larger than the slab size " << slab.m_slab->getMaxAllocSize(); + } + if (is_new) { + THROWF(db0::InternalException) << "Slab is new but cannot allocate " << size; + } + } + if (is_first) { + slab = findFirst(size, locality); + is_first = false; + } else { + slab = findNext(slab, size, locality); + } + if (!slab.m_slab) { + slab = addNewSlab(locality); + is_new = true; + } + } + } + + void SlabManager::free(Address address) + { + if (m_deferred_free) { + deferredFree(address); + } else { + _free(address); + } + } + + void SlabManager::free(Address address, std::uint32_t slab_id) + { + assert(m_deferred_free_ops.find(address) == m_deferred_free_ops.end()); + if (m_deferred_free) { + deferredFree(address); + } else { + _free(address, slab_id); + } + } + + void SlabManager::_free(Address address) { + _free(address, m_slab_id_func(address)); + } + + void SlabManager::_free(Address address, std::uint32_t slab_id) + { + assert(m_slab_id_func(address) == slab_id); + auto slab = find(slab_id); + slab.m_slab->free(address); + if (slab.m_slab->empty()) { + // erase or mark as erased + erase(slab); + } + } + + std::size_t SlabManager::getAllocSize(Address address) const { + return getAllocSize(address, m_slab_id_func(address)); + } + + std::size_t SlabManager::getAllocSize(Address address, std::uint32_t slab_id) const + { + if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { + THROWF(db0::BadAddressException) << "Address " << address << " not found (pending deferred free)"; + } + + assert(m_slab_id_func(address) == slab_id); + return find(slab_id).m_slab->getAllocSize(address); + } + + bool SlabManager::isAllocated(Address address, std::size_t *size_of_result) const { + return isAllocated(address, m_slab_id_func(address), size_of_result); + } + + bool SlabManager::isAllocated(Address address, std::uint32_t slab_id, std::size_t *size_of_result) const + { + auto slab = tryFind(slab_id); + if (!slab) { + return false; + } + return slab.m_slab->isAllocated(address, size_of_result); + } + + void SlabManager::forAllSlabs(std::function f) const + { + auto it = m_slab_defs.cbegin(); + for (;!it.is_end();++it) { + auto slab = openExistingSlab(*it); + f(*slab, it->m_slab_id); + } + } + + void SlabManager::deferredFree(Address address) + { + if (m_atomic) { + m_atomic_deferred_free_ops.push_back(address); + } else { + m_deferred_free_ops.insert(address); + } + } + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabManager.hpp b/src/dbzero/core/memory/SlabManager.hpp new file mode 100644 index 00000000..890703d3 --- /dev/null +++ b/src/dbzero/core/memory/SlabManager.hpp @@ -0,0 +1,279 @@ +#pragma once + +#include "Allocator.hpp" +#include "Prefix.hpp" +#include "BitSpace.hpp" +#include "Memspace.hpp" +#include "SlabAllocatorConfig.hpp" +#include "MetaAllocator.hpp" +#include +#include +#include +#include +#include + +namespace db0 + +{ + + /** + * SlabManager allows efficient access to a working set of slabs + * either for read-only or read-write operations + * It's also capable of synchronizing metadata between slabs and the meta-indexes + * The following requirements apply: + * - it's only allowed to access slabs via the SlabCache (no direct access permitted) + * - SlabCache must be part of commit/rollback flows + * - SlabCache must be part of atomic operations + */ + class SlabManager + { + public: + static constexpr std::size_t NUM_REALMS = MetaAllocator::NUM_REALMS; + using CapacityItem = MetaAllocator::CapacityItem; + using SlabDef = MetaAllocator::SlabDef; + + SlabManager(std::shared_ptr prefix, MetaAllocator::SlabTreeT &slab_defs, + MetaAllocator::CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, + std::function address_func, std::function slab_id_func, + unsigned char realm_id, bool deferred_free); + + std::optional
tryAlloc(std::size_t size, std::uint32_t slot_num, bool aligned, bool unique, + std::uint16_t &instance_id, unsigned char locality); + + void free(Address address); + // @param slab_id must match the one calcuated from the address + void free(Address address, std::uint32_t slab_id); + + std::size_t getAllocSize(Address address) const; + std::size_t getAllocSize(Address address, std::uint32_t slab_id) const; + + bool isAllocated(Address address, std::size_t *size_of_result) const; + bool isAllocated(Address address, std::uint32_t slab_id, std::size_t *size_of_result) const; + + unsigned int getSlabCount() const { + return (nextSlabId() - m_realm_id) / NUM_REALMS; + } + + std::shared_ptr reserveNewSlab(); + + /** + * Open an existing slab which has been previously reserved + */ + std::shared_ptr openReservedSlab(Address) const; + std::shared_ptr openReservedSlab(Address, std::uint32_t slab_id) const; + + std::uint32_t getRemainingCapacity(std::uint32_t slab_id) const; + + Address getFirstAddress() const; + + bool empty() const; + + void commit() const; + + void detach() const; + + void beginAtomic(); + void endAtomic(); + void cancelAtomic(); + + void close(); + + void forAllSlabs(std::function f) const; + + private: + + struct FindResult + { + std::shared_ptr m_slab; + CapacityItem m_cap_item; + + bool operator==(std::uint32_t slab_id) const { + return m_slab && m_cap_item.m_slab_id == slab_id; + } + + bool operator==(const FindResult &rhs) const { + return *this == rhs.m_cap_item.m_slab_id; + } + + const SlabAllocator &operator*() const { + return *m_slab; + } + + inline bool operator!() const { + return !m_slab; + } + }; + + // NOTE: only localities 0 and 1 are currently supported + struct ActiveSlab: public std::array + { + bool contains(std::uint32_t slab_id) const { + return ((*this)[0] == slab_id || (*this)[1] == slab_id); + } + + bool contains(const FindResult &slab) const { + return ((*this)[0] == slab || (*this)[1] == slab); + } + + FindResult find(std::uint32_t slab_id) const + { + if ((*this)[0] == slab_id) { + return (*this)[0]; + } else if ((*this)[1] == slab_id) { + return (*this)[1]; + } + return {}; + } + + void erase(const FindResult &slab) + { + if ((*this)[0] == slab) { + (*this)[0] = {}; + } else if ((*this)[1] == slab) { + (*this)[1] = {}; + } else { + assert(false); + THROWF(db0::InternalException) << "Slab not found in active slabs." << THROWF_END; + } + } + }; + + /** + * Retrieves the active slab or returns nullptr if no active slab available + */ + FindResult tryGetActiveSlab(unsigned char locality); + + void resetActiveSlab(unsigned char locality); + + /** + * Retrieve the 1st slab to allocate a block of at least min_capacity + * this is only a 'hint' and if the allocation is not possible, the next slab should be attempted + */ + FindResult findFirst(std::size_t size, unsigned char locality); + + // Continue after findFirst + FindResult findNext(FindResult last_result, std::size_t size, unsigned char locality); + + /** + * Create a new, unregistered slab instance + */ + std::pair, std::uint32_t> createNewSlab(); + + // Create a new, registered slab instance + FindResult addNewSlab(unsigned char locality); + + // Find existing slab by ID + FindResult tryFind(std::uint32_t slab_id) const; + FindResult find(std::uint32_t slab_id) const; + + /** + * Erase if 'slab' is the last slab + */ + void erase(const FindResult &slab); + + std::shared_ptr openExistingSlab(const SlabDef &); + + std::uint32_t nextSlabId() const; + + struct CacheItem + { + SlabManager &m_manager; + std::weak_ptr m_slab; + CapacityItem m_cap_item; + // the slab's remaining capacity reflected with backend when the SlabAllocator gets destroyed + std::uint32_t m_final_remaining_capacity = 0; + std::uint32_t m_final_lost_capacity = 0; + + CacheItem(SlabManager &manager, std::weak_ptr slab, CapacityItem cap) + : m_manager(manager) + , m_slab(slab) + , m_cap_item(cap) + { + } + + void save() + { + if (auto slab = m_slab.lock()) { + if (slab) { + m_final_remaining_capacity = slab->getRemainingCapacity(); + m_final_lost_capacity = slab->getLostCapacity(); + } + } + // reflect changes with the backend + m_manager.saveItem(*this); + } + + void commit() const + { + // NOTE: SlabManager::commit calls back "save" to reflect & persist capacity changes + if (auto slab = m_slab.lock()) { + if (slab) { + slab->commit(); + } + } + } + + void detach() const + { + if (auto slab = m_slab.lock()) { + if (slab) { + slab->detach(); + } + } + } + + // Check if any of the properties changed when compared to "capacity item" + bool isModified() const { + return m_final_remaining_capacity != m_cap_item.m_remaining_capacity || + m_final_lost_capacity != m_cap_item.m_lost_capacity; + } + }; + + using CacheIterator = std::unordered_map >::iterator; + + std::shared_ptr m_prefix; + const unsigned char m_realm_id; + MetaAllocator::SlabTreeT &m_slab_defs; + MetaAllocator::CapacityTreeT &m_capacity_items; + SlabRecycler *m_recycler_ptr = nullptr; + const std::uint32_t m_slab_size; + const std::uint32_t m_page_size; + // slab cache by address + mutable std::unordered_map > m_slabs; + mutable std::vector > m_reserved_slabs; + // active slabs for each supported locality (0 or 1) + mutable ActiveSlab m_active_slab; + // address by allocation ID (from the algo-allocator) + std::function m_slab_address_func; + std::function m_slab_id_func; + mutable std::optional m_next_slab_id; + // addresses of slabs newly created during atomic operations (potentially to be reverted) + mutable std::vector m_volatile_slabs; + // the atomic operation's flag + bool m_atomic = false; + std::vector
m_atomic_deferred_free_ops; + const bool m_deferred_free; + mutable std::unordered_set
m_deferred_free_ops; + + // Update item changes in the backend (if modified) + void saveItem(CacheItem &item) const; + + CacheIterator unregisterSlab(CacheIterator it) const; + + FindResult tryOpenSlab(Address address) const; + + FindResult openSlab(Address address) const; + + // open slab by definition and add to cache + FindResult openSlab(const SlabDef &def) const; + + void erase(const FindResult &slab, bool cleanup); + + std::uint32_t fetchNextSlabId() const; + + void deferredFree(Address); + // internal "free" implementation which performs the dealloc instanly + void _free(Address); + }; + +} \ No newline at end of file diff --git a/src/dbzero/object_model/index/Index.cpp b/src/dbzero/object_model/index/Index.cpp index 9a0ca31d..f9b4cca8 100644 --- a/src/dbzero/object_model/index/Index.cpp +++ b/src/dbzero/object_model/index/Index.cpp @@ -111,7 +111,7 @@ namespace db0::object_model << static_cast(m_new_type) << THROWF_END; } } - + void Index::Builder::flush() { if (!m_index_builder) { diff --git a/src/dbzero/workspace/Workspace.hpp b/src/dbzero/workspace/Workspace.hpp index 5e685789..b143980a 100644 --- a/src/dbzero/workspace/Workspace.hpp +++ b/src/dbzero/workspace/Workspace.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -36,15 +37,14 @@ namespace db0 class BaseWorkspace { public: - // 4KB pages - static constexpr std::size_t DEFAULT_PAGE_SIZE = 4096; + static constexpr std::size_t DEFAULT_PAGE_SIZE = SlabAllocatorConfig::DEFAULT_PAGE_SIZE; + static constexpr std::size_t DEFAULT_SLAB_SIZE = SlabAllocatorConfig::DEFAULT_SLAB_SIZE; + // 16KB sparse index index (memory pages) static constexpr std::size_t DEFAULT_SPARSE_INDEX_NODE_SIZE = 16 * 1024 - 256; - // 64MB slabs - static constexpr std::size_t DEFAULT_SLAB_SIZE = 64 * 1024 * 1024; static constexpr std::size_t DEFAULT_CACHE_SIZE = 2u << 30; static constexpr std::size_t DEFAULT_SLAB_CACHE_SIZE = 256; - + /** * @param root_path default search path for existing prefixes and storage for new ones (pass "" for current directory) **/ diff --git a/tests/unit_tests/IndexTest.cpp b/tests/unit_tests/IndexTest.cpp new file mode 100644 index 00000000..11490ccd --- /dev/null +++ b/tests/unit_tests/IndexTest.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace tests + +{ + + using namespace db0; + + class IndexTest: public MemspaceTestBase + { + public: + }; + +} diff --git a/tests/unit_tests/VBIndexTests.cpp b/tests/unit_tests/VBIndexTests.cpp index addaeff3..4376bb56 100644 --- a/tests/unit_tests/VBIndexTests.cpp +++ b/tests/unit_tests/VBIndexTests.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,54 @@ namespace tests ++it2; } } + + TEST_F( VBIndexTests , testVBIndexBulkPushBack1MSortedElements ) + { + auto memspace = getMemspace(); + std::vector values; + for (std::uint64_t i = 0; i < 1000000; ++i) { + values.push_back(i); + } + + db0::v_bindex cut(memspace, memspace.getPageSize()); + db0::ProcessTimer timer("bulkPushBack"); + cut.bulkPushBack(values.begin(), values.end()); + timer.printLog(std::cout) << std::endl; + } + + TEST_F( VBIndexTests , testVBIndexBulkPushBack1MRandomElements ) + { + auto memspace = getMemspace(); + std::vector values; + for (std::uint64_t i = 0; i < 1000000; ++i) { + values.push_back(rand()); + } + + db0::v_bindex cut(memspace, memspace.getPageSize()); + db0::ProcessTimer timer("bulkPushBack (random)"); + cut.bulkPushBack(values.begin(), values.end()); + timer.printLog(std::cout) << std::endl; + } + TEST_F( VBIndexTests , testVBIndexBulkInsert1MRandomElementsInBatches ) + { + auto memspace = getMemspace(); + auto total_count = 1000000u; + auto batch_size = 1000u; + + db0::v_bindex cut(memspace, memspace.getPageSize()); + db0::ProcessTimer timer("bulkInsert (random)"); + std::size_t count = 0; + while (count < total_count) { + std::vector values; + for (std::size_t i = 0; i < batch_size; ++i) { + values.push_back(rand()); + } + cut.bulkInsert(values.begin(), values.end()); + count += batch_size; + } + timer.printLog(std::cout) << std::endl; + } + } From 5cc7fa2e2348d1aabac3bc2fbfdb74dc615fc547 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 10 Nov 2025 16:37:21 +0100 Subject: [PATCH 08/11] WIP: save work --- src/dbzero/core/memory/MetaAllocator.cpp | 56 +------ src/dbzero/core/memory/MetaAllocator.hpp | 157 ++------------------ src/dbzero/core/memory/PageMap.hpp | 26 +++- src/dbzero/core/memory/Recycler.hpp | 90 ++++++++++++ src/dbzero/core/memory/SlabItem.cpp | 40 +++++ src/dbzero/core/memory/SlabItem.hpp | 179 +++++++++++++++++++++++ src/dbzero/core/memory/SlabManager.cpp | 68 ++++++++- src/dbzero/core/memory/SlabManager.hpp | 172 ++++++---------------- src/dbzero/core/memory/SlabRecycler.cpp | 50 ------- src/dbzero/core/memory/SlabRecycler.hpp | 42 ------ 10 files changed, 451 insertions(+), 429 deletions(-) create mode 100644 src/dbzero/core/memory/Recycler.hpp create mode 100644 src/dbzero/core/memory/SlabItem.cpp create mode 100644 src/dbzero/core/memory/SlabItem.hpp delete mode 100644 src/dbzero/core/memory/SlabRecycler.cpp delete mode 100644 src/dbzero/core/memory/SlabRecycler.hpp diff --git a/src/dbzero/core/memory/MetaAllocator.cpp b/src/dbzero/core/memory/MetaAllocator.cpp index 2b500f6d..4a2ea8cf 100644 --- a/src/dbzero/core/memory/MetaAllocator.cpp +++ b/src/dbzero/core/memory/MetaAllocator.cpp @@ -1,7 +1,6 @@ #include "MetaAllocator.hpp" #include "OneShotAllocator.hpp" #include "Memspace.hpp" -#include "SlabRecycler.hpp" #include "SlabManager.hpp" #include #include @@ -11,7 +10,7 @@ namespace db0 { static constexpr double MIN_FILL_RATE = 0.25; - + inline unsigned char getRealmID(std::uint32_t slab_id) { return slab_id & MetaAllocator::REALM_MASK; } @@ -22,8 +21,8 @@ namespace db0 std::size_t max_slab_count = (std::numeric_limits::max() - MP * page_size) / slab_size - 1; // estimate the number of slabs for which the definitions can be stored on a single page // this is a very conservative estimate - std::size_t slab_count_1 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(MetaAllocator::SlabDef)); - std::size_t slab_count_2 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(MetaAllocator::CapacityItem)) - (2 * MP); + std::size_t slab_count_1 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(SlabDef)); + std::size_t slab_count_2 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(CapacityItem)) - (2 * MP); return std::min(max_slab_count, std::min(slab_count_1, slab_count_2)); } @@ -49,8 +48,8 @@ namespace db0 } // Construct the reverse address pool function - std::function MetaAllocator::getReverseAddressPool(std::size_t offset, std::size_t page_size, - std::size_t slab_size) + std::function MetaAllocator::getReverseAddressPool(std::size_t offset, + std::size_t page_size, std::size_t slab_size) { auto slab_count = getSlabCount(page_size, slab_size); // make offset page-aligned @@ -102,7 +101,7 @@ namespace db0 , m_slab_size(slab_size) { } - + std::uint64_t MetaAllocator::Realm::getSlabMaxAddress() const { // take max of the 2 collections @@ -253,9 +252,6 @@ namespace db0 bool MetaAllocator::isAllocated(Address address, std::size_t *size_of_result) const { - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - return false; - } auto slab_id = m_slab_id_function(address); auto realm_id = getRealmID(slab_id); return m_realms[realm_id].isAllocated(address, slab_id, size_of_result); @@ -267,9 +263,6 @@ namespace db0 if (realm_id != getRealmID(slab_id)) { THROWF(db0::BadAddressException) << "Invalid address accessed"; } - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - return false; - } return m_realms[realm_id].isAllocated(address, slab_id, size_of_result); } @@ -363,20 +356,9 @@ namespace db0 void MetaAllocator::flush() const { assert(!m_atomic); - assert(m_atomic_deferred_free_ops.empty()); - // perform the deferred free operations - if (!m_deferred_free_ops.empty()) { - for (auto addr : m_deferred_free_ops) { - const_cast(*this)._free(addr); - } - m_deferred_free_ops.clear(); - } + m_realms.flush(); } - std::size_t MetaAllocator::getDeferredFreeCount() const { - return m_deferred_free_ops.size(); - } - void MetaAllocator::beginAtomic() { assert(!m_atomic); @@ -388,13 +370,6 @@ namespace db0 { assert(m_atomic); m_atomic = false; - // merge atomic deferred free operations - if (!m_atomic_deferred_free_ops.empty()) { - for (auto addr : m_atomic_deferred_free_ops) { - m_deferred_free_ops.insert(addr); - } - m_atomic_deferred_free_ops.clear(); - } m_realms.endAtomic(); } @@ -402,8 +377,6 @@ namespace db0 { assert(m_atomic); m_atomic = false; - // rollback atomic deferred free operations - m_atomic_deferred_free_ops.clear(); m_realms.cancelAtomic(); } @@ -495,18 +468,3 @@ namespace db0 } } - -namespace std - -{ - ostream &operator<<(ostream &os, const db0::MetaAllocator::CapacityItem &item) { - os << "CapacityItem(capacity=" << item.m_remaining_capacity << ", slab=" << item.m_slab_id << ")"; - return os; - } - - ostream &operator<<(ostream &os, const db0::MetaAllocator::SlabDef &def) { - os << "SlabDef(slab=" << def.m_slab_id << ", capacity=" << def.m_remaining_capacity << ")"; - return os; - } - -} \ No newline at end of file diff --git a/src/dbzero/core/memory/MetaAllocator.hpp b/src/dbzero/core/memory/MetaAllocator.hpp index 601d09ca..7e5fd985 100644 --- a/src/dbzero/core/memory/MetaAllocator.hpp +++ b/src/dbzero/core/memory/MetaAllocator.hpp @@ -2,8 +2,10 @@ #include "Prefix.hpp" #include "SlabAllocator.hpp" +#include "SlabItem.hpp" #include "AlgoAllocator.hpp" #include "Allocator.hpp" +#include "Recycler.hpp" #include #include #include @@ -16,10 +18,10 @@ namespace db0 { - - class SlabRecycler; + class SlabManager; - + using SlabRecycler = db0::Recycler; + DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_realm: public o_fixed_versioned { @@ -66,136 +68,6 @@ DB0_PACKED_END */ static void formatPrefix(std::shared_ptr prefix, std::size_t page_size, std::size_t slab_size); -DB0_PACKED_BEGIN - struct DB0_PACKED_ATTR CapacityItem - { - // primary key - std::uint32_t m_remaining_capacity; - std::uint32_t m_lost_capacity; - // secondary key - std::uint32_t m_slab_id; - - CapacityItem() = default; - - CapacityItem(std::uint32_t remaining_capacity, std::uint32_t lost_capacity, std::uint32_t slab_id) - : m_remaining_capacity(remaining_capacity) - , m_lost_capacity(lost_capacity) - , m_slab_id(slab_id) - { - } - - static std::uint64_t getKey(const CapacityItem &item) { - return ((std::uint64_t)item.m_remaining_capacity << 32) | item.m_slab_id; - } - - // Construct key from construction args - static std::uint64_t getKey(std::uint32_t remaining_capacity, std::uint32_t, std::uint32_t slab_id) { - return ((std::uint64_t)remaining_capacity << 32) | slab_id; - } - - inline static std::uint32_t first(std::uint64_t key) { - return static_cast(key >> 32); - } - - inline static std::uint32_t second(std::uint64_t key) { - return static_cast(key & 0xFFFFFFFF); - } - - // note descending order of comparisons - struct CompT - { - inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { - if (lhs.m_remaining_capacity == rhs.m_remaining_capacity) - return lhs.m_slab_id < rhs.m_slab_id; - return rhs.m_remaining_capacity < lhs.m_remaining_capacity; - } - - inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { - if (lhs.m_remaining_capacity == first(rhs)) - return lhs.m_slab_id < second(rhs); - return first(rhs) < lhs.m_remaining_capacity; - } - - inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { - if (first(lhs) == rhs.m_remaining_capacity) - return second(lhs) < rhs.m_slab_id; - return rhs.m_remaining_capacity < first(lhs); - } - }; - - struct EqualT - { - inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { - return lhs.m_remaining_capacity == rhs.m_remaining_capacity && lhs.m_slab_id == rhs.m_slab_id; - } - - inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { - return lhs.m_remaining_capacity == first(rhs) && lhs.m_slab_id == second(rhs); - } - - inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { - return first(lhs) == rhs.m_remaining_capacity && second(lhs) == rhs.m_slab_id; - } - }; - }; -DB0_PACKED_END - -DB0_PACKED_BEGIN - struct DB0_PACKED_ATTR SlabDef - { - // primary key - std::uint32_t m_slab_id; - std::uint32_t m_remaining_capacity; - std::uint32_t m_lost_capacity; - - SlabDef(std::uint32_t slab_id, std::uint32_t remaining_capacity, std::uint32_t lost_capacity) - : m_slab_id(slab_id) - , m_remaining_capacity(remaining_capacity) - , m_lost_capacity(lost_capacity) - { - } - - static inline std::uint32_t getKey(const SlabDef &item) { - return item.m_slab_id; - } - - // Extract key from construction args - static inline std::uint32_t getKey(std::uint32_t slab_id, std::uint32_t, std::uint32_t) { - return slab_id; - } - - struct CompT - { - inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { - return lhs.m_slab_id < rhs.m_slab_id; - } - - inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { - return lhs.m_slab_id < rhs; - } - - inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { - return lhs < rhs.m_slab_id; - } - }; - - struct EqualT - { - inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { - return lhs.m_slab_id == rhs.m_slab_id; - } - - inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { - return lhs.m_slab_id == rhs; - } - - inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { - return lhs == rhs.m_slab_id; - } - }; - }; -DB0_PACKED_END - using CapacityTreeT = SGB_Tree; using SlabTreeT = SGB_Tree; @@ -240,7 +112,7 @@ DB0_PACKED_END * Retrieve information about the remaining space available to the Slab */ std::uint32_t getRemainingCapacity(std::uint32_t slab_id) const; - + /** * Retrieve a new slab reserved for private use * note that this slab will not be available for allocations from MetaAllocator and has to be used directly @@ -306,6 +178,7 @@ DB0_PACKED_END void cancelAtomic(); void forAllSlabs(std::function) const; + void flush() const; }; struct RealmsVector: protected std::vector @@ -316,6 +189,8 @@ DB0_PACKED_END // evaluate the max address from all realms std::uint64_t getSlabMaxAddress() const; + std::size_t getDeferredFreeCount() const; + inline SlabManager &operator[](unsigned char realm_id) { return *at(realm_id).m_slab_manager; } @@ -333,12 +208,12 @@ DB0_PACKED_END void endAtomic(); void cancelAtomic(); + void flush() const; void close(); }; RealmsVector m_realms; - SlabRecycler *m_recycler_ptr; - const bool m_deferred_free; + SlabRecycler *m_recycler_ptr; std::function m_slab_id_function; // flag indicating if the atomic operation is in progress bool m_atomic = false; @@ -362,13 +237,3 @@ DB0_PACKED_END }; } - -namespace std - -{ - - ostream &operator<<(ostream &os, const db0::MetaAllocator::CapacityItem &item); - - ostream &operator<<(ostream &os, const db0::MetaAllocator::SlabDef &item); - -} \ No newline at end of file diff --git a/src/dbzero/core/memory/PageMap.hpp b/src/dbzero/core/memory/PageMap.hpp index c2df3f00..b04e086a 100644 --- a/src/dbzero/core/memory/PageMap.hpp +++ b/src/dbzero/core/memory/PageMap.hpp @@ -67,6 +67,9 @@ namespace db0 // we need to only perform them from a well researched contexts friend class PrefixCache; + void insert(std::unique_lock &, StateNumType state_num, + std::shared_ptr); + // Erase lock stored under a known state number void erase(StateNumType state_num, std::shared_ptr lock); void erase(StateNumType state_num, std::uint64_t page_num); @@ -102,16 +105,23 @@ namespace db0 template void PageMap::insert(StateNumType state_num, std::shared_ptr res_lock) { - std::unique_lock lock(m_rw_mutex); + std::unique_lock _lock(m_rw_mutex); m_cache[{res_lock->getAddress() >> m_shift, state_num}] = res_lock; } template - void PageMap::insert(StateNumType state_num, std::shared_ptr lock, + void PageMap::insert(std::unique_lock &, StateNumType state_num, + std::shared_ptr res_lock) + { + m_cache[{res_lock->getAddress() >> m_shift, state_num}] = res_lock; + } + + template + void PageMap::insert(StateNumType state_num, std::shared_ptr res_lock, std::uint64_t page_num) { std::unique_lock _lock(m_rw_mutex); - m_cache[{page_num, state_num}] = lock; + m_cache[{page_num, state_num}] = res_lock; } template @@ -217,13 +227,13 @@ namespace db0 template std::shared_ptr PageMap::replace( - StateNumType state_num, std::shared_ptr lock, std::uint64_t page_num) + StateNumType state_num, std::shared_ptr res_lock, std::uint64_t page_num) { std::unique_lock _lock(m_rw_mutex); // find exact match of the page / state auto it = m_cache.find({page_num, state_num}); if (it == m_cache.end()) { - insert(state_num, lock); + insert(_lock, state_num, res_lock); return {}; } auto existing_lock = it->second.lock(); @@ -232,13 +242,13 @@ namespace db0 // this is fine because we're inserting under updated more recent state assert(state_num >= it->first.second); m_cache.erase(it); - insert(state_num, lock); + insert(_lock, state_num, res_lock); return {}; } - assert(existing_lock->size() == lock->size()); + assert(existing_lock->size() == res_lock->size()); // apply changes from the lock being merged (discarding changes in this lock) - existing_lock->moveFrom(*lock); + existing_lock->moveFrom(*res_lock); return existing_lock; } diff --git a/src/dbzero/core/memory/Recycler.hpp b/src/dbzero/core/memory/Recycler.hpp new file mode 100644 index 00000000..ac34cb47 --- /dev/null +++ b/src/dbzero/core/memory/Recycler.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include +#include +#include "SlabAllocator.hpp" + +namespace db0 + +{ + + // The recycler class helps maintain the lifecycle of a limited number of + // shared_ptr based resources (e.g. SlabAllocator instances) + template class Recycler + { + public: + Recycler(unsigned int max_size = 256); + + void append(std::shared_ptr); + + /** + * Get the number of instances currently begin stored + */ + std::size_t size() const; + + /** + * Get the maximum number of instances that could be stored + */ + std::size_t capacity() const; + + /** + * Close / remove all instances that match the predicate + */ + void close(std::function predicate, bool only_first = false); + void closeOne(std::function predicate); + void clear(); + + private: + const unsigned int m_max_size; + std::deque > m_queue; + }; + + template Recycler::Recycler(unsigned int max_size) + : m_max_size(max_size) + { + } + + template void Recycler::append(std::shared_ptr instance) + { + m_queue.push_back(instance); + while (m_queue.size() > m_max_size) { + m_queue.pop_front(); + } + } + + template + std::size_t Recycler::size() const { + return m_slabs.size(); + } + + template + std::size_t Recycler::capacity() const { + return m_max_size; + } + + template + void Recycler::close(std::function predicate, bool only_first) + { + for (auto it = m_queue.begin(); it != m_queue.end();) { + if (predicate(**it)) { + it = m_queue.erase(it); + if (only_first) { + break; + } + } else { + ++it; + } + } + } + + template + void Recycler::closeOne(std::function predicate) { + close(predicate, true); + } + + template + void Recycler::clear() { + m_queue.clear(); + } + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabItem.cpp b/src/dbzero/core/memory/SlabItem.cpp new file mode 100644 index 00000000..549e2dac --- /dev/null +++ b/src/dbzero/core/memory/SlabItem.cpp @@ -0,0 +1,40 @@ +#include "SlabItem.hpp" + +namespace db0 + +{ + + SlabItem::SlabItem(std::shared_ptr slab, CapacityItem cap) + : m_slab(slab) + , m_cap_item(cap) + { + } + + void SlabItem::commit() const + { + assert(m_slab); + m_slab->commit(); + } + + void SlabItem::detach() const + { + assert(m_slab); + m_slab->detach(); + } + +} + +namespace std + +{ + ostream &operator<<(ostream &os, const db0::CapacityItem &item) { + os << "CapacityItem(capacity=" << item.m_remaining_capacity << ", slab=" << item.m_slab_id << ")"; + return os; + } + + ostream &operator<<(ostream &os, const db0::SlabDef &def) { + os << "SlabDef(slab=" << def.m_slab_id << ", capacity=" << def.m_remaining_capacity << ")"; + return os; + } + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabItem.hpp b/src/dbzero/core/memory/SlabItem.hpp new file mode 100644 index 00000000..43dcf824 --- /dev/null +++ b/src/dbzero/core/memory/SlabItem.hpp @@ -0,0 +1,179 @@ +#pragma once + +#include "SlabAllocator.hpp" + +namespace db0 + +{ + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR CapacityItem + { + // primary key (high part) + std::uint32_t m_remaining_capacity; + std::uint32_t m_lost_capacity; + // primary key (low part) + std::uint32_t m_slab_id; + + CapacityItem() = default; + + CapacityItem(std::uint32_t remaining_capacity, std::uint32_t lost_capacity, std::uint32_t slab_id) + : m_remaining_capacity(remaining_capacity) + , m_lost_capacity(lost_capacity) + , m_slab_id(slab_id) + { + } + + static std::uint64_t getKey(const CapacityItem &item) { + return ((std::uint64_t)item.m_remaining_capacity << 32) | item.m_slab_id; + } + + // Construct key from construction args + static std::uint64_t getKey(std::uint32_t remaining_capacity, std::uint32_t, std::uint32_t slab_id) { + return ((std::uint64_t)remaining_capacity << 32) | slab_id; + } + + inline static std::uint32_t first(std::uint64_t key) { + return static_cast(key >> 32); + } + + inline static std::uint32_t second(std::uint64_t key) { + return static_cast(key & 0xFFFFFFFF); + } + + // note descending order of comparisons + struct CompT + { + inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { + if (lhs.m_remaining_capacity == rhs.m_remaining_capacity) + return lhs.m_slab_id < rhs.m_slab_id; + return rhs.m_remaining_capacity < lhs.m_remaining_capacity; + } + + inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { + if (lhs.m_remaining_capacity == first(rhs)) + return lhs.m_slab_id < second(rhs); + return first(rhs) < lhs.m_remaining_capacity; + } + + inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { + if (first(lhs) == rhs.m_remaining_capacity) + return second(lhs) < rhs.m_slab_id; + return rhs.m_remaining_capacity < first(lhs); + } + }; + + struct EqualT + { + inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { + return lhs.m_remaining_capacity == rhs.m_remaining_capacity && lhs.m_slab_id == rhs.m_slab_id; + } + + inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { + return lhs.m_remaining_capacity == first(rhs) && lhs.m_slab_id == second(rhs); + } + + inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { + return first(lhs) == rhs.m_remaining_capacity && second(lhs) == rhs.m_slab_id; + } + }; + }; +DB0_PACKED_END + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR SlabDef + { + // primary key + std::uint32_t m_slab_id; + std::uint32_t m_remaining_capacity; + std::uint32_t m_lost_capacity; + + SlabDef(std::uint32_t slab_id, std::uint32_t remaining_capacity, std::uint32_t lost_capacity) + : m_slab_id(slab_id) + , m_remaining_capacity(remaining_capacity) + , m_lost_capacity(lost_capacity) + { + } + + static inline std::uint32_t getKey(const SlabDef &item) { + return item.m_slab_id; + } + + // Extract key from construction args + static inline std::uint32_t getKey(std::uint32_t slab_id, std::uint32_t, std::uint32_t) { + return slab_id; + } + + struct CompT + { + inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { + return lhs.m_slab_id < rhs.m_slab_id; + } + + inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { + return lhs.m_slab_id < rhs; + } + + inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { + return lhs < rhs.m_slab_id; + } + }; + + struct EqualT + { + inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { + return lhs.m_slab_id == rhs.m_slab_id; + } + + inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { + return lhs.m_slab_id == rhs; + } + + inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { + return lhs == rhs.m_slab_id; + } + }; + }; +DB0_PACKED_END + + struct SlabItem + { + std::shared_ptr m_slab; + // the capacity item as last retrieved from the backend (may need update) + CapacityItem m_cap_item; + + SlabItem(std::shared_ptr slab, CapacityItem cap); + + void commit() const; + void detach() const; + + bool operator==(std::uint32_t slab_id) const { + assert(m_slab) + return m_cap_item.m_slab_id == slab_id; + } + + bool operator==(const SlabItem &rhs) const { + return *this == rhs.m_cap_item.m_slab_id; + } + + const SlabAllocator &operator*() const { + assert(m_slab); + return *m_slab; + } + + const SlabAllocator *operator->() const { + assert(m_slab); + return m_slab.get(); + } + }; + +} + +namespace std + +{ + + ostream &operator<<(ostream &os, const db0::CapacityItem &item); + ostream &operator<<(ostream &os, const db0::SlabDef &item); + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabManager.cpp b/src/dbzero/core/memory/SlabManager.cpp index 884e49b1..18067663 100644 --- a/src/dbzero/core/memory/SlabManager.cpp +++ b/src/dbzero/core/memory/SlabManager.cpp @@ -23,6 +23,36 @@ namespace db0 { } + bool SlabManager::ActiveSlab::contains(std::uint32_t slab_id) const { + return ((*this)[0] == slab_id || (*this)[1] == slab_id); + } + + bool SlabManager::ActiveSlab::contains(const FindResult &slab) const { + return ((*this)[0] == slab || (*this)[1] == slab); + } + + SlabManager::FindResult SlabManager::ActiveSlab::find(std::uint32_t slab_id) const + { + if ((*this)[0] == slab_id) { + return (*this)[0]; + } else if ((*this)[1] == slab_id) { + return (*this)[1]; + } + return {}; + } + + void SlabManager::ActiveSlab::erase(const FindResult &slab) + { + if ((*this)[0] == slab) { + (*this)[0] = {}; + } else if ((*this)[1] == slab) { + (*this)[1] = {}; + } else { + assert(false); + THROWF(db0::InternalException) << "Slab not found in active slabs." << THROWF_END; + } + } + SlabManager::FindResult SlabManager::tryGetActiveSlab(unsigned char locality) { assert(locality < m_active_slab.size()); return m_active_slab[locality]; @@ -212,7 +242,7 @@ namespace db0 m_capacity_items.insert(cap_item); return slab; } - + std::shared_ptr SlabManager::openExistingSlab(const SlabDef &slab_def) { if (slab_def.m_slab_id >= nextSlabId()) { @@ -304,6 +334,14 @@ namespace db0 void SlabManager::endAtomic() { assert(m_atomic); + // merge atomic deferred free operations + if (!m_atomic_deferred_free_ops.empty()) { + for (auto addr : m_atomic_deferred_free_ops) { + m_deferred_free_ops.insert(addr); + } + m_atomic_deferred_free_ops.clear(); + } + m_volatile_slabs.clear(); m_atomic = false; } @@ -311,6 +349,9 @@ namespace db0 void SlabManager::cancelAtomic() { assert(m_atomic); + // rollback atomic deferred free operations + m_atomic_deferred_free_ops.clear(); + // revert all volatile slabs from cache for (auto slab_addr : m_volatile_slabs) { auto it = m_slabs.find(slab_addr); @@ -569,6 +610,10 @@ namespace db0 bool SlabManager::isAllocated(Address address, std::uint32_t slab_id, std::size_t *size_of_result) const { + if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { + return false; + } + auto slab = tryFind(slab_id); if (!slab) { return false; @@ -580,11 +625,11 @@ namespace db0 { auto it = m_slab_defs.cbegin(); for (;!it.is_end();++it) { - auto slab = openExistingSlab(*it); + auto slab = const_cast(*this).openExistingSlab(*it); f(*slab, it->m_slab_id); } } - + void SlabManager::deferredFree(Address address) { if (m_atomic) { @@ -594,4 +639,21 @@ namespace db0 } } + void SlabManager::flush() const + { + assert(!m_atomic); + assert(m_atomic_deferred_free_ops.empty()); + // perform the deferred free operations + if (!m_deferred_free_ops.empty()) { + for (auto addr : m_deferred_free_ops) { + const_cast(*this)._free(addr); + } + m_deferred_free_ops.clear(); + } + } + + std::size_t SlabManager::getDeferredFreeCount() const { + return m_deferred_free_ops.size(); + } + } \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabManager.hpp b/src/dbzero/core/memory/SlabManager.hpp index 890703d3..b0a142e3 100644 --- a/src/dbzero/core/memory/SlabManager.hpp +++ b/src/dbzero/core/memory/SlabManager.hpp @@ -5,6 +5,7 @@ #include "BitSpace.hpp" #include "Memspace.hpp" #include "SlabAllocatorConfig.hpp" +#include "SlabItem.hpp" #include "MetaAllocator.hpp" #include #include @@ -54,16 +55,18 @@ namespace db0 return (nextSlabId() - m_realm_id) / NUM_REALMS; } + // NOTE: reserved slabs are not updated in the CapacityItems tree + // since they're registered with capacity = 0 (to avoid using them in regular allocations) std::shared_ptr reserveNewSlab(); - - /** - * Open an existing slab which has been previously reserved - */ + + // Open an existing reserved slab std::shared_ptr openReservedSlab(Address) const; std::shared_ptr openReservedSlab(Address, std::uint32_t slab_id) const; std::uint32_t getRemainingCapacity(std::uint32_t slab_id) const; + std::size_t getDeferredFreeCount() const; + Address getFirstAddress() const; bool empty() const; @@ -80,156 +83,59 @@ namespace db0 void forAllSlabs(std::function f) const; + void flush() const; + private: - - struct FindResult - { - std::shared_ptr m_slab; - CapacityItem m_cap_item; - - bool operator==(std::uint32_t slab_id) const { - return m_slab && m_cap_item.m_slab_id == slab_id; - } - - bool operator==(const FindResult &rhs) const { - return *this == rhs.m_cap_item.m_slab_id; - } - - const SlabAllocator &operator*() const { - return *m_slab; - } - - inline bool operator!() const { - return !m_slab; - } - }; // NOTE: only localities 0 and 1 are currently supported - struct ActiveSlab: public std::array + struct ActiveSlab: public std::array, 2> { - bool contains(std::uint32_t slab_id) const { - return ((*this)[0] == slab_id || (*this)[1] == slab_id); - } - - bool contains(const FindResult &slab) const { - return ((*this)[0] == slab || (*this)[1] == slab); - } + bool contains(std::uint32_t slab_id) const; + bool contains(const SlabItem &slab) const; - FindResult find(std::uint32_t slab_id) const - { - if ((*this)[0] == slab_id) { - return (*this)[0]; - } else if ((*this)[1] == slab_id) { - return (*this)[1]; - } - return {}; - } + std::shared_ptr find(std::uint32_t slab_id) const; - void erase(const FindResult &slab) - { - if ((*this)[0] == slab) { - (*this)[0] = {}; - } else if ((*this)[1] == slab) { - (*this)[1] = {}; - } else { - assert(false); - THROWF(db0::InternalException) << "Slab not found in active slabs." << THROWF_END; - } - } + void erase(const SlabItem &slab); }; /** * Retrieves the active slab or returns nullptr if no active slab available */ - FindResult tryGetActiveSlab(unsigned char locality); - + std::shared_ptr tryGetActiveSlab(unsigned char locality); void resetActiveSlab(unsigned char locality); /** * Retrieve the 1st slab to allocate a block of at least min_capacity * this is only a 'hint' and if the allocation is not possible, the next slab should be attempted */ - FindResult findFirst(std::size_t size, unsigned char locality); + std::shared_ptr findFirst(std::size_t size, unsigned char locality); // Continue after findFirst - FindResult findNext(FindResult last_result, std::size_t size, unsigned char locality); - + std::shared_ptr findNext(std::shared_ptr last_result, std::size_t size, + unsigned char locality); + /** * Create a new, unregistered slab instance */ std::pair, std::uint32_t> createNewSlab(); - - // Create a new, registered slab instance - FindResult addNewSlab(unsigned char locality); + + // Create a new, registered slab instance + std::shared_ptr addNewSlab(unsigned char locality); // Find existing slab by ID - FindResult tryFind(std::uint32_t slab_id) const; - FindResult find(std::uint32_t slab_id) const; - + std::shared_ptr tryFind(std::uint32_t slab_id) const; + std::shared_ptr find(std::uint32_t slab_id) const; + /** * Erase if 'slab' is the last slab */ - void erase(const FindResult &slab); + void erase(std::shared_ptr); std::shared_ptr openExistingSlab(const SlabDef &); - + std::uint32_t nextSlabId() const; - - struct CacheItem - { - SlabManager &m_manager; - std::weak_ptr m_slab; - CapacityItem m_cap_item; - // the slab's remaining capacity reflected with backend when the SlabAllocator gets destroyed - std::uint32_t m_final_remaining_capacity = 0; - std::uint32_t m_final_lost_capacity = 0; - - CacheItem(SlabManager &manager, std::weak_ptr slab, CapacityItem cap) - : m_manager(manager) - , m_slab(slab) - , m_cap_item(cap) - { - } - - void save() - { - if (auto slab = m_slab.lock()) { - if (slab) { - m_final_remaining_capacity = slab->getRemainingCapacity(); - m_final_lost_capacity = slab->getLostCapacity(); - } - } - // reflect changes with the backend - m_manager.saveItem(*this); - } - - void commit() const - { - // NOTE: SlabManager::commit calls back "save" to reflect & persist capacity changes - if (auto slab = m_slab.lock()) { - if (slab) { - slab->commit(); - } - } - } - - void detach() const - { - if (auto slab = m_slab.lock()) { - if (slab) { - slab->detach(); - } - } - } - - // Check if any of the properties changed when compared to "capacity item" - bool isModified() const { - return m_final_remaining_capacity != m_cap_item.m_remaining_capacity || - m_final_lost_capacity != m_cap_item.m_lost_capacity; - } - }; - using CacheIterator = std::unordered_map >::iterator; + using CacheIterator = std::unordered_map >::iterator; std::shared_ptr m_prefix; const unsigned char m_realm_id; @@ -239,7 +145,7 @@ namespace db0 const std::uint32_t m_slab_size; const std::uint32_t m_page_size; // slab cache by address - mutable std::unordered_map > m_slabs; + mutable std::unordered_map > m_slabs; mutable std::vector > m_reserved_slabs; // active slabs for each supported locality (0 or 1) mutable ActiveSlab m_active_slab; @@ -254,26 +160,30 @@ namespace db0 std::vector
m_atomic_deferred_free_ops; const bool m_deferred_free; mutable std::unordered_set
m_deferred_free_ops; + // the list of modified slabs (need backend refresh) + std::vector > m_dirty_slabs; // Update item changes in the backend (if modified) void saveItem(CacheItem &item) const; CacheIterator unregisterSlab(CacheIterator it) const; - - FindResult tryOpenSlab(Address address) const; - - FindResult openSlab(Address address) const; + + std::shared_ptr tryOpenSlab(Address address) const; + std::shared_ptr openSlab(Address address) const; + // open slab by definition and add to cache - FindResult openSlab(const SlabDef &def) const; - - void erase(const FindResult &slab, bool cleanup); - + std::shared_ptr openSlab(const SlabDef &def) const; + + void erase(std::shared_ptr, bool cleanup); + std::uint32_t fetchNextSlabId() const; void deferredFree(Address); + // internal "free" implementation which performs the dealloc instanly void _free(Address); + void _free(Address, std::uint32_t slab_id); }; } \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabRecycler.cpp b/src/dbzero/core/memory/SlabRecycler.cpp deleted file mode 100644 index e8e7c144..00000000 --- a/src/dbzero/core/memory/SlabRecycler.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "SlabRecycler.hpp" - -namespace db0 - -{ - - SlabRecycler::SlabRecycler(unsigned int max_size) - : m_max_size(max_size) - { - } - - void SlabRecycler::append(std::shared_ptr slab) - { - m_slabs.push_back(slab); - while (m_slabs.size() > m_max_size) { - m_slabs.pop_front(); - } - } - - std::size_t SlabRecycler::size() const { - return m_slabs.size(); - } - - std::size_t SlabRecycler::capacity() const { - return m_max_size; - } - - void SlabRecycler::close(std::function predicate, bool only_first) - { - for (auto it = m_slabs.begin(); it != m_slabs.end();) { - if (predicate(**it)) { - it = m_slabs.erase(it); - if (only_first) { - break; - } - } else { - ++it; - } - } - } - - void SlabRecycler::closeOne(std::function predicate) { - close(predicate, true); - } - - void SlabRecycler::clear() { - m_slabs.clear(); - } - -} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabRecycler.hpp b/src/dbzero/core/memory/SlabRecycler.hpp deleted file mode 100644 index a26cb3a3..00000000 --- a/src/dbzero/core/memory/SlabRecycler.hpp +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include -#include -#include "SlabAllocator.hpp" - -namespace db0 - -{ - - class SlabRecycler - { - public: - SlabRecycler(unsigned int max_size = 256); - - void append(std::shared_ptr slab); - - /** - * Get the number of slab currently begin stored - */ - std::size_t size() const; - - /** - * Get the maximum number of slab that could be stored - */ - std::size_t capacity() const; - - /** - * Close / remove all SlabAllocator instances that match the predicate - */ - void close(std::function predicate, bool only_first = false); - - void closeOne(std::function predicate); - - void clear(); - - private: - const unsigned int m_max_size; - std::deque > m_slabs; - }; - -} \ No newline at end of file From 549dd344e057cf2b424311e90b62ee60e34cb9ba Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 10 Nov 2025 19:18:47 +0100 Subject: [PATCH 09/11] WIP: save work --- src/dbzero/core/memory/MetaAllocator.cpp | 54 ++--- src/dbzero/core/memory/MetaAllocator.hpp | 19 +- src/dbzero/core/memory/Recycler.hpp | 4 +- src/dbzero/core/memory/SlabItem.cpp | 4 + src/dbzero/core/memory/SlabItem.hpp | 16 +- src/dbzero/core/memory/SlabManager.cpp | 278 +++++++++++++---------- src/dbzero/core/memory/SlabManager.hpp | 31 ++- src/dbzero/workspace/Workspace.hpp | 6 +- tests/unit_tests/CapacityTreeTest.cpp | 6 +- tests/unit_tests/MetaAllocatorTest.cpp | 5 +- tests/utils/TestWorkspace.hpp | 7 +- 11 files changed, 239 insertions(+), 191 deletions(-) diff --git a/src/dbzero/core/memory/MetaAllocator.cpp b/src/dbzero/core/memory/MetaAllocator.cpp index 4a2ea8cf..5567d676 100644 --- a/src/dbzero/core/memory/MetaAllocator.cpp +++ b/src/dbzero/core/memory/MetaAllocator.cpp @@ -117,7 +117,7 @@ namespace db0 } return max_addr; } - + MetaAllocator::MetaAllocator(std::shared_ptr prefix, SlabRecycler *recycler, bool deferred_free) : m_prefix(prefix) , m_header(getMetaHeader(prefix)) @@ -281,16 +281,12 @@ namespace db0 return m_realms[realm_id].getRemainingCapacity(slab_id); } - void MetaAllocator::Realm::close() { - m_slab_manager->close(); - } - void MetaAllocator::close() { if (m_recycler_ptr) { // unregister all owned (i.e. associated with the same prefix) slabs from the recycler - m_recycler_ptr->close([this](const SlabAllocator &slab) { - return &slab.getPrefix() == m_prefix.get(); + m_recycler_ptr->close([this](const SlabItem &slab) { + return &slab->getPrefix() == m_prefix.get(); }); } m_realms.close(); @@ -344,11 +340,7 @@ namespace db0 SlabRecycler *MetaAllocator::getSlabRecyclerPtr() const { return m_recycler_ptr; } - - void MetaAllocator::Realm::forAllSlabs(std::function f) const { - m_slab_manager->forAllSlabs(f); - } - + void MetaAllocator::forAllSlabs(std::function f) const { m_realms.forAllSlabs(f); } @@ -396,7 +388,7 @@ namespace db0 void MetaAllocator::RealmsVector::forAllSlabs(std::function f) const { for (const auto &realm: *this) { - realm.forAllSlabs(f); + realm->forAllSlabs(f); } } @@ -417,28 +409,28 @@ namespace db0 void MetaAllocator::RealmsVector::beginAtomic() { for (auto &realm: *this) { - realm.beginAtomic(); + realm->beginAtomic(); } } void MetaAllocator::RealmsVector::endAtomic() { for (auto &realm: *this) { - realm.endAtomic(); + realm->endAtomic(); } } void MetaAllocator::RealmsVector::cancelAtomic() { for (auto &realm: *this) { - realm.cancelAtomic(); + realm->cancelAtomic(); } } void MetaAllocator::RealmsVector::close() { for (auto &realm: *this) { - realm.close(); + realm->close(); } } @@ -446,25 +438,33 @@ namespace db0 { std::uint64_t max_addr = 0; for (const auto &realm : *this) { - max_addr = std::max(max_addr, realm.getSlabMaxAddress()); + max_addr = std::max(max_addr, realm.getSlabMaxAddress()); } return max_addr; } - void MetaAllocator::Realm::beginAtomic() { - m_slab_manager->beginAtomic(); - } - - void MetaAllocator::Realm::endAtomic() { - m_slab_manager->endAtomic(); + void MetaAllocator::RealmsVector::flush() const + { + for (const auto &realm : *this) { + realm->flush(); + } } - void MetaAllocator::Realm::cancelAtomic() { - m_slab_manager->cancelAtomic(); + std::size_t MetaAllocator::RealmsVector::getDeferredFreeCount() const + { + std::size_t result = 0; + for (const auto &realm : *this) { + result += realm->getDeferredFreeCount(); + } + return result; } std::uint32_t MetaAllocator::getSlabId(Address address) const { return m_slab_id_function(address); } + + std::size_t MetaAllocator::getDeferredFreeCount() const { + return m_realms.getDeferredFreeCount(); + } -} +} \ No newline at end of file diff --git a/src/dbzero/core/memory/MetaAllocator.hpp b/src/dbzero/core/memory/MetaAllocator.hpp index 7e5fd985..0334bb70 100644 --- a/src/dbzero/core/memory/MetaAllocator.hpp +++ b/src/dbzero/core/memory/MetaAllocator.hpp @@ -166,19 +166,19 @@ DB0_PACKED_END Realm(Memspace &, std::shared_ptr, SlabRecycler *, o_realm, std::uint32_t slab_size, std::uint32_t page_size, unsigned char realm_id, bool deferred_free); - - // get the max address from all underlying slabs + std::uint64_t getSlabMaxAddress() const; - void close(); + void commit() const; void detach() const; - - void beginAtomic(); - void endAtomic(); - void cancelAtomic(); - void forAllSlabs(std::function) const; - void flush() const; + SlabManager *operator->() { + return m_slab_manager.get(); + } + + const SlabManager *operator->() const { + return m_slab_manager.get(); + } }; struct RealmsVector: protected std::vector @@ -188,7 +188,6 @@ DB0_PACKED_END // evaluate the max address from all realms std::uint64_t getSlabMaxAddress() const; - std::size_t getDeferredFreeCount() const; inline SlabManager &operator[](unsigned char realm_id) { diff --git a/src/dbzero/core/memory/Recycler.hpp b/src/dbzero/core/memory/Recycler.hpp index ac34cb47..a0d26678 100644 --- a/src/dbzero/core/memory/Recycler.hpp +++ b/src/dbzero/core/memory/Recycler.hpp @@ -54,7 +54,7 @@ namespace db0 template std::size_t Recycler::size() const { - return m_slabs.size(); + return m_queue.size(); } template @@ -81,7 +81,7 @@ namespace db0 void Recycler::closeOne(std::function predicate) { close(predicate, true); } - + template void Recycler::clear() { m_queue.clear(); diff --git a/src/dbzero/core/memory/SlabItem.cpp b/src/dbzero/core/memory/SlabItem.cpp index 549e2dac..8372ccf6 100644 --- a/src/dbzero/core/memory/SlabItem.cpp +++ b/src/dbzero/core/memory/SlabItem.cpp @@ -10,6 +10,10 @@ namespace db0 { } + SlabItem::~SlabItem() { + assert(!m_is_dirty && "SlabItem destroyed while still dirty"); + } + void SlabItem::commit() const { assert(m_slab); diff --git a/src/dbzero/core/memory/SlabItem.hpp b/src/dbzero/core/memory/SlabItem.hpp index 43dcf824..907b3bcd 100644 --- a/src/dbzero/core/memory/SlabItem.hpp +++ b/src/dbzero/core/memory/SlabItem.hpp @@ -141,14 +141,16 @@ DB0_PACKED_END std::shared_ptr m_slab; // the capacity item as last retrieved from the backend (may need update) CapacityItem m_cap_item; + bool m_is_dirty = false; SlabItem(std::shared_ptr slab, CapacityItem cap); + ~SlabItem(); void commit() const; void detach() const; - + bool operator==(std::uint32_t slab_id) const { - assert(m_slab) + assert(m_slab); return m_cap_item.m_slab_id == slab_id; } @@ -156,6 +158,11 @@ DB0_PACKED_END return *this == rhs.m_cap_item.m_slab_id; } + SlabAllocator &operator*() { + assert(m_slab); + return *m_slab; + } + const SlabAllocator &operator*() const { assert(m_slab); return *m_slab; @@ -165,6 +172,11 @@ DB0_PACKED_END assert(m_slab); return m_slab.get(); } + + SlabAllocator *operator->(){ + assert(m_slab); + return m_slab.get(); + } }; } diff --git a/src/dbzero/core/memory/SlabManager.cpp b/src/dbzero/core/memory/SlabManager.cpp index 18067663..c45e0969 100644 --- a/src/dbzero/core/memory/SlabManager.cpp +++ b/src/dbzero/core/memory/SlabManager.cpp @@ -1,5 +1,4 @@ #include "SlabManager.hpp" -#include "SlabRecycler.hpp" namespace db0 @@ -24,24 +23,24 @@ namespace db0 } bool SlabManager::ActiveSlab::contains(std::uint32_t slab_id) const { - return ((*this)[0] == slab_id || (*this)[1] == slab_id); + return (((*this)[0] && *(*this)[0] == slab_id) || ((*this)[1] && *(*this)[1] == slab_id)); } - - bool SlabManager::ActiveSlab::contains(const FindResult &slab) const { + + bool SlabManager::ActiveSlab::contains(std::shared_ptr slab) const { return ((*this)[0] == slab || (*this)[1] == slab); } - SlabManager::FindResult SlabManager::ActiveSlab::find(std::uint32_t slab_id) const + std::shared_ptr SlabManager::ActiveSlab::find(std::uint32_t slab_id) const { - if ((*this)[0] == slab_id) { + if ((*this)[0] && *(*this)[0] == slab_id) { return (*this)[0]; - } else if ((*this)[1] == slab_id) { + } else if ((*this)[1] && *(*this)[1] == slab_id) { return (*this)[1]; } return {}; } - - void SlabManager::ActiveSlab::erase(const FindResult &slab) + + void SlabManager::ActiveSlab::erase(std::shared_ptr slab) { if ((*this)[0] == slab) { (*this)[0] = {}; @@ -52,19 +51,23 @@ namespace db0 THROWF(db0::InternalException) << "Slab not found in active slabs." << THROWF_END; } } - - SlabManager::FindResult SlabManager::tryGetActiveSlab(unsigned char locality) { + + std::shared_ptr SlabManager::tryGetActiveSlab(unsigned char locality) + { assert(locality < m_active_slab.size()); return m_active_slab[locality]; } - void SlabManager::resetActiveSlab(unsigned char locality) { + void SlabManager::resetActiveSlab(unsigned char locality) + { assert(locality < m_active_slab.size()); m_active_slab[locality] = {}; } - SlabManager::FindResult SlabManager::findFirst(std::size_t size, unsigned char locality) + std::shared_ptr SlabManager::findFirst(std::size_t size, unsigned char locality) { + // NOTE: before accessing capacity items we must synchronize any updates + saveDirtySlabs(); // visit slabs starting from the largest available capacity auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY()); auto it = m_capacity_items.cbegin(); @@ -79,21 +82,23 @@ namespace db0 ++it; continue; } - auto slab = openSlab(m_slab_address_func(it->m_slab_id)); + auto slab = openSlab(m_slab_address_func(it->m_slab_id)); // make the slab active m_active_slab[locality] = slab; return slab; } } - - SlabManager::FindResult SlabManager::findNext(FindResult last_result, std::size_t size, + + std::shared_ptr SlabManager::findNext(std::shared_ptr last_result, std::size_t size, unsigned char locality) { + saveDirtySlabs(); auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY()); + auto last_key = last_result->m_cap_item; for (;;) { // this is to find the next item in order - last_result.m_cap_item.m_slab_id += NUM_REALMS; - auto it = m_capacity_items.upper_equal_bound(last_result.m_cap_item); + last_key.m_slab_id += NUM_REALMS; + auto it = m_capacity_items.upper_equal_bound(last_key); if (!it.first || it.first->m_remaining_capacity < min_capacity) { return {}; } @@ -108,7 +113,7 @@ namespace db0 return slab; } } - + std::pair, std::uint32_t> SlabManager::createNewSlab() { if (!m_next_slab_id) { @@ -129,8 +134,8 @@ namespace db0 return { slab, slab_id }; } - - SlabManager::FindResult SlabManager::addNewSlab(unsigned char locality) + + std::shared_ptr SlabManager::addNewSlab(unsigned char locality) { auto [slab, slab_id] = createNewSlab(); auto address = m_slab_address_func(slab_id); @@ -147,16 +152,16 @@ namespace db0 // register with capacity items m_capacity_items.insert(cap_item); // add to cache - auto cache_item = std::make_shared(slab, cap_item); + auto cache_item = std::make_shared(slab, cap_item); m_slabs.emplace(address, cache_item); - + // append with the recycler if (m_recycler_ptr) { - m_recycler_ptr->append(slab); + m_recycler_ptr->append(cache_item); } - + // make the newly added slab active - m_active_slab[locality] = { slab, cap_item }; + m_active_slab[locality] = cache_item; return m_active_slab[locality]; } @@ -166,13 +171,13 @@ namespace db0 auto address = m_slab_address_func(slab_id); auto it = m_slabs.find(address); if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab->getRemainingCapacity(); + auto slab = it->second.lock(); + if (slab) { + return (*slab)->getRemainingCapacity(); } } - // look up with the slab defs next + // look up with the slab defs if not in cache auto slab_def_ptr = m_slab_defs.find_equal(slab_id); if (!slab_def_ptr.first) { THROWF(db0::InternalException) << "Slab definition not found."; @@ -184,12 +189,11 @@ namespace db0 { m_active_slab = {}; m_reserved_slabs.clear(); - for (auto it = m_slabs.begin(); it != m_slabs.end();) { - it = unregisterSlab(it); - } - } - - SlabManager::FindResult SlabManager::tryFind(std::uint32_t slab_id) const + saveDirtySlabs(); + m_slabs.clear(); + } + + std::shared_ptr SlabManager::tryFind(std::uint32_t slab_id) const { if (slab_id < nextSlabId()) { if (m_active_slab.contains(slab_id)) { @@ -199,10 +203,12 @@ namespace db0 auto address = m_slab_address_func(slab_id); auto it = m_slabs.find(address); if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return { slab, it->second->m_cap_item }; + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item; } + // remove expired cache entry + m_slabs.erase(it); } return tryOpenSlab(address); @@ -210,7 +216,7 @@ namespace db0 return {}; } - SlabManager::FindResult SlabManager::find(std::uint32_t slab_id) const + std::shared_ptr SlabManager::find(std::uint32_t slab_id) const { auto slab = tryFind(slab_id); if (!slab) { @@ -219,14 +225,14 @@ namespace db0 return slab; } - void SlabManager::erase(const FindResult &slab) { + void SlabManager::erase(std::shared_ptr slab) { erase(slab, true); } - + bool SlabManager::empty() const { return nextSlabId() == m_realm_id; } - + std::shared_ptr SlabManager::reserveNewSlab() { auto [slab, slab_id] = createNewSlab(); @@ -252,13 +258,13 @@ namespace db0 // look up with the cache first auto it = m_slabs.find(address); if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab; + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item->m_slab; } } // pull through cache - return openSlab(slab_def).m_slab; + return openSlab(slab_def)->m_slab; } std::shared_ptr SlabManager::openReservedSlab(Address address) const { @@ -275,9 +281,9 @@ namespace db0 // look up with the cache first auto it = m_slabs.find(address); if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab; + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item->m_slab; } } @@ -288,7 +294,7 @@ namespace db0 } // pull through cache - auto result = openSlab(*slab_def_ptr.first).m_slab; + auto result = openSlab(*slab_def_ptr.first)->m_slab; // and add for non-expiry cache m_reserved_slabs.push_back(result); return result; @@ -297,25 +303,32 @@ namespace db0 Address SlabManager::getFirstAddress() const { return m_slab_address_func(m_realm_id) + SlabAllocator::getFirstAddress(); } - + void SlabManager::commit() const { - for (auto &it : m_slabs) { - it.second->commit(); + saveDirtySlabs(); + for (auto &item : m_slabs) { + auto slab_item = item.second.lock(); + if (slab_item) { + slab_item->commit(); + } } } - + void SlabManager::detach() const { // detach all cached slabs - for (auto &it : m_slabs) { - it.second->detach(); - } + for (auto &item : m_slabs) { + auto slab_item = item.second.lock(); + if (slab_item) { + slab_item->detach(); + } + } // NOTE: we retain the slab element because it's detached // invalidate cached variable m_next_slab_id = {}; } - + std::uint32_t SlabManager::nextSlabId() const { if (!m_next_slab_id) { @@ -330,7 +343,7 @@ namespace db0 assert(m_volatile_slabs.empty()); m_atomic = true; } - + void SlabManager::endAtomic() { assert(m_atomic); @@ -350,12 +363,16 @@ namespace db0 { assert(m_atomic); // rollback atomic deferred free operations - m_atomic_deferred_free_ops.clear(); + m_atomic_deferred_free_ops.clear(); // revert all volatile slabs from cache for (auto slab_addr : m_volatile_slabs) { auto it = m_slabs.find(slab_addr); if (it != m_slabs.end()) { + auto slab_item = it->second.lock(); + if (slab_item) { + slab_item->m_is_dirty = false; + } m_slabs.erase(it); } } @@ -363,55 +380,53 @@ namespace db0 m_volatile_slabs.clear(); m_atomic = false; } - - void SlabManager::saveItem(CacheItem &item) const + + void SlabManager::saveItem(SlabItem &item) const { // if the remaining capacity has hanged, reflect this with backend - if (item.isModified()) { + if (item.m_is_dirty) { auto slab_id = item.m_cap_item.m_slab_id; - if (item.m_final_remaining_capacity != item.m_cap_item.m_remaining_capacity) { - auto it = m_capacity_items.find_equal(item.m_cap_item); - assert(!it.isEnd()); - // register under a modified key - m_capacity_items.erase(it); - m_capacity_items.emplace( - item.m_final_remaining_capacity, item.m_final_lost_capacity, slab_id - ); - } + auto remaining_capacity = item->getRemainingCapacity(); + auto lost_capacity = item->getLostCapacity(); + + auto it = m_capacity_items.find_equal(item.m_cap_item); + assert(!it.isEnd()); + + // re-register under a modified key + m_capacity_items.erase(it); + m_capacity_items.emplace( + remaining_capacity, lost_capacity, slab_id + ); + // and update with the slab defs auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - m_slab_defs.modify(slab_def_ptr)->m_remaining_capacity = item.m_final_remaining_capacity; - m_slab_defs.modify(slab_def_ptr)->m_lost_capacity = item.m_final_lost_capacity; + m_slab_defs.modify(slab_def_ptr)->m_remaining_capacity = remaining_capacity; + m_slab_defs.modify(slab_def_ptr)->m_lost_capacity = lost_capacity; - item.m_final_remaining_capacity = item.m_cap_item.m_remaining_capacity; - item.m_final_lost_capacity = item.m_cap_item.m_lost_capacity; - assert(!item.isModified()); + // update cached item + item.m_cap_item.m_remaining_capacity = remaining_capacity; + item.m_cap_item.m_lost_capacity = lost_capacity; + item.m_is_dirty = false; } } - SlabManager::CacheIterator SlabManager::unregisterSlab(CacheIterator it) const + void SlabManager::saveDirtySlabs() const { - auto cache_item = it->second; - if (!cache_item->m_slab.expired()) { - THROWF(db0::InternalException) - << "Slab " << static_cast(cache_item->m_cap_item.m_slab_id) << " is not closed"; + for (auto &slab_item : m_dirty_slabs) { + saveItem(*slab_item); } - - // auto &item = *cache_item; - // commitItem(item); - return m_slabs.erase(it); + m_dirty_slabs.clear(); } - SlabManager::FindResult SlabManager::tryOpenSlab(Address address) const + std::shared_ptr SlabManager::tryOpenSlab(Address address) const { auto it = m_slabs.find(address); if (it != m_slabs.end()) { - auto result = it->second->m_slab.lock(); - if (result) { - return { result, it->second->m_cap_item }; + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item; } - // unregister expired slab from cache - unregisterSlab(it); + m_slabs.erase(it); } auto slab_id = m_slab_id_func(address); @@ -421,10 +436,10 @@ namespace db0 return {}; } - return openSlab(*slab_def_ptr.first); + return openSlab(*slab_def_ptr.first); } - SlabManager::FindResult SlabManager::openSlab(Address address) const + std::shared_ptr SlabManager::openSlab(Address address) const { auto slab = tryOpenSlab(address); if (!slab) { @@ -433,7 +448,7 @@ namespace db0 return slab; } - SlabManager::FindResult SlabManager::openSlab(const SlabDef &def) const + std::shared_ptr SlabManager::openSlab(const SlabDef &def) const { auto cap_item = CapacityItem(def.m_remaining_capacity, def.m_lost_capacity, def.m_slab_id); auto addr = m_slab_address_func(def.m_slab_id); @@ -441,34 +456,39 @@ namespace db0 m_prefix, addr, m_slab_size, m_page_size, def.m_remaining_capacity, def.m_lost_capacity ); // add to cache (it's safe to reference item from the unordered_map) - auto cache_item = std::make_shared(slab, cap_item); - m_slabs.emplace(addr, cache_item).first->second; + auto cache_item = std::make_shared(slab, cap_item); + m_slabs.emplace(addr, cache_item); // append with the recycler if (m_recycler_ptr) { - m_recycler_ptr->append(slab); + m_recycler_ptr->append(cache_item); } - return { slab, cap_item }; + return cache_item; } - void SlabManager::erase(const FindResult &slab, bool cleanup) + void SlabManager::erase(std::shared_ptr slab, bool cleanup) { - // erasing the last slab - if (slab.m_cap_item.m_slab_id != nextSlabId() - NUM_REALMS) { + assert(slab); + // Only the last slab can be erased + if (slab->m_cap_item.m_slab_id != nextSlabId() - NUM_REALMS) { return; } - auto addr = m_slab_address_func(slab.m_cap_item.m_slab_id); + auto slab_id = slab->m_cap_item.m_slab_id; + auto addr = m_slab_address_func(slab_id); + // clear the dirty flag since it's being erased anyway + slab->m_is_dirty = false; // unregister from cache auto it = m_slabs.find(addr); if (it != m_slabs.end()) { m_slabs.erase(it); } + // unregister from recycler if (m_recycler_ptr) { - m_recycler_ptr->closeOne([&slab](const SlabAllocator &s) { - return slab.m_slab.get() == &s; + m_recycler_ptr->closeOne([&slab](const SlabItem &item) { + return slab.get() == &item; }); } // unregister if active @@ -476,11 +496,11 @@ namespace db0 m_active_slab.erase(slab); } // unregister from slab defs - if (!m_slab_defs.erase_equal(slab.m_cap_item.m_slab_id).first) { + if (!m_slab_defs.erase_equal(slab_id).first) { THROWF(db0::InternalException) << "Slab definition not found."; } // unregister from capacity items - if (!m_capacity_items.erase_equal(slab.m_cap_item).first) { + if (!m_capacity_items.erase_equal(slab->m_cap_item).first) { THROWF(db0::InternalException) << "Capacity item not found."; } if (!m_next_slab_id) { @@ -491,7 +511,7 @@ namespace db0 if (cleanup) { while (!empty()) { auto slab = openSlab(m_slab_address_func(nextSlabId() - NUM_REALMS)); - if (!slab.m_slab->empty()) { + if (!((*slab)->empty())) { break; } erase(slab, false); @@ -518,16 +538,21 @@ namespace db0 bool is_first = true; bool is_new = false; for (;;) { - if (slab.m_slab) { + if (slab) { for (;;) { - auto addr = slab.m_slab->tryAlloc(size, 0, aligned); + auto addr = (*slab)->tryAlloc(size, 0, aligned); if (!addr) { // NOTE: since the last allocation failed, don't use this slab as "active" resetActiveSlab(locality); break; } - if (!unique || slab.m_slab->tryMakeAddressUnique(*addr, instance_id)) { + if (!unique || ((*slab)->tryMakeAddressUnique(*addr, instance_id))) { + // modified, add to dirty slabs + if (!slab->m_is_dirty) { + slab->m_is_dirty = true; + m_dirty_slabs.push_back(slab); + } return addr; } @@ -535,9 +560,9 @@ namespace db0 // NOTE: the allocation is lost deferredFree(*addr); } - if (size > slab.m_slab->getMaxAllocSize()) { + if (size > ((*slab)->getMaxAllocSize())) { THROWF(db0::InternalException) - << "Requested allocation size " << size << " is larger than the slab size " << slab.m_slab->getMaxAllocSize(); + << "Requested allocation size " << size << " is larger than the slab size " << (*slab)->getMaxAllocSize(); } if (is_new) { THROWF(db0::InternalException) << "Slab is new but cannot allocate " << size; @@ -549,7 +574,7 @@ namespace db0 } else { slab = findNext(slab, size, locality); } - if (!slab.m_slab) { + if (!slab) { slab = addNewSlab(locality); is_new = true; } @@ -564,7 +589,7 @@ namespace db0 _free(address); } } - + void SlabManager::free(Address address, std::uint32_t slab_id) { assert(m_deferred_free_ops.find(address) == m_deferred_free_ops.end()); @@ -578,15 +603,22 @@ namespace db0 void SlabManager::_free(Address address) { _free(address, m_slab_id_func(address)); } - + void SlabManager::_free(Address address, std::uint32_t slab_id) { assert(m_slab_id_func(address) == slab_id); auto slab = find(slab_id); - slab.m_slab->free(address); - if (slab.m_slab->empty()) { + assert(slab); + (*slab)->free(address); + if ((*slab)->empty()) { // erase or mark as erased erase(slab); + } else { + // modified, add to dirty slabs + if (!slab->m_is_dirty) { + slab->m_is_dirty = true; + m_dirty_slabs.push_back(slab); + } } } @@ -599,9 +631,9 @@ namespace db0 if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { THROWF(db0::BadAddressException) << "Address " << address << " not found (pending deferred free)"; } - + assert(m_slab_id_func(address) == slab_id); - return find(slab_id).m_slab->getAllocSize(address); + return (*find(slab_id))->getAllocSize(address); } bool SlabManager::isAllocated(Address address, std::size_t *size_of_result) const { @@ -618,7 +650,7 @@ namespace db0 if (!slab) { return false; } - return slab.m_slab->isAllocated(address, size_of_result); + return ((*slab)->isAllocated(address, size_of_result)); } void SlabManager::forAllSlabs(std::function f) const @@ -651,7 +683,7 @@ namespace db0 m_deferred_free_ops.clear(); } } - + std::size_t SlabManager::getDeferredFreeCount() const { return m_deferred_free_ops.size(); } diff --git a/src/dbzero/core/memory/SlabManager.hpp b/src/dbzero/core/memory/SlabManager.hpp index b0a142e3..e40f34cf 100644 --- a/src/dbzero/core/memory/SlabManager.hpp +++ b/src/dbzero/core/memory/SlabManager.hpp @@ -30,11 +30,11 @@ namespace db0 { public: static constexpr std::size_t NUM_REALMS = MetaAllocator::NUM_REALMS; - using CapacityItem = MetaAllocator::CapacityItem; - using SlabDef = MetaAllocator::SlabDef; + using SlabTreeT = MetaAllocator::SlabTreeT; + using CapacityTreeT = MetaAllocator::CapacityTreeT; - SlabManager(std::shared_ptr prefix, MetaAllocator::SlabTreeT &slab_defs, - MetaAllocator::CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, + SlabManager(std::shared_ptr prefix, SlabTreeT &slab_defs, + CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, std::function address_func, std::function slab_id_func, unsigned char realm_id, bool deferred_free); @@ -91,13 +91,13 @@ namespace db0 struct ActiveSlab: public std::array, 2> { bool contains(std::uint32_t slab_id) const; - bool contains(const SlabItem &slab) const; + bool contains(std::shared_ptr) const; std::shared_ptr find(std::uint32_t slab_id) const; - void erase(const SlabItem &slab); + void erase(std::shared_ptr); }; - + /** * Retrieves the active slab or returns nullptr if no active slab available */ @@ -135,12 +135,10 @@ namespace db0 std::uint32_t nextSlabId() const; - using CacheIterator = std::unordered_map >::iterator; - std::shared_ptr m_prefix; const unsigned char m_realm_id; - MetaAllocator::SlabTreeT &m_slab_defs; - MetaAllocator::CapacityTreeT &m_capacity_items; + SlabTreeT &m_slab_defs; + CapacityTreeT &m_capacity_items; SlabRecycler *m_recycler_ptr = nullptr; const std::uint32_t m_slab_size; const std::uint32_t m_page_size; @@ -161,15 +159,14 @@ namespace db0 const bool m_deferred_free; mutable std::unordered_set
m_deferred_free_ops; // the list of modified slabs (need backend refresh) - std::vector > m_dirty_slabs; + mutable std::vector > m_dirty_slabs; - // Update item changes in the backend (if modified) - void saveItem(CacheItem &item) const; - - CacheIterator unregisterSlab(CacheIterator it) const; + // Reflect item changes with the backend (if modified) + void saveItem(SlabItem &item) const; + // Save all dirty slabs to the backend + void saveDirtySlabs() const; std::shared_ptr tryOpenSlab(Address address) const; - std::shared_ptr openSlab(Address address) const; // open slab by definition and add to cache diff --git a/src/dbzero/workspace/Workspace.hpp b/src/dbzero/workspace/Workspace.hpp index b143980a..742b2124 100644 --- a/src/dbzero/workspace/Workspace.hpp +++ b/src/dbzero/workspace/Workspace.hpp @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -33,7 +34,8 @@ namespace db0 class LangCache; class Config; class WorkspaceView; - + using SlabRecycler = Recycler; + class BaseWorkspace { public: diff --git a/tests/unit_tests/CapacityTreeTest.cpp b/tests/unit_tests/CapacityTreeTest.cpp index 6e8ad2f4..2375f14b 100644 --- a/tests/unit_tests/CapacityTreeTest.cpp +++ b/tests/unit_tests/CapacityTreeTest.cpp @@ -1,6 +1,7 @@ #include #include -#include +#include +#include #include #include #include @@ -50,8 +51,7 @@ namespace tests TEST_F( CapacityTreeTests , testCapacityTreeInsertEraseIssue1 ) { - using CapacityTreeT = typename db0::MetaAllocator::CapacityTreeT; - using CapacityItem = typename db0::MetaAllocator::CapacityItem; + using CapacityTreeT = typename db0::MetaAllocator::CapacityTreeT; std::vector realms; realms.emplace_back(m_bitspace, page_size); diff --git a/tests/unit_tests/MetaAllocatorTest.cpp b/tests/unit_tests/MetaAllocatorTest.cpp index 3ee14587..aae06d29 100644 --- a/tests/unit_tests/MetaAllocatorTest.cpp +++ b/tests/unit_tests/MetaAllocatorTest.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -14,7 +14,8 @@ namespace tests { using namespace db0; - + using SlabRecycler = db0::Recycler; + // a proxy class to expose protected members for testing class MetaAllocatorProxy: public MetaAllocator { diff --git a/tests/utils/TestWorkspace.hpp b/tests/utils/TestWorkspace.hpp index c829ac9e..af09e0b7 100644 --- a/tests/utils/TestWorkspace.hpp +++ b/tests/utils/TestWorkspace.hpp @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -73,11 +74,11 @@ namespace db0 void tearDown(); std::size_t size() const override; - + private: const std::size_t m_slab_size; FixedObjectList m_shared_object_list; - SlabRecycler m_slab_recycler; + Recycler m_slab_recycler; db0::swine_ptr m_current_fixture; std::unordered_map > m_fixtures; std::unordered_map m_uuids; From c4f2eb7b99493fb569bec6fad20a0f763f848b3a Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 11 Nov 2025 19:43:54 +0100 Subject: [PATCH 10/11] fixes --- python_tests/test_cache.py | 3 + python_tests/test_issues_12.py | 101 ++++++++++++++++ python_tests/test_memo_no_cache.py | 26 ++--- src/dbzero/bindings/python/PyAPI.cpp | 14 +++ .../core/collections/range_tree/RangeTree.hpp | 9 +- src/dbzero/core/memory/CacheRecycler.cpp | 68 +++++++---- src/dbzero/core/memory/CacheRecycler.hpp | 21 ++-- src/dbzero/core/memory/PageMap.hpp | 30 +++-- src/dbzero/core/memory/SlabAllocator.cpp | 2 +- .../core/memory/SlabAllocatorConfig.hpp | 17 ++- src/dbzero/core/memory/SlabManager.cpp | 23 ++-- src/dbzero/object_model/index/Index.hpp | 2 - .../object_model/index/IndexBuilder.hpp | 5 +- .../object_model/value/TypedAddress.cpp | 10 +- .../object_model/value/TypedAddress.hpp | 23 +++- src/dbzero/workspace/Fixture.cpp | 24 ++-- src/dbzero/workspace/GC0.cpp | 110 +++++++----------- src/dbzero/workspace/GC0.hpp | 38 +++--- tests/unit_tests/MetaAllocatorTest.cpp | 6 +- 19 files changed, 336 insertions(+), 196 deletions(-) create mode 100644 python_tests/test_issues_12.py diff --git a/python_tests/test_cache.py b/python_tests/test_cache.py index b2fabb72..2db5843d 100644 --- a/python_tests/test_cache.py +++ b/python_tests/test_cache.py @@ -16,15 +16,18 @@ def rand_array(max_bytes): def test_cache_size_can_be_updated_at_runtime(db0_fixture): cache_0 = db0.get_cache_stats() + print(cache_0) # create object instances to populate cache buf = [] for _ in range(1000): buf.append(MemoTestClass(get_string(1024))) cache_1 = db0.get_cache_stats() + print(cache_1) diff_1 = cache_1["size"] - cache_0["size"] # reduce cache size so that only 1/2 of objects can fit db0.set_cache_size(512 * 1024) cache_2 = db0.get_cache_stats() + print(cache_2) # make sure cache size / capacity was adjusted with at least 95% accuracy assert abs(1.0 - (512 * 1024) / cache_2["size"]) < 0.05 assert abs(1.0 - cache_2["capacity"] / cache_2["size"]) < 0.05 diff --git a/python_tests/test_issues_12.py b/python_tests/test_issues_12.py new file mode 100644 index 00000000..1a51d492 --- /dev/null +++ b/python_tests/test_issues_12.py @@ -0,0 +1,101 @@ +import dbzero as db0 +import pytest +from .conftest import DB0_DIR +from datetime import datetime +from .memo_test_types import MemoBlob +from dataclasses import dataclass +import random +import time +from typing import Dict, List + + +@db0.memo +@dataclass +class Issuer: + tax_id: int + inv_list: List + inv_index: db0.index + + +@db0.memo(no_cache=True) +@dataclass +class Invoice: + tax_id: int + issue_dt: datetime + data: bytes + + +def get_random_tax_id(tax_ids_set=set()): + tax_id = random.randint(1000000000, 9999999999) + while tax_id in tax_ids_set: + tax_id = random.randint(1000000000, 9999999999) + tax_ids_set.add(tax_id) + return tax_id + + +@pytest.mark.stress_test +@pytest.mark.parametrize("db0_slab_size", [{"slab_size": 64 << 20, "autocommit": False}], indirect=True) +def test_no_cache_allocator_issue(db0_slab_size): + db0.set_cache_size(8 << 30) + # create 25 k unique tax_id numbers + tax_id_count = 25000 + tax_id_numbers = set() + print(f"Generating {tax_id_count} unique tax_id numbers") + for i in range(tax_id_count): + if i % 5000 == 0: + print(f"Generated {i} tax_id numbers so far") + get_random_tax_id(tax_id_numbers) + tax_id_list = list(tax_id_numbers) + + issuers = {} + + for i, tax_id in enumerate(tax_id_list): + if i % 5000 == 0: + print(f"Created {i} issuers so far") + new_issuer = Issuer(tax_id=tax_id, inv_list=[], inv_index=db0.index()) + issuers[tax_id] = new_issuer + + execution_time = 15 + RANDOM_BYTES = b'DB0'*22000 + total_size = 0 + count_of_objects = 0 + new_objects = 0 + db0.commit() + print("Starting benchmark loop") + last_report = time.perf_counter() + start = last_report + while True: + # get random number between 0 and 100 + random_number = random.randint(0, 100) + if random_number < 90: + data_size = random.randint(500, 2000) + else: + data_size = random.randint(8000, 64000) + + random_tax_id = random.choice(tax_id_list) + issuer = issuers[random_tax_id] + invoice = Invoice(tax_id=issuer.tax_id, issue_dt=datetime.now(), data=RANDOM_BYTES[:data_size]) + issuer.inv_list.append(invoice) + issuer.inv_index.add(datetime.now(), invoice) + count_of_objects += 1 + new_objects += 1 + + total_size += data_size + # report every 3 seconds + now = time.perf_counter() + if (now - last_report) >= 3: + commit_start = time.perf_counter() + db0.commit() + commit_end = time.perf_counter() + print(f"Commit time: {(commit_end - commit_start)} seconds") + + now = time.perf_counter() + print(f"Objects / sec {float(new_objects) / (now - last_report)}, Total objects: {count_of_objects}, Total size: {total_size} bytes") + print(db0.get_storage_stats()) + print(db0.get_lang_cache_stats()) + new_objects = 0 + last_report = now + + if (now - start) > execution_time: + break + \ No newline at end of file diff --git a/python_tests/test_memo_no_cache.py b/python_tests/test_memo_no_cache.py index 40fca2c8..d3e1577a 100644 --- a/python_tests/test_memo_no_cache.py +++ b/python_tests/test_memo_no_cache.py @@ -53,15 +53,15 @@ def test_memo_no_cache_issue1(db0_fixture): del obj -def test_excluding_no_cache_instances_from_dbzero_cache(db0_fixture): +def test_excluding_no_cache_instances_from_P0_cache(db0_fixture): buf = db0.list() - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] for _ in range(100): - obj = MemoNoCacheClass() + obj = MemoNoCacheClass() buf.append(obj) - gc.collect() - final_cache_size = db0.get_cache_stats()["size"] + gc.collect() + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (350 << 10) @@ -80,7 +80,7 @@ def test_fetching_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now fetch objects by uuid - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for id in uuid_list: # NOTE: must fetch with type, otherwise no_cache flag may not be honored @@ -88,7 +88,7 @@ def test_fetching_no_cache_objects(db0_fixture): # this forces data retrieval total_len += len(obj.data) - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (300 << 10) @@ -105,14 +105,14 @@ def test_find_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now retrieve objects using db0.find - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for obj in db0.find(MemoNoCacheClass): # this forces data retrieval (but not caching) total_len += len(obj.data) assert total_len > 0 - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (300 << 10) @@ -131,7 +131,7 @@ def test_fetching_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now fetch objects by uuid - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for id in uuid_list: # NOTE: must fetch with type, otherwise no_cache flag may not be honored @@ -139,7 +139,7 @@ def test_fetching_no_cache_objects(db0_fixture): # this forces data retrieval total_len += len(obj.data) - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (300 << 10) @@ -156,13 +156,13 @@ def test_find_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now retrieve objects using db0.find - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for obj in db0.find(MemoNoCacheClass): # this forces data retrieval (but not caching) total_len += len(obj.data) assert total_len > 0 - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (350 << 10) diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index 09154342..10be1b04 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -61,6 +61,20 @@ namespace db0::python } PySafeDict_SetItemString(*dict, "size", Py_OWN(PyLong_FromLong(cache_recycler.size()))); + + { + std::vector detailed_size = cache_recycler.getDetailedSize(); + auto detailed_size_dict = Py_OWN(PyDict_New()); + unsigned int priority_index = 0; + for (auto size: detailed_size) { + std::stringstream key_str; + key_str << "P" << priority_index++; + PySafeDict_SetItemString(*detailed_size_dict, key_str.str().c_str(), Py_OWN(PyLong_FromLong(size))); + } + // cache size with a by-priority breakdown + PySafeDict_SetItemString(*dict, "P_size", detailed_size_dict); + } + PySafeDict_SetItemString(*dict, "capacity", Py_OWN(PyLong_FromLong(cache_recycler.getCapacity()))); PySafeDict_SetItemString(*dict, "deferred_free_count", Py_OWN(PyLong_FromLong(deferred_free_count))); PySafeDict_SetItemString(*dict, "lang_cache_size", Py_OWN(PyLong_FromLong(lang_cache_size))); diff --git a/src/dbzero/core/collections/range_tree/RangeTree.hpp b/src/dbzero/core/collections/range_tree/RangeTree.hpp index 989a3523..66fcd91f 100644 --- a/src/dbzero/core/collections/range_tree/RangeTree.hpp +++ b/src/dbzero/core/collections/range_tree/RangeTree.hpp @@ -158,8 +158,6 @@ DB0_PACKED_END if (!range.isUnbound() || begin == end) { break; } - // FIXME: log - std::cout << "!!! bulkInsert continuing to insert into new range" << std::endl; // in case of unbound ranges (i.e. the last range) append a new one and continue range = insertRange(*begin); } @@ -433,18 +431,15 @@ DB0_PACKED_END } } - // Forwards a value to the add item callback - /* FIXME: log + // Forwards a value to the add item callback std::function add_item_callback = [&](ItemT item) { (*add_callback_ptr)(item.m_value); }; std::function *add_item_callback_ptr = (add_callback_ptr ? &add_item_callback : nullptr); return (*this)->bulkInsertUnique(begin_item, end_item, add_item_callback_ptr).second; - */ - return (*this)->bulkInsertUnique(begin_item, end_item).second; } - + /** * Erase existing elements, ignore non-existing ones * @return number of erased elements diff --git a/src/dbzero/core/memory/CacheRecycler.cpp b/src/dbzero/core/memory/CacheRecycler.cpp index 8d06cc97..2851ffab 100644 --- a/src/dbzero/core/memory/CacheRecycler.cpp +++ b/src/dbzero/core/memory/CacheRecycler.cpp @@ -7,6 +7,7 @@ namespace db0 { + // Calculate target capacity for specific priority std::size_t getCapacity(std::size_t total_capacity, int priority) { auto result = total_capacity; @@ -27,8 +28,9 @@ namespace db0 std::optional flush_size, std::function flush_dirty, std::function flush_callback) - : m_capacity { db0::getCapacity(capacity, 0), db0::getCapacity(capacity, 1) } - , m_res_bufs { getMaxSize(m_capacity[0]), getMaxSize(m_capacity[1]) } + : m_capacity(capacity) + // NOTE: buffers are overprovisioned + , m_res_bufs { getMaxSize(m_capacity), getMaxSize(m_capacity) } , m_dirty_meter(dirty_meter) // assign default flush size , m_flush_size(flush_size.value_or(DEFAULT_FLUSH_SIZE)) @@ -116,20 +118,19 @@ namespace db0 } else { // add new resource (if to be cached) auto lock_size = res_lock->usedMem(); - auto &res_buf = m_res_bufs[priority]; - auto capacity = m_capacity[priority]; - if (lock_size > capacity) { + auto &res_buf = m_res_bufs[priority]; + if (lock_size > m_capacity) { // Cache size is too small to keep this resource // (or is uninitialized) return; } m_current_size[priority] += lock_size; - if (m_current_size[priority] > capacity) { + if (getCurrentSize() > m_capacity) { // try reducing cache utilization to capacity minus flush size - auto flush_size = std::min(capacity >> 1, m_flush_size); - updateSize(lock, priority, capacity - flush_size); + auto flush_size = std::min(m_capacity >> 1, m_flush_size); + updateSize(lock, m_capacity - flush_size); flushed = true; - flush_result = m_current_size[priority] <= (capacity - flush_size); + flush_result = m_current_size[priority] <= (m_capacity - flush_size); } // resize is a costly operation but cannot be avoided if the number of locked // resources exceeds the assumed limit @@ -140,7 +141,7 @@ namespace db0 // Update self-iterators in all cached locks for (auto it = res_buf.begin(), end = res_buf.end(); it != end; ++it) { (*it)->m_recycle_it = it; - } + } } res_buf.push_back(res_lock); res_lock->m_recycle_it = std::prev(res_buf.end()); @@ -157,31 +158,44 @@ namespace db0 void CacheRecycler::clear() { std::unique_lock lock(m_mutex); - // try releasing all locks + // try releasing all locks without changing capacity updateSize(lock, 0, 0); updateSize(lock, 1, 0); } - void CacheRecycler::resize(std::size_t new_size) + void CacheRecycler::resize(std::size_t new_capacity) { - resize(db0::getCapacity(new_size, 0), 0); - resize(db0::getCapacity(new_size, 1), 1); + std::unique_lock lock(m_mutex); + bool resize = (new_capacity < m_capacity); + m_capacity = new_capacity; + if (resize) { + // try reducing cache utilization to new capacity + updateSize(lock, new_capacity); + } } - void CacheRecycler::resize(std::size_t new_size, int priority) + void CacheRecycler::updateSize(std::unique_lock &_lock, std::size_t expected_size) + { + // try keeping priority = 1 below its target capacity + auto new_size_1 = std::min(db0::getCapacity(expected_size, 1), m_current_size[1]); + resize(_lock, new_size_1, 1); + // priority = 0 may excteed its target capacity when there's sufficient free space + resize(_lock, std::min(expected_size - new_size_1, m_current_size[0]), 0); + } + + void CacheRecycler::resize(std::unique_lock &_lock, std::size_t new_size, int priority) { - std::unique_lock lock(m_mutex); - if (new_size == m_capacity[priority]) { + if (m_current_size[priority] <= new_size) { + // target size already satisfied return; } - m_capacity[priority] = new_size; // try releasing excess locks - updateSize(lock, priority, m_capacity[priority]); + updateSize(_lock, priority, new_size); auto &res_buf = m_res_bufs[priority]; // new capacity of the fixed list should allow storing existing locks - auto new_max_size = std::max((m_capacity[priority] - 1) / MIN_PAGE_SIZE + 1, res_buf.size()); - if (new_max_size != res_buf.max_size()) { + auto new_max_size = std::max((m_capacity - 1) / MIN_PAGE_SIZE + 1, res_buf.size()); + if (new_max_size > res_buf.max_size()) { // After resize, all iterators to cached elements will be invalidated!! res_buf.resize(new_max_size); @@ -202,7 +216,9 @@ namespace db0 } } - std::size_t CacheRecycler::size() const { + std::size_t CacheRecycler::size() const + { + std::unique_lock lock(m_mutex); return getCurrentSize(); } @@ -220,7 +236,13 @@ namespace db0 std::size_t CacheRecycler::getCapacity() const { std::unique_lock lock(m_mutex); - return m_capacity[0] + m_capacity[1]; + return m_capacity; } + std::vector CacheRecycler::getDetailedSize() const + { + std::unique_lock lock(m_mutex); + return { m_current_size[0], m_current_size[1] }; + } + } \ No newline at end of file diff --git a/src/dbzero/core/memory/CacheRecycler.hpp b/src/dbzero/core/memory/CacheRecycler.hpp index 340982b0..fae00a1e 100644 --- a/src/dbzero/core/memory/CacheRecycler.hpp +++ b/src/dbzero/core/memory/CacheRecycler.hpp @@ -17,7 +17,7 @@ namespace db0 class CacheRecycler { public: - static constexpr std::size_t DEFAULT_FLUSH_SIZE = 128 << 20u; + static constexpr std::size_t DEFAULT_FLUSH_SIZE = 256u << 20; /** * Holds resource locks and recycles based on LRU policy @@ -45,10 +45,10 @@ namespace db0 void clear(); /** - * Modify cache size - * @param new_size as byte count + * Change cache capacity at runtime + * @param new_capacity as byte size */ - void resize(std::size_t new_size); + void resize(std::size_t new_capacity); void setFlushSize(unsigned int); @@ -63,6 +63,9 @@ namespace db0 * Get current cache utilization */ std::size_t size() const; + + // @return current cache size with a by-priority breakdown + std::vector getDetailedSize() const; std::size_t getCapacity() const; @@ -75,8 +78,8 @@ namespace db0 using list_t = db0::FixedList >; using iterator = list_t::iterator; - // cache capacities as number of bytes (priority 0 and 1) - std::array m_capacity; + // total cache capacity + std::size_t m_capacity; // buffers for priority cache (#0) and secondary cache (#1) std::array m_res_bufs; std::array m_current_size = {0, 0}; @@ -87,8 +90,8 @@ namespace db0 std::function m_flush_dirty; std::function m_flush_callback; std::pair m_last_flush_callback_result = {true, false}; - - void resize(std::size_t new_size, int priority); + + void resize(std::unique_lock &, std::size_t new_size, int priority); /** * Adjusts cache size after updates, collect locks to unlock (can be unlocked off main thread) @@ -99,6 +102,8 @@ namespace db0 std::size_t adjustSize(std::unique_lock &, list_t &res_buf, std::size_t release_size); void adjustSize(std::unique_lock &, std::size_t release_size); void updateSize(std::unique_lock &, int priority, std::size_t expected_size); + // update overall size + void updateSize(std::unique_lock &, std::size_t expected_size); inline std::size_t getCurrentSize() const { return m_current_size[0] + m_current_size[1]; diff --git a/src/dbzero/core/memory/PageMap.hpp b/src/dbzero/core/memory/PageMap.hpp index b04e086a..5230e249 100644 --- a/src/dbzero/core/memory/PageMap.hpp +++ b/src/dbzero/core/memory/PageMap.hpp @@ -158,9 +158,8 @@ namespace db0 template std::weak_ptr *PageMap::find(StateNumType state_num, std::uint64_t page_num, StateNumType &read_state_num) const - { - // needs to be unique locked due to potential m_cache::erase operation - std::unique_lock lock(m_rw_mutex); + { + std::shared_lock lock(m_rw_mutex); auto it = findImpl(page_num, state_num); if (it == m_cache.end()) { return nullptr; @@ -176,17 +175,27 @@ namespace db0 if (m_cache.empty()) { return m_cache.end(); } + + // Find the first element with key >= {page_num, state_num} auto it = m_cache.lower_bound({page_num, state_num}); - if (it == m_cache.end()) { - assert(!m_cache.empty()); - --it; + + // If we found exact match or an element with same page_num and state <= state_num + if (it != m_cache.end() && it->first.first == page_num && it->first.second <= state_num) { + return it; } - if (it != m_cache.begin() && (it->first.second > state_num || it->first.first != page_num)) { - --it; + + // Look backwards for the largest state <= state_num with same page_num + if (it == m_cache.begin()) { + return m_cache.end(); // No valid element found } + + --it; // Safe because we checked it != m_cache.begin() + + // Check if this element matches our criteria if (it->first.first == page_num && it->first.second <= state_num) { return it; } + return m_cache.end(); } @@ -197,8 +206,11 @@ namespace db0 auto page_num = res_lock->getAddress() >> m_shift; auto it = findImpl(page_num, state_num); assert(it != m_cache.end()); + if (it == m_cache.end()) { + THROWF(db0::InternalException) << "Attempt to erase non-existing lock from PageMap"; + } assert(it->second.lock() == res_lock); - m_cache.erase(it); + m_cache.erase(it); } template void PageMap::clear() diff --git a/src/dbzero/core/memory/SlabAllocator.cpp b/src/dbzero/core/memory/SlabAllocator.cpp index 6fd8ffd9..7e61fcf8 100644 --- a/src/dbzero/core/memory/SlabAllocator.cpp +++ b/src/dbzero/core/memory/SlabAllocator.cpp @@ -126,7 +126,7 @@ namespace db0 BlankSetT blanks(bitspace, page_size); AlignedBlankSetT aligned_blanks(bitspace, page_size, CompT(page_size), page_size); StripeSetT stripes(bitspace, page_size); - LimitedVector alloc_counter(bitspace, page_size); + LimitedVector alloc_counter(bitspace, page_size); alloc_counter.reserve(SlabAllocatorConfig::SLAB_BITSPACE_SIZE()); // calculate size initially available to CRTD allocator std::uint32_t crdt_size = static_cast(size - admin_size - admin_margin_bytes); diff --git a/src/dbzero/core/memory/SlabAllocatorConfig.hpp b/src/dbzero/core/memory/SlabAllocatorConfig.hpp index a4085259..3ce582fa 100644 --- a/src/dbzero/core/memory/SlabAllocatorConfig.hpp +++ b/src/dbzero/core/memory/SlabAllocatorConfig.hpp @@ -13,19 +13,24 @@ namespace db0 struct SlabAllocatorConfig { // 4KB pages - static constexpr std::size_t DEFAULT_PAGE_SIZE = 4096; - static constexpr std::size_t DEFAULT_SLAB_SIZE = 128u << 20; + static constexpr std::size_t DEFAULT_PAGE_SIZE = 4096; + static constexpr std::size_t DEFAULT_SLAB_SIZE = 64u << 20; static constexpr unsigned int SLAB_BITSPACE_SIZE() { - // Must equal the number of data pages in the entire slab + // Must equal the number of data pages in the entire slab return DEFAULT_SLAB_SIZE / DEFAULT_PAGE_SIZE; } // Minimum operational capacity in bytes - // i.e. slabs below this capacity will not be considered for allocation - static constexpr std::size_t MIN_OP_CAPACITY() { - return DEFAULT_SLAB_SIZE / 16; + // i.e. slabs with remaining capacity below this value will not be considered for allocation + static std::size_t MIN_OP_CAPACITY(std::size_t slab_size) { + // NOTE: 1/2 may seem very high but it helps improve performance under heavy fragmentation + return slab_size / 2; } + + // The number of alloc attempts from existing slabs before + // resorting to adding a new slab + static constexpr int NUM_EXISTING_SLAB_ALLOC_ATTEMPTS = 2; }; } \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabManager.cpp b/src/dbzero/core/memory/SlabManager.cpp index c45e0969..f8610702 100644 --- a/src/dbzero/core/memory/SlabManager.cpp +++ b/src/dbzero/core/memory/SlabManager.cpp @@ -69,7 +69,7 @@ namespace db0 // NOTE: before accessing capacity items we must synchronize any updates saveDirtySlabs(); // visit slabs starting from the largest available capacity - auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY()); + auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY(m_slab_size)); auto it = m_capacity_items.cbegin(); for (;;) { if (it.is_end() || it->m_remaining_capacity < min_capacity) { @@ -93,8 +93,8 @@ namespace db0 unsigned char locality) { saveDirtySlabs(); - auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY()); - auto last_key = last_result->m_cap_item; + auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY(m_slab_size)); + auto last_key = last_result->m_cap_item; for (;;) { // this is to find the next item in order last_key.m_slab_id += NUM_REALMS; @@ -104,10 +104,11 @@ namespace db0 } if (m_active_slab.contains(it.first->m_slab_id)) { + last_key = *(it.first); // do not include active slab in find operation continue; } - auto slab = openSlab(m_slab_address_func(it.first->m_slab_id)); + auto slab = openSlab(m_slab_address_func(it.first->m_slab_id)); // make the slab active and for a specific locality m_active_slab[locality] = slab; return slab; @@ -537,17 +538,20 @@ namespace db0 auto slab = tryGetActiveSlab(locality); bool is_first = true; bool is_new = false; + // The number of alloc attempts from existing slabs before + // resorting to adding a new slab + int num_remaining_attempts = SlabAllocatorConfig::NUM_EXISTING_SLAB_ALLOC_ATTEMPTS; for (;;) { if (slab) { for (;;) { auto addr = (*slab)->tryAlloc(size, 0, aligned); if (!addr) { // NOTE: since the last allocation failed, don't use this slab as "active" - resetActiveSlab(locality); + resetActiveSlab(locality); break; } - if (!unique || ((*slab)->tryMakeAddressUnique(*addr, instance_id))) { + if (!unique || ((*slab)->tryMakeAddressUnique(*addr, instance_id))) { // modified, add to dirty slabs if (!slab->m_is_dirty) { slab->m_is_dirty = true; @@ -571,9 +575,14 @@ namespace db0 if (is_first) { slab = findFirst(size, locality); is_first = false; - } else { + --num_remaining_attempts; + } else if (num_remaining_attempts-- > 0) { slab = findNext(slab, size, locality); + } else { + slab = {}; } + // Create if unable to allocate from existing slabs + // or the number of attempts has been exhausted if (!slab) { slab = addNewSlab(locality); is_new = true; diff --git a/src/dbzero/object_model/index/Index.hpp b/src/dbzero/object_model/index/Index.hpp index 54db25e2..95268257 100644 --- a/src/dbzero/object_model/index/Index.hpp +++ b/src/dbzero/object_model/index/Index.hpp @@ -154,8 +154,6 @@ namespace db0::object_model } if (!std::is_same_v) { - // FIXME: log - std::cout << "Index builder update !!" << std::endl; m_index_builder = db0::make_shared_void >( get().releaseRemoveNullItems(), get().releaseAddNullItems(), diff --git a/src/dbzero/object_model/index/IndexBuilder.hpp b/src/dbzero/object_model/index/IndexBuilder.hpp index f4461f9c..6478a79c 100644 --- a/src/dbzero/object_model/index/IndexBuilder.hpp +++ b/src/dbzero/object_model/index/IndexBuilder.hpp @@ -88,8 +88,7 @@ namespace db0::object_model } template void IndexBuilder::flush(RangeTreeT &index) - { - /* FIXME: log + { std::function add_callback = [&](UniqueAddress address) { auto it = m_object_cache.find(address); assert(it != m_object_cache.end()); @@ -103,8 +102,6 @@ namespace db0::object_model }; super_t::flush(index, &add_callback, &erase_callback); - */ - super_t::flush(index); m_object_cache.clear(); } diff --git a/src/dbzero/object_model/value/TypedAddress.cpp b/src/dbzero/object_model/value/TypedAddress.cpp index b339a8a8..ca3b4073 100644 --- a/src/dbzero/object_model/value/TypedAddress.cpp +++ b/src/dbzero/object_model/value/TypedAddress.cpp @@ -4,10 +4,6 @@ namespace db0::object_model { - bool TypedAddress::operator==(const TypedAddress &other) const { - return m_value == other.m_value; - } - void TypedAddress::setAddress(Address address) { m_value = (m_value & 0xFFFC000000000000) | address.getOffset(); } @@ -15,11 +11,7 @@ namespace db0::object_model void TypedAddress::setType(StorageClass type) { m_value = (m_value & 0x0003FFFFFFFFFFFF) | (static_cast(type) << 50); } - - bool TypedAddress::operator<(const TypedAddress &other) const { - return m_value < other.m_value; - } - + TypedAddress toTypedAddress(const std::pair &addr_with_type) { return { addr_with_type.second, addr_with_type.first.getAddress() }; } diff --git a/src/dbzero/object_model/value/TypedAddress.hpp b/src/dbzero/object_model/value/TypedAddress.hpp index b7e7aa65..dfafbaeb 100644 --- a/src/dbzero/object_model/value/TypedAddress.hpp +++ b/src/dbzero/object_model/value/TypedAddress.hpp @@ -45,12 +45,31 @@ DB0_PACKED_BEGIN void setAddress(Address); void setType(StorageClass type); - bool operator==(const TypedAddress &other) const; - bool operator<(const TypedAddress &other) const; + inline bool operator==(const TypedAddress &other) const { + return m_value == other.m_value; + } + + inline bool operator<(const TypedAddress &other) const { + return m_value < other.m_value; + } }; TypedAddress toTypedAddress(const std::pair &); DB0_PACKED_END +} + +namespace std + +{ + + template <> + struct hash + { + std::size_t operator()(const db0::object_model::TypedAddress& k) const { + return std::hash()(k.m_value); + } + }; + } \ No newline at end of file diff --git a/src/dbzero/workspace/Fixture.cpp b/src/dbzero/workspace/Fixture.cpp index d4bd8dc2..80582486 100644 --- a/src/dbzero/workspace/Fixture.cpp +++ b/src/dbzero/workspace/Fixture.cpp @@ -290,7 +290,8 @@ namespace db0 bool Fixture::commit() { - auto process_timer = std::make_unique("Fixture::commit"); + std::unique_ptr process_timer; + // process_timer = std::make_unique("Fixture::commit"); assert(getPrefixPtr()); // flush to prepare objects which require it (e.g. Index) for commit // NOTE: flush must NOT lock the fixture's shared mutex @@ -310,17 +311,14 @@ namespace db0 } } - // Clear expired instances from cache so that they're not persisted - // FIXME: log - // m_lang_cache.clear(true); + // Clear Python-side expired instances from cache so that they're not persisted + m_lang_cache.clear(true); std::unique_lock lock(m_commit_mutex); bool result = tryCommit(lock, process_timer.get()); m_updated = false; auto callbacks = collectStateReachedCallbacks(); lock.unlock(); executeStateReachedCallbacks(callbacks); - // FIXME: log - process_timer->printLog(std::cout) << std::endl; return result; } @@ -338,8 +336,8 @@ namespace db0 if (!prefix_ptr) { return result; } - - std::unique_ptr gc0_ctx = m_gc0_ptr ? getGC0().beginSave() : nullptr; + + std::unique_ptr ctx = m_gc0_ptr ? m_gc0_ptr->beginCommit() : nullptr; // NOTE: close handlers perform internal buffers flush (e.g. TagIndex) // which may result in modifications (e.g. incRef) // it's therefore important to perform this action before GC0::commitAll (which commits finalized objects) @@ -348,15 +346,11 @@ namespace db0 } // Commit modified only (to avoid scan over all objects) - if (m_gc0_ptr) { - getGC0().commitAllOf(Memspace::getModified(), timer.get()); + if (ctx) { + ctx->commitAllOf(Memspace::getModified(), timer.get()); + ctx = nullptr; } - // Save garbage collector's state - // we check if gc0 exists because the unit-tests set up may not have it - if (gc0_ctx) { - gc0_ctx->save(timer.get()); - } m_string_pool.commit(); m_object_catalogue.commit(); m_v_object_cache.commit(); diff --git a/src/dbzero/workspace/GC0.cpp b/src/dbzero/workspace/GC0.cpp index 58640856..76534e40 100644 --- a/src/dbzero/workspace/GC0.cpp +++ b/src/dbzero/workspace/GC0.cpp @@ -30,25 +30,6 @@ namespace db0 { } - GC0::SaveContext::SaveContext(GC0 &gc0) - : m_gc0(gc0) - { - assert(!m_gc0.m_save_pending); - m_gc0.m_save_pending = true; - } - - GC0::SaveContext::~SaveContext() - { - assert(m_gc0.m_save_pending); - m_gc0.m_save_pending = false; - } - - void GC0::SaveContext::save(ProcessTimer *timer) - { - assert(m_gc0.m_save_pending); - m_gc0.save(timer); - } - bool GC0::tryRemove(void *vptr, bool is_volatile) { std::unique_lock lock(m_mutex); @@ -70,7 +51,7 @@ namespace db0 if (!m_read_only && ops.hasRefs && ops.drop && !is_volatile && !ops.hasRefs(it->first)) { - if (m_save_pending) { + if (m_commit_pending) { // must schedule for deletion since unable to drop while save is pending auto addr_pair = ops.address(it->first); m_scheduled_for_deletion[addr_pair.first] = addr_pair.second; @@ -110,24 +91,40 @@ namespace db0 if (timer_ptr) { timer = std::make_unique("GC0::commitAllOf", timer_ptr); } - + + // Commit & collect unreferenced instances + // Important ! Collect instance addresses first because push_back can trigger "remove" calls std::unique_lock lock(m_mutex); + std::unordered_set addresses; std::size_t count = 0; for (auto vptr : vptrs) { auto it = m_vptr_map.find(vptr); if (it != m_vptr_map.end()) { - m_ops[it->second].commit(vptr); + auto &ops = m_ops[it->second]; + ops.commit(vptr); + if (ops.hasRefs && !ops.hasRefs(vptr)) { + addresses.insert(toTypedAddress(ops.address(vptr))); + } ++count; } } - // FIXME: log - std::cout << "GC0::commit size: " << count << std::endl; + + lock.unlock(); + + super_t::clear(); + for (auto addr: addresses) { + super_t::push_back(addr); + } + // also registered instances scheduled for deletion + for (auto &addr_pair: m_scheduled_for_deletion) { + super_t::push_back(toTypedAddress(addr_pair)); + } + m_scheduled_for_deletion.clear(); + super_t::commit(); } void GC0::commitAll() { - // FIXME: log - std::cout << "commitAll" << std::endl; std::unique_lock lock(m_mutex); for (auto &vptr_item : m_vptr_map) { m_ops[vptr_item.second].commit(vptr_item.first); @@ -162,41 +159,6 @@ namespace db0 for (auto &item : flush_ops) { m_ops[item.second].flush(item.first, false); } - // FIXME: log - std::cout << "GC0 flushed: " << flush_ops.size() << std::endl; - } - - void GC0::save(ProcessTimer *timer_ptr) - { - std::unique_ptr timer; - if (timer_ptr) { - timer = std::make_unique("GC0::save", timer_ptr); - } - - // collect unreferenced instances - // Important ! Collect instance addresses first because push_back can trigger "remove" calls - /* FIXME: log - std::vector addresses; - std::unique_lock lock(m_mutex); - for (auto &vptr_item : m_vptr_map) { - auto &ops = m_ops[vptr_item.second]; - if (ops.hasRefs && !ops.hasRefs(vptr_item.first)) { - addresses.push_back(toTypedAddress(ops.address(vptr_item.first))); - } - } - lock.unlock(); - - super_t::clear(); - for (auto addr: addresses) { - super_t::push_back(addr); - } - // also registered instances scheduled for deletion - for (auto &addr_pair: m_scheduled_for_deletion) { - super_t::push_back(toTypedAddress(addr_pair)); - } - m_scheduled_for_deletion.clear(); - */ - super_t::commit(); } void GC0::collect() @@ -258,10 +220,6 @@ namespace db0 m_atomic = false; } - std::unique_ptr GC0::beginSave() { - return std::make_unique(*this); - } - std::optional GC0::erase(void *vptr) { std::optional flush_op; @@ -284,4 +242,26 @@ namespace db0 return flush_op; } + GC0::CommitContext::CommitContext(GC0 &gc0) + : m_gc0(gc0) + { + assert(!m_gc0.m_commit_pending); + m_gc0.m_commit_pending = true; + } + + GC0::CommitContext::~CommitContext() + { + assert(m_gc0.m_commit_pending); + m_gc0.m_commit_pending = false; + } + + void GC0::CommitContext::commitAllOf(const std::vector &vec, ProcessTimer *timer) + { + assert(m_gc0.m_commit_pending); + m_gc0.commitAllOf(vec, timer); + } + + std::unique_ptr GC0::beginCommit() { + return std::make_unique(*this); + } } \ No newline at end of file diff --git a/src/dbzero/workspace/GC0.hpp b/src/dbzero/workspace/GC0.hpp index 74450956..585972ad 100644 --- a/src/dbzero/workspace/GC0.hpp +++ b/src/dbzero/workspace/GC0.hpp @@ -94,26 +94,8 @@ namespace db0 // Detach all instances held by this registry void detachAll(); - // Commit specific (e.g. modified) instances held by this registry - void commitAllOf(const std::vector &, ProcessTimer * = nullptr); - - std::size_t size() const; - - struct SaveContext - { - GC0 &m_gc0; - - SaveContext(GC0 &gc0); - ~SaveContext(); - void save(ProcessTimer * = nullptr); - }; - - /** - * Save serializes the list of unreferenced instances to the persistence layer - * this is to be able to drop those instances once the corresponding references from Python expire - */ - std::unique_ptr beginSave(); + std::size_t size() const; template static void registerTypes(); @@ -126,12 +108,24 @@ namespace db0 void beginAtomic(); void endAtomic(); void cancelAtomic(); + + struct CommitContext + { + GC0 &m_gc0; + + CommitContext(GC0 &gc0); + ~CommitContext(); + + void commitAllOf(const std::vector &, ProcessTimer * = nullptr); + }; + + std::unique_ptr beginCommit(); protected: - friend SaveContext; - bool m_save_pending = false; + bool m_commit_pending = false; - void save(ProcessTimer * = nullptr); + // Commit specific (e.g. modified) instances held by this registry + void commitAllOf(const std::vector &, ProcessTimer * = nullptr); // @return flush ops-id if element was assigned it std::optional erase(void *vptr); diff --git a/tests/unit_tests/MetaAllocatorTest.cpp b/tests/unit_tests/MetaAllocatorTest.cpp index aae06d29..c783c4a2 100644 --- a/tests/unit_tests/MetaAllocatorTest.cpp +++ b/tests/unit_tests/MetaAllocatorTest.cpp @@ -59,9 +59,9 @@ namespace tests protected: // in bytes static constexpr std::size_t PAGE_SIZE = 4096; - static constexpr std::size_t SLAB_SIZE = 4 * 1024 * 1024; + static constexpr std::size_t SLAB_SIZE = 4u << 20; static constexpr std::size_t SMALL_SLAB_SIZE = 64 * 4096; - + std::atomic m_dirty_meter = 0; CacheRecycler m_recycler; std::shared_ptr m_prefix; @@ -157,7 +157,7 @@ namespace tests auto ptr = cut.alloc(100); // the allocation should be in the same slab ASSERT_EQ(cut.getSlabId(ptr), 0); - } + } TEST_F( MetaAllocatorTests , testMetaAllocatorCanAllocateFromMultipleExistingSlabs ) { From 420dd7df6fe1932cfb581ea366771a9c778914db Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 11 Nov 2025 19:50:39 +0100 Subject: [PATCH 11/11] cleanup --- python_tests/test_cache.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/python_tests/test_cache.py b/python_tests/test_cache.py index 2db5843d..3182d086 100644 --- a/python_tests/test_cache.py +++ b/python_tests/test_cache.py @@ -15,19 +15,16 @@ def rand_array(max_bytes): def test_cache_size_can_be_updated_at_runtime(db0_fixture): - cache_0 = db0.get_cache_stats() - print(cache_0) + cache_0 = db0.get_cache_stats() # create object instances to populate cache buf = [] for _ in range(1000): buf.append(MemoTestClass(get_string(1024))) - cache_1 = db0.get_cache_stats() - print(cache_1) + cache_1 = db0.get_cache_stats() diff_1 = cache_1["size"] - cache_0["size"] # reduce cache size so that only 1/2 of objects can fit db0.set_cache_size(512 * 1024) - cache_2 = db0.get_cache_stats() - print(cache_2) + cache_2 = db0.get_cache_stats() # make sure cache size / capacity was adjusted with at least 95% accuracy assert abs(1.0 - (512 * 1024) / cache_2["size"]) < 0.05 assert abs(1.0 - cache_2["capacity"] / cache_2["size"]) < 0.05