diff --git a/python_tests/test_cache.py b/python_tests/test_cache.py index b2fabb72..3182d086 100644 --- a/python_tests/test_cache.py +++ b/python_tests/test_cache.py @@ -15,16 +15,16 @@ def rand_array(max_bytes): def test_cache_size_can_be_updated_at_runtime(db0_fixture): - cache_0 = db0.get_cache_stats() + cache_0 = db0.get_cache_stats() # create object instances to populate cache buf = [] for _ in range(1000): buf.append(MemoTestClass(get_string(1024))) - cache_1 = db0.get_cache_stats() + cache_1 = db0.get_cache_stats() diff_1 = cache_1["size"] - cache_0["size"] # reduce cache size so that only 1/2 of objects can fit db0.set_cache_size(512 * 1024) - cache_2 = db0.get_cache_stats() + cache_2 = db0.get_cache_stats() # make sure cache size / capacity was adjusted with at least 95% accuracy assert abs(1.0 - (512 * 1024) / cache_2["size"]) < 0.05 assert abs(1.0 - cache_2["capacity"] / cache_2["size"]) < 0.05 diff --git a/python_tests/test_index.py b/python_tests/test_index.py index e884c510..14c65966 100644 --- a/python_tests/test_index.py +++ b/python_tests/test_index.py @@ -5,6 +5,7 @@ from dbzero import find from datetime import timedelta, datetime import random +import time def test_index_instance_can_be_created_without_arguments(db0_fixture): @@ -714,4 +715,17 @@ def test_find_in_index_range_issue_1(db0_fixture): index.add(3, test_obj) assert test_obj in set(index.range()) assert list(db0.find(index.range(), test_obj)) == [test_obj] - \ No newline at end of file + + +@pytest.mark.stress_test +def test_insert_1M_keys_to_index(db0_no_autocommit): + cut = db0.index() + objects = [MemoTestClass(0) for _ in range(25000)] + start = time.perf_counter() + for i in range(1_000_000): + # add random int + cut.add(random.randint(0, 100_000_000), random.choice(objects)) + result = list(cut.select(0, 1)) + end = time.perf_counter() + assert len(cut) == 1_000_000 + print(f"Inserted 1M keys to index in {end - start:.2f} seconds") \ No newline at end of file diff --git a/python_tests/test_issues_12.py b/python_tests/test_issues_12.py new file mode 100644 index 00000000..1a51d492 --- /dev/null +++ b/python_tests/test_issues_12.py @@ -0,0 +1,101 @@ +import dbzero as db0 +import pytest +from .conftest import DB0_DIR +from datetime import datetime +from .memo_test_types import MemoBlob +from dataclasses import dataclass +import random +import time +from typing import Dict, List + + +@db0.memo +@dataclass +class Issuer: + tax_id: int + inv_list: List + inv_index: db0.index + + +@db0.memo(no_cache=True) +@dataclass +class Invoice: + tax_id: int + issue_dt: datetime + data: bytes + + +def get_random_tax_id(tax_ids_set=set()): + tax_id = random.randint(1000000000, 9999999999) + while tax_id in tax_ids_set: + tax_id = random.randint(1000000000, 9999999999) + tax_ids_set.add(tax_id) + return tax_id + + +@pytest.mark.stress_test +@pytest.mark.parametrize("db0_slab_size", [{"slab_size": 64 << 20, "autocommit": False}], indirect=True) +def test_no_cache_allocator_issue(db0_slab_size): + db0.set_cache_size(8 << 30) + # create 25 k unique tax_id numbers + tax_id_count = 25000 + tax_id_numbers = set() + print(f"Generating {tax_id_count} unique tax_id numbers") + for i in range(tax_id_count): + if i % 5000 == 0: + print(f"Generated {i} tax_id numbers so far") + get_random_tax_id(tax_id_numbers) + tax_id_list = list(tax_id_numbers) + + issuers = {} + + for i, tax_id in enumerate(tax_id_list): + if i % 5000 == 0: + print(f"Created {i} issuers so far") + new_issuer = Issuer(tax_id=tax_id, inv_list=[], inv_index=db0.index()) + issuers[tax_id] = new_issuer + + execution_time = 15 + RANDOM_BYTES = b'DB0'*22000 + total_size = 0 + count_of_objects = 0 + new_objects = 0 + db0.commit() + print("Starting benchmark loop") + last_report = time.perf_counter() + start = last_report + while True: + # get random number between 0 and 100 + random_number = random.randint(0, 100) + if random_number < 90: + data_size = random.randint(500, 2000) + else: + data_size = random.randint(8000, 64000) + + random_tax_id = random.choice(tax_id_list) + issuer = issuers[random_tax_id] + invoice = Invoice(tax_id=issuer.tax_id, issue_dt=datetime.now(), data=RANDOM_BYTES[:data_size]) + issuer.inv_list.append(invoice) + issuer.inv_index.add(datetime.now(), invoice) + count_of_objects += 1 + new_objects += 1 + + total_size += data_size + # report every 3 seconds + now = time.perf_counter() + if (now - last_report) >= 3: + commit_start = time.perf_counter() + db0.commit() + commit_end = time.perf_counter() + print(f"Commit time: {(commit_end - commit_start)} seconds") + + now = time.perf_counter() + print(f"Objects / sec {float(new_objects) / (now - last_report)}, Total objects: {count_of_objects}, Total size: {total_size} bytes") + print(db0.get_storage_stats()) + print(db0.get_lang_cache_stats()) + new_objects = 0 + last_report = now + + if (now - start) > execution_time: + break + \ No newline at end of file diff --git a/python_tests/test_memo_no_cache.py b/python_tests/test_memo_no_cache.py index 40fca2c8..d3e1577a 100644 --- a/python_tests/test_memo_no_cache.py +++ b/python_tests/test_memo_no_cache.py @@ -53,15 +53,15 @@ def test_memo_no_cache_issue1(db0_fixture): del obj -def test_excluding_no_cache_instances_from_dbzero_cache(db0_fixture): +def test_excluding_no_cache_instances_from_P0_cache(db0_fixture): buf = db0.list() - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] for _ in range(100): - obj = MemoNoCacheClass() + obj = MemoNoCacheClass() buf.append(obj) - gc.collect() - final_cache_size = db0.get_cache_stats()["size"] + gc.collect() + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (350 << 10) @@ -80,7 +80,7 @@ def test_fetching_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now fetch objects by uuid - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for id in uuid_list: # NOTE: must fetch with type, otherwise no_cache flag may not be honored @@ -88,7 +88,7 @@ def test_fetching_no_cache_objects(db0_fixture): # this forces data retrieval total_len += len(obj.data) - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (300 << 10) @@ -105,14 +105,14 @@ def test_find_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now retrieve objects using db0.find - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for obj in db0.find(MemoNoCacheClass): # this forces data retrieval (but not caching) total_len += len(obj.data) assert total_len > 0 - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (300 << 10) @@ -131,7 +131,7 @@ def test_fetching_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now fetch objects by uuid - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for id in uuid_list: # NOTE: must fetch with type, otherwise no_cache flag may not be honored @@ -139,7 +139,7 @@ def test_fetching_no_cache_objects(db0_fixture): # this forces data retrieval total_len += len(obj.data) - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (300 << 10) @@ -156,13 +156,13 @@ def test_find_no_cache_objects(db0_fixture): db0.open(px_name, "r") # now retrieve objects using db0.find - initial_cache_size = db0.get_cache_stats()["size"] + initial_cache_size = db0.get_cache_stats()["P_size"]["P0"] total_len = 0 for obj in db0.find(MemoNoCacheClass): # this forces data retrieval (but not caching) total_len += len(obj.data) assert total_len > 0 - final_cache_size = db0.get_cache_stats()["size"] + final_cache_size = db0.get_cache_stats()["P_size"]["P0"] # make sure cache utilization is low assert abs(final_cache_size - initial_cache_size) < (350 << 10) diff --git a/python_tests/test_object_stress.py b/python_tests/test_object_stress.py index 5df38c7d..2abe207b 100644 --- a/python_tests/test_object_stress.py +++ b/python_tests/test_object_stress.py @@ -63,7 +63,7 @@ def read_value(value) -> int: total_bytes += len(buf[-1].value) count += 1 if total_bytes > report_bytes: - pre_commit = datetime.now() + flush = datetime.now() print("*** next transaction ***") db0.commit() storage_stats = db0.get_storage_stats() @@ -71,7 +71,7 @@ def read_value(value) -> int: print(f"Rand DRAM I/O ops: {storage_stats['dram_io_rand_ops'] - rand_dram_io}") print(f"Rand file write ops: {storage_stats['file_rand_write_ops'] - rand_file_write_ops}") print(f"File bytes written: {storage_stats['file_bytes_written'] - bytes_written}") - print(f"Commit took: {datetime.now() - pre_commit}\n") + print(f"Commit took: {datetime.now() - flush}\n") rand_dram_io = storage_stats["dram_io_rand_ops"] rand_file_write_ops = storage_stats["file_rand_write_ops"] bytes_written = storage_stats["file_bytes_written"] @@ -107,7 +107,7 @@ def rand_string(max_len): total_bytes += len(buf[-1].value) count += 1 if total_bytes > report_bytes: - pre_commit = datetime.now() + flush = datetime.now() # NOTE: with each commit the size of GC0 is increasing due to large # number of objects referenced only from python db0.commit() @@ -116,7 +116,7 @@ def rand_string(max_len): print(f"Rand DRAM I/O ops: {storage_stats['dram_io_rand_ops'] - rand_dram_io}") print(f"Rand file write ops: {storage_stats['file_rand_write_ops'] - rand_file_write_ops}") print(f"File bytes written: {storage_stats['file_bytes_written'] - bytes_written}") - print(f"Commit took: {datetime.now() - pre_commit}") + print(f"Commit took: {datetime.now() - flush}") rand_dram_io = storage_stats["dram_io_rand_ops"] rand_file_write_ops = storage_stats["file_rand_write_ops"] bytes_written = storage_stats["file_bytes_written"] diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index 6f576fea..8c8ab214 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -211,6 +211,7 @@ namespace db0::python { using Class = db0::object_model::Class; using TagIndex = db0::object_model::TagIndex; + using ExtT = typename MemoImplT::ExtT; PY_API_FUNC // the instance may already exist (e.g. if this is a singleton) @@ -230,7 +231,8 @@ namespace db0::python auto type = self->ext().getClassPtr(); if (type->isExistingSingleton(fixture_uuid)) { // drop existing instance - self->ext().destroy(); + // NOTE: may use ext() because destroy does not mutate the instance itself + const_cast(self->ext()).destroy(); // unload singleton from a different fixture if (!type->unloadSingleton(&self->modifyExt(), fixture_uuid)) { PyErr_SetString(PyExc_RuntimeError, "Unloading singleton failed"); diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index 09154342..10be1b04 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -61,6 +61,20 @@ namespace db0::python } PySafeDict_SetItemString(*dict, "size", Py_OWN(PyLong_FromLong(cache_recycler.size()))); + + { + std::vector detailed_size = cache_recycler.getDetailedSize(); + auto detailed_size_dict = Py_OWN(PyDict_New()); + unsigned int priority_index = 0; + for (auto size: detailed_size) { + std::stringstream key_str; + key_str << "P" << priority_index++; + PySafeDict_SetItemString(*detailed_size_dict, key_str.str().c_str(), Py_OWN(PyLong_FromLong(size))); + } + // cache size with a by-priority breakdown + PySafeDict_SetItemString(*dict, "P_size", detailed_size_dict); + } + PySafeDict_SetItemString(*dict, "capacity", Py_OWN(PyLong_FromLong(cache_recycler.getCapacity()))); PySafeDict_SetItemString(*dict, "deferred_free_count", Py_OWN(PyLong_FromLong(deferred_free_count))); PySafeDict_SetItemString(*dict, "lang_cache_size", Py_OWN(PyLong_FromLong(lang_cache_size))); diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index a4f2a4ce..d40fddc1 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -259,7 +259,7 @@ namespace db0::python } return shared_py_cast(std::move(list_object)); } - + PyToolkit::ObjectSharedPtr PyToolkit::unloadByteArray(db0::swine_ptr fixture, Address address, AccessFlags access_mode) { @@ -281,9 +281,9 @@ namespace db0::python return shared_py_cast(std::move(byte_array_object)); } - PyToolkit::ObjectSharedPtr PyToolkit::unloadIndex(db0::swine_ptr fixture, + PyToolkit::ObjectSharedPtr PyToolkit::unloadIndex(db0::swine_ptr fixture, Address address, std::uint16_t, AccessFlags access_mode) - { + { // try pulling from cache first auto &lang_cache = fixture->getLangCache(); auto object_ptr = lang_cache.get(address); @@ -292,7 +292,7 @@ namespace db0::python return object_ptr; } - auto index_object = IndexDefaultObject_new(); + auto index_object = Py_OWN(IndexDefaultObject_new()); // retrieve actual dbzero instance index_object->unload(fixture, address, access_mode); diff --git a/src/dbzero/bindings/python/collections/PyIndex.cpp b/src/dbzero/bindings/python/collections/PyIndex.cpp index 2bef4015..eec72314 100644 --- a/src/dbzero/bindings/python/collections/PyIndex.cpp +++ b/src/dbzero/bindings/python/collections/PyIndex.cpp @@ -40,8 +40,8 @@ namespace db0::python return reinterpret_cast(type->tp_alloc(type, 0)); } - shared_py_object IndexDefaultObject_new() { - return { IndexObject_new(&IndexObjectType, NULL, NULL), false }; + IndexObject *IndexDefaultObject_new() { + return IndexObject_new(&IndexObjectType, NULL, NULL); } void PyAPI_IndexObject_del(IndexObject* index_obj) @@ -67,12 +67,12 @@ namespace db0::python IndexObject *tryMakeIndex(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { // make actual dbzero instance, use default fixture - auto index_object = IndexDefaultObject_new(); + auto py_index = Py_OWN(IndexDefaultObject_new()); db0::FixtureLock lock(PyToolkit::getPyWorkspace().getWorkspace().getCurrentFixture()); - index_object->makeNew(*lock); + auto &index = py_index->makeNew(*lock); // register newly created index with py-object cache - lock->getLangCache().add(index_object.get()->ext().getAddress(), index_object.get()); - return index_object.steal(); + lock->getLangCache().add(index.getAddress(), py_index.get()); + return py_index.steal(); } IndexObject *PyAPI_makeIndex(PyObject *self, PyObject *const *args, Py_ssize_t nargs) diff --git a/src/dbzero/bindings/python/collections/PyIndex.hpp b/src/dbzero/bindings/python/collections/PyIndex.hpp index 821b8baa..eba2f877 100644 --- a/src/dbzero/bindings/python/collections/PyIndex.hpp +++ b/src/dbzero/bindings/python/collections/PyIndex.hpp @@ -10,7 +10,7 @@ namespace db0::python using IndexObject = PyWrapper; IndexObject *IndexObject_new(PyTypeObject *type, PyObject *, PyObject *); - shared_py_object IndexDefaultObject_new(); + IndexObject* IndexDefaultObject_new(); void PyAPI_IndexObject_del(IndexObject* self); Py_ssize_t PyAPI_IndexObject_len(IndexObject *); diff --git a/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp b/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp index eef6eadb..b7b9767b 100644 --- a/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp +++ b/src/dbzero/core/collections/SGB_Tree/SGB_Tree.hpp @@ -20,7 +20,7 @@ namespace db0 using CompT = typename TypesT::CompT; using AddressT = typename TypesT::AddressT; using NodeT = typename TypesT::NodeT; - using NodePtrT = typename NodeT::ptr_t; + using NodePtrT = NodeT; using node_iterator = typename TypesT::o_sgb_node_t::iterator; using node_const_iterator = typename TypesT::o_sgb_node_t::const_iterator; using sg_tree_const_iterator = typename super_t::const_iterator; @@ -547,13 +547,13 @@ namespace db0 const std::size_t m_node_capacity; const NodeItemCompT m_item_comp; const HeapCompT m_heap_comp; - + template ItemIterator emplace_to_empty(Args&&... args) { - super_t::modify().m_sgb_size++; + ++super_t::modify().m_sgb_size; // create the root node which shares the same allocation as the 'head' node // obtain mutable mem lock first - auto mem_lock = this->get_v_ptr().modifyMappedRange(); + auto mem_lock = this->modifyMappedRange(); // calculate residual capacity auto residual_capacity = (*this)->sizeOf() - (*this)->trueSizeOf(); // use the remaining capacity to initialize the root node diff --git a/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp b/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp index c1caec48..152e8b98 100644 --- a/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp +++ b/src/dbzero/core/collections/SGB_Tree/sgb_types.hpp @@ -75,11 +75,11 @@ namespace db0 } inline operator ptr_t&() { - return this->v_this; + return *this; } inline operator const ptr_t&() const { - return this->v_this; + return *this; } }; diff --git a/src/dbzero/core/collections/b_index/mb_index.hpp b/src/dbzero/core/collections/b_index/mb_index.hpp index e8f51f41..1570f773 100644 --- a/src/dbzero/core/collections/b_index/mb_index.hpp +++ b/src/dbzero/core/collections/b_index/mb_index.hpp @@ -613,7 +613,7 @@ namespace db0 /** * Destroy existing instance */ - void destroy() const { + void destroy() { m_interface.destroy(*m_memspace_ptr); } diff --git a/src/dbzero/core/collections/b_index/v_bindex.hpp b/src/dbzero/core/collections/b_index/v_bindex.hpp index 0304b692..5bca7993 100644 --- a/src/dbzero/core/collections/b_index/v_bindex.hpp +++ b/src/dbzero/core/collections/b_index/v_bindex.hpp @@ -121,7 +121,7 @@ namespace db0 return b_index.getAddress(); } - void destroy() const + void destroy() { // must clear all nodes (item destroy) assert(!m_item_destroy_func && "Operation not implemented"); diff --git a/src/dbzero/core/collections/pools/RC_LimitedPool.hpp b/src/dbzero/core/collections/pools/RC_LimitedPool.hpp index 515fa071..ab99cd6a 100644 --- a/src/dbzero/core/collections/pools/RC_LimitedPool.hpp +++ b/src/dbzero/core/collections/pools/RC_LimitedPool.hpp @@ -193,14 +193,14 @@ DB0_PACKED_END void RC_LimitedPool::commit() const { m_pool_map.commit(); - db0::v_object::commit(); + db0::vtypeless::commit(); } template void RC_LimitedPool::detach() const { m_pool_map.detach(); - db0::v_object::detach(); + db0::vtypeless::detach(); } template diff --git a/src/dbzero/core/collections/range_tree/RangeTree.hpp b/src/dbzero/core/collections/range_tree/RangeTree.hpp index 54c97a5a..66fcd91f 100644 --- a/src/dbzero/core/collections/range_tree/RangeTree.hpp +++ b/src/dbzero/core/collections/range_tree/RangeTree.hpp @@ -240,9 +240,11 @@ DB0_PACKED_END auto null_block_ptr = getNullBlock(); assert(null_block_ptr); - + // erase values from the null block directly - auto diff = null_block_ptr->bulkErase(begin, end, static_cast(nullptr), erase_callback_ptr); + auto diff = null_block_ptr->bulkErase( + begin, end, static_cast(nullptr), erase_callback_ptr + ); if (diff > 0) { this->modify().m_size -= diff; } @@ -428,16 +430,16 @@ DB0_PACKED_END m_it.modifyItem().m_first_item = first_item; } } - - // Forwards a value to the add item callback + + // Forwards a value to the add item callback std::function add_item_callback = [&](ItemT item) { (*add_callback_ptr)(item.m_value); - }; + }; std::function *add_item_callback_ptr = (add_callback_ptr ? &add_item_callback : nullptr); return (*this)->bulkInsertUnique(begin_item, end_item, add_item_callback_ptr).second; } - + /** * Erase existing elements, ignore non-existing ones * @return number of erased elements @@ -603,7 +605,7 @@ DB0_PACKED_END CallbackT *erase_callback_ptr = nullptr) { // erase items first - if (!m_remove_items.empty()) { + if (!m_remove_items.empty()) { std::vector items; std::copy(m_remove_items.begin(), m_remove_items.end(), std::back_inserter(items)); range_tree.bulkErase(items.begin(), items.end(), erase_callback_ptr); @@ -616,7 +618,7 @@ DB0_PACKED_END } if (!m_add_items.empty()) { std::vector items; - std::copy(m_add_items.begin(), m_add_items.end(), std::back_inserter(items)); + std::copy(m_add_items.begin(), m_add_items.end(), std::back_inserter(items)); range_tree.bulkInsert(items.begin(), items.end(), add_callback_ptr); m_add_items.clear(); } @@ -751,7 +753,7 @@ DB0_PACKED_END // retrieve existing range return { m_index, it, m_index.begin(), m_index.end(), it == m_index.begin(), true }; } - + RangeIterator insertRange(ItemT item) { BlockT new_block(this->getMemspace()); diff --git a/src/dbzero/core/collections/sgtree/intrusive_node.hpp b/src/dbzero/core/collections/sgtree/intrusive_node.hpp index 5e2cb50c..bd6187b2 100644 --- a/src/dbzero/core/collections/sgtree/intrusive_node.hpp +++ b/src/dbzero/core/collections/sgtree/intrusive_node.hpp @@ -2,24 +2,24 @@ #include "v_sgtree.hpp" #include -#include +#include namespace db0 { - + /** * VSPACE node type compliant with intrusive containers * c_type - node container type * comp_t - node pointer comparer type */ - template > class intrusive_node - : public v_object + template > + class intrusive_node: public v_object { public : using super = v_object; using c_type = T; - using ptr_t = typename super::ptr_t; + using ptr_t = typename v_object::ptr_t; using comp_t = comp_t_; // type compliant with intrusive NodeTraits requirements using traits_t = base_traits_t, ptr_t>; @@ -33,7 +33,7 @@ namespace db0 // Copy constructor struct tag_copy {}; intrusive_node(tag_copy, Memspace &memspace, Memspace &other_memspace, const ptr_t &other) - : super(memspace, memspace, other_memspace, *other.get()) + : super(memspace, memspace, other_memspace, *other.getData()) { } @@ -46,14 +46,14 @@ namespace db0 * Cast to pointer */ inline operator ptr_t&() { - return this->v_this; + return *this; } /** * Cast to const-pointer */ inline operator const ptr_t&() const { - return this->v_this; + return *this; } }; diff --git a/src/dbzero/core/collections/sgtree/sgtree_node.hpp b/src/dbzero/core/collections/sgtree/sgtree_node.hpp index c17353bf..0361f4bf 100644 --- a/src/dbzero/core/collections/sgtree/sgtree_node.hpp +++ b/src/dbzero/core/collections/sgtree/sgtree_node.hpp @@ -79,12 +79,13 @@ DB0_PACKED_BEGIN data_t m_data; }; DB0_PACKED_END - - template class o_sgtree_node_traits + + template + class o_sgtree_node_traits { - public : - typedef typename data_t::Initializer Initializer; - typedef typename v_object >::ptr_t node_ptr_t; + public: + using Initializer = typename data_t::Initializer; + using node_ptr_t = typename v_object >::ptr_t; struct comp_t { diff --git a/src/dbzero/core/collections/sgtree/v_sgtree.hpp b/src/dbzero/core/collections/sgtree/v_sgtree.hpp index f63e242c..17d0a1ef 100644 --- a/src/dbzero/core/collections/sgtree/v_sgtree.hpp +++ b/src/dbzero/core/collections/sgtree/v_sgtree.hpp @@ -121,7 +121,7 @@ DB0_PACKED_END { public: using super = typename node_t::tree_base_t; - using c_type = typename super::c_type; + using c_type = typename super::ContainerT; using comp_t = typename node_t::comp_t; using node_ptr_t = typename node_t::ptr_t; using ptr_t = typename super::ptr_t; @@ -226,28 +226,23 @@ DB0_PACKED_END using const_iterator = iterator; iterator begin() { - // cast to node_prt_t - return _Tree::begin_node(node_ptr_t(this->get_v_ptr())); + return _Tree::begin_node(node_ptr_t(*this)); } iterator end() { - // cast to node_prt_t - return _Tree::end_node(node_ptr_t(this->get_v_ptr())); + return _Tree::end_node(node_ptr_t(*this)); } - iterator begin() const { - // cast to node_prt_t - return _Tree::begin_node(node_ptr_t(this->get_v_ptr())); + iterator begin() const { + return _Tree::begin_node(node_ptr_t(*this)); } iterator end() const { - // cast to node_prt_t - return _Tree::end_node(node_ptr_t(this->get_v_ptr())); + return _Tree::end_node(node_ptr_t(*this)); } bool empty() const { - // cast to node_prt_t - return (_Tree::begin_node(node_ptr_t(this->get_v_ptr())) == _Tree::end_node(node_ptr_t(this->get_v_ptr()))); + return (_Tree::begin_node(node_ptr_t(*this))) == _Tree::end_node(node_ptr_t(*this)); } // This method allows constructing an iterator from a previously saved address @@ -271,7 +266,8 @@ DB0_PACKED_END * KeyInitializer - node key initializer type * args - data initializers */ - template iterator insert_equal(const KeyInitializer &key, Args&&... args) + template + iterator insert_equal(const KeyInitializer &key, Args&&... args) { std::size_t depth; link_data ld; @@ -282,7 +278,7 @@ DB0_PACKED_END SG_Tree::link(this->head(), new_node, ld); SG_Tree::rebalance_after_insertion(new_node, depth, this->modify().size++, _alpha); this->updateMaxTreeSize(); - return new_node.get_v_ptr(); + return new_node; } /** @@ -300,7 +296,7 @@ DB0_PACKED_END SG_Tree::link(this->head(), new_node, ld); SG_Tree::rebalance_after_insertion(new_node, depth, ++this->modify().size, _alpha); this->updateMaxTreeSize(); - return new_node.get_v_ptr(); + return new_node; } /** @@ -335,7 +331,7 @@ DB0_PACKED_END this->head(), new_node, commit_data, this->modify().size++, _alpha ); this->updateMaxTreeSize(); - return std::make_pair(new_node.get_v_ptr(), true); + return std::make_pair(new_node, true); } /** @@ -436,7 +432,7 @@ DB0_PACKED_END /** * Destroy SG-Tree and all its nodes (v-objects) */ - void destroy() const + void destroy() { // destroy SG-Tree starting from the "head" element destroyHeadNode(this->head()); @@ -466,16 +462,14 @@ DB0_PACKED_END #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstrict-aliasing" #endif - node_ptr_t &head() - { + node_ptr_t &head() { // cast to head - return (node_ptr_t&)(this->v_this); + return reinterpret_cast(*this); } - - const node_ptr_t &head() const - { + + const node_ptr_t &head() const { // cast to head - return (const node_ptr_t&)(this->v_this); + return reinterpret_cast(*this); } #ifdef __linux__ @@ -562,7 +556,7 @@ DB0_PACKED_END * Join / use specialized comparer */ template bool join(join_stack &it, const KeyT &key, - NodePtrKeyComp key_comp, int direction) const + NodePtrKeyComp key_comp, int direction) const { if (direction > 0) { // initialize join stack @@ -601,11 +595,10 @@ DB0_PACKED_END /// joinBound implementation with dedicated key comparator template void joinBound(join_stack &it, const KeyT &key, - NodePtrKeyComp key_comp) const + NodePtrKeyComp key_comp) const { // initialize join stack - if (it.empty()) - { + if (it.empty()) { SG_Tree::beginJoinBackward(this->head(),it); } SG_Tree::joinBound(it,key, key_comp); diff --git a/src/dbzero/core/collections/vector/v_bvector.hpp b/src/dbzero/core/collections/vector/v_bvector.hpp index ebdebd0a..98881151 100644 --- a/src/dbzero/core/collections/vector/v_bvector.hpp +++ b/src/dbzero/core/collections/vector/v_bvector.hpp @@ -260,7 +260,7 @@ DB0_PACKED_END } } - void destroy() const + void destroy() { destroyAllBlocks(); m_pb_cache.clear(); diff --git a/src/dbzero/core/collections/vector/v_sorted_sequence.hpp b/src/dbzero/core/collections/vector/v_sorted_sequence.hpp index 8809b51e..16d344a1 100644 --- a/src/dbzero/core/collections/vector/v_sorted_sequence.hpp +++ b/src/dbzero/core/collections/vector/v_sorted_sequence.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/dbzero/core/collections/vector/v_sorted_vector.hpp b/src/dbzero/core/collections/vector/v_sorted_vector.hpp index 2b74f2f3..c5baf02f 100644 --- a/src/dbzero/core/collections/vector/v_sorted_vector.hpp +++ b/src/dbzero/core/collections/vector/v_sorted_vector.hpp @@ -834,7 +834,7 @@ DB0_PACKED_BEGIN return ((*this)->m_size == 0); } - void destroy() const + void destroy() { // container destroy (*this)->destroy(this->getMemspace(), m_item_destroy_func); @@ -1031,7 +1031,7 @@ DB0_PACKED_BEGIN // delete VSPACE "this" this->destroy(); // claim new identity - this->v_this = new_vector.get_v_ptr(); + (*this) = new_vector; return true; } else { return false; @@ -1057,7 +1057,7 @@ DB0_PACKED_BEGIN // delete VSPACE "this" this->destroy(); // claim new identity - this->v_this = new_vector.get_v_ptr(); + (*this) = new_vector; return true; } else { return false; diff --git a/src/dbzero/core/memory/Address.hpp b/src/dbzero/core/memory/Address.hpp index 6dddaa48..3473db50 100644 --- a/src/dbzero/core/memory/Address.hpp +++ b/src/dbzero/core/memory/Address.hpp @@ -9,8 +9,8 @@ namespace db0 { -DB0_PACKED_BEGIN +DB0_PACKED_BEGIN template class DB0_PACKED_ATTR AddressType { public: @@ -79,9 +79,10 @@ DB0_PACKED_BEGIN }; using Address = AddressType; - + // The UniqueAddress combines memory offset and instance ID // by definition the UniqueAddress will not be assigned more than once throughut the lifetime of the prefix +DB0_PACKED_BEGIN class DB0_PACKED_ATTR UniqueAddress { public: @@ -173,10 +174,10 @@ DB0_PACKED_BEGIN { } }; - +DB0_PACKED_END + UniqueAddress makeUniqueAddr(std::uint64_t offset, std::uint16_t id); -DB0_PACKED_END } namespace std diff --git a/src/dbzero/core/memory/CacheRecycler.cpp b/src/dbzero/core/memory/CacheRecycler.cpp index c4dfea26..2851ffab 100644 --- a/src/dbzero/core/memory/CacheRecycler.cpp +++ b/src/dbzero/core/memory/CacheRecycler.cpp @@ -7,12 +7,30 @@ namespace db0 { + // Calculate target capacity for specific priority + std::size_t getCapacity(std::size_t total_capacity, int priority) + { + auto result = total_capacity; + auto low_result = total_capacity >> 3; // 12.5% for low priority + if (priority == 0) { + result -= low_result; + } else { + result = low_result; + } + return result; + } + + std::size_t getMaxSize(std::size_t capacity) { + return (capacity > 0) ? ((capacity - 1) / MIN_PAGE_SIZE + 1) : 0; + } + CacheRecycler::CacheRecycler(std::size_t capacity, const std::atomic &dirty_meter, std::optional flush_size, std::function flush_dirty, std::function flush_callback) - : m_res_buf((capacity > 0)?((capacity - 1) / MIN_PAGE_SIZE + 1):0) - , m_capacity(capacity) + : m_capacity(capacity) + // NOTE: buffers are overprovisioned + , m_res_bufs { getMaxSize(m_capacity), getMaxSize(m_capacity) } , m_dirty_meter(dirty_meter) // assign default flush size , m_flush_size(flush_size.value_or(DEFAULT_FLUSH_SIZE)) @@ -21,46 +39,61 @@ namespace db0 { } - void CacheRecycler::adjustSize(std::unique_lock &, std::size_t requested_release_size) + std::size_t CacheRecycler::adjustSize(std::unique_lock &, list_t &res_buf, + std::size_t requested_release_size) { // calculate size to be released from the dirty locks // so that they occupy <50% of the cache // NOTE: this has to be done before actual size adjustment - if (m_flush_dirty && m_dirty_meter > ((m_current_size - requested_release_size) >> 1)) { - std::int64_t limit = m_dirty_meter - ((m_current_size - requested_release_size) >> 1); + if (m_flush_dirty && m_dirty_meter > ((getCurrentSize() - requested_release_size) >> 1)) { + std::int64_t limit = m_dirty_meter - ((getCurrentSize() - requested_release_size) >> 1); // request flushing (and releasing) specific volume of dirty locks m_flush_dirty(limit); } std::size_t released_size = 0; // try flushing 'requested_release_size' number of excess elements - auto it = m_res_buf.begin(), end = m_res_buf.end(); + auto it = res_buf.begin(), end = res_buf.end(); while (it != end && released_size < requested_release_size) { // only release locks with no active external references (other than the CacheRecycler itself) // NOTE: dirty locks are relased by m_flush_dirty callback if ((*it).use_count() == 1 && !(*it)->isDirty()) { released_size += (*it)->usedMem(); - it = m_res_buf.erase(it); + it = res_buf.erase(it); } else { ++it; } } - // update current size - m_current_size -= released_size; + return released_size; } - void CacheRecycler::updateSize(std::unique_lock &lock, std::size_t expected_size) - { + void CacheRecycler::adjustSize(std::unique_lock &lock, std::size_t release_size) + { + // release from low-priority cache first + auto released_size = adjustSize(lock, m_res_bufs[1], release_size); + // update current size + m_current_size[1] -= released_size; + release_size -= released_size; + if (release_size > 0) { + released_size = adjustSize(lock, m_res_bufs[0], release_size); + m_current_size[0] -= released_size; + } + } + + void CacheRecycler::updateSize(std::unique_lock &lock, int priority, std::size_t expected_size) + { + assert(priority == 0 || priority == 1); // we make 2 iterations because dependent locks (i.e. owned by the boundary lock) // will be released only during the second pass for (int i = 0; i < 2; ++i) { - if (m_current_size <= expected_size) { + if (m_current_size[priority] <= expected_size) { break; } // release excess locks plus flush size - adjustSize(lock, m_current_size - expected_size); + auto released_size = adjustSize(lock, m_res_bufs[priority], m_current_size[priority] - expected_size); + m_current_size[priority] -= released_size; } } @@ -78,38 +111,40 @@ namespace db0 if (res_lock) { // access existing resource std::unique_lock lock(m_mutex); + int priority = res_lock->isCached() ? 0 : 1; if (res_lock->isRecycled()) { // resource already in cache, just bring to back (lowest priority for removal) - m_res_buf.splice(m_res_buf.end(), res_lock->m_recycle_it); - } else if (res_lock->isCached()) { + m_res_bufs[priority].splice(m_res_bufs[priority].end(), res_lock->m_recycle_it); + } else { // add new resource (if to be cached) auto lock_size = res_lock->usedMem(); + auto &res_buf = m_res_bufs[priority]; if (lock_size > m_capacity) { // Cache size is too small to keep this resource // (or is uninitialized) return; } - m_current_size += lock_size; - if (m_current_size > m_capacity) { + m_current_size[priority] += lock_size; + if (getCurrentSize() > m_capacity) { // try reducing cache utilization to capacity minus flush size auto flush_size = std::min(m_capacity >> 1, m_flush_size); updateSize(lock, m_capacity - flush_size); flushed = true; - flush_result = m_current_size <= (m_capacity - flush_size); + flush_result = m_current_size[priority] <= (m_capacity - flush_size); } // resize is a costly operation but cannot be avoided if the number of locked // resources exceeds the assumed limit // note that this operation does not change the configured cache capacity - if (m_res_buf.size() == m_res_buf.max_size()) { + if (res_buf.size() == res_buf.max_size()) { // After resize, all iterators to cached elements will be invalidated!! - m_res_buf.resize(m_res_buf.size() * 2); + res_buf.resize(res_buf.size() * 2); // Update self-iterators in all cached locks - for (auto it = m_res_buf.begin(), end = m_res_buf.end(); it != end; ++it) { + for (auto it = res_buf.begin(), end = res_buf.end(); it != end; ++it) { (*it)->m_recycle_it = it; - } + } } - m_res_buf.push_back(res_lock); - res_lock->m_recycle_it = std::prev(m_res_buf.end()); + res_buf.push_back(res_lock); + res_lock->m_recycle_it = std::prev(res_buf.end()); res_lock->setRecycled(true); } } @@ -123,28 +158,49 @@ namespace db0 void CacheRecycler::clear() { std::unique_lock lock(m_mutex); - // try releasing all locks - updateSize(lock, 0); + // try releasing all locks without changing capacity + updateSize(lock, 0, 0); + updateSize(lock, 1, 0); } - void CacheRecycler::resize(std::size_t new_size) - { + void CacheRecycler::resize(std::size_t new_capacity) + { std::unique_lock lock(m_mutex); - if (new_size == m_capacity) { + bool resize = (new_capacity < m_capacity); + m_capacity = new_capacity; + if (resize) { + // try reducing cache utilization to new capacity + updateSize(lock, new_capacity); + } + } + + void CacheRecycler::updateSize(std::unique_lock &_lock, std::size_t expected_size) + { + // try keeping priority = 1 below its target capacity + auto new_size_1 = std::min(db0::getCapacity(expected_size, 1), m_current_size[1]); + resize(_lock, new_size_1, 1); + // priority = 0 may excteed its target capacity when there's sufficient free space + resize(_lock, std::min(expected_size - new_size_1, m_current_size[0]), 0); + } + + void CacheRecycler::resize(std::unique_lock &_lock, std::size_t new_size, int priority) + { + if (m_current_size[priority] <= new_size) { + // target size already satisfied return; } - m_capacity = new_size; // try releasing excess locks - updateSize(lock, m_capacity); + updateSize(_lock, priority, new_size); + auto &res_buf = m_res_bufs[priority]; // new capacity of the fixed list should allow storing existing locks - auto new_max_size = std::max((m_capacity - 1) / MIN_PAGE_SIZE + 1, m_res_buf.size()); - if (new_max_size != m_res_buf.max_size()) { + auto new_max_size = std::max((m_capacity - 1) / MIN_PAGE_SIZE + 1, res_buf.size()); + if (new_max_size > res_buf.max_size()) { // After resize, all iterators to cached elements will be invalidated!! - m_res_buf.resize(new_max_size); - + res_buf.resize(new_max_size); + // Update self-iterators in all cached locks - for (auto it = m_res_buf.begin(), end = m_res_buf.end(); it != end; ++it) { + for (auto it = res_buf.begin(), end = res_buf.end(); it != end; ++it) { (*it)->m_recycle_it = it; } } @@ -154,25 +210,39 @@ namespace db0 { if (res.isRecycled()) { res.setRecycled(false); - m_current_size -= res.size(); - m_res_buf.erase(res.m_recycle_it); + int priority = res.isCached() ? 0 : 1; + m_current_size[priority] -= res.size(); + m_res_bufs[priority].erase(res.m_recycle_it); } } - std::size_t CacheRecycler::size() const { - return m_current_size; + std::size_t CacheRecycler::size() const + { + std::unique_lock lock(m_mutex); + return getCurrentSize(); } - std::size_t CacheRecycler::getCapacity() const { - return m_capacity; - } - void CacheRecycler::forEach(std::function)> f) const { std::unique_lock lock(m_mutex); - for (const auto &p: m_res_buf) { + for (const auto &p: m_res_bufs[0]) { + f(p); + } + for (const auto &p: m_res_bufs[1]) { f(p); } } + + std::size_t CacheRecycler::getCapacity() const + { + std::unique_lock lock(m_mutex); + return m_capacity; + } + std::vector CacheRecycler::getDetailedSize() const + { + std::unique_lock lock(m_mutex); + return { m_current_size[0], m_current_size[1] }; + } + } \ No newline at end of file diff --git a/src/dbzero/core/memory/CacheRecycler.hpp b/src/dbzero/core/memory/CacheRecycler.hpp index 688e4ac1..fae00a1e 100644 --- a/src/dbzero/core/memory/CacheRecycler.hpp +++ b/src/dbzero/core/memory/CacheRecycler.hpp @@ -16,8 +16,8 @@ namespace db0 class CacheRecycler { - public : - static constexpr std::size_t DEFAULT_FLUSH_SIZE = 128 << 20u; + public: + static constexpr std::size_t DEFAULT_FLUSH_SIZE = 256u << 20; /** * Holds resource locks and recycles based on LRU policy @@ -45,10 +45,10 @@ namespace db0 void clear(); /** - * Modify cache size - * @param new_size as byte count + * Change cache capacity at runtime + * @param new_capacity as byte size */ - void resize(std::size_t new_size); + void resize(std::size_t new_capacity); void setFlushSize(unsigned int); @@ -63,6 +63,9 @@ namespace db0 * Get current cache utilization */ std::size_t size() const; + + // @return current cache size with a by-priority breakdown + std::vector getDetailedSize() const; std::size_t getCapacity() const; @@ -71,14 +74,15 @@ namespace db0 */ void forEach(std::function)>) const; - private : + private: using list_t = db0::FixedList >; using iterator = list_t::iterator; - - list_t m_res_buf; - std::size_t m_current_size = 0; - // cache capacity as number of bytes + + // total cache capacity std::size_t m_capacity; + // buffers for priority cache (#0) and secondary cache (#1) + std::array m_res_bufs; + std::array m_current_size = {0, 0}; const std::atomic &m_dirty_meter; // number of locks to be flushed at once std::size_t m_flush_size; @@ -87,13 +91,23 @@ namespace db0 std::function m_flush_callback; std::pair m_last_flush_callback_result = {true, false}; + void resize(std::unique_lock &, std::size_t new_size, int priority); + /** * Adjusts cache size after updates, collect locks to unlock (can be unlocked off main thread) * @param released_locks locks to be released * @param release_size total number of bytes to be released + * @return number of bytes actually released */ - void adjustSize(std::unique_lock &, std::size_t release_size); + std::size_t adjustSize(std::unique_lock &, list_t &res_buf, std::size_t release_size); + void adjustSize(std::unique_lock &, std::size_t release_size); + void updateSize(std::unique_lock &, int priority, std::size_t expected_size); + // update overall size void updateSize(std::unique_lock &, std::size_t expected_size); + + inline std::size_t getCurrentSize() const { + return m_current_size[0] + m_current_size[1]; + } }; } \ No newline at end of file diff --git a/src/dbzero/core/memory/Memspace.cpp b/src/dbzero/core/memory/Memspace.cpp index 6273a117..49d32ffe 100644 --- a/src/dbzero/core/memory/Memspace.cpp +++ b/src/dbzero/core/memory/Memspace.cpp @@ -5,7 +5,7 @@ namespace db0 { - + Memspace::Memspace(std::shared_ptr prefix, std::shared_ptr allocator, std::optional uuid) : m_prefix(prefix) , m_storage_ptr(&prefix->getStorage()) @@ -52,6 +52,9 @@ namespace db0 bool Memspace::commit(ProcessTimer *timer) { assert(m_prefix); + m_maybe_need_flush.clear(); + m_maybe_modified.clear(); + // prepare the allocator for the next transaction getAllocatorForUpdate().commit(); auto state_num = m_prefix->getStateNum(false); @@ -70,6 +73,8 @@ namespace db0 timer = std::make_unique("Memspace::close", timer_ptr); } + m_maybe_need_flush.clear(); + m_maybe_modified.clear(); m_allocator_ptr = nullptr; m_allocator = nullptr; m_prefix->close(); diff --git a/src/dbzero/core/memory/Memspace.hpp b/src/dbzero/core/memory/Memspace.hpp index 8252ed00..7172bc64 100644 --- a/src/dbzero/core/memory/Memspace.hpp +++ b/src/dbzero/core/memory/Memspace.hpp @@ -13,6 +13,8 @@ namespace db0 { class ProcessTimer; + class vtypeless; + class GC0; /** * Combines application requisites, prefix related @@ -112,6 +114,14 @@ namespace db0 // NOTE: m_page_shift is 0 if page size is not a power of 2 return m_page_shift ? (address.getOffset() >> m_page_shift) : (address.getOffset() / m_page_size); } + + void collectForFlush(db0::vtypeless *vptr) { + m_maybe_need_flush.push_back(vptr); + } + + void collectModified(db0::vtypeless *vptr) { + m_maybe_modified.push_back(vptr); + } protected: std::shared_ptr m_prefix; @@ -124,12 +134,23 @@ namespace db0 bool m_atomic = false; std::size_t m_page_size = 0; unsigned int m_page_shift = 0; - + // exhaustive list of instances which may need flush + std::vector m_maybe_need_flush; + // exhaustive list of pointers to instances (may be expired!) modified within the current transaction + std::vector m_maybe_modified; + inline Allocator &getAllocatorForUpdate() { assert(m_allocator_ptr); return *m_allocator_ptr; } - + + const std::vector &getModified() const { + return m_maybe_modified; + } + + const std::vector &getForFlush() const { + return m_maybe_need_flush; + } }; } diff --git a/src/dbzero/core/memory/MetaAllocator.cpp b/src/dbzero/core/memory/MetaAllocator.cpp index 60d76f8f..5567d676 100644 --- a/src/dbzero/core/memory/MetaAllocator.cpp +++ b/src/dbzero/core/memory/MetaAllocator.cpp @@ -1,7 +1,7 @@ #include "MetaAllocator.hpp" #include "OneShotAllocator.hpp" #include "Memspace.hpp" -#include "SlabRecycler.hpp" +#include "SlabManager.hpp" #include #include @@ -10,7 +10,7 @@ namespace db0 { static constexpr double MIN_FILL_RATE = 0.25; - + inline unsigned char getRealmID(std::uint32_t slab_id) { return slab_id & MetaAllocator::REALM_MASK; } @@ -21,8 +21,8 @@ namespace db0 std::size_t max_slab_count = (std::numeric_limits::max() - MP * page_size) / slab_size - 1; // estimate the number of slabs for which the definitions can be stored on a single page // this is a very conservative estimate - std::size_t slab_count_1 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(MetaAllocator::SlabDef)); - std::size_t slab_count_2 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(MetaAllocator::CapacityItem)) - (2 * MP); + std::size_t slab_count_1 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(SlabDef)); + std::size_t slab_count_2 = (std::size_t)(MIN_FILL_RATE * (double)page_size / (double)sizeof(CapacityItem)) - (2 * MP); return std::min(max_slab_count, std::min(slab_count_1, slab_count_2)); } @@ -48,8 +48,8 @@ namespace db0 } // Construct the reverse address pool function - std::function MetaAllocator::getReverseAddressPool(std::size_t offset, std::size_t page_size, - std::size_t slab_size) + std::function MetaAllocator::getReverseAddressPool(std::size_t offset, + std::size_t page_size, std::size_t slab_size) { auto slab_count = getSlabCount(page_size, slab_size); // make offset page-aligned @@ -101,626 +101,6 @@ namespace db0 , m_slab_size(slab_size) { } - - class SlabManager - { - public: - static constexpr std::size_t NUM_REALMS = MetaAllocator::NUM_REALMS; - - SlabManager(std::shared_ptr prefix, MetaAllocator::SlabTreeT &slab_defs, - MetaAllocator::CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, - std::function address_func, std::function slab_id_func, - unsigned char realm_id) - : m_prefix(prefix) - , m_realm_id(realm_id) - , m_slab_defs(slab_defs) - , m_capacity_items(capacity_items) - , m_recycler_ptr(recycler) - , m_slab_size(slab_size) - , m_page_size(page_size) - , m_slab_address_func(address_func) - , m_slab_id_func(slab_id_func) - , m_next_slab_id(fetchNextSlabId()) - { - } - - using CapacityItem = MetaAllocator::CapacityItem; - using SlabDef = MetaAllocator::SlabDef; - - struct FindResult - { - std::shared_ptr m_slab; - CapacityItem m_cap_item; - - bool operator==(std::uint32_t slab_id) const { - return m_slab && m_cap_item.m_slab_id == slab_id; - } - - bool operator==(const FindResult &rhs) const { - return *this == rhs.m_cap_item.m_slab_id; - } - - const SlabAllocator &operator*() const { - return *m_slab; - } - - inline bool operator!() const { - return !m_slab; - } - }; - - // NOTE: only localities 0 and 1 are currently supported - struct ActiveSlab: public std::array - { - bool contains(std::uint32_t slab_id) const { - return ((*this)[0] == slab_id || (*this)[1] == slab_id); - } - - bool contains(const FindResult &slab) const { - return ((*this)[0] == slab || (*this)[1] == slab); - } - - FindResult find(std::uint32_t slab_id) const - { - if ((*this)[0] == slab_id) { - return (*this)[0]; - } else if ((*this)[1] == slab_id) { - return (*this)[1]; - } - return {}; - } - - void erase(const FindResult &slab) - { - if ((*this)[0] == slab) { - (*this)[0] = {}; - } else if ((*this)[1] == slab) { - (*this)[1] = {}; - } else { - assert(false); - THROWF(db0::InternalException) << "Slab not found in active slabs." << THROWF_END; - } - } - }; - - /** - * Retrieves the active slab or returns nullptr if no active slab available - */ - FindResult tryGetActiveSlab(unsigned char locality) { - assert(locality < m_active_slab.size()); - return m_active_slab[locality]; - } - - void resetActiveSlab(unsigned char locality) { - assert(locality < m_active_slab.size()); - m_active_slab[locality] = {}; - } - - /** - * Retrieve the 1st slab to allocate a block of at least min_capacity - * this is only a 'hint' and if the allocation is not possible, the next slab should be attempted - */ - FindResult findFirst(std::size_t min_capacity, unsigned char locality) - { - // visit slabs starting from the largest available capacity - auto it = m_capacity_items.cbegin(); - for (;;) { - if (it.is_end() || it->m_remaining_capacity < min_capacity) { - // no existing slab has sufficient capacity - return {}; - } - - if (m_active_slab.contains(it->m_slab_id)) { - // do not include active slab in find operation - ++it; - continue; - } - auto slab = openSlab(m_slab_address_func(it->m_slab_id)); - if (!m_active_slab[locality]) { - // make the slab active - m_active_slab[locality] = slab; - } - return slab; - } - } - - // Continue after findFirst - FindResult findNext(FindResult last_result, std::size_t min_capacity, unsigned char locality) - { - for (;;) { - // this is to find the next item in order - last_result.m_cap_item.m_slab_id++; - auto it = m_capacity_items.upper_equal_bound(last_result.m_cap_item); - if (!it.first || it.first->m_remaining_capacity < min_capacity) { - return {}; - } - - if (m_active_slab.contains(it.first->m_slab_id)) { - // do not include active slab in find operation - continue; - } - auto slab = openSlab(m_slab_address_func(it.first->m_slab_id)); - if (!m_active_slab[locality]) { - // make the slab active - m_active_slab[locality] = slab; - } - return slab; - } - } - - unsigned int getSlabCount() const { - return (nextSlabId() - m_realm_id) / NUM_REALMS; - } - - /** - * Create a new, unregistered slab instance - */ - std::pair, std::uint32_t> createNewSlab() - { - if (!m_next_slab_id) { - m_next_slab_id = fetchNextSlabId(); - } - auto slab_id = *m_next_slab_id; - (*m_next_slab_id) += NUM_REALMS; - auto address = m_slab_address_func(slab_id); - // create the new slab - auto capacity = SlabAllocator::formatSlab(m_prefix, address, m_slab_size, m_page_size); - // NOTE: for a new slab, the initial lost capacity is 0 - auto slab = std::make_shared(m_prefix, address, m_slab_size, m_page_size, capacity, 0); - if (m_atomic) { - // if atomic operation is in progress, add to the volatile slabs - m_volatile_slabs.push_back(address); - } - - return { slab, slab_id }; - } - - // Create a new, registered slab instance - FindResult addNewSlab(unsigned char locality) - { - auto [slab, slab_id] = createNewSlab(); - auto address = m_slab_address_func(slab_id); - CapacityItem cap_item { - static_cast(slab->getRemainingCapacity()), - static_cast(slab->getLostCapacity()), - slab_id - }; - // register with slab defs - m_slab_defs.emplace(slab_id, - static_cast(cap_item.m_remaining_capacity), - static_cast(cap_item.m_lost_capacity) - ); - // register with capacity items - m_capacity_items.insert(cap_item); - // add to cache - auto cache_item = std::make_shared(slab, cap_item); - m_slabs.emplace(address, cache_item); - // capture remaining capacity before instance is closed - slab->setOnCloseHandler([cache_item](const SlabAllocator &alloc) { - cache_item->m_final_remaining_capacity = alloc.getRemainingCapacity(); - cache_item->m_final_lost_capacity = alloc.getLostCapacity(); - }); - - // append with the recycler - if (m_recycler_ptr) { - m_recycler_ptr->append(slab); - } - - // make the newly added slab active - m_active_slab[locality] = { slab, cap_item }; - return m_active_slab[locality]; - } - - std::uint32_t getRemainingCapacity(std::uint32_t slab_id) const - { - // look up with the cache first - auto address = m_slab_address_func(slab_id); - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab->getRemainingCapacity(); - } - } - - // look up with the slab defs next - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - if (!slab_def_ptr.first) { - THROWF(db0::InternalException) << "Slab definition not found."; - } - return slab_def_ptr.first->m_remaining_capacity; - } - - void close() - { - m_active_slab = {}; - m_reserved_slabs.clear(); - for (auto it = m_slabs.begin(); it != m_slabs.end();) { - it = unregisterSlab(it); - } - } - - // Find existing slab by ID - FindResult tryFind(std::uint32_t slab_id) const - { - if (slab_id < nextSlabId()) { - if (m_active_slab.contains(slab_id)) { - return m_active_slab.find(slab_id); - } - // look up with the cache first - auto address = m_slab_address_func(slab_id); - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return { slab, it->second->m_cap_item }; - } - } - - return tryOpenSlab(address); - } - return {}; - } - - FindResult find(std::uint32_t slab_id) const - { - auto slab = tryFind(slab_id); - if (!slab) { - THROWF(db0::BadAddressException) << "Slab " << slab_id << " not found"; - } - return slab; - } - - /** - * Erase if 'slab' is the last slab - */ - void erase(const FindResult &slab) { - erase(slab, true); - } - - bool empty() const { - return nextSlabId() == m_realm_id; - } - - std::shared_ptr reserveNewSlab() - { - auto [slab, slab_id] = createNewSlab(); - // internally register the slab with capacity = 0 (to avoid use in regular allocations) - CapacityItem cap_item { 0, 0, slab_id }; - // register with slab defs - m_slab_defs.emplace( - slab_id, - static_cast(cap_item.m_remaining_capacity), - static_cast(cap_item.m_lost_capacity) - ); - // register with capacity items - m_capacity_items.insert(cap_item); - return slab; - } - - std::shared_ptr openExistingSlab(const SlabDef &slab_def) - { - if (slab_def.m_slab_id >= nextSlabId()) { - THROWF(db0::InputException) << "Slab " << slab_def.m_slab_id << " does not exist"; - } - auto address = m_slab_address_func(slab_def.m_slab_id); - // look up with the cache first - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab; - } - } - // pull through cache - return openSlab(slab_def).m_slab; - } - - /** - * Open existing slab which has been previously reserved - */ - std::shared_ptr openReservedSlab(Address address) const - { - auto slab_id = m_slab_id_func(address); - if (slab_id >= nextSlabId()) { - THROWF(db0::InputException) << "Slab " << slab_id << " does not exist"; - } - - // look up with the cache first - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - if (slab) { - return slab; - } - } - - // retrieve slab definition - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - if (!slab_def_ptr.first) { - THROWF(db0::InternalException) << "Slab definition not found: " << slab_id; - } - - // pull through cache - auto result = openSlab(*slab_def_ptr.first).m_slab; - // and add for non-expiry cache - m_reserved_slabs.push_back(result); - return result; - } - - Address getFirstAddress() const { - return m_slab_address_func(m_realm_id) + SlabAllocator::getFirstAddress(); - } - - void commit() const - { - for (auto &it : m_slabs) { - it.second->commit(); - } - } - - void detach() const - { - // detach all cached slabs - for (auto &it : m_slabs) { - it.second->detach(); - } - // NOTE: we retain the slab element because it's detached - // invalidate cached variable - m_next_slab_id = {}; - } - - std::uint32_t nextSlabId() const - { - if (!m_next_slab_id) { - m_next_slab_id = fetchNextSlabId(); - } - return *m_next_slab_id; - } - - void beginAtomic() - { - assert(!m_atomic); - assert(m_volatile_slabs.empty()); - m_atomic = true; - } - - void endAtomic() - { - assert(m_atomic); - m_volatile_slabs.clear(); - m_atomic = false; - } - - void cancelAtomic() - { - assert(m_atomic); - // revert all volatile slabs from cache - for (auto slab_addr : m_volatile_slabs) { - auto it = m_slabs.find(slab_addr); - if (it != m_slabs.end()) { - auto slab = it->second->m_slab.lock(); - // this is to prevent the slab from materializing any updates - if (slab) { - slab->resetOnCloseHandler(); - } - m_slabs.erase(it); - } - } - m_active_slab = {}; - m_volatile_slabs.clear(); - m_atomic = false; - } - - private: - - struct CacheItem - { - std::weak_ptr m_slab; - CapacityItem m_cap_item; - // the slab's remaining capacity reflected with backend when the SlabAllocator gets destroyed - std::uint32_t m_final_remaining_capacity = 0; - std::uint32_t m_final_lost_capacity = 0; - - CacheItem(std::weak_ptr slab, CapacityItem cap) - : m_slab(slab) - , m_cap_item(cap) - { - } - - void commit() const - { - if (auto slab = m_slab.lock()) { - if (slab) { - slab->commit(); - } - } - } - - void detach() const - { - if (auto slab = m_slab.lock()) { - if (slab) { - slab->detach(); - } - } - } - - // Check if any of the properties changed when compared to "capacity item" - bool isModified() const { - return m_final_remaining_capacity != m_cap_item.m_remaining_capacity || - m_final_lost_capacity != m_cap_item.m_lost_capacity; - } - }; - - using CacheIterator = std::unordered_map >::iterator; - - std::shared_ptr m_prefix; - const unsigned char m_realm_id; - MetaAllocator::SlabTreeT &m_slab_defs; - MetaAllocator::CapacityTreeT &m_capacity_items; - SlabRecycler *m_recycler_ptr = nullptr; - const std::uint32_t m_slab_size; - const std::uint32_t m_page_size; - // slab cache by address - mutable std::unordered_map > m_slabs; - mutable std::vector > m_reserved_slabs; - // active slabs for each supported locality (0 or 1) - mutable ActiveSlab m_active_slab; - // address by allocation ID (from the algo-allocator) - std::function m_slab_address_func; - std::function m_slab_id_func; - mutable std::optional m_next_slab_id; - // addresses of slabs newly created during atomic operations (potentially to be reverted) - mutable std::vector m_volatile_slabs; - // the atomic operation's flag - bool m_atomic = false; - - CacheIterator unregisterSlab(CacheIterator it) const - { - auto cache_item = it->second; - if (!cache_item->m_slab.expired()) { - THROWF(db0::InternalException) - << "Slab " << static_cast(cache_item->m_cap_item.m_slab_id) << " is not closed"; - } - - auto &item = *cache_item; - // if the remaining capacity has hanged, reflect this with backend - if (item.isModified()) { - auto slab_id = item.m_cap_item.m_slab_id; - if (item.m_final_remaining_capacity != item.m_cap_item.m_remaining_capacity) { - auto it = m_capacity_items.find_equal(item.m_cap_item); - assert(!it.isEnd()); - // register under a modified key - m_capacity_items.erase(it); - m_capacity_items.emplace( - item.m_final_remaining_capacity, item.m_final_lost_capacity, slab_id - ); - } - // and update with the slab defs - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - m_slab_defs.modify(slab_def_ptr)->m_remaining_capacity = item.m_final_remaining_capacity; - m_slab_defs.modify(slab_def_ptr)->m_lost_capacity = item.m_final_lost_capacity; - } - return m_slabs.erase(it); - } - - FindResult tryOpenSlab(Address address) const - { - auto it = m_slabs.find(address); - if (it != m_slabs.end()) { - auto result = it->second->m_slab.lock(); - if (result) { - return { result, it->second->m_cap_item }; - } - // unregister expired slab from cache - unregisterSlab(it); - } - - auto slab_id = m_slab_id_func(address); - // retrieve slab definition - auto slab_def_ptr = m_slab_defs.find_equal(slab_id); - if (!slab_def_ptr.first) { - return {}; - } - - return openSlab(*slab_def_ptr.first); - } - - FindResult openSlab(Address address) const - { - auto slab = tryOpenSlab(address); - if (!slab) { - THROWF(db0::BadAddressException) << "Invalid address accessed"; - } - return slab; - } - - // open slab by definition and add to cache - FindResult openSlab(const SlabDef &def) const - { - auto cap_item = CapacityItem(def.m_remaining_capacity, def.m_lost_capacity, def.m_slab_id); - auto addr = m_slab_address_func(def.m_slab_id); - auto slab = std::make_shared( - m_prefix, addr, m_slab_size, m_page_size, def.m_remaining_capacity, def.m_lost_capacity - ); - // add to cache (it's safe to reference item from the unordered_map) - auto cache_item = std::make_shared(slab, cap_item); - m_slabs.emplace(addr, cache_item).first->second; - // capture remaining capacity before instance is closed - slab->setOnCloseHandler([cache_item](const SlabAllocator &alloc) { - cache_item->m_final_remaining_capacity = alloc.getRemainingCapacity(); - cache_item->m_final_lost_capacity = alloc.getLostCapacity(); - }); - - // append with the recycler - if (m_recycler_ptr) { - m_recycler_ptr->append(slab); - } - - return { slab, cap_item }; - } - - void erase(const FindResult &slab, bool cleanup) - { - // erasing the last slab - if (slab.m_cap_item.m_slab_id != nextSlabId() - NUM_REALMS) { - return; - } - - auto addr = m_slab_address_func(slab.m_cap_item.m_slab_id); - // unregister from cache - auto it = m_slabs.find(addr); - if (it != m_slabs.end()) { - m_slabs.erase(it); - } - // unregister from recycler - if (m_recycler_ptr) { - m_recycler_ptr->closeOne([&slab](const SlabAllocator &s) { - return slab.m_slab.get() == &s; - }); - } - // unregister if active - if (m_active_slab.contains(slab)) { - m_active_slab.erase(slab); - } - // unregister from slab defs - if (!m_slab_defs.erase_equal(slab.m_cap_item.m_slab_id).first) { - THROWF(db0::InternalException) << "Slab definition not found."; - } - // unregister from capacity items - if (!m_capacity_items.erase_equal(slab.m_cap_item).first) { - THROWF(db0::InternalException) << "Capacity item not found."; - } - if (!m_next_slab_id) { - m_next_slab_id = fetchNextSlabId(); - } - (*m_next_slab_id) -= NUM_REALMS; - // try removing other empty slabs if such exist - if (cleanup) { - while (!empty()) { - auto slab = openSlab(m_slab_address_func(nextSlabId() - NUM_REALMS)); - if (!slab.m_slab->empty()) { - break; - } - erase(slab, false); - } - } - } - - std::uint32_t fetchNextSlabId() const - { - // determine the max slab id - auto it = m_slab_defs.find_max(); - if (it.first) { - return it.first->m_slab_id + NUM_REALMS; - } else { - // first slab being created - return m_realm_id; - } - } - - }; std::uint64_t MetaAllocator::Realm::getSlabMaxAddress() const { @@ -737,7 +117,7 @@ namespace db0 } return max_addr; } - + MetaAllocator::MetaAllocator(std::shared_ptr prefix, SlabRecycler *recycler, bool deferred_free) : m_prefix(prefix) , m_header(getMetaHeader(prefix)) @@ -747,9 +127,8 @@ namespace db0 m_header.m_page_size ) , m_metaspace(createMetaspace()) - , m_realms(m_metaspace, m_prefix, recycler, m_header, NUM_REALMS) - , m_recycler_ptr(recycler) - , m_deferred_free(deferred_free) + , m_realms(m_metaspace, m_prefix, recycler, m_header, NUM_REALMS, deferred_free) + , m_recycler_ptr(recycler) , m_slab_id_function(getSlabIdFunction(o_meta_header::sizeOf(), m_header.m_page_size, m_header.m_slab_size)) { auto max_addr = m_realms.getSlabMaxAddress(); @@ -764,7 +143,7 @@ namespace db0 } MetaAllocator::Realm::Realm(Memspace &metaspace, std::shared_ptr prefix, SlabRecycler *slab_recycler, - o_realm realm, std::uint32_t slab_size, std::uint32_t page_size, unsigned char realm_id) + o_realm realm, std::uint32_t slab_size, std::uint32_t page_size, unsigned char realm_id, bool deferred_free) : m_slab_defs(metaspace.myPtr(realm.m_slab_defs_ptr), page_size) , m_capacity_items(metaspace.myPtr(realm.m_capacity_items_ptr), page_size) , m_slab_manager(std::make_unique(prefix, m_slab_defs, m_capacity_items, slab_recycler, @@ -772,7 +151,8 @@ namespace db0 page_size, getSlabAddressFunction(o_meta_header::sizeOf(), page_size, slab_size), getSlabIdFunction(o_meta_header::sizeOf(), page_size, slab_size), - realm_id + realm_id, + deferred_free )) { } @@ -843,90 +223,22 @@ namespace db0 std::uint16_t &instance_id, unsigned char realm_id, unsigned char locality) { assert(slot_num == 0); - assert(size > 0); - // try allocating from the active slab first - auto &realm = m_realms[realm_id]; - auto slab = realm.tryGetActiveSlab(locality); - bool is_first = true; - bool is_new = false; - for (;;) { - if (slab.m_slab) { - for (;;) { - auto addr = slab.m_slab->tryAlloc(size, 0, aligned); - if (!addr) { - // NOTE: since the last allocation failed, don't use this slab as "active" - realm.resetActiveSlab(locality); - break; - } - - if (!unique || slab.m_slab->tryMakeAddressUnique(*addr, instance_id)) { - return addr; - } - - // unable to make the address unique, schedule for deferred free and try again - // NOTE: the allocation is lost - deferredFree(*addr); - } - if (size > slab.m_slab->getMaxAllocSize()) { - THROWF(db0::InternalException) - << "Requested allocation size " << size << " is larger than the slab size " << slab.m_slab->getMaxAllocSize(); - } - if (is_new) { - THROWF(db0::InternalException) << "Slab is new but cannot allocate " << size; - } - } - if (is_first) { - slab = realm.findFirst(size, locality); - is_first = false; - } else { - slab = realm.findNext(slab, size, locality); - } - if (!slab.m_slab) { - slab = realm.addNewSlab(locality); - is_new = true; - } - } + assert(size > 0); + return m_realms[realm_id].tryAlloc(size, slot_num, aligned, unique, instance_id, locality); } void MetaAllocator::free(Address address) - { - assert(m_deferred_free_ops.find(address) == m_deferred_free_ops.end()); - if (m_deferred_free) { - deferredFree(address); - } else { - _free(address); - } - } - - void MetaAllocator::deferredFree(Address address) - { - if (m_atomic) { - m_atomic_deferred_free_ops.push_back(address); - } else { - m_deferred_free_ops.insert(address); - } - } - - void MetaAllocator::_free(Address address) { auto slab_id = m_slab_id_function(address); auto realm_id = getRealmID(slab_id); - auto slab = m_realms[realm_id].find(slab_id); - slab.m_slab->free(address); - if (slab.m_slab->empty()) { - // erase or mark as erased - m_realms[realm_id].erase(slab); - } + m_realms[realm_id].free(address, slab_id); } - + std::size_t MetaAllocator::getAllocSize(Address address) const { - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - THROWF(db0::BadAddressException) << "Address " << address << " not found (pending deferred free)"; - } auto slab_id = m_slab_id_function(address); auto realm_id = getRealmID(slab_id); - return m_realms[realm_id].find(slab_id).m_slab->getAllocSize(address); + return m_realms[realm_id].getAllocSize(address, slab_id); } std::size_t MetaAllocator::getAllocSize(Address address, unsigned char realm_id) const @@ -935,40 +247,23 @@ namespace db0 if (realm_id != getRealmID(slab_id)) { THROWF(db0::BadAddressException) << "Invalid address accessed"; } - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - THROWF(db0::BadAddressException) << "Address " << address << " not found (pending deferred free)"; - } - return m_realms[realm_id].find(slab_id).m_slab->getAllocSize(address); + return m_realms[realm_id].getAllocSize(address, slab_id); } bool MetaAllocator::isAllocated(Address address, std::size_t *size_of_result) const { - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - return false; - } auto slab_id = m_slab_id_function(address); auto realm_id = getRealmID(slab_id); - auto slab = m_realms[realm_id].tryFind(slab_id); - if (!slab) { - return false; - } - return slab.m_slab->isAllocated(address, size_of_result); + return m_realms[realm_id].isAllocated(address, slab_id, size_of_result); } - + bool MetaAllocator::isAllocated(Address address, unsigned char realm_id, std::size_t *size_of_result) const { auto slab_id = m_slab_id_function(address); if (realm_id != getRealmID(slab_id)) { THROWF(db0::BadAddressException) << "Invalid address accessed"; } - if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { - return false; - } - auto slab = m_realms[realm_id].tryFind(slab_id); - if (!slab) { - return false; - } - return slab.m_slab->isAllocated(address, size_of_result); + return m_realms[realm_id].isAllocated(address, slab_id, size_of_result); } unsigned int MetaAllocator::getSlabCount() const @@ -986,16 +281,12 @@ namespace db0 return m_realms[realm_id].getRemainingCapacity(slab_id); } - void MetaAllocator::Realm::close() { - m_slab_manager->close(); - } - void MetaAllocator::close() { if (m_recycler_ptr) { // unregister all owned (i.e. associated with the same prefix) slabs from the recycler - m_recycler_ptr->close([this](const SlabAllocator &slab) { - return &slab.getPrefix() == m_prefix.get(); + m_recycler_ptr->close([this](const SlabItem &slab) { + return &slab->getPrefix() == m_prefix.get(); }); } m_realms.close(); @@ -1013,11 +304,11 @@ namespace db0 { auto slab_id = m_slab_id_function(address); auto realm_id = slab_id & MetaAllocator::REALM_MASK; - auto result = m_realms[realm_id].openReservedSlab(address); + auto result = m_realms[realm_id].openReservedSlab(address, slab_id); assert(result->size() == size); return result; } - + void MetaAllocator::Realm::commit() const { m_slab_defs.commit(); @@ -1049,16 +340,7 @@ namespace db0 SlabRecycler *MetaAllocator::getSlabRecyclerPtr() const { return m_recycler_ptr; } - - void MetaAllocator::Realm::forAllSlabs(std::function f) const - { - auto it = m_slab_defs.cbegin(); - for (;!it.is_end();++it) { - auto slab = m_slab_manager->openExistingSlab(*it); - f(*slab, it->m_slab_id); - } - } - + void MetaAllocator::forAllSlabs(std::function f) const { m_realms.forAllSlabs(f); } @@ -1066,20 +348,9 @@ namespace db0 void MetaAllocator::flush() const { assert(!m_atomic); - assert(m_atomic_deferred_free_ops.empty()); - // perform the deferred free operations - if (!m_deferred_free_ops.empty()) { - for (auto addr : m_deferred_free_ops) { - const_cast(*this)._free(addr); - } - m_deferred_free_ops.clear(); - } + m_realms.flush(); } - std::size_t MetaAllocator::getDeferredFreeCount() const { - return m_deferred_free_ops.size(); - } - void MetaAllocator::beginAtomic() { assert(!m_atomic); @@ -1091,13 +362,6 @@ namespace db0 { assert(m_atomic); m_atomic = false; - // merge atomic deferred free operations - if (!m_atomic_deferred_free_ops.empty()) { - for (auto addr : m_atomic_deferred_free_ops) { - m_deferred_free_ops.insert(addr); - } - m_atomic_deferred_free_ops.clear(); - } m_realms.endAtomic(); } @@ -1105,20 +369,18 @@ namespace db0 { assert(m_atomic); m_atomic = false; - // rollback atomic deferred free operations - m_atomic_deferred_free_ops.clear(); m_realms.cancelAtomic(); } MetaAllocator::RealmsVector::RealmsVector(Memspace &metaspace, std::shared_ptr prefix, SlabRecycler *slab_recycler, - o_meta_header &meta_header, unsigned int size) + o_meta_header &meta_header, unsigned int size, bool deferred_free) { reserve(size); auto slab_size = meta_header.m_slab_size; auto page_size = meta_header.m_page_size; for (unsigned int i = 0; i < size; ++i) { - emplace_back(metaspace, prefix, slab_recycler, meta_header.m_realms[i], - slab_size, page_size, static_cast(i) + emplace_back(metaspace, prefix, slab_recycler, meta_header.m_realms[i], slab_size, + page_size, static_cast(i), deferred_free ); } } @@ -1126,7 +388,7 @@ namespace db0 void MetaAllocator::RealmsVector::forAllSlabs(std::function f) const { for (const auto &realm: *this) { - realm.forAllSlabs(f); + realm->forAllSlabs(f); } } @@ -1147,28 +409,28 @@ namespace db0 void MetaAllocator::RealmsVector::beginAtomic() { for (auto &realm: *this) { - realm.beginAtomic(); + realm->beginAtomic(); } } void MetaAllocator::RealmsVector::endAtomic() { for (auto &realm: *this) { - realm.endAtomic(); + realm->endAtomic(); } } void MetaAllocator::RealmsVector::cancelAtomic() { for (auto &realm: *this) { - realm.cancelAtomic(); + realm->cancelAtomic(); } } void MetaAllocator::RealmsVector::close() { for (auto &realm: *this) { - realm.close(); + realm->close(); } } @@ -1176,40 +438,33 @@ namespace db0 { std::uint64_t max_addr = 0; for (const auto &realm : *this) { - max_addr = std::max(max_addr, realm.getSlabMaxAddress()); + max_addr = std::max(max_addr, realm.getSlabMaxAddress()); } return max_addr; } - void MetaAllocator::Realm::beginAtomic() { - m_slab_manager->beginAtomic(); - } - - void MetaAllocator::Realm::endAtomic() { - m_slab_manager->endAtomic(); + void MetaAllocator::RealmsVector::flush() const + { + for (const auto &realm : *this) { + realm->flush(); + } } - void MetaAllocator::Realm::cancelAtomic() { - m_slab_manager->cancelAtomic(); + std::size_t MetaAllocator::RealmsVector::getDeferredFreeCount() const + { + std::size_t result = 0; + for (const auto &realm : *this) { + result += realm->getDeferredFreeCount(); + } + return result; } std::uint32_t MetaAllocator::getSlabId(Address address) const { return m_slab_id_function(address); } - -} - -namespace std - -{ - ostream &operator<<(ostream &os, const db0::MetaAllocator::CapacityItem &item) { - os << "CapacityItem(capacity=" << item.m_remaining_capacity << ", slab=" << item.m_slab_id << ")"; - return os; - } - - ostream &operator<<(ostream &os, const db0::MetaAllocator::SlabDef &def) { - os << "SlabDef(slab=" << def.m_slab_id << ", capacity=" << def.m_remaining_capacity << ")"; - return os; + + std::size_t MetaAllocator::getDeferredFreeCount() const { + return m_realms.getDeferredFreeCount(); } } \ No newline at end of file diff --git a/src/dbzero/core/memory/MetaAllocator.hpp b/src/dbzero/core/memory/MetaAllocator.hpp index f0e76119..0334bb70 100644 --- a/src/dbzero/core/memory/MetaAllocator.hpp +++ b/src/dbzero/core/memory/MetaAllocator.hpp @@ -2,8 +2,10 @@ #include "Prefix.hpp" #include "SlabAllocator.hpp" +#include "SlabItem.hpp" #include "AlgoAllocator.hpp" #include "Allocator.hpp" +#include "Recycler.hpp" #include #include #include @@ -16,10 +18,10 @@ namespace db0 { - - class SlabRecycler; + class SlabManager; - + using SlabRecycler = db0::Recycler; + DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_realm: public o_fixed_versioned { @@ -44,7 +46,7 @@ DB0_PACKED_BEGIN o_meta_header(std::uint32_t page_size, std::uint32_t slab_size); }; -DB0_PACKED_END +DB0_PACKED_END class MetaAllocator: public Allocator { @@ -66,136 +68,6 @@ DB0_PACKED_END */ static void formatPrefix(std::shared_ptr prefix, std::size_t page_size, std::size_t slab_size); -DB0_PACKED_BEGIN - struct DB0_PACKED_ATTR CapacityItem - { - // primary key - std::uint32_t m_remaining_capacity; - std::uint32_t m_lost_capacity; - // secondary key - std::uint32_t m_slab_id; - - CapacityItem() = default; - - CapacityItem(std::uint32_t remaining_capacity, std::uint32_t lost_capacity, std::uint32_t slab_id) - : m_remaining_capacity(remaining_capacity) - , m_lost_capacity(lost_capacity) - , m_slab_id(slab_id) - { - } - - static std::uint64_t getKey(const CapacityItem &item) { - return ((std::uint64_t)item.m_remaining_capacity << 32) | item.m_slab_id; - } - - // Construct key from construction args - static std::uint64_t getKey(std::uint32_t remaining_capacity, std::uint32_t, std::uint32_t slab_id) { - return ((std::uint64_t)remaining_capacity << 32) | slab_id; - } - - inline static std::uint32_t first(std::uint64_t key) { - return static_cast(key >> 32); - } - - inline static std::uint32_t second(std::uint64_t key) { - return static_cast(key & 0xFFFFFFFF); - } - - // note descending order of comparisons - struct CompT - { - inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { - if (lhs.m_remaining_capacity == rhs.m_remaining_capacity) - return lhs.m_slab_id < rhs.m_slab_id; - return rhs.m_remaining_capacity < lhs.m_remaining_capacity; - } - - inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { - if (lhs.m_remaining_capacity == first(rhs)) - return lhs.m_slab_id < second(rhs); - return first(rhs) < lhs.m_remaining_capacity; - } - - inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { - if (first(lhs) == rhs.m_remaining_capacity) - return second(lhs) < rhs.m_slab_id; - return rhs.m_remaining_capacity < first(lhs); - } - }; - - struct EqualT - { - inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { - return lhs.m_remaining_capacity == rhs.m_remaining_capacity && lhs.m_slab_id == rhs.m_slab_id; - } - - inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { - return lhs.m_remaining_capacity == first(rhs) && lhs.m_slab_id == second(rhs); - } - - inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { - return first(lhs) == rhs.m_remaining_capacity && second(lhs) == rhs.m_slab_id; - } - }; - }; -DB0_PACKED_END - -DB0_PACKED_BEGIN - struct DB0_PACKED_ATTR SlabDef - { - // primary key - std::uint32_t m_slab_id; - std::uint32_t m_remaining_capacity; - std::uint32_t m_lost_capacity; - - SlabDef(std::uint32_t slab_id, std::uint32_t remaining_capacity, std::uint32_t lost_capacity) - : m_slab_id(slab_id) - , m_remaining_capacity(remaining_capacity) - , m_lost_capacity(lost_capacity) - { - } - - static inline std::uint32_t getKey(const SlabDef &item) { - return item.m_slab_id; - } - - // Extract key from construction args - static inline std::uint32_t getKey(std::uint32_t slab_id, std::uint32_t, std::uint32_t) { - return slab_id; - } - - struct CompT - { - inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { - return lhs.m_slab_id < rhs.m_slab_id; - } - - inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { - return lhs.m_slab_id < rhs; - } - - inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { - return lhs < rhs.m_slab_id; - } - }; - - struct EqualT - { - inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { - return lhs.m_slab_id == rhs.m_slab_id; - } - - inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { - return lhs.m_slab_id == rhs; - } - - inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { - return lhs == rhs.m_slab_id; - } - }; - }; -DB0_PACKED_END - using CapacityTreeT = SGB_Tree; using SlabTreeT = SGB_Tree; @@ -240,7 +112,7 @@ DB0_PACKED_END * Retrieve information about the remaining space available to the Slab */ std::uint32_t getRemainingCapacity(std::uint32_t slab_id) const; - + /** * Retrieve a new slab reserved for private use * note that this slab will not be available for allocations from MetaAllocator and has to be used directly @@ -293,28 +165,30 @@ DB0_PACKED_END std::unique_ptr m_slab_manager; Realm(Memspace &, std::shared_ptr, SlabRecycler *, o_realm, std::uint32_t slab_size, - std::uint32_t page_size, unsigned char realm_id); - - // get the max address from all underlying slabs + std::uint32_t page_size, unsigned char realm_id, bool deferred_free); + std::uint64_t getSlabMaxAddress() const; - void close(); + void commit() const; void detach() const; - - void beginAtomic(); - void endAtomic(); - void cancelAtomic(); - void forAllSlabs(std::function) const; + SlabManager *operator->() { + return m_slab_manager.get(); + } + + const SlabManager *operator->() const { + return m_slab_manager.get(); + } }; struct RealmsVector: protected std::vector { RealmsVector(Memspace &, std::shared_ptr, SlabRecycler *, o_meta_header &, - unsigned int size); - + unsigned int size, bool deferred_free); + // evaluate the max address from all realms std::uint64_t getSlabMaxAddress() const; + std::size_t getDeferredFreeCount() const; inline SlabManager &operator[](unsigned char realm_id) { return *at(realm_id).m_slab_manager; @@ -333,17 +207,15 @@ DB0_PACKED_END void endAtomic(); void cancelAtomic(); + void flush() const; void close(); }; RealmsVector m_realms; - SlabRecycler *m_recycler_ptr; - const bool m_deferred_free; - mutable std::unordered_set
m_deferred_free_ops; + SlabRecycler *m_recycler_ptr; std::function m_slab_id_function; // flag indicating if the atomic operation is in progress bool m_atomic = false; - std::vector
m_atomic_deferred_free_ops; /** * Reads header information from the prefix @@ -357,10 +229,6 @@ DB0_PACKED_END * if not found then create a new slab */ std::shared_ptr getSlabAllocator(std::size_t min_capacity); - - // internal "free" implementation which performs the dealloc instanly - void _free(Address); - void deferredFree(Address); // NOTE: instance ID will only be populated when unique = true std::optional
tryAllocImpl(std::size_t size, std::uint32_t slot_num, bool aligned, bool unique, @@ -368,13 +236,3 @@ DB0_PACKED_END }; } - -namespace std - -{ - - ostream &operator<<(ostream &os, const db0::MetaAllocator::CapacityItem &item); - - ostream &operator<<(ostream &os, const db0::MetaAllocator::SlabDef &item); - -} \ No newline at end of file diff --git a/src/dbzero/core/memory/PageMap.hpp b/src/dbzero/core/memory/PageMap.hpp index b10b1a9f..5230e249 100644 --- a/src/dbzero/core/memory/PageMap.hpp +++ b/src/dbzero/core/memory/PageMap.hpp @@ -67,6 +67,9 @@ namespace db0 // we need to only perform them from a well researched contexts friend class PrefixCache; + void insert(std::unique_lock &, StateNumType state_num, + std::shared_ptr); + // Erase lock stored under a known state number void erase(StateNumType state_num, std::shared_ptr lock); void erase(StateNumType state_num, std::uint64_t page_num); @@ -90,11 +93,7 @@ namespace db0 mutable std::map, CompT> m_cache; using CacheIterator = typename decltype(m_cache)::iterator; - CacheIterator find(std::uint64_t page_num, StateNumType state_num) const; - - // Erase ALL locks with a given page number where state < state_num - // irrespective of their use count, this is required for handling inconsistent locks problem - void eraseAll(std::uint64_t page_num, StateNumType state_num) const; + CacheIterator findImpl(std::uint64_t page_num, StateNumType state_num) const; }; template @@ -106,16 +105,23 @@ namespace db0 template void PageMap::insert(StateNumType state_num, std::shared_ptr res_lock) { - std::unique_lock lock(m_rw_mutex); + std::unique_lock _lock(m_rw_mutex); m_cache[{res_lock->getAddress() >> m_shift, state_num}] = res_lock; } template - void PageMap::insert(StateNumType state_num, std::shared_ptr lock, + void PageMap::insert(std::unique_lock &, StateNumType state_num, + std::shared_ptr res_lock) + { + m_cache[{res_lock->getAddress() >> m_shift, state_num}] = res_lock; + } + + template + void PageMap::insert(StateNumType state_num, std::shared_ptr res_lock, std::uint64_t page_num) { std::unique_lock _lock(m_rw_mutex); - m_cache[{page_num, state_num}] = lock; + m_cache[{page_num, state_num}] = res_lock; } template @@ -146,16 +152,15 @@ namespace db0 bool PageMap::exists(StateNumType state_num, std::uint64_t page_num) const { std::shared_lock _lock(m_rw_mutex); - return find(page_num, state_num) != m_cache.end(); + return findImpl(page_num, state_num) != m_cache.end(); } template std::weak_ptr *PageMap::find(StateNumType state_num, std::uint64_t page_num, StateNumType &read_state_num) const - { - // needs to be unique locked due to potential m_cache::erase operation - std::unique_lock lock(m_rw_mutex); - auto it = find(page_num, state_num); + { + std::shared_lock lock(m_rw_mutex); + auto it = findImpl(page_num, state_num); if (it == m_cache.end()) { return nullptr; } @@ -164,55 +169,48 @@ namespace db0 } template - typename PageMap::CacheIterator PageMap::find( + typename PageMap::CacheIterator PageMap::findImpl( std::uint64_t page_num, StateNumType state_num) const { if (m_cache.empty()) { return m_cache.end(); } + + // Find the first element with key >= {page_num, state_num} auto it = m_cache.lower_bound({page_num, state_num}); - if (it == m_cache.end()) { - assert(!m_cache.empty()); - --it; + + // If we found exact match or an element with same page_num and state <= state_num + if (it != m_cache.end() && it->first.first == page_num && it->first.second <= state_num) { + return it; } - if (it != m_cache.begin() && (it->first.second > state_num || it->first.first != page_num)) { - --it; + + // Look backwards for the largest state <= state_num with same page_num + if (it == m_cache.begin()) { + return m_cache.end(); // No valid element found } + + --it; // Safe because we checked it != m_cache.begin() + + // Check if this element matches our criteria if (it->first.first == page_num && it->first.second <= state_num) { return it; } + return m_cache.end(); } - - template void PageMap::eraseAll( - std::uint64_t page_num, StateNumType state_num) const - { - if (m_cache.empty()) { - return; - } - auto it = m_cache.lower_bound({page_num, state_num}); - if (it == m_cache.end() && !m_cache.empty()) { - assert(!m_cache.empty()); - --it; - } - if (it != m_cache.begin() && (it->first.second > state_num || it->first.first != page_num)) { - --it; - } - // NOTE: we're NOT erasing locks exactly matching the state number - while (it->first.first == page_num && it->first.second < state_num) { - it = m_cache.erase(it); - } - } template void PageMap::erase(StateNumType state_num, std::shared_ptr res_lock) { std::unique_lock lock(m_rw_mutex); auto page_num = res_lock->getAddress() >> m_shift; - auto it = find(page_num, state_num); + auto it = findImpl(page_num, state_num); assert(it != m_cache.end()); + if (it == m_cache.end()) { + THROWF(db0::InternalException) << "Attempt to erase non-existing lock from PageMap"; + } assert(it->second.lock() == res_lock); - m_cache.erase(it); + m_cache.erase(it); } template void PageMap::clear() @@ -220,8 +218,8 @@ namespace db0 std::unique_lock lock(m_rw_mutex); m_cache.clear(); } - - template bool PageMap::empty() const + + template bool PageMap::empty() const { std::shared_lock lock(m_rw_mutex); return m_cache.empty(); @@ -241,12 +239,13 @@ namespace db0 template std::shared_ptr PageMap::replace( - StateNumType state_num, std::shared_ptr lock, std::uint64_t page_num) + StateNumType state_num, std::shared_ptr res_lock, std::uint64_t page_num) { + std::unique_lock _lock(m_rw_mutex); // find exact match of the page / state auto it = m_cache.find({page_num, state_num}); if (it == m_cache.end()) { - insert(state_num, lock); + insert(_lock, state_num, res_lock); return {}; } auto existing_lock = it->second.lock(); @@ -255,13 +254,13 @@ namespace db0 // this is fine because we're inserting under updated more recent state assert(state_num >= it->first.second); m_cache.erase(it); - insert(state_num, lock); + insert(_lock, state_num, res_lock); return {}; } - assert(existing_lock->size() == lock->size()); + assert(existing_lock->size() == res_lock->size()); // apply changes from the lock being merged (discarding changes in this lock) - existing_lock->moveFrom(*lock); + existing_lock->moveFrom(*res_lock); return existing_lock; } diff --git a/src/dbzero/core/memory/PrefixImpl.hpp b/src/dbzero/core/memory/PrefixImpl.hpp index f8f598db..b750402b 100644 --- a/src/dbzero/core/memory/PrefixImpl.hpp +++ b/src/dbzero/core/memory/PrefixImpl.hpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/dbzero/core/memory/Recycler.hpp b/src/dbzero/core/memory/Recycler.hpp new file mode 100644 index 00000000..a0d26678 --- /dev/null +++ b/src/dbzero/core/memory/Recycler.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include +#include +#include "SlabAllocator.hpp" + +namespace db0 + +{ + + // The recycler class helps maintain the lifecycle of a limited number of + // shared_ptr based resources (e.g. SlabAllocator instances) + template class Recycler + { + public: + Recycler(unsigned int max_size = 256); + + void append(std::shared_ptr); + + /** + * Get the number of instances currently begin stored + */ + std::size_t size() const; + + /** + * Get the maximum number of instances that could be stored + */ + std::size_t capacity() const; + + /** + * Close / remove all instances that match the predicate + */ + void close(std::function predicate, bool only_first = false); + void closeOne(std::function predicate); + void clear(); + + private: + const unsigned int m_max_size; + std::deque > m_queue; + }; + + template Recycler::Recycler(unsigned int max_size) + : m_max_size(max_size) + { + } + + template void Recycler::append(std::shared_ptr instance) + { + m_queue.push_back(instance); + while (m_queue.size() > m_max_size) { + m_queue.pop_front(); + } + } + + template + std::size_t Recycler::size() const { + return m_queue.size(); + } + + template + std::size_t Recycler::capacity() const { + return m_max_size; + } + + template + void Recycler::close(std::function predicate, bool only_first) + { + for (auto it = m_queue.begin(); it != m_queue.end();) { + if (predicate(**it)) { + it = m_queue.erase(it); + if (only_first) { + break; + } + } else { + ++it; + } + } + } + + template + void Recycler::closeOne(std::function predicate) { + close(predicate, true); + } + + template + void Recycler::clear() { + m_queue.clear(); + } + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/ResourceLock.cpp b/src/dbzero/core/memory/ResourceLock.cpp index abcec796..0a0b6efc 100644 --- a/src/dbzero/core/memory/ResourceLock.cpp +++ b/src/dbzero/core/memory/ResourceLock.cpp @@ -77,11 +77,7 @@ namespace db0 atomicResetFlags(m_resource_flags, RESOURCE_RECYCLED); } } - - bool ResourceLock::isCached() const { - return !m_access_mode[AccessOptions::no_cache]; - } - + bool ResourceLock::resetDirtyFlag() { using MutexT = ResourceDirtyMutexT; diff --git a/src/dbzero/core/memory/ResourceLock.hpp b/src/dbzero/core/memory/ResourceLock.hpp index 67384012..70d5bbf8 100644 --- a/src/dbzero/core/memory/ResourceLock.hpp +++ b/src/dbzero/core/memory/ResourceLock.hpp @@ -109,7 +109,11 @@ namespace db0 inline bool isRecycled() const { return m_resource_flags & db0::RESOURCE_RECYCLED; } - + + inline bool isCached() const { + return !m_access_mode[AccessOptions::no_cache]; + } + // Mark lock as dirty without range specification void setDirty(); @@ -121,7 +125,7 @@ namespace db0 // Sets the RESOURCE_FREEZE flag void freeze(); - bool isCached() const; + #ifndef NDEBUG bool isVolatile() const; diff --git a/src/dbzero/core/memory/SlabAllocator.cpp b/src/dbzero/core/memory/SlabAllocator.cpp index d2a536f5..7e61fcf8 100644 --- a/src/dbzero/core/memory/SlabAllocator.cpp +++ b/src/dbzero/core/memory/SlabAllocator.cpp @@ -58,9 +58,6 @@ namespace db0 SlabAllocator::~SlabAllocator() { - if (m_on_close_handler) { - m_on_close_handler(*this); - } } std::optional
SlabAllocator::tryAlloc(std::size_t size, std::uint32_t slot_num, @@ -113,20 +110,24 @@ namespace db0 if (size % page_size != 0) { THROWF(db0::InternalException) << "Slab size not multiple of page size: " << size << " % " << page_size; } - - // put bitspace right before the header (at the end of the slab ) - BitSpace::create(prefix, headerAddr(begin_addr, size), page_size, -1); + + // put bitspace right before the header (at the end of the slab) + BitSpace::create( + prefix, headerAddr(begin_addr, size), page_size, -1 + ); // open newly created bitspace // use offset = begin_addr (to allow storing internal addresses as 32bit) - BitSpace bitspace(prefix, headerAddr(begin_addr, size), page_size, -1); + BitSpace bitspace( + prefix, headerAddr(begin_addr, size), page_size, -1 + ); - // create the CRDT allocator data structures on top of the bitspace + // Create the CRDT allocator data structures on top of the bitspace AllocSetT allocs(bitspace, page_size); BlankSetT blanks(bitspace, page_size); AlignedBlankSetT aligned_blanks(bitspace, page_size, CompT(page_size), page_size); StripeSetT stripes(bitspace, page_size); - LimitedVector alloc_counter(bitspace, page_size); - alloc_counter.reserve(SLAB_BITSPACE_SIZE()); + LimitedVector alloc_counter(bitspace, page_size); + alloc_counter.reserve(SlabAllocatorConfig::SLAB_BITSPACE_SIZE()); // calculate size initially available to CRTD allocator std::uint32_t crdt_size = static_cast(size - admin_size - admin_margin_bytes); assert(crdt_size > 0); @@ -167,13 +168,13 @@ namespace db0 std::size_t SlabAllocator::calculateAdminSpaceSize(std::size_t page_size) { - auto result = BitSpace::sizeOf() + o_slab_header::sizeOf(); + auto result = BitSpace::sizeOf() + o_slab_header::sizeOf(); // round to full page size result = (result + page_size - 1) / page_size * page_size; // add ADMIN_SPAN pages for CRDT types (actual space initially occupied) result += page_size * ADMIN_SPAN(); // include limited vector's reserved capacity - result += LimitedVectorT::DP_REQ(SLAB_BITSPACE_SIZE(), page_size) * page_size; + result += LimitedVectorT::DP_REQ(SlabAllocatorConfig::SLAB_BITSPACE_SIZE(), page_size) * page_size; return result; } @@ -203,15 +204,7 @@ namespace db0 const Prefix &SlabAllocator::getPrefix() const { return *m_prefix; } - - void SlabAllocator::setOnCloseHandler(std::function handler) { - m_on_close_handler = handler; - } - - void SlabAllocator::resetOnCloseHandler() { - m_on_close_handler = {}; - } - + bool SlabAllocator::empty() const { return m_allocs.empty(); } diff --git a/src/dbzero/core/memory/SlabAllocator.hpp b/src/dbzero/core/memory/SlabAllocator.hpp index 81540f23..78fb9453 100644 --- a/src/dbzero/core/memory/SlabAllocator.hpp +++ b/src/dbzero/core/memory/SlabAllocator.hpp @@ -4,6 +4,7 @@ #include "Prefix.hpp" #include "BitSpace.hpp" #include "Memspace.hpp" +#include "SlabAllocatorConfig.hpp" #include #include #include @@ -13,8 +14,8 @@ namespace db0 { -DB0_PACKED_BEGIN - + +DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_slab_header: public db0::o_fixed { const std::uint32_t m_version = 1; @@ -42,13 +43,8 @@ DB0_PACKED_BEGIN { } }; - - static constexpr unsigned int SLAB_BITSPACE_SIZE() { - // typical configuration, sufficient for a 64MB slab - // FIXME: page_size hardcoded - return 64 * 1024 * 1024 / 4096; - } - +DB0_PACKED_END + /** * The SlabAllocator takes a fixed size address range (e.g. 64MB) * and organizes the space with the use of BitSetAllocator/BitSpace + CRDT_Allocator @@ -122,13 +118,7 @@ DB0_PACKED_BEGIN std::size_t getLostCapacity() const; const Prefix &getPrefix() const; - - /** - * Register a handler to be called pre-destruction - */ - void setOnCloseHandler(std::function); - void resetOnCloseHandler(); - + bool empty() const; /** @@ -185,7 +175,7 @@ DB0_PACKED_BEGIN const std::uint32_t m_slab_size; Memspace m_internal_memspace; v_object m_header; - BitSpace m_bitspace; + BitSpace m_bitspace; AllocSetT m_allocs; BlankSetT m_blanks; AlignedBlankSetT m_aligned_blanks; @@ -195,11 +185,9 @@ DB0_PACKED_BEGIN CRDT_Allocator m_allocator; const std::optional m_initial_remaining_capacity; const std::optional m_initial_lost_capacity; - std::size_t m_initial_admin_size; - std::function m_on_close_handler; + std::size_t m_initial_admin_size; static Address headerAddr(Address begin_addr, std::uint32_t size); }; -DB0_PACKED_END } diff --git a/src/dbzero/core/memory/SlabAllocatorConfig.hpp b/src/dbzero/core/memory/SlabAllocatorConfig.hpp new file mode 100644 index 00000000..3ce582fa --- /dev/null +++ b/src/dbzero/core/memory/SlabAllocatorConfig.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include +#include "AccessOptions.hpp" +#include + +namespace db0 + +{ + + struct SlabAllocatorConfig + { + // 4KB pages + static constexpr std::size_t DEFAULT_PAGE_SIZE = 4096; + static constexpr std::size_t DEFAULT_SLAB_SIZE = 64u << 20; + + static constexpr unsigned int SLAB_BITSPACE_SIZE() { + // Must equal the number of data pages in the entire slab + return DEFAULT_SLAB_SIZE / DEFAULT_PAGE_SIZE; + } + + // Minimum operational capacity in bytes + // i.e. slabs with remaining capacity below this value will not be considered for allocation + static std::size_t MIN_OP_CAPACITY(std::size_t slab_size) { + // NOTE: 1/2 may seem very high but it helps improve performance under heavy fragmentation + return slab_size / 2; + } + + // The number of alloc attempts from existing slabs before + // resorting to adding a new slab + static constexpr int NUM_EXISTING_SLAB_ALLOC_ATTEMPTS = 2; + }; + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabItem.cpp b/src/dbzero/core/memory/SlabItem.cpp new file mode 100644 index 00000000..8372ccf6 --- /dev/null +++ b/src/dbzero/core/memory/SlabItem.cpp @@ -0,0 +1,44 @@ +#include "SlabItem.hpp" + +namespace db0 + +{ + + SlabItem::SlabItem(std::shared_ptr slab, CapacityItem cap) + : m_slab(slab) + , m_cap_item(cap) + { + } + + SlabItem::~SlabItem() { + assert(!m_is_dirty && "SlabItem destroyed while still dirty"); + } + + void SlabItem::commit() const + { + assert(m_slab); + m_slab->commit(); + } + + void SlabItem::detach() const + { + assert(m_slab); + m_slab->detach(); + } + +} + +namespace std + +{ + ostream &operator<<(ostream &os, const db0::CapacityItem &item) { + os << "CapacityItem(capacity=" << item.m_remaining_capacity << ", slab=" << item.m_slab_id << ")"; + return os; + } + + ostream &operator<<(ostream &os, const db0::SlabDef &def) { + os << "SlabDef(slab=" << def.m_slab_id << ", capacity=" << def.m_remaining_capacity << ")"; + return os; + } + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabItem.hpp b/src/dbzero/core/memory/SlabItem.hpp new file mode 100644 index 00000000..907b3bcd --- /dev/null +++ b/src/dbzero/core/memory/SlabItem.hpp @@ -0,0 +1,191 @@ +#pragma once + +#include "SlabAllocator.hpp" + +namespace db0 + +{ + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR CapacityItem + { + // primary key (high part) + std::uint32_t m_remaining_capacity; + std::uint32_t m_lost_capacity; + // primary key (low part) + std::uint32_t m_slab_id; + + CapacityItem() = default; + + CapacityItem(std::uint32_t remaining_capacity, std::uint32_t lost_capacity, std::uint32_t slab_id) + : m_remaining_capacity(remaining_capacity) + , m_lost_capacity(lost_capacity) + , m_slab_id(slab_id) + { + } + + static std::uint64_t getKey(const CapacityItem &item) { + return ((std::uint64_t)item.m_remaining_capacity << 32) | item.m_slab_id; + } + + // Construct key from construction args + static std::uint64_t getKey(std::uint32_t remaining_capacity, std::uint32_t, std::uint32_t slab_id) { + return ((std::uint64_t)remaining_capacity << 32) | slab_id; + } + + inline static std::uint32_t first(std::uint64_t key) { + return static_cast(key >> 32); + } + + inline static std::uint32_t second(std::uint64_t key) { + return static_cast(key & 0xFFFFFFFF); + } + + // note descending order of comparisons + struct CompT + { + inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { + if (lhs.m_remaining_capacity == rhs.m_remaining_capacity) + return lhs.m_slab_id < rhs.m_slab_id; + return rhs.m_remaining_capacity < lhs.m_remaining_capacity; + } + + inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { + if (lhs.m_remaining_capacity == first(rhs)) + return lhs.m_slab_id < second(rhs); + return first(rhs) < lhs.m_remaining_capacity; + } + + inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { + if (first(lhs) == rhs.m_remaining_capacity) + return second(lhs) < rhs.m_slab_id; + return rhs.m_remaining_capacity < first(lhs); + } + }; + + struct EqualT + { + inline bool operator()(const CapacityItem &lhs, const CapacityItem &rhs) const { + return lhs.m_remaining_capacity == rhs.m_remaining_capacity && lhs.m_slab_id == rhs.m_slab_id; + } + + inline bool operator()(const CapacityItem &lhs, std::uint64_t rhs) const { + return lhs.m_remaining_capacity == first(rhs) && lhs.m_slab_id == second(rhs); + } + + inline bool operator()(std::uint64_t lhs, const CapacityItem &rhs) const { + return first(lhs) == rhs.m_remaining_capacity && second(lhs) == rhs.m_slab_id; + } + }; + }; +DB0_PACKED_END + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR SlabDef + { + // primary key + std::uint32_t m_slab_id; + std::uint32_t m_remaining_capacity; + std::uint32_t m_lost_capacity; + + SlabDef(std::uint32_t slab_id, std::uint32_t remaining_capacity, std::uint32_t lost_capacity) + : m_slab_id(slab_id) + , m_remaining_capacity(remaining_capacity) + , m_lost_capacity(lost_capacity) + { + } + + static inline std::uint32_t getKey(const SlabDef &item) { + return item.m_slab_id; + } + + // Extract key from construction args + static inline std::uint32_t getKey(std::uint32_t slab_id, std::uint32_t, std::uint32_t) { + return slab_id; + } + + struct CompT + { + inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { + return lhs.m_slab_id < rhs.m_slab_id; + } + + inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { + return lhs.m_slab_id < rhs; + } + + inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { + return lhs < rhs.m_slab_id; + } + }; + + struct EqualT + { + inline bool operator()(const SlabDef &lhs, const SlabDef &rhs) const { + return lhs.m_slab_id == rhs.m_slab_id; + } + + inline bool operator()(const SlabDef &lhs, std::uint32_t rhs) const { + return lhs.m_slab_id == rhs; + } + + inline bool operator()(std::uint32_t lhs, const SlabDef &rhs) const { + return lhs == rhs.m_slab_id; + } + }; + }; +DB0_PACKED_END + + struct SlabItem + { + std::shared_ptr m_slab; + // the capacity item as last retrieved from the backend (may need update) + CapacityItem m_cap_item; + bool m_is_dirty = false; + + SlabItem(std::shared_ptr slab, CapacityItem cap); + ~SlabItem(); + + void commit() const; + void detach() const; + + bool operator==(std::uint32_t slab_id) const { + assert(m_slab); + return m_cap_item.m_slab_id == slab_id; + } + + bool operator==(const SlabItem &rhs) const { + return *this == rhs.m_cap_item.m_slab_id; + } + + SlabAllocator &operator*() { + assert(m_slab); + return *m_slab; + } + + const SlabAllocator &operator*() const { + assert(m_slab); + return *m_slab; + } + + const SlabAllocator *operator->() const { + assert(m_slab); + return m_slab.get(); + } + + SlabAllocator *operator->(){ + assert(m_slab); + return m_slab.get(); + } + }; + +} + +namespace std + +{ + + ostream &operator<<(ostream &os, const db0::CapacityItem &item); + ostream &operator<<(ostream &os, const db0::SlabDef &item); + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabManager.cpp b/src/dbzero/core/memory/SlabManager.cpp new file mode 100644 index 00000000..f8610702 --- /dev/null +++ b/src/dbzero/core/memory/SlabManager.cpp @@ -0,0 +1,700 @@ +#include "SlabManager.hpp" + +namespace db0 + +{ + + SlabManager::SlabManager(std::shared_ptr prefix, MetaAllocator::SlabTreeT &slab_defs, + MetaAllocator::CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, + std::function address_func, std::function slab_id_func, + unsigned char realm_id, bool deferred_free) + : m_prefix(prefix) + , m_realm_id(realm_id) + , m_slab_defs(slab_defs) + , m_capacity_items(capacity_items) + , m_recycler_ptr(recycler) + , m_slab_size(slab_size) + , m_page_size(page_size) + , m_slab_address_func(address_func) + , m_slab_id_func(slab_id_func) + , m_next_slab_id(fetchNextSlabId()) + , m_deferred_free(deferred_free) + { + } + + bool SlabManager::ActiveSlab::contains(std::uint32_t slab_id) const { + return (((*this)[0] && *(*this)[0] == slab_id) || ((*this)[1] && *(*this)[1] == slab_id)); + } + + bool SlabManager::ActiveSlab::contains(std::shared_ptr slab) const { + return ((*this)[0] == slab || (*this)[1] == slab); + } + + std::shared_ptr SlabManager::ActiveSlab::find(std::uint32_t slab_id) const + { + if ((*this)[0] && *(*this)[0] == slab_id) { + return (*this)[0]; + } else if ((*this)[1] && *(*this)[1] == slab_id) { + return (*this)[1]; + } + return {}; + } + + void SlabManager::ActiveSlab::erase(std::shared_ptr slab) + { + if ((*this)[0] == slab) { + (*this)[0] = {}; + } else if ((*this)[1] == slab) { + (*this)[1] = {}; + } else { + assert(false); + THROWF(db0::InternalException) << "Slab not found in active slabs." << THROWF_END; + } + } + + std::shared_ptr SlabManager::tryGetActiveSlab(unsigned char locality) + { + assert(locality < m_active_slab.size()); + return m_active_slab[locality]; + } + + void SlabManager::resetActiveSlab(unsigned char locality) + { + assert(locality < m_active_slab.size()); + m_active_slab[locality] = {}; + } + + std::shared_ptr SlabManager::findFirst(std::size_t size, unsigned char locality) + { + // NOTE: before accessing capacity items we must synchronize any updates + saveDirtySlabs(); + // visit slabs starting from the largest available capacity + auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY(m_slab_size)); + auto it = m_capacity_items.cbegin(); + for (;;) { + if (it.is_end() || it->m_remaining_capacity < min_capacity) { + // no existing slab has sufficient capacity + return {}; + } + + if (m_active_slab.contains(it->m_slab_id)) { + // do not include active slab in find operation + ++it; + continue; + } + auto slab = openSlab(m_slab_address_func(it->m_slab_id)); + // make the slab active + m_active_slab[locality] = slab; + return slab; + } + } + + std::shared_ptr SlabManager::findNext(std::shared_ptr last_result, std::size_t size, + unsigned char locality) + { + saveDirtySlabs(); + auto min_capacity = std::max(size, SlabAllocatorConfig::MIN_OP_CAPACITY(m_slab_size)); + auto last_key = last_result->m_cap_item; + for (;;) { + // this is to find the next item in order + last_key.m_slab_id += NUM_REALMS; + auto it = m_capacity_items.upper_equal_bound(last_key); + if (!it.first || it.first->m_remaining_capacity < min_capacity) { + return {}; + } + + if (m_active_slab.contains(it.first->m_slab_id)) { + last_key = *(it.first); + // do not include active slab in find operation + continue; + } + auto slab = openSlab(m_slab_address_func(it.first->m_slab_id)); + // make the slab active and for a specific locality + m_active_slab[locality] = slab; + return slab; + } + } + + std::pair, std::uint32_t> SlabManager::createNewSlab() + { + if (!m_next_slab_id) { + m_next_slab_id = fetchNextSlabId(); + } + + auto slab_id = *m_next_slab_id; + (*m_next_slab_id) += NUM_REALMS; + auto address = m_slab_address_func(slab_id); + // create the new slab + auto capacity = SlabAllocator::formatSlab(m_prefix, address, m_slab_size, m_page_size); + // NOTE: for a new slab, the initial lost capacity is 0 + auto slab = std::make_shared(m_prefix, address, m_slab_size, m_page_size, capacity, 0); + if (m_atomic) { + // if atomic operation is in progress, add to the volatile slabs + m_volatile_slabs.push_back(address); + } + + return { slab, slab_id }; + } + + std::shared_ptr SlabManager::addNewSlab(unsigned char locality) + { + auto [slab, slab_id] = createNewSlab(); + auto address = m_slab_address_func(slab_id); + CapacityItem cap_item { + static_cast(slab->getRemainingCapacity()), + static_cast(slab->getLostCapacity()), + slab_id + }; + // register with slab defs + m_slab_defs.emplace(slab_id, + static_cast(cap_item.m_remaining_capacity), + static_cast(cap_item.m_lost_capacity) + ); + // register with capacity items + m_capacity_items.insert(cap_item); + // add to cache + auto cache_item = std::make_shared(slab, cap_item); + m_slabs.emplace(address, cache_item); + + // append with the recycler + if (m_recycler_ptr) { + m_recycler_ptr->append(cache_item); + } + + // make the newly added slab active + m_active_slab[locality] = cache_item; + return m_active_slab[locality]; + } + + std::uint32_t SlabManager::getRemainingCapacity(std::uint32_t slab_id) const + { + // look up with the cache first + auto address = m_slab_address_func(slab_id); + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab = it->second.lock(); + if (slab) { + return (*slab)->getRemainingCapacity(); + } + } + + // look up with the slab defs if not in cache + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + if (!slab_def_ptr.first) { + THROWF(db0::InternalException) << "Slab definition not found."; + } + return slab_def_ptr.first->m_remaining_capacity; + } + + void SlabManager::close() + { + m_active_slab = {}; + m_reserved_slabs.clear(); + saveDirtySlabs(); + m_slabs.clear(); + } + + std::shared_ptr SlabManager::tryFind(std::uint32_t slab_id) const + { + if (slab_id < nextSlabId()) { + if (m_active_slab.contains(slab_id)) { + return m_active_slab.find(slab_id); + } + // look up with the cache first + auto address = m_slab_address_func(slab_id); + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item; + } + // remove expired cache entry + m_slabs.erase(it); + } + + return tryOpenSlab(address); + } + return {}; + } + + std::shared_ptr SlabManager::find(std::uint32_t slab_id) const + { + auto slab = tryFind(slab_id); + if (!slab) { + THROWF(db0::BadAddressException) << "Slab " << slab_id << " not found"; + } + return slab; + } + + void SlabManager::erase(std::shared_ptr slab) { + erase(slab, true); + } + + bool SlabManager::empty() const { + return nextSlabId() == m_realm_id; + } + + std::shared_ptr SlabManager::reserveNewSlab() + { + auto [slab, slab_id] = createNewSlab(); + // internally register the slab with capacity = 0 (to avoid use in regular allocations) + CapacityItem cap_item { 0, 0, slab_id }; + // register with slab defs + m_slab_defs.emplace( + slab_id, + static_cast(cap_item.m_remaining_capacity), + static_cast(cap_item.m_lost_capacity) + ); + // register with capacity items + m_capacity_items.insert(cap_item); + return slab; + } + + std::shared_ptr SlabManager::openExistingSlab(const SlabDef &slab_def) + { + if (slab_def.m_slab_id >= nextSlabId()) { + THROWF(db0::InputException) << "Slab " << slab_def.m_slab_id << " does not exist"; + } + auto address = m_slab_address_func(slab_def.m_slab_id); + // look up with the cache first + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item->m_slab; + } + } + // pull through cache + return openSlab(slab_def)->m_slab; + } + + std::shared_ptr SlabManager::openReservedSlab(Address address) const { + return openReservedSlab(address, m_slab_id_func(address)); + } + + std::shared_ptr SlabManager::openReservedSlab(Address address, std::uint32_t slab_id) const + { + assert(m_slab_id_func(address) == slab_id); + if (slab_id >= nextSlabId()) { + THROWF(db0::InputException) << "Slab " << slab_id << " does not exist"; + } + + // look up with the cache first + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item->m_slab; + } + } + + // retrieve slab definition + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + if (!slab_def_ptr.first) { + THROWF(db0::InternalException) << "Slab definition not found: " << slab_id; + } + + // pull through cache + auto result = openSlab(*slab_def_ptr.first)->m_slab; + // and add for non-expiry cache + m_reserved_slabs.push_back(result); + return result; + } + + Address SlabManager::getFirstAddress() const { + return m_slab_address_func(m_realm_id) + SlabAllocator::getFirstAddress(); + } + + void SlabManager::commit() const + { + saveDirtySlabs(); + for (auto &item : m_slabs) { + auto slab_item = item.second.lock(); + if (slab_item) { + slab_item->commit(); + } + } + } + + void SlabManager::detach() const + { + // detach all cached slabs + for (auto &item : m_slabs) { + auto slab_item = item.second.lock(); + if (slab_item) { + slab_item->detach(); + } + } + // NOTE: we retain the slab element because it's detached + // invalidate cached variable + m_next_slab_id = {}; + } + + std::uint32_t SlabManager::nextSlabId() const + { + if (!m_next_slab_id) { + m_next_slab_id = fetchNextSlabId(); + } + return *m_next_slab_id; + } + + void SlabManager::beginAtomic() + { + assert(!m_atomic); + assert(m_volatile_slabs.empty()); + m_atomic = true; + } + + void SlabManager::endAtomic() + { + assert(m_atomic); + // merge atomic deferred free operations + if (!m_atomic_deferred_free_ops.empty()) { + for (auto addr : m_atomic_deferred_free_ops) { + m_deferred_free_ops.insert(addr); + } + m_atomic_deferred_free_ops.clear(); + } + + m_volatile_slabs.clear(); + m_atomic = false; + } + + void SlabManager::cancelAtomic() + { + assert(m_atomic); + // rollback atomic deferred free operations + m_atomic_deferred_free_ops.clear(); + + // revert all volatile slabs from cache + for (auto slab_addr : m_volatile_slabs) { + auto it = m_slabs.find(slab_addr); + if (it != m_slabs.end()) { + auto slab_item = it->second.lock(); + if (slab_item) { + slab_item->m_is_dirty = false; + } + m_slabs.erase(it); + } + } + m_active_slab = {}; + m_volatile_slabs.clear(); + m_atomic = false; + } + + void SlabManager::saveItem(SlabItem &item) const + { + // if the remaining capacity has hanged, reflect this with backend + if (item.m_is_dirty) { + auto slab_id = item.m_cap_item.m_slab_id; + auto remaining_capacity = item->getRemainingCapacity(); + auto lost_capacity = item->getLostCapacity(); + + auto it = m_capacity_items.find_equal(item.m_cap_item); + assert(!it.isEnd()); + + // re-register under a modified key + m_capacity_items.erase(it); + m_capacity_items.emplace( + remaining_capacity, lost_capacity, slab_id + ); + + // and update with the slab defs + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + m_slab_defs.modify(slab_def_ptr)->m_remaining_capacity = remaining_capacity; + m_slab_defs.modify(slab_def_ptr)->m_lost_capacity = lost_capacity; + + // update cached item + item.m_cap_item.m_remaining_capacity = remaining_capacity; + item.m_cap_item.m_lost_capacity = lost_capacity; + item.m_is_dirty = false; + } + } + + void SlabManager::saveDirtySlabs() const + { + for (auto &slab_item : m_dirty_slabs) { + saveItem(*slab_item); + } + m_dirty_slabs.clear(); + } + + std::shared_ptr SlabManager::tryOpenSlab(Address address) const + { + auto it = m_slabs.find(address); + if (it != m_slabs.end()) { + auto slab_item = it->second.lock(); + if (slab_item) { + return slab_item; + } + m_slabs.erase(it); + } + + auto slab_id = m_slab_id_func(address); + // retrieve slab definition + auto slab_def_ptr = m_slab_defs.find_equal(slab_id); + if (!slab_def_ptr.first) { + return {}; + } + + return openSlab(*slab_def_ptr.first); + } + + std::shared_ptr SlabManager::openSlab(Address address) const + { + auto slab = tryOpenSlab(address); + if (!slab) { + THROWF(db0::BadAddressException) << "Invalid address accessed"; + } + return slab; + } + + std::shared_ptr SlabManager::openSlab(const SlabDef &def) const + { + auto cap_item = CapacityItem(def.m_remaining_capacity, def.m_lost_capacity, def.m_slab_id); + auto addr = m_slab_address_func(def.m_slab_id); + auto slab = std::make_shared( + m_prefix, addr, m_slab_size, m_page_size, def.m_remaining_capacity, def.m_lost_capacity + ); + // add to cache (it's safe to reference item from the unordered_map) + auto cache_item = std::make_shared(slab, cap_item); + m_slabs.emplace(addr, cache_item); + + // append with the recycler + if (m_recycler_ptr) { + m_recycler_ptr->append(cache_item); + } + + return cache_item; + } + + void SlabManager::erase(std::shared_ptr slab, bool cleanup) + { + assert(slab); + // Only the last slab can be erased + if (slab->m_cap_item.m_slab_id != nextSlabId() - NUM_REALMS) { + return; + } + + auto slab_id = slab->m_cap_item.m_slab_id; + auto addr = m_slab_address_func(slab_id); + // clear the dirty flag since it's being erased anyway + slab->m_is_dirty = false; + // unregister from cache + auto it = m_slabs.find(addr); + if (it != m_slabs.end()) { + m_slabs.erase(it); + } + + // unregister from recycler + if (m_recycler_ptr) { + m_recycler_ptr->closeOne([&slab](const SlabItem &item) { + return slab.get() == &item; + }); + } + // unregister if active + if (m_active_slab.contains(slab)) { + m_active_slab.erase(slab); + } + // unregister from slab defs + if (!m_slab_defs.erase_equal(slab_id).first) { + THROWF(db0::InternalException) << "Slab definition not found."; + } + // unregister from capacity items + if (!m_capacity_items.erase_equal(slab->m_cap_item).first) { + THROWF(db0::InternalException) << "Capacity item not found."; + } + if (!m_next_slab_id) { + m_next_slab_id = fetchNextSlabId(); + } + (*m_next_slab_id) -= NUM_REALMS; + // try removing other empty slabs if such exist + if (cleanup) { + while (!empty()) { + auto slab = openSlab(m_slab_address_func(nextSlabId() - NUM_REALMS)); + if (!((*slab)->empty())) { + break; + } + erase(slab, false); + } + } + } + + std::uint32_t SlabManager::fetchNextSlabId() const + { + // determine the max slab id + auto it = m_slab_defs.find_max(); + if (it.first) { + return it.first->m_slab_id + NUM_REALMS; + } else { + // first slab being created + return m_realm_id; + } + } + + std::optional
SlabManager::tryAlloc(std::size_t size, std::uint32_t slot_num, bool aligned, + bool unique, std::uint16_t &instance_id, unsigned char locality) + { + auto slab = tryGetActiveSlab(locality); + bool is_first = true; + bool is_new = false; + // The number of alloc attempts from existing slabs before + // resorting to adding a new slab + int num_remaining_attempts = SlabAllocatorConfig::NUM_EXISTING_SLAB_ALLOC_ATTEMPTS; + for (;;) { + if (slab) { + for (;;) { + auto addr = (*slab)->tryAlloc(size, 0, aligned); + if (!addr) { + // NOTE: since the last allocation failed, don't use this slab as "active" + resetActiveSlab(locality); + break; + } + + if (!unique || ((*slab)->tryMakeAddressUnique(*addr, instance_id))) { + // modified, add to dirty slabs + if (!slab->m_is_dirty) { + slab->m_is_dirty = true; + m_dirty_slabs.push_back(slab); + } + return addr; + } + + // unable to make the address unique, schedule for deferred free and try again + // NOTE: the allocation is lost + deferredFree(*addr); + } + if (size > ((*slab)->getMaxAllocSize())) { + THROWF(db0::InternalException) + << "Requested allocation size " << size << " is larger than the slab size " << (*slab)->getMaxAllocSize(); + } + if (is_new) { + THROWF(db0::InternalException) << "Slab is new but cannot allocate " << size; + } + } + if (is_first) { + slab = findFirst(size, locality); + is_first = false; + --num_remaining_attempts; + } else if (num_remaining_attempts-- > 0) { + slab = findNext(slab, size, locality); + } else { + slab = {}; + } + // Create if unable to allocate from existing slabs + // or the number of attempts has been exhausted + if (!slab) { + slab = addNewSlab(locality); + is_new = true; + } + } + } + + void SlabManager::free(Address address) + { + if (m_deferred_free) { + deferredFree(address); + } else { + _free(address); + } + } + + void SlabManager::free(Address address, std::uint32_t slab_id) + { + assert(m_deferred_free_ops.find(address) == m_deferred_free_ops.end()); + if (m_deferred_free) { + deferredFree(address); + } else { + _free(address, slab_id); + } + } + + void SlabManager::_free(Address address) { + _free(address, m_slab_id_func(address)); + } + + void SlabManager::_free(Address address, std::uint32_t slab_id) + { + assert(m_slab_id_func(address) == slab_id); + auto slab = find(slab_id); + assert(slab); + (*slab)->free(address); + if ((*slab)->empty()) { + // erase or mark as erased + erase(slab); + } else { + // modified, add to dirty slabs + if (!slab->m_is_dirty) { + slab->m_is_dirty = true; + m_dirty_slabs.push_back(slab); + } + } + } + + std::size_t SlabManager::getAllocSize(Address address) const { + return getAllocSize(address, m_slab_id_func(address)); + } + + std::size_t SlabManager::getAllocSize(Address address, std::uint32_t slab_id) const + { + if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { + THROWF(db0::BadAddressException) << "Address " << address << " not found (pending deferred free)"; + } + + assert(m_slab_id_func(address) == slab_id); + return (*find(slab_id))->getAllocSize(address); + } + + bool SlabManager::isAllocated(Address address, std::size_t *size_of_result) const { + return isAllocated(address, m_slab_id_func(address), size_of_result); + } + + bool SlabManager::isAllocated(Address address, std::uint32_t slab_id, std::size_t *size_of_result) const + { + if (m_deferred_free_ops.find(address) != m_deferred_free_ops.end()) { + return false; + } + + auto slab = tryFind(slab_id); + if (!slab) { + return false; + } + return ((*slab)->isAllocated(address, size_of_result)); + } + + void SlabManager::forAllSlabs(std::function f) const + { + auto it = m_slab_defs.cbegin(); + for (;!it.is_end();++it) { + auto slab = const_cast(*this).openExistingSlab(*it); + f(*slab, it->m_slab_id); + } + } + + void SlabManager::deferredFree(Address address) + { + if (m_atomic) { + m_atomic_deferred_free_ops.push_back(address); + } else { + m_deferred_free_ops.insert(address); + } + } + + void SlabManager::flush() const + { + assert(!m_atomic); + assert(m_atomic_deferred_free_ops.empty()); + // perform the deferred free operations + if (!m_deferred_free_ops.empty()) { + for (auto addr : m_deferred_free_ops) { + const_cast(*this)._free(addr); + } + m_deferred_free_ops.clear(); + } + } + + std::size_t SlabManager::getDeferredFreeCount() const { + return m_deferred_free_ops.size(); + } + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabManager.hpp b/src/dbzero/core/memory/SlabManager.hpp new file mode 100644 index 00000000..e40f34cf --- /dev/null +++ b/src/dbzero/core/memory/SlabManager.hpp @@ -0,0 +1,186 @@ +#pragma once + +#include "Allocator.hpp" +#include "Prefix.hpp" +#include "BitSpace.hpp" +#include "Memspace.hpp" +#include "SlabAllocatorConfig.hpp" +#include "SlabItem.hpp" +#include "MetaAllocator.hpp" +#include +#include +#include +#include +#include + +namespace db0 + +{ + + /** + * SlabManager allows efficient access to a working set of slabs + * either for read-only or read-write operations + * It's also capable of synchronizing metadata between slabs and the meta-indexes + * The following requirements apply: + * - it's only allowed to access slabs via the SlabCache (no direct access permitted) + * - SlabCache must be part of commit/rollback flows + * - SlabCache must be part of atomic operations + */ + class SlabManager + { + public: + static constexpr std::size_t NUM_REALMS = MetaAllocator::NUM_REALMS; + using SlabTreeT = MetaAllocator::SlabTreeT; + using CapacityTreeT = MetaAllocator::CapacityTreeT; + + SlabManager(std::shared_ptr prefix, SlabTreeT &slab_defs, + CapacityTreeT &capacity_items, SlabRecycler *recycler, std::uint32_t slab_size, std::uint32_t page_size, + std::function address_func, std::function slab_id_func, + unsigned char realm_id, bool deferred_free); + + std::optional
tryAlloc(std::size_t size, std::uint32_t slot_num, bool aligned, bool unique, + std::uint16_t &instance_id, unsigned char locality); + + void free(Address address); + // @param slab_id must match the one calcuated from the address + void free(Address address, std::uint32_t slab_id); + + std::size_t getAllocSize(Address address) const; + std::size_t getAllocSize(Address address, std::uint32_t slab_id) const; + + bool isAllocated(Address address, std::size_t *size_of_result) const; + bool isAllocated(Address address, std::uint32_t slab_id, std::size_t *size_of_result) const; + + unsigned int getSlabCount() const { + return (nextSlabId() - m_realm_id) / NUM_REALMS; + } + + // NOTE: reserved slabs are not updated in the CapacityItems tree + // since they're registered with capacity = 0 (to avoid using them in regular allocations) + std::shared_ptr reserveNewSlab(); + + // Open an existing reserved slab + std::shared_ptr openReservedSlab(Address) const; + std::shared_ptr openReservedSlab(Address, std::uint32_t slab_id) const; + + std::uint32_t getRemainingCapacity(std::uint32_t slab_id) const; + + std::size_t getDeferredFreeCount() const; + + Address getFirstAddress() const; + + bool empty() const; + + void commit() const; + + void detach() const; + + void beginAtomic(); + void endAtomic(); + void cancelAtomic(); + + void close(); + + void forAllSlabs(std::function f) const; + + void flush() const; + + private: + + // NOTE: only localities 0 and 1 are currently supported + struct ActiveSlab: public std::array, 2> + { + bool contains(std::uint32_t slab_id) const; + bool contains(std::shared_ptr) const; + + std::shared_ptr find(std::uint32_t slab_id) const; + + void erase(std::shared_ptr); + }; + + /** + * Retrieves the active slab or returns nullptr if no active slab available + */ + std::shared_ptr tryGetActiveSlab(unsigned char locality); + void resetActiveSlab(unsigned char locality); + + /** + * Retrieve the 1st slab to allocate a block of at least min_capacity + * this is only a 'hint' and if the allocation is not possible, the next slab should be attempted + */ + std::shared_ptr findFirst(std::size_t size, unsigned char locality); + + // Continue after findFirst + std::shared_ptr findNext(std::shared_ptr last_result, std::size_t size, + unsigned char locality); + + /** + * Create a new, unregistered slab instance + */ + std::pair, std::uint32_t> createNewSlab(); + + // Create a new, registered slab instance + std::shared_ptr addNewSlab(unsigned char locality); + + // Find existing slab by ID + std::shared_ptr tryFind(std::uint32_t slab_id) const; + std::shared_ptr find(std::uint32_t slab_id) const; + + /** + * Erase if 'slab' is the last slab + */ + void erase(std::shared_ptr); + + std::shared_ptr openExistingSlab(const SlabDef &); + + std::uint32_t nextSlabId() const; + + std::shared_ptr m_prefix; + const unsigned char m_realm_id; + SlabTreeT &m_slab_defs; + CapacityTreeT &m_capacity_items; + SlabRecycler *m_recycler_ptr = nullptr; + const std::uint32_t m_slab_size; + const std::uint32_t m_page_size; + // slab cache by address + mutable std::unordered_map > m_slabs; + mutable std::vector > m_reserved_slabs; + // active slabs for each supported locality (0 or 1) + mutable ActiveSlab m_active_slab; + // address by allocation ID (from the algo-allocator) + std::function m_slab_address_func; + std::function m_slab_id_func; + mutable std::optional m_next_slab_id; + // addresses of slabs newly created during atomic operations (potentially to be reverted) + mutable std::vector m_volatile_slabs; + // the atomic operation's flag + bool m_atomic = false; + std::vector
m_atomic_deferred_free_ops; + const bool m_deferred_free; + mutable std::unordered_set
m_deferred_free_ops; + // the list of modified slabs (need backend refresh) + mutable std::vector > m_dirty_slabs; + + // Reflect item changes with the backend (if modified) + void saveItem(SlabItem &item) const; + // Save all dirty slabs to the backend + void saveDirtySlabs() const; + + std::shared_ptr tryOpenSlab(Address address) const; + std::shared_ptr openSlab(Address address) const; + + // open slab by definition and add to cache + std::shared_ptr openSlab(const SlabDef &def) const; + + void erase(std::shared_ptr, bool cleanup); + + std::uint32_t fetchNextSlabId() const; + + void deferredFree(Address); + + // internal "free" implementation which performs the dealloc instanly + void _free(Address); + void _free(Address, std::uint32_t slab_id); + }; + +} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabRecycler.cpp b/src/dbzero/core/memory/SlabRecycler.cpp deleted file mode 100644 index e8e7c144..00000000 --- a/src/dbzero/core/memory/SlabRecycler.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "SlabRecycler.hpp" - -namespace db0 - -{ - - SlabRecycler::SlabRecycler(unsigned int max_size) - : m_max_size(max_size) - { - } - - void SlabRecycler::append(std::shared_ptr slab) - { - m_slabs.push_back(slab); - while (m_slabs.size() > m_max_size) { - m_slabs.pop_front(); - } - } - - std::size_t SlabRecycler::size() const { - return m_slabs.size(); - } - - std::size_t SlabRecycler::capacity() const { - return m_max_size; - } - - void SlabRecycler::close(std::function predicate, bool only_first) - { - for (auto it = m_slabs.begin(); it != m_slabs.end();) { - if (predicate(**it)) { - it = m_slabs.erase(it); - if (only_first) { - break; - } - } else { - ++it; - } - } - } - - void SlabRecycler::closeOne(std::function predicate) { - close(predicate, true); - } - - void SlabRecycler::clear() { - m_slabs.clear(); - } - -} \ No newline at end of file diff --git a/src/dbzero/core/memory/SlabRecycler.hpp b/src/dbzero/core/memory/SlabRecycler.hpp deleted file mode 100644 index a26cb3a3..00000000 --- a/src/dbzero/core/memory/SlabRecycler.hpp +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include -#include -#include "SlabAllocator.hpp" - -namespace db0 - -{ - - class SlabRecycler - { - public: - SlabRecycler(unsigned int max_size = 256); - - void append(std::shared_ptr slab); - - /** - * Get the number of slab currently begin stored - */ - std::size_t size() const; - - /** - * Get the maximum number of slab that could be stored - */ - std::size_t capacity() const; - - /** - * Close / remove all SlabAllocator instances that match the predicate - */ - void close(std::function predicate, bool only_first = false); - - void closeOne(std::function predicate); - - void clear(); - - private: - const unsigned int m_max_size; - std::deque > m_slabs; - }; - -} \ No newline at end of file diff --git a/src/dbzero/core/vspace/db0_ptr.hpp b/src/dbzero/core/vspace/db0_ptr.hpp index 3e6c75ba..55b12577 100644 --- a/src/dbzero/core/vspace/db0_ptr.hpp +++ b/src/dbzero/core/vspace/db0_ptr.hpp @@ -1,6 +1,5 @@ #pragma once -#include "v_ptr.hpp" #include #include #include diff --git a/src/dbzero/core/vspace/v_object.hpp b/src/dbzero/core/vspace/v_object.hpp index 21917248..3e666ffc 100644 --- a/src/dbzero/core/vspace/v_object.hpp +++ b/src/dbzero/core/vspace/v_object.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include "v_ptr.hpp" #include #include @@ -9,39 +9,40 @@ namespace db0 { struct tag_verified {}; - + /** * Base class for vspace-mapped objects * @tparam T container object type */ template - class v_object + class v_object: public v_ptr { public: - using c_type = T; - using ptr_t = v_ptr; + using ContainerT = T; + // for compatiblility with intrusive containers (e.g. v_sgtree) + using ptr_t = v_ptr; v_object() = default; - + v_object(const ptr_t &ptr) - : v_this(ptr) + : ptr_t(ptr) { - } - + } + v_object(mptr ptr, FlagSet access_mode = {}) - : v_this(ptr, access_mode) + : ptr_t(ptr, access_mode) { } // Construct a verified instance - i.e. backed by a valid db0 address with a known size v_object(db0::tag_verified, mptr ptr, std::size_t size_of = 0, FlagSet access_mode = {}) - : v_this(ptr, access_mode) + : ptr_t(ptr, access_mode) { - v_this.safeConstRef(size_of); + ptr_t::safeConstRef(size_of); } v_object(const v_object &other) - : v_this(other.v_this) + : ptr_t(other) { } @@ -52,49 +53,49 @@ namespace db0 private: template::value-1> v_object(Memspace &memspace, Tuple&& t, int_seq) - : v_this(ptr_t::makeNew( - memspace, - c_type::measure(std::get(std::forward(t))...), - std::get(std::forward(t)) ) - ) { - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + initNew( + memspace, + ContainerT::measure(std::get(std::forward(t))...), + std::get(std::forward(t)) + ); + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } /// Pre-locked constructor struct tag_prelocked {}; template::value-1> - v_object(Memspace &memspace, tag_prelocked, Tuple&& t, int_seq) - : v_this(ptr_t::makeNew(memspace, std::move(std::get(std::forward(t))))) + v_object(Memspace &memspace, tag_prelocked, Tuple&& t, int_seq) { + initNew(memspace, std::move(std::get(std::forward(t)))); // placement new syntax - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } template::value-1> void init(Memspace &memspace, Tuple&& t, int_seq) { - v_this = ptr_t::makeNew( + initNew( memspace, - c_type::measure(std::get(std::forward(t))...), + ContainerT::measure(std::get(std::forward(t))...), // access options (the last argument) std::get(std::forward(t)) ); - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); } template::value-1> std::uint16_t initUnique(Memspace &memspace, Tuple&& t, int_seq) { std::uint16_t instance_id; - v_this = ptr_t::makeNewUnique( + initNewUnique( memspace, instance_id, - c_type::measure(std::get(std::forward(t))...), + ContainerT::measure(std::get(std::forward(t))...), // access options (the last argument) std::get(std::forward(t)) ); - c_type::__new(reinterpret_cast(&v_this.modify()), std::get(std::forward(t))...); + ContainerT::__new(reinterpret_cast(&this->modify()), std::get(std::forward(t))...); return instance_id; } @@ -148,20 +149,11 @@ namespace db0 return initUnique(memspace, std::forward(args)..., FlagSet {}); } - // Construct from v-pointer - v_object(ptr_t &&ptr) - : v_this(std::move(ptr)) - { - } - v_object(v_object &&other) - : v_this(std::move(other.v_this)) + : ptr_t(std::move(other)) { } - /** - * static V-Space allocator - */ template static std::uint64_t makeNew(Memspace &memspace, Args&&... args) { @@ -169,146 +161,107 @@ namespace db0 return new_object.getAddress(); } - void operator=(const v_object &other) { - v_this = other.v_this; - } - - void operator=(v_object &&other) - { - v_this = std::move(other.v_this); - other.v_this = {}; - } - - /** - * Readonly data access operator - */ - inline const c_type *operator->() const { - return v_this.get(); - } - - inline const c_type *getData() const { - return v_this.get(); - } - - /** - * Reference data container for read - */ - inline const c_type &const_ref() const { - return *(v_this.get()); - } - - const c_type& safeRef() const { - return v_this.safeRef(); - } - - const c_type& safeRef(std::uint32_t access_mode) const { - return v_this.safeRef(access_mode); - } - - /** - * Reference data container for update - */ - inline c_type &modify() { - return v_this.modify(); - } - - // Mark specific range as modified - // NOTE: even if the range is not updated it will be forced-diff - void modify(std::size_t offset, std::size_t size) { - v_this.modify(offset, size); + // Reference data container for read + inline const ContainerT &const_ref() const { + return *this->getData(); } - inline Address getAddress() const { - return v_this.getAddress(); - } - - inline const ptr_t &get_v_ptr() const { - return this->v_this; - } - - inline ptr_t &get_v_ptr() { - return this->v_this; + mptr myPtr(Address address, FlagSet access_mode = {}) const { + return this->getMemspace().myPtr(address, access_mode); } - void destroy() const + // Calculate the number of DPs spanned by this object + // NOTE: even small objects may span more than 1 DP if are positioned on a boundary + // however allocators typically will avoid such situations + unsigned int span() const { - if (v_this) { - v_this.destroy(); - v_this = {}; - } - } - - inline Memspace &getMemspace() const { - return v_this.getMemspace(); + auto first_dp = this->getMemspace().getPageNum(this->m_address); + auto last_dp = this->getMemspace().getPageNum(this->m_address + (*this)->sizeOf()); + return last_dp - first_dp + 1; } - inline bool isNull() const { - return v_this.isNull(); - } - - /** - * instance compare - */ - bool operator==(const v_object &other) const { - return (v_this==other.v_this); + v_object &operator=(v_object &&other) + { + vtypeless::operator=(std::move(other)); + return *this; } - explicit operator bool() const { - return !v_this.isNull(); + v_object &operator=(v_object const &other) + { + vtypeless::operator=(other); + return *this; } - bool operator!() const { - return v_this.isNull(); - } - - mptr myPtr(Address address, FlagSet access_mode = {}) const { - return v_this.getMemspace().myPtr(address, access_mode); - } + private: - /** - * Get use count of the underlying lock - */ - unsigned int use_count() const { - return v_this.use_count(); - } - - void detach() const { - v_this.detach(); - } - - void commit() const + // Create a new instance + void initNew(Memspace &memspace, std::size_t size, FlagSet access_mode = {}) { - // FIXME: optimization - // potentially we could call v_this.commit() here BUT - // if there exist 2 instances of v_object and one of them gets modified - // then the "read-only" instance will not see the updates - - v_this.detach(); + // read not allowed for instance creation + assert(!access_mode[AccessOptions::read]); + this->m_memspace_ptr = &memspace; + this->m_address = memspace.alloc(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); + // lock for create & write + // NOTE: must extract physical address for mapRange + this->m_mem_lock = memspace.getPrefix().mapRange( + this->m_address, size, access_mode | AccessOptions::write + ); + // mark the entire writable area as modified + this->m_mem_lock.modify(); + this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; + this->m_access_mode = access_mode; + // collect as a modified instance for commit speedup + this->m_memspace_ptr->collectModified(this); } - // Calculate the number of DPs spanned by this object - // NOTE: even small objects may span more than 1 DP if are positioned on a boundary - // however allocators typically will avoid such situations - unsigned int span() const + // Create a new instance using allocUnique functionality + void initNewUnique(Memspace &memspace, std::uint16_t &instance_id, std::size_t size, + FlagSet access_mode = {}) { - auto first_dp = v_this.getMemspace().getPageNum(v_this.getAddress()); - auto last_dp = v_this.getMemspace().getPageNum(v_this.getAddress() + v_this->sizeOf()); - return last_dp - first_dp + 1; - } - - // Check if the underlying resource is available as mutable - // i.e. was already access for read/write - bool isModified() const { - return v_this.isModified(); + // read not allowed for instance creation + assert(!access_mode[AccessOptions::read]); + this->m_memspace_ptr = &memspace; + auto unique_address = memspace.allocUnique(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); + instance_id = unique_address.getInstanceId(); + // lock for create & write + // NOTE: must extract physical address for mapRange + this->m_address = unique_address; + this->m_mem_lock = memspace.getPrefix().mapRange( + unique_address.getOffset(), size, access_mode | AccessOptions::write + ); + // mark the entire writable area as modified + this->m_mem_lock.modify(); + // mark as available for both write & read + this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; + this->m_access_mode = access_mode; + // collect as a modified instance for commit speedup + this->m_memspace_ptr->collectModified(this); } - bool isNoCache() const { - return v_this.isNoCache(); + /** + * Create a new instance from the mapped address + * @param memspace the memspace to use + * @param mapped_addr the mapped address + * @param access_mode additional access mode flags + */ + void initNew(Memspace &memspace, MappedAddress &&mapped_addr, FlagSet access_mode = {}) + { + this->m_memspace_ptr = &memspace; + // mark the entire writable area as modified + mapped_addr.m_mem_lock.modify(); + this->m_address = mapped_addr.m_address; + this->m_mem_lock = std::move(mapped_addr.m_mem_lock); + // mark as available for read & write + this->m_resource_flags = db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE; + this->m_access_mode = access_mode; + // collect as a modified instance for commit speedup + this->m_memspace_ptr->collectModified(this); } - protected: - // container reference - mutable ptr_t v_this; + static inline unsigned char getLocality(FlagSet access_mode) { + // NOTE: use locality = 1 for no_cache allocations, 0 otherwise (undefined) + return access_mode[AccessOptions::no_cache] ? 1 : 0; + } }; // Utility function to safely mutate a v_object's fixed-size member diff --git a/src/dbzero/core/vspace/v_ptr.hpp b/src/dbzero/core/vspace/v_ptr.hpp index fb33bb32..f300e9f7 100644 --- a/src/dbzero/core/vspace/v_ptr.hpp +++ b/src/dbzero/core/vspace/v_ptr.hpp @@ -1,18 +1,6 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "MappedAddress.hpp" -#include "safe_buf_t.hpp" +#include "vtypeless.hpp" namespace db0 @@ -20,150 +8,6 @@ namespace db0 template class v_object; - - class vtypeless - { - protected : - using ResourceReadMutexT = ROWO_Mutex< - std::uint16_t, - db0::RESOURCE_AVAILABLE_FOR_READ, - db0::RESOURCE_AVAILABLE_FOR_READ, - db0::RESOURCE_LOCK >; - - using ResourceReadWriteMutexT = ROWO_Mutex< - std::uint16_t, - db0::RESOURCE_AVAILABLE_FOR_WRITE, - db0::RESOURCE_AVAILABLE_FOR_RW, - db0::RESOURCE_LOCK >; - - // detach checks either R/W flags and clears both of them - using ResourceDetachMutexT = ROWO_Mutex< - std::uint16_t, - db0::RESOURCE_AVAILABLE_FOR_RW, - db0::RESOURCE_AVAILABLE_FOR_RW, - db0::RESOURCE_LOCK >; - - /** - * Within-prefix address of this object - */ - Address m_address = {}; - Memspace *m_memspace_ptr = nullptr; - mutable std::atomic m_resource_flags = 0; - // initial access flags (e.g. read / write / create) - FlagSet m_access_mode; - // NOTE: cached size may speed-up updates but also is relevant for existing vptr's reinterpret casts - mutable std::optional m_cached_size; - - /** - * Memory mapped range corresponding to this object - */ - mutable MemLock m_mem_lock; - - public: - vtypeless() = default; - - vtypeless(Memspace &, Address address, FlagSet); - - /** - * Create mem-locked with specific flags (e.g. read/ write) - */ - vtypeless(Memspace &, Address address, MemLock &&, std::uint16_t resource_flags, - FlagSet); - - vtypeless(const vtypeless& other); - vtypeless(vtypeless&&); - - /** - * @param access_mode additional flags / modes to use - */ - inline vtypeless(mptr ptr, FlagSet access_mode = {}) - : m_address(ptr.m_address) - , m_memspace_ptr(&ptr.m_memspace.get()) - , m_access_mode(ptr.m_access_mode | access_mode) - { - assertFlags(); - } - - inline FlagSet getAccessMode() const { - return m_access_mode; - } - - vtypeless &operator=(const vtypeless &other); - void operator=(vtypeless &&); - - /** - * Instance compare - */ - inline bool operator==(const vtypeless &ptr) const { - return (m_memspace_ptr == ptr.m_memspace_ptr && m_address == ptr.m_address); - } - - inline bool operator!=(const vtypeless &ptr) const { - return (m_memspace_ptr != ptr.m_memspace_ptr || m_address != ptr.m_address); - } - - inline bool isNull() const { - return !m_address.isValid(); - } - - inline operator bool() const { - return m_address.isValid(); - } - - inline Address getAddress() const { - return m_address; - } - - inline Memspace &getMemspace() const { - assert(m_memspace_ptr); - return *m_memspace_ptr; - } - - inline Memspace *getMemspacePtr() const { - return m_memspace_ptr; - } - - inline bool isNoCache() const { - return m_access_mode[AccessOptions::no_cache]; - } - - /** - * Get use count of the underlying lock - */ - unsigned int use_count() const; - - /** - * Check if the underlying resource is available in local memory - */ - bool isAttached() const; - - /** - * Detach underlying resource lock (i.e. mark resource as not available in local memory) - */ - void detach(); - - /** - * Commit by marking the write as final. - * The subsequent modify() will need to refresh the underlying lock - */ - void commit(); - - /** - * Cast to a specific concrete type - * @return pointer which may be null if the underlying lock does not exist - */ - template const T *castTo() const { - return reinterpret_cast(m_mem_lock.m_buffer); - } - - private: - inline void assertFlags() - { - // read / write / create flags are disallowed since they're assigned dynamically - assert(!m_access_mode[AccessOptions::read]); - assert(!m_access_mode[AccessOptions::write]); - } - }; /** * virtual pointer to object of ContainerT @@ -230,9 +74,12 @@ namespace db0 // note that lock is getting updated, possibly copy-on-write is being performed // NOTE: must extract physical address for mapRange m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read); + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::write | AccessOptions::read + ); // by calling MemLock::modify we mark the object's associated range as modified m_mem_lock.modify(); + // collect as a modified instance for commit speedup + m_memspace_ptr->collectModified(this); lock.commit_set(); break; } @@ -252,24 +99,7 @@ namespace db0 bool isModified() const { return ResourceReadWriteMutexT::__ref(m_resource_flags).get(); } - - const ContainerT &safeConstRef(std::size_t size_of = 0) const - { - if (!size_of) { - size_of = this->getSize(); - } - assureInitialized(size_of); - return ContainerT::__safe_const_ref( - safe_buf_t((std::byte*)m_mem_lock.m_buffer, (std::byte*)m_mem_lock.m_buffer + size_of) - ); - } - - const ContainerT *get() const - { - assureInitialized(); - return reinterpret_cast(m_mem_lock.m_buffer); - } - + const ContainerT *getData() const { assureInitialized(); @@ -277,74 +107,29 @@ namespace db0 } inline const ContainerT *operator->() const { - return get(); - } - - static self_t makeNew(Memspace &memspace, std::size_t size, FlagSet access_mode = {}) - { - // read not allowed for instance creation - assert(!access_mode[AccessOptions::read]); - auto address = memspace.alloc(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); - // lock for create & write - // NOTE: must extract physical address for mapRange - auto mem_lock = memspace.getPrefix().mapRange(address, size, access_mode | AccessOptions::write); - // mark the entire writable area as modified - mem_lock.modify(); - // mark as available for both write & read - return self_t( - memspace, address, std::move(mem_lock), - db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE, access_mode - ); + return this->getData(); } - // Create a new instance using allocUnique functionality - static self_t makeNewUnique(Memspace &memspace, std::uint16_t &instance_id, std::size_t size, - FlagSet access_mode = {}) - { - // read not allowed for instance creation - assert(!access_mode[AccessOptions::read]); - auto unique_address = memspace.allocUnique(size, SLOT_NUM, REALM_ID, getLocality(access_mode)); - instance_id = unique_address.getInstanceId(); - // lock for create & write - // NOTE: must extract physical address for mapRange - auto mem_lock = memspace.getPrefix().mapRange( - unique_address.getOffset(), size, access_mode | AccessOptions::write - ); - // mark the entire writable area as modified - mem_lock.modify(); - // mark as available for both write & read - return self_t( - memspace, unique_address, std::move(mem_lock), - db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE, access_mode - ); - } - - /** - * Create a new instance from the mapped address - * @param memspace the memspace to use - * @param mapped_addr the mapped address - * @param access_mode additional access mode flags - */ - static self_t makeNew(Memspace &memspace, MappedAddress &&mapped_addr, FlagSet access_mode = {}) - { - // mark the entire writable area as modified - mapped_addr.m_mem_lock.modify(); - return self_t(memspace, mapped_addr.m_address, - std::move(mapped_addr.m_mem_lock), - // mark as available for read & write - db0::RESOURCE_AVAILABLE_FOR_READ | db0::RESOURCE_AVAILABLE_FOR_WRITE, access_mode - ); - } - - /** - * Get the underlying mapped range (for mutation) - */ + // Get the underlying mapped range (for mutation) MemLock modifyMappedRange() { modify(); return this->m_mem_lock; } + protected: + + const ContainerT &safeConstRef(std::size_t size_of = 0) const + { + if (!size_of) { + size_of = this->getSize(); + } + assureInitialized(size_of); + return ContainerT::__safe_const_ref( + safe_buf_t((std::byte*)m_mem_lock.m_buffer, (std::byte*)m_mem_lock.m_buffer + size_of) + ); + } + private: static inline unsigned char getLocality(FlagSet access_mode) { @@ -361,7 +146,8 @@ namespace db0 if (lock.isLocked()) { // NOTE: must extract physical address for mapRange m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read); + m_address.getOffset(), this->getSize(), m_access_mode | AccessOptions::read + ); lock.commit_set(); break; } @@ -380,7 +166,8 @@ namespace db0 if (lock.isLocked()) { // NOTE: must extract physical address for mapRange m_mem_lock = m_memspace_ptr->getPrefix().mapRange( - m_address.getOffset(), size_of, m_access_mode | AccessOptions::read); + m_address.getOffset(), size_of, m_access_mode | AccessOptions::read + ); lock.commit_set(); break; } @@ -413,6 +200,6 @@ namespace db0 } return *m_cached_size; } - }; + }; } diff --git a/src/dbzero/core/vspace/v_ptr.cpp b/src/dbzero/core/vspace/vtypeless.cpp similarity index 88% rename from src/dbzero/core/vspace/v_ptr.cpp rename to src/dbzero/core/vspace/vtypeless.cpp index 703f9ffb..9c7f390e 100644 --- a/src/dbzero/core/vspace/v_ptr.cpp +++ b/src/dbzero/core/vspace/vtypeless.cpp @@ -1,4 +1,4 @@ -#include +#include "vtypeless.hpp" namespace db0 @@ -88,12 +88,12 @@ namespace db0 unsigned int vtypeless::use_count() const { return m_mem_lock.use_count(); } - + bool vtypeless::isAttached() const { return m_mem_lock.m_buffer != nullptr; } - void vtypeless::detach() + void vtypeless::detach() const { // detaching clears the reasource available for read flag while (ResourceDetachMutexT::__ref(m_resource_flags).get()) { @@ -107,11 +107,19 @@ namespace db0 } } - void vtypeless::commit() + void vtypeless::commit() const { + /* FIXME: + // NOTE: this operation assumes that only one v_object instance pointing to the same address exists + // otherwise modifications done to one instance will not be visible to the other instances + // this assumption holds true for dbzero objects but if unable to fulfill in the future, + // it must be changed to "this->detach()" + // commit clears the reasource available for write flag // it might still be available for read atomicResetFlags(m_resource_flags, db0::RESOURCE_AVAILABLE_FOR_WRITE); + */ + detach(); } } \ No newline at end of file diff --git a/src/dbzero/core/vspace/vtypeless.hpp b/src/dbzero/core/vspace/vtypeless.hpp new file mode 100644 index 00000000..1d1e5409 --- /dev/null +++ b/src/dbzero/core/vspace/vtypeless.hpp @@ -0,0 +1,166 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "MappedAddress.hpp" +#include "safe_buf_t.hpp" + +namespace db0 + +{ + + class vtypeless + { + protected: + using ResourceReadMutexT = ROWO_Mutex< + std::uint16_t, + db0::RESOURCE_AVAILABLE_FOR_READ, + db0::RESOURCE_AVAILABLE_FOR_READ, + db0::RESOURCE_LOCK >; + + using ResourceReadWriteMutexT = ROWO_Mutex< + std::uint16_t, + db0::RESOURCE_AVAILABLE_FOR_WRITE, + db0::RESOURCE_AVAILABLE_FOR_RW, + db0::RESOURCE_LOCK >; + + // detach checks either R/W flags and clears both of them + using ResourceDetachMutexT = ROWO_Mutex< + std::uint16_t, + db0::RESOURCE_AVAILABLE_FOR_RW, + db0::RESOURCE_AVAILABLE_FOR_RW, + db0::RESOURCE_LOCK >; + + /** + * Within-prefix address of this object + */ + Address m_address = {}; + Memspace *m_memspace_ptr = nullptr; + mutable std::atomic m_resource_flags = 0; + // initial access flags (e.g. read / write / create) + FlagSet m_access_mode; + // NOTE: cached size may speed-up updates but also is relevant for existing vptr's reinterpret casts + mutable std::optional m_cached_size; + + // Memory mapped range corresponding to this object + mutable MemLock m_mem_lock; + + public: + vtypeless() = default; + + vtypeless(Memspace &, Address address, FlagSet); + + /** + * Create mem-locked with specific flags (e.g. read/ write) + */ + vtypeless(Memspace &, Address address, MemLock &&, std::uint16_t resource_flags, + FlagSet); + + vtypeless(const vtypeless& other); + vtypeless(vtypeless&&); + + /** + * @param access_mode additional flags / modes to use + */ + inline vtypeless(mptr ptr, FlagSet access_mode = {}) + : m_address(ptr.m_address) + , m_memspace_ptr(&ptr.m_memspace.get()) + , m_access_mode(ptr.m_access_mode | access_mode) + { + assertFlags(); + } + + inline FlagSet getAccessMode() const { + return m_access_mode; + } + + vtypeless &operator=(const vtypeless &other); + void operator=(vtypeless &&); + + /** + * Instance compare + */ + inline bool operator==(const vtypeless &ptr) const { + return (m_memspace_ptr == ptr.m_memspace_ptr && m_address == ptr.m_address); + } + + inline bool operator!=(const vtypeless &ptr) const { + return (m_memspace_ptr != ptr.m_memspace_ptr || m_address != ptr.m_address); + } + + inline bool isNull() const { + return !m_address.isValid(); + } + + inline operator bool() const { + return m_address.isValid(); + } + + bool operator!() const { + return !m_address.isValid(); + } + + inline Address getAddress() const { + return m_address; + } + + inline Memspace &getMemspace() const { + assert(m_memspace_ptr); + return *m_memspace_ptr; + } + + inline Memspace *getMemspacePtr() const { + return m_memspace_ptr; + } + + inline bool isNoCache() const { + return m_access_mode[AccessOptions::no_cache]; + } + + // Get use count of the underlying lock + unsigned int use_count() const; + + /** + * Check if the underlying resource is available in local memory + */ + bool isAttached() const; + + /** + * Detach underlying resource lock (i.e. mark resource as not available in local memory) + */ + void detach() const; + + /** + * Commit by marking the write as final. + * The subsequent modify() will need to refresh the underlying lock + */ + void commit() const; + + /** + * Cast to a specific concrete type + * @return pointer which may be null if the underlying lock does not exist + */ + template const T *castTo() const { + return reinterpret_cast(m_mem_lock.m_buffer); + } + + private: + + inline void assertFlags() + { + // read / write / create flags are disallowed since they're assigned dynamically + assert(!m_access_mode[AccessOptions::read]); + assert(!m_access_mode[AccessOptions::write]); + } + }; + +} diff --git a/src/dbzero/object_model/ObjectBase.hpp b/src/dbzero/object_model/ObjectBase.hpp index c7cef626..24635340 100644 --- a/src/dbzero/object_model/ObjectBase.hpp +++ b/src/dbzero/object_model/ObjectBase.hpp @@ -205,7 +205,7 @@ namespace db0 // Get access flags to propagate to members (e.g. no_cache) AccessFlags getMemberFlags() const { - return this->v_this.getAccessMode() & AccessOptions::no_cache; + return this->getAccessMode() & AccessOptions::no_cache; } protected: @@ -221,16 +221,16 @@ namespace db0 has_fixture::init(fixture, std::forward(args)...); } } - - // member should be overridden for derived types which need pre-commit - using PreCommitFunction = void (*)(void *, bool revert); - static PreCommitFunction getPreCommitFunction() { + + // member should be overridden for derived types which need flush + using FlushFunction = void (*)(void *, bool revert); + static FlushFunction getFlushFunction() { return nullptr; } // called from GC0 to bind GC_Ops for this type static GC_Ops getGC_Ops() { - return { hasRefsOp, dropOp, detachOp, commitOp, getTypedAddress, dropByAddr, T::getPreCommitFunction() }; + return { hasRefsOp, dropOp, detachOp, commitOp, getTypedAddress, dropByAddr, T::getFlushFunction() }; } void operator=(ObjectBase &&other) diff --git a/src/dbzero/object_model/dict/Dict.cpp b/src/dbzero/object_model/dict/Dict.cpp index 311e90be..ae617738 100644 --- a/src/dbzero/object_model/dict/Dict.cpp +++ b/src/dbzero/object_model/dict/Dict.cpp @@ -269,7 +269,7 @@ namespace db0::object_model return m_index.end(); } - void Dict::destroy() const + void Dict::destroy() { unrefMembers(); m_index.destroy(); diff --git a/src/dbzero/object_model/dict/Dict.hpp b/src/dbzero/object_model/dict/Dict.hpp index 8c007ecf..87fb1928 100644 --- a/src/dbzero/object_model/dict/Dict.hpp +++ b/src/dbzero/object_model/dict/Dict.hpp @@ -89,7 +89,7 @@ DB0_PACKED_END void unrefMembers() const; - void destroy() const; + void destroy(); std::shared_ptr getIterator(ObjectPtr lang_dict) const; diff --git a/src/dbzero/object_model/has_fixture.hpp b/src/dbzero/object_model/has_fixture.hpp index 22f000d6..32cd76a5 100644 --- a/src/dbzero/object_model/has_fixture.hpp +++ b/src/dbzero/object_model/has_fixture.hpp @@ -17,8 +17,7 @@ namespace db0 */ template class has_fixture: public BaseT { - public: - using ptr_t = typename BaseT::ptr_t; + public: has_fixture() = default; // create new instance @@ -51,7 +50,7 @@ namespace db0 template void init(db0::swine_ptr &fixture, Args &&... args) { // must release existing weak ref - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -65,7 +64,7 @@ namespace db0 std::uint16_t initUnique(db0::swine_ptr &fixture, Args &&... args) { // must release existing weak ref - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -78,7 +77,7 @@ namespace db0 ~has_fixture() { - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -87,7 +86,7 @@ namespace db0 db0::swine_ptr tryGetFixture() const { - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // construct swine_ptr from raw ptr return db0::swine_ptr::lock_weak(raw_ptr); @@ -109,7 +108,7 @@ namespace db0 void operator=(const has_fixture &other) { // must release existing weak ref and take from the copied object - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); @@ -120,10 +119,10 @@ namespace db0 other_fixture.take_weak(); } - void operator=(has_fixture &&other) + void operator=(has_fixture &&other) { // must release existing weak ref and take from the copied object - Fixture *raw_ptr = reinterpret_cast(this->v_this.getMemspacePtr()); + Fixture *raw_ptr = reinterpret_cast(this->getMemspacePtr()); if (raw_ptr) { // release weak ref of the Fixture db0::swine_ptr::release_weak(raw_ptr); diff --git a/src/dbzero/object_model/index/Index.cpp b/src/dbzero/object_model/index/Index.cpp index f2a6f5c9..f9b4cca8 100644 --- a/src/dbzero/object_model/index/Index.cpp +++ b/src/dbzero/object_model/index/Index.cpp @@ -68,7 +68,7 @@ namespace db0::object_model { // in case of index we need to unregister first because otherwise // it may trigger discard of unflushed data (which has to be performed before destruction of 'builder') - unregister(); + unregister(); // after unregister object might still have unflushed data, we need to flush them if (hasInstance() && isDirty()) { @@ -111,7 +111,7 @@ namespace db0::object_model << static_cast(m_new_type) << THROWF_END; } } - + void Index::Builder::flush() { if (!m_index_builder) { @@ -198,7 +198,7 @@ namespace db0::object_model } m_builder.flush(); } - + void Index::rollback() { m_builder.rollback(); } @@ -281,6 +281,11 @@ namespace db0::object_model m_builder.update(type_manager.getTypeId(key)); } + // subscribe for flush operation + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { case IndexDataType::Int64: { m_builder.get().add(type_manager.extractInt64(key), value); @@ -316,6 +321,11 @@ namespace db0::object_model m_builder.update(type_manager.getTypeId(key)); } + // subscribe for flush operation + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { case IndexDataType::Int64: { m_builder.get().remove(type_manager.extractInt64(key), value); @@ -335,7 +345,7 @@ namespace db0::object_model } m_mutation_log->onDirty(); } - + std::unique_ptr Index::range(ObjectPtr min, ObjectPtr max, bool null_first) const { assert(hasInstance()); @@ -431,6 +441,11 @@ namespace db0::object_model void Index::addNull(ObjectPtr obj_ptr) { assert(hasInstance()); + // subscribe for flush operation + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { // use provisional data type for Auto case IndexDataType::Auto: { @@ -475,7 +490,7 @@ namespace db0::object_model return type_manager.extractUInt64(type_manager.getTypeId(value), value); } - void Index::preCommit(bool revert) + void Index::flush(bool revert) { if (revert) { rollback(); @@ -484,12 +499,16 @@ namespace db0::object_model } } - void Index::preCommitOp(void *ptr, bool revert) { - static_cast(ptr)->preCommit(revert); + void Index::flushOp(void *ptr, bool revert) { + static_cast(ptr)->flush(revert); } void Index::removeNull(ObjectPtr obj_ptr) { + if (!isDirty()) { + getMemspace().collectForFlush(this); + } + switch (m_builder.getDataType()) { // use provisional data type for Auto case IndexDataType::Auto: { @@ -577,7 +596,7 @@ namespace db0::object_model super_t::detach(); } - void Index::destroy() const + void Index::destroy() { m_mutation_log = nullptr; // discard any pending changes diff --git a/src/dbzero/object_model/index/Index.hpp b/src/dbzero/object_model/index/Index.hpp index 250a9e3f..95268257 100644 --- a/src/dbzero/object_model/index/Index.hpp +++ b/src/dbzero/object_model/index/Index.hpp @@ -61,8 +61,8 @@ namespace db0::object_model std::unique_ptr range(ObjectPtr min, ObjectPtr max, bool null_first = false) const; - static PreCommitFunction getPreCommitFunction() { - return preCommitOp; + static FlushFunction getFlushFunction() { + return flushOp; } void moveTo(db0::swine_ptr &); @@ -73,7 +73,7 @@ namespace db0::object_model void detach() const; - void destroy() const; + void destroy(); // remove any cached updates / revert void rollback(); @@ -84,8 +84,8 @@ namespace db0::object_model // the default / provisional type using DefaultT = std::int64_t; friend struct Builder; - void preCommit(bool revert); - static void preCommitOp(void *, bool revert); + void flush(bool revert); + static void flushOp(void *, bool revert); template static constexpr IndexDataType dataTypeOf() { @@ -105,7 +105,7 @@ namespace db0::object_model // concrete data type to be assigned (only allowed to update from Auto) IndexDataType m_initial_type; IndexDataType m_new_type; - mutable std::shared_ptr m_index_builder; + mutable std::shared_ptr m_index_builder; Builder(Index &); @@ -134,7 +134,7 @@ namespace db0::object_model m_index_builder = db0::make_shared_void >(); m_new_type = Index::dataTypeOf(); } - return *static_cast*>(m_index_builder.get()); + return *static_cast*>(m_index_builder.get()); } template IndexBuilder &getExisting() const @@ -162,7 +162,6 @@ namespace db0::object_model m_new_type = Index::dataTypeOf(); } } - }; Builder m_builder; @@ -225,7 +224,7 @@ namespace db0::object_model this->modify().m_index_addr = new_range_tree.getAddress(); } - template const typename db0::RangeTree &getExistingRangeTree() const + template typename db0::RangeTree &getExistingRangeTree() const { assert(hasRangeTree()); return const_cast(this)->getRangeTree(); diff --git a/src/dbzero/object_model/index/IndexBuilder.hpp b/src/dbzero/object_model/index/IndexBuilder.hpp index fa7bf740..6478a79c 100644 --- a/src/dbzero/object_model/index/IndexBuilder.hpp +++ b/src/dbzero/object_model/index/IndexBuilder.hpp @@ -24,7 +24,8 @@ namespace db0::object_model IndexBuilder(std::unordered_set &&remove_null_values, std::unordered_set &&add_null_values, std::unordered_map &&object_cache); - + ~IndexBuilder(); + void add(KeyT key, ObjectPtr obj_ptr); void remove(KeyT key, ObjectPtr obj_ptr); @@ -66,6 +67,10 @@ namespace db0::object_model { } + template IndexBuilder::~IndexBuilder() + { + } + template void IndexBuilder::add(KeyT key, ObjectPtr obj_ptr) { super_t::add(key, addToCache(obj_ptr)); } @@ -83,7 +88,7 @@ namespace db0::object_model } template void IndexBuilder::flush(RangeTreeT &index) - { + { std::function add_callback = [&](UniqueAddress address) { auto it = m_object_cache.find(address); assert(it != m_object_cache.end()); @@ -94,13 +99,13 @@ namespace db0::object_model auto it = m_object_cache.find(address); assert(it != m_object_cache.end()); m_type_manager.extractMutableAnyObject(it->second.get()).decRef(false); - }; + }; super_t::flush(index, &add_callback, &erase_callback); m_object_cache.clear(); } - template + template UniqueAddress IndexBuilder::addToCache(ObjectPtr obj_ptr) { auto obj_addr = m_type_manager.extractAnyObject(obj_ptr).getUniqueAddress(); diff --git a/src/dbzero/object_model/list/List.cpp b/src/dbzero/object_model/list/List.cpp index 06764cad..26a3f215 100644 --- a/src/dbzero/object_model/list/List.cpp +++ b/src/dbzero/object_model/list/List.cpp @@ -183,7 +183,7 @@ namespace db0::object_model super_t::moveTo(fixture); } - void List::destroy() const + void List::destroy() { clearMembers(); super_t::destroy(); diff --git a/src/dbzero/object_model/list/List.hpp b/src/dbzero/object_model/list/List.hpp index 71cc34e4..1f857117 100644 --- a/src/dbzero/object_model/list/List.hpp +++ b/src/dbzero/object_model/list/List.hpp @@ -64,7 +64,7 @@ namespace db0::object_model void moveTo(db0::swine_ptr &); - void destroy() const; + void destroy(); void clearMembers() const; diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index d0963a29..3b67394d 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -744,7 +744,7 @@ namespace db0::object_model } template - void ObjectImplBase::destroy() const + void ObjectImplBase::destroy() { if (this->hasInstance()) { // associated class type (may require unloading) diff --git a/src/dbzero/object_model/object/ObjectImplBase.hpp b/src/dbzero/object_model/object/ObjectImplBase.hpp index 6dcf5ce6..e6aa28b7 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.hpp +++ b/src/dbzero/object_model/object/ObjectImplBase.hpp @@ -101,7 +101,7 @@ namespace db0::object_model // Get description of the field layout FieldLayout getFieldLayout() const; - void destroy() const; + void destroy(); // execute the function for all members (until false is returned from the input lambda) void forAll(std::function) const; diff --git a/src/dbzero/object_model/set/Set.cpp b/src/dbzero/object_model/set/Set.cpp index b9543d2e..1d0378e0 100644 --- a/src/dbzero/object_model/set/Set.cpp +++ b/src/dbzero/object_model/set/Set.cpp @@ -195,7 +195,7 @@ namespace db0::object_model return nullptr; } - void Set::destroy() const + void Set::destroy() { unrefMembers(); m_index.destroy(); diff --git a/src/dbzero/object_model/set/Set.hpp b/src/dbzero/object_model/set/Set.hpp index b13b975c..4121489d 100644 --- a/src/dbzero/object_model/set/Set.hpp +++ b/src/dbzero/object_model/set/Set.hpp @@ -82,7 +82,7 @@ DB0_PACKED_END void detach() const; // drop underlying dbzero representation - void destroy() const; + void destroy(); const_iterator begin() const; const_iterator end() const; diff --git a/src/dbzero/object_model/tuple/Tuple.cpp b/src/dbzero/object_model/tuple/Tuple.cpp index 9dee6b31..90cbd845 100644 --- a/src/dbzero/object_model/tuple/Tuple.cpp +++ b/src/dbzero/object_model/tuple/Tuple.cpp @@ -146,7 +146,7 @@ namespace db0::object_model return !(*this == tuple); } - void Tuple::destroy() const + void Tuple::destroy() { auto fixture = this->getFixture(); for (auto &elem: this->getData()->items()) { diff --git a/src/dbzero/object_model/tuple/Tuple.hpp b/src/dbzero/object_model/tuple/Tuple.hpp index c68373f1..89b7a68c 100644 --- a/src/dbzero/object_model/tuple/Tuple.hpp +++ b/src/dbzero/object_model/tuple/Tuple.hpp @@ -95,7 +95,7 @@ DB0_PACKED_END void operator=(Tuple &&); bool operator!=(const Tuple &) const; - void destroy() const; + void destroy(); const o_typed_item *begin() const; const o_typed_item *end() const; diff --git a/src/dbzero/object_model/value/TypedAddress.cpp b/src/dbzero/object_model/value/TypedAddress.cpp index b339a8a8..ca3b4073 100644 --- a/src/dbzero/object_model/value/TypedAddress.cpp +++ b/src/dbzero/object_model/value/TypedAddress.cpp @@ -4,10 +4,6 @@ namespace db0::object_model { - bool TypedAddress::operator==(const TypedAddress &other) const { - return m_value == other.m_value; - } - void TypedAddress::setAddress(Address address) { m_value = (m_value & 0xFFFC000000000000) | address.getOffset(); } @@ -15,11 +11,7 @@ namespace db0::object_model void TypedAddress::setType(StorageClass type) { m_value = (m_value & 0x0003FFFFFFFFFFFF) | (static_cast(type) << 50); } - - bool TypedAddress::operator<(const TypedAddress &other) const { - return m_value < other.m_value; - } - + TypedAddress toTypedAddress(const std::pair &addr_with_type) { return { addr_with_type.second, addr_with_type.first.getAddress() }; } diff --git a/src/dbzero/object_model/value/TypedAddress.hpp b/src/dbzero/object_model/value/TypedAddress.hpp index b7e7aa65..dfafbaeb 100644 --- a/src/dbzero/object_model/value/TypedAddress.hpp +++ b/src/dbzero/object_model/value/TypedAddress.hpp @@ -45,12 +45,31 @@ DB0_PACKED_BEGIN void setAddress(Address); void setType(StorageClass type); - bool operator==(const TypedAddress &other) const; - bool operator<(const TypedAddress &other) const; + inline bool operator==(const TypedAddress &other) const { + return m_value == other.m_value; + } + + inline bool operator<(const TypedAddress &other) const { + return m_value < other.m_value; + } }; TypedAddress toTypedAddress(const std::pair &); DB0_PACKED_END +} + +namespace std + +{ + + template <> + struct hash + { + std::size_t operator()(const db0::object_model::TypedAddress& k) const { + return std::hash()(k.m_value); + } + }; + } \ No newline at end of file diff --git a/src/dbzero/workspace/Fixture.cpp b/src/dbzero/workspace/Fixture.cpp index 2315bd90..80582486 100644 --- a/src/dbzero/workspace/Fixture.cpp +++ b/src/dbzero/workspace/Fixture.cpp @@ -191,13 +191,13 @@ namespace db0 // prevents commit on a closed fixture std::unique_lock lock(m_close_mutex); if (!Memspace::isClosed()) { - // pre-commit to prepare objects which require it (e.g. Index) for commit - // NOTE: pre-commit must NOT lock the fixture's shared mutex + // flush to prepare objects which require it (e.g. Index) for commit + // NOTE: flush must NOT lock the fixture's shared mutex if (m_gc0_ptr) { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush()); } - - // clear lang cache again since pre-commit might've released some Python instances + + // clear lang cache again since flush might've released some Python instances m_lang_cache.clear(true); // lock for exclusive access @@ -290,23 +290,31 @@ namespace db0 bool Fixture::commit() { + std::unique_ptr process_timer; + // process_timer = std::make_unique("Fixture::commit"); assert(getPrefixPtr()); - // pre-commit to prepare objects which require it (e.g. Index) for commit - // NOTE: pre-commit must NOT lock the fixture's shared mutex - // NOTE: pre-commit may release some of the Python instances + // flush to prepare objects which require it (e.g. Index) for commit + // NOTE: flush must NOT lock the fixture's shared mutex + // NOTE: flush may release some of the Python instances if (m_gc0_ptr) { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush(), process_timer.get()); } // Flush using registered flush handlers - for (auto &handler: m_flush_handlers) { - handler(); + { + std::unique_ptr flush_timer; + if (process_timer) { + flush_timer = std::make_unique("Fixture::commit:flush_handlers", process_timer.get()); + } + for (auto &handler: m_flush_handlers) { + handler(); + } } - - // Clear expired instances from cache so that they're not persisted + + // Clear Python-side expired instances from cache so that they're not persisted m_lang_cache.clear(true); std::unique_lock lock(m_commit_mutex); - bool result = tryCommit(lock); + bool result = tryCommit(lock, process_timer.get()); m_updated = false; auto callbacks = collectStateReachedCallbacks(); lock.unlock(); @@ -329,7 +337,7 @@ namespace db0 return result; } - std::unique_ptr gc0_ctx = m_gc0_ptr ? getGC0().beginCommit() : nullptr; + std::unique_ptr ctx = m_gc0_ptr ? m_gc0_ptr->beginCommit() : nullptr; // NOTE: close handlers perform internal buffers flush (e.g. TagIndex) // which may result in modifications (e.g. incRef) // it's therefore important to perform this action before GC0::commitAll (which commits finalized objects) @@ -337,15 +345,12 @@ namespace db0 commit(true); } - if (m_gc0_ptr) { - getGC0().commitAll(); - } - - // commit garbage collector's state - // we check if gc0 exists because the unit-tests set up may not have it - if (gc0_ctx) { - gc0_ctx->commit(); + // Commit modified only (to avoid scan over all objects) + if (ctx) { + ctx->commitAllOf(Memspace::getModified(), timer.get()); + ctx = nullptr; } + m_string_pool.commit(); m_object_catalogue.commit(); m_v_object_cache.commit(); @@ -369,11 +374,11 @@ namespace db0 return {}; } - assert(!Memspace::isClosed()); - // pre-commit to prepare objects which require it (e.g. Index) for commit - // NOTE: pre-commit must NOT lock the fixture's shared mutex + assert(!Memspace::isClosed()); + // flush to prepare objects which require it (e.g. Index) for commit + // NOTE: flush must NOT lock the fixture's shared mutex if (m_gc0_ptr) { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush()); } // Flush using registered flush handlers @@ -432,7 +437,8 @@ namespace db0 void Fixture::preAtomic() { - getGC0().preCommit(); + getGC0().flushAllOf(Memspace::getForFlush()); + m_maybe_need_flush.clear(); for (auto &commit: m_close_handlers) { commit(true); } diff --git a/src/dbzero/workspace/Fixture.hpp b/src/dbzero/workspace/Fixture.hpp index 099caef1..f8620e08 100644 --- a/src/dbzero/workspace/Fixture.hpp +++ b/src/dbzero/workspace/Fixture.hpp @@ -287,7 +287,7 @@ DB0_PACKED_BEGIN SlotAllocator &m_slot_allocator; MetaAllocator &m_meta_allocator; const std::uint64_t m_UUID; - // the registry holds active v_ptr instances (important for refresh) + // the registry holds active v_object instances (important for refresh) // and cleanup of the "hanging" references db0::GC0 *m_gc0_ptr = nullptr; StringPoolT m_string_pool; diff --git a/src/dbzero/workspace/FixtureThreads.cpp b/src/dbzero/workspace/FixtureThreads.cpp index 228f3aea..b552c78c 100644 --- a/src/dbzero/workspace/FixtureThreads.cpp +++ b/src/dbzero/workspace/FixtureThreads.cpp @@ -178,14 +178,14 @@ namespace db0 /** * Acquires locks for safe execution and handles post-commit callbacks */ - class AutoCommitContext : public FixtureThreadCallbacksContext + class AutoSaveContext : public FixtureThreadCallbacksContext { std::unique_lock m_commit_lock; std::unique_lock m_locked_context_lock; std::unique_lock m_atomic_lock; public: - AutoCommitContext( + AutoSaveContext( std::unique_lock &&commit_lock, std::unique_lock &&locked_context_lock, std::unique_lock &&atomic_lock) @@ -234,13 +234,13 @@ namespace db0 std::shared_ptr AutoCommitThread::prepareContext() { - assert(!m_tmp_context.lock() && "Only one AutoCommitContext should exist at the time!"); + assert(!m_tmp_context.lock() && "Only one AutoSaveContext should exist at the time!"); auto commit_lock = std::unique_lock(m_commit_mutex); // must acquire unique lock-context's lock auto locked_context_lock = db0::LockedContext::lockUnique(); // and the atomic lock next (order is relevant here !!) auto atomic_lock = db0::AtomicContext::lock(); - auto context = std::make_shared(std::move(commit_lock), + auto context = std::make_shared(std::move(commit_lock), std::move(locked_context_lock), std::move(atomic_lock) ); // To collect callbacks from fixtures as we proceed with commiting diff --git a/src/dbzero/workspace/FixtureThreads.hpp b/src/dbzero/workspace/FixtureThreads.hpp index 695700c4..cbf83f2c 100644 --- a/src/dbzero/workspace/FixtureThreads.hpp +++ b/src/dbzero/workspace/FixtureThreads.hpp @@ -86,7 +86,7 @@ namespace db0 * The purpose of the AutoCommitThread is to commit changes from all read/write fixtures * after 250ms (unless configured differently) since the last modification */ - class AutoCommitContext; + class AutoSaveContext; class AutoCommitThread: public FixtureThread { public: @@ -101,7 +101,7 @@ namespace db0 private: static std::mutex m_commit_mutex; - std::weak_ptr m_tmp_context; + std::weak_ptr m_tmp_context; }; } diff --git a/src/dbzero/workspace/GC0.cpp b/src/dbzero/workspace/GC0.cpp index 9f3adfc9..76534e40 100644 --- a/src/dbzero/workspace/GC0.cpp +++ b/src/dbzero/workspace/GC0.cpp @@ -30,25 +30,6 @@ namespace db0 { } - GC0::CommitContext::CommitContext(GC0 &gc0) - : m_gc0(gc0) - { - assert(!m_gc0.m_commit_pending); - m_gc0.m_commit_pending = true; - } - - GC0::CommitContext::~CommitContext() - { - assert(m_gc0.m_commit_pending); - m_gc0.m_commit_pending = false; - } - - void GC0::CommitContext::commit() - { - assert(m_gc0.m_commit_pending); - m_gc0.commit(); - } - bool GC0::tryRemove(void *vptr, bool is_volatile) { std::unique_lock lock(m_mutex); @@ -59,9 +40,9 @@ namespace db0 NoArgsFunction drop_op = nullptr; auto &ops = m_ops[it->second]; - // if type implements preCommit then remove it from pre-commit map as well - if (ops.preCommit) { - m_pre_commit_map.erase(vptr); + // if type implements flush then remove it from flush map as well + if (ops.flush) { + m_flush_map.erase(vptr); } // do not drop when in read-only mode (e.g. snapshot owned) @@ -71,7 +52,7 @@ namespace db0 && !ops.hasRefs(it->first)) { if (m_commit_pending) { - // must schedule for deletion since unable to drop while commit is pending + // must schedule for deletion since unable to drop while save is pending auto addr_pair = ops.address(it->first); m_scheduled_for_deletion[addr_pair.first] = addr_pair.second; } else { @@ -104,6 +85,44 @@ namespace db0 } } + void GC0::commitAllOf(const std::vector &vptrs, ProcessTimer *timer_ptr) + { + std::unique_ptr timer; + if (timer_ptr) { + timer = std::make_unique("GC0::commitAllOf", timer_ptr); + } + + // Commit & collect unreferenced instances + // Important ! Collect instance addresses first because push_back can trigger "remove" calls + std::unique_lock lock(m_mutex); + std::unordered_set addresses; + std::size_t count = 0; + for (auto vptr : vptrs) { + auto it = m_vptr_map.find(vptr); + if (it != m_vptr_map.end()) { + auto &ops = m_ops[it->second]; + ops.commit(vptr); + if (ops.hasRefs && !ops.hasRefs(vptr)) { + addresses.insert(toTypedAddress(ops.address(vptr))); + } + ++count; + } + } + + lock.unlock(); + + super_t::clear(); + for (auto addr: addresses) { + super_t::push_back(addr); + } + // also registered instances scheduled for deletion + for (auto &addr_pair: m_scheduled_for_deletion) { + super_t::push_back(toTypedAddress(addr_pair)); + } + m_scheduled_for_deletion.clear(); + super_t::commit(); + } + void GC0::commitAll() { std::unique_lock lock(m_mutex); @@ -111,50 +130,35 @@ namespace db0 m_ops[vptr_item.second].commit(vptr_item.first); } } - + std::size_t GC0::size() const { std::unique_lock lock(m_mutex); return m_vptr_map.size(); } - void GC0::preCommit() + void GC0::flushAllOf(const std::vector &vptrs, ProcessTimer *timer_ptr) { - std::unique_lock lock(m_mutex); - // collect ops first (this is necessary because preCommit can trigger "remove" calls) - std::vector> pre_commit_ops; - std::copy(m_pre_commit_map.begin(), m_pre_commit_map.end(), std::back_inserter(pre_commit_ops)); - lock.unlock(); - - // call pre-commit where it's provided - for (auto &item : pre_commit_ops) { - m_ops[item.second].preCommit(item.first, false); + std::unique_ptr timer; + if (timer_ptr) { + timer = std::make_unique("GC0::flushAllOf", timer_ptr); } - } - - void GC0::commit() - { - // Important ! Collect instance addresses first because push_back can trigger "remove" calls - std::vector addresses; + std::unique_lock lock(m_mutex); - for (auto &vptr_item : m_vptr_map) { - auto &ops = m_ops[vptr_item.second]; - if (ops.hasRefs && !ops.hasRefs(vptr_item.first)) { - addresses.push_back(toTypedAddress(ops.address(vptr_item.first))); + // collect ops first (this is necessary because flush can trigger "remove" calls) + std::vector> flush_ops; + for (auto vptr : vptrs) { + auto it = m_flush_map.find(vptr); + if (it != m_flush_map.end()) { + flush_ops.push_back(*it); } } lock.unlock(); - super_t::clear(); - for (auto addr: addresses) { - super_t::push_back(addr); + // call flush where it's provided + for (auto &item : flush_ops) { + m_ops[item.second].flush(item.first, false); } - // also registered instances scheduled for deletion - for (auto &addr_pair: m_scheduled_for_deletion) { - super_t::push_back(toTypedAddress(addr_pair)); - } - m_scheduled_for_deletion.clear(); - super_t::commit(); } void GC0::collect() @@ -208,28 +212,24 @@ namespace db0 tryRemove(vptr, true); } } - // call reverse pre-commit where it's provided (use revert=true) - for (auto &item : m_pre_commit_map) { - m_ops[item.second].preCommit(item.first, true); + // call reverse flush where it's provided (use revert=true) + for (auto &item : m_flush_map) { + m_ops[item.second].flush(item.first, true); } m_volatile.clear(); m_atomic = false; } - - std::unique_ptr GC0::beginCommit() { - return std::make_unique(*this); - } - + std::optional GC0::erase(void *vptr) { - std::optional pre_commit_op; + std::optional flush_op; std::unique_lock lock(m_mutex); assert(m_vptr_map.find(vptr) != m_vptr_map.end()); m_vptr_map.erase(vptr); - auto it = m_pre_commit_map.find(vptr); - if (it != m_pre_commit_map.end()) { - pre_commit_op = it->second; - m_pre_commit_map.erase(it); + auto it = m_flush_map.find(vptr); + if (it != m_flush_map.end()) { + flush_op = it->second; + m_flush_map.erase(it); } if (m_atomic) { @@ -239,7 +239,29 @@ namespace db0 } } } - return pre_commit_op; + return flush_op; + } + + GC0::CommitContext::CommitContext(GC0 &gc0) + : m_gc0(gc0) + { + assert(!m_gc0.m_commit_pending); + m_gc0.m_commit_pending = true; } + GC0::CommitContext::~CommitContext() + { + assert(m_gc0.m_commit_pending); + m_gc0.m_commit_pending = false; + } + + void GC0::CommitContext::commitAllOf(const std::vector &vec, ProcessTimer *timer) + { + assert(m_gc0.m_commit_pending); + m_gc0.commitAllOf(vec, timer); + } + + std::unique_ptr GC0::beginCommit() { + return std::make_unique(*this); + } } \ No newline at end of file diff --git a/src/dbzero/workspace/GC0.hpp b/src/dbzero/workspace/GC0.hpp index 2bdd822d..585972ad 100644 --- a/src/dbzero/workspace/GC0.hpp +++ b/src/dbzero/workspace/GC0.hpp @@ -4,7 +4,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -16,6 +17,7 @@ namespace db0 { class Fixture; + class ProcessTimer; using TypedAddress = db0::object_model::TypedAddress; using StorageClass = db0::object_model::StorageClass; @@ -26,7 +28,7 @@ namespace db0 using GetAddress = std::pair (*)(const void *); using StorageClass = db0::object_model::StorageClass; using DropByAddrFunction = void (*)(db0::swine_ptr &, Address); - using PreCommitFunction = void (*)(void *, bool revert); + using FlushFunction = void (*)(void *, bool revert); struct GC_Ops { @@ -37,8 +39,8 @@ namespace db0 NoArgsFunction commit = nullptr; GetAddress address = nullptr; DropByAddrFunction dropByAddr = nullptr; - // null allowed, preCommit handler is called just before fixture.commit - PreCommitFunction preCommit = nullptr; + // null allowed, flush handler is called just before fixture.commit + FlushFunction flush = nullptr; }; struct GCOps_ID @@ -61,9 +63,9 @@ namespace db0 static GCOps_ID m_gc_ops_id; #define GC0_Define(T) GCOps_ID T::m_gc_ops_id; - + /** - * GC0 keeps track of all "live" v_ptr instances. + * GC0 keeps track of all "live" v_object instances. * and drops associated dbzero instances once they are no longer referenced from Python * GC0 has also a persistence layer to keep track of unreferenced instances as long as * the corresponding Python objects are still alive. @@ -81,38 +83,19 @@ namespace db0 // move instance from another GC0 template void moveFrom(GC0 &other, void *vptr); - // preCommit calls the operation on objects which implement it - void preCommit(); - /** * Unregister instance (i.e. when reference from Python was removed) * @return true if object was also dropped */ bool tryRemove(void *vptr, bool is_volatile = false); - /** - * Detach all instances held by this registry. - */ - void detachAll(); - void commitAll(); - - std::size_t size() const; + // flush calls the operation on objects which implement it + void flushAllOf(const std::vector &, ProcessTimer * = nullptr); - struct CommitContext - { - GC0 &m_gc0; - - CommitContext(GC0 &gc0); - ~CommitContext(); - - void commit(); - }; + // Detach all instances held by this registry + void detachAll(); - /** - * Commit serializes the list of unreferenced instances to the persistence layer - * this is to be able to drop those instances once the corresponding references from Python expire - */ - std::unique_ptr beginCommit(); + std::size_t size() const; template static void registerTypes(); @@ -125,13 +108,25 @@ namespace db0 void beginAtomic(); void endAtomic(); void cancelAtomic(); + + struct CommitContext + { + GC0 &m_gc0; + + CommitContext(GC0 &gc0); + ~CommitContext(); + + void commitAllOf(const std::vector &, ProcessTimer * = nullptr); + }; + + std::unique_ptr beginCommit(); protected: - friend CommitContext; bool m_commit_pending = false; - - void commit(); - // @return pre-commit ops-id if element was assigned it + + // Commit specific (e.g. modified) instances held by this registry + void commitAllOf(const std::vector &, ProcessTimer * = nullptr); + // @return flush ops-id if element was assigned it std::optional erase(void *vptr); private: @@ -143,9 +138,9 @@ namespace db0 const bool m_read_only; // type / ops_id std::unordered_map m_vptr_map; - // the map dedicated to instances which implement preCommit + // the map dedicated to instances which implement flush // it's assumed that it's much smaller than m_vptr_map (it duplicates some of its entries) - std::unordered_map m_pre_commit_map; + std::unordered_map m_flush_map; // objects irrevocably scheduled for deletion std::unordered_map m_scheduled_for_deletion; // flag indicating atomic operation in progress @@ -154,6 +149,8 @@ namespace db0 std::vector m_volatile; mutable std::mutex m_mutex; + void commitAll(); + template static void registerSingleType() { T::m_gc_ops_id = GCOps_ID(m_ops.size()); @@ -171,9 +168,9 @@ namespace db0 assert(m_ops[T::m_gc_ops_id].detach); assert(m_ops[T::m_gc_ops_id].address); m_vptr_map[vptr] = T::m_gc_ops_id; - // if the type implements preCommit then also add it to the preCommit map - if (m_ops[T::m_gc_ops_id].preCommit) { - m_pre_commit_map[vptr] = T::m_gc_ops_id; + // if the type implements flush then also add it to the flush map + if (m_ops[T::m_gc_ops_id].flush) { + m_flush_map[vptr] = T::m_gc_ops_id; } if (m_atomic) { m_volatile.push_back(vptr); @@ -183,11 +180,11 @@ namespace db0 template void GC0::moveFrom(GC0 &other, void *vptr) { std::unique_lock lock(m_mutex); - auto pre_commit_op = other.erase(vptr); + auto flush_op = other.erase(vptr); m_vptr_map[vptr] = T::m_gc_ops_id; - // also move between pre-commit maps - if (pre_commit_op) { - m_pre_commit_map[vptr] = *pre_commit_op; + // also move between flush maps + if (flush_op) { + m_flush_map[vptr] = *flush_op; } if (m_atomic) { m_volatile.push_back(vptr); diff --git a/src/dbzero/workspace/Workspace.hpp b/src/dbzero/workspace/Workspace.hpp index 5e685789..742b2124 100644 --- a/src/dbzero/workspace/Workspace.hpp +++ b/src/dbzero/workspace/Workspace.hpp @@ -10,7 +10,9 @@ #include #include #include -#include +#include +#include +#include #include #include #include @@ -32,19 +34,19 @@ namespace db0 class LangCache; class Config; class WorkspaceView; - + using SlabRecycler = Recycler; + class BaseWorkspace { public: - // 4KB pages - static constexpr std::size_t DEFAULT_PAGE_SIZE = 4096; + static constexpr std::size_t DEFAULT_PAGE_SIZE = SlabAllocatorConfig::DEFAULT_PAGE_SIZE; + static constexpr std::size_t DEFAULT_SLAB_SIZE = SlabAllocatorConfig::DEFAULT_SLAB_SIZE; + // 16KB sparse index index (memory pages) static constexpr std::size_t DEFAULT_SPARSE_INDEX_NODE_SIZE = 16 * 1024 - 256; - // 64MB slabs - static constexpr std::size_t DEFAULT_SLAB_SIZE = 64 * 1024 * 1024; static constexpr std::size_t DEFAULT_CACHE_SIZE = 2u << 30; static constexpr std::size_t DEFAULT_SLAB_CACHE_SIZE = 256; - + /** * @param root_path default search path for existing prefixes and storage for new ones (pass "" for current directory) **/ diff --git a/tests/unit_tests/CapacityTreeTest.cpp b/tests/unit_tests/CapacityTreeTest.cpp index 6e8ad2f4..2375f14b 100644 --- a/tests/unit_tests/CapacityTreeTest.cpp +++ b/tests/unit_tests/CapacityTreeTest.cpp @@ -1,6 +1,7 @@ #include #include -#include +#include +#include #include #include #include @@ -50,8 +51,7 @@ namespace tests TEST_F( CapacityTreeTests , testCapacityTreeInsertEraseIssue1 ) { - using CapacityTreeT = typename db0::MetaAllocator::CapacityTreeT; - using CapacityItem = typename db0::MetaAllocator::CapacityItem; + using CapacityTreeT = typename db0::MetaAllocator::CapacityTreeT; std::vector realms; realms.emplace_back(m_bitspace, page_size); diff --git a/tests/unit_tests/IndexTest.cpp b/tests/unit_tests/IndexTest.cpp new file mode 100644 index 00000000..11490ccd --- /dev/null +++ b/tests/unit_tests/IndexTest.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace tests + +{ + + using namespace db0; + + class IndexTest: public MemspaceTestBase + { + public: + }; + +} diff --git a/tests/unit_tests/MetaAllocatorTest.cpp b/tests/unit_tests/MetaAllocatorTest.cpp index 3ee14587..c783c4a2 100644 --- a/tests/unit_tests/MetaAllocatorTest.cpp +++ b/tests/unit_tests/MetaAllocatorTest.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -14,7 +14,8 @@ namespace tests { using namespace db0; - + using SlabRecycler = db0::Recycler; + // a proxy class to expose protected members for testing class MetaAllocatorProxy: public MetaAllocator { @@ -58,9 +59,9 @@ namespace tests protected: // in bytes static constexpr std::size_t PAGE_SIZE = 4096; - static constexpr std::size_t SLAB_SIZE = 4 * 1024 * 1024; + static constexpr std::size_t SLAB_SIZE = 4u << 20; static constexpr std::size_t SMALL_SLAB_SIZE = 64 * 4096; - + std::atomic m_dirty_meter = 0; CacheRecycler m_recycler; std::shared_ptr m_prefix; @@ -156,7 +157,7 @@ namespace tests auto ptr = cut.alloc(100); // the allocation should be in the same slab ASSERT_EQ(cut.getSlabId(ptr), 0); - } + } TEST_F( MetaAllocatorTests , testMetaAllocatorCanAllocateFromMultipleExistingSlabs ) { diff --git a/tests/unit_tests/VBIndexTests.cpp b/tests/unit_tests/VBIndexTests.cpp index addaeff3..4376bb56 100644 --- a/tests/unit_tests/VBIndexTests.cpp +++ b/tests/unit_tests/VBIndexTests.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,54 @@ namespace tests ++it2; } } + + TEST_F( VBIndexTests , testVBIndexBulkPushBack1MSortedElements ) + { + auto memspace = getMemspace(); + std::vector values; + for (std::uint64_t i = 0; i < 1000000; ++i) { + values.push_back(i); + } + + db0::v_bindex cut(memspace, memspace.getPageSize()); + db0::ProcessTimer timer("bulkPushBack"); + cut.bulkPushBack(values.begin(), values.end()); + timer.printLog(std::cout) << std::endl; + } + + TEST_F( VBIndexTests , testVBIndexBulkPushBack1MRandomElements ) + { + auto memspace = getMemspace(); + std::vector values; + for (std::uint64_t i = 0; i < 1000000; ++i) { + values.push_back(rand()); + } + + db0::v_bindex cut(memspace, memspace.getPageSize()); + db0::ProcessTimer timer("bulkPushBack (random)"); + cut.bulkPushBack(values.begin(), values.end()); + timer.printLog(std::cout) << std::endl; + } + TEST_F( VBIndexTests , testVBIndexBulkInsert1MRandomElementsInBatches ) + { + auto memspace = getMemspace(); + auto total_count = 1000000u; + auto batch_size = 1000u; + + db0::v_bindex cut(memspace, memspace.getPageSize()); + db0::ProcessTimer timer("bulkInsert (random)"); + std::size_t count = 0; + while (count < total_count) { + std::vector values; + for (std::size_t i = 0; i < batch_size; ++i) { + values.push_back(rand()); + } + cut.bulkInsert(values.begin(), values.end()); + count += batch_size; + } + timer.printLog(std::cout) << std::endl; + } + } diff --git a/tests/unit_tests/VSpaceTests.cpp b/tests/unit_tests/VSpaceTests.cpp index 2a9853a6..1c451576 100644 --- a/tests/unit_tests/VSpaceTests.cpp +++ b/tests/unit_tests/VSpaceTests.cpp @@ -53,31 +53,7 @@ namespace tests db0::v_object i2(std::move(i1)); ASSERT_EQ(i2->size(), 4096); } - - TEST_F( VSpaceTests , testMoveConstructorForVPtr ) - { - auto memspace = getMemspace(); - - auto vptr_1 = db0::v_ptr::makeNew(memspace, o_binary::measure(4096), {}); - o_binary::__new(reinterpret_cast(&vptr_1.modify()), 4096); - ASSERT_TRUE(vptr_1.isAttached()); - db0::v_ptr vptr_2(std::move(vptr_1)); - - ASSERT_EQ(vptr_2->size(), 4096); - } - - TEST_F( VSpaceTests , testVPtrImplementsDetach ) - { - auto memspace = getMemspace(); - auto vptr_1 = db0::v_ptr::makeNew(memspace, o_binary::measure(4096), {}); - o_binary::__new(reinterpret_cast(&vptr_1.modify()), 4096); - ASSERT_TRUE(vptr_1.isAttached()); - - vptr_1.detach(); - ASSERT_FALSE(vptr_1.isAttached()); - } - TEST_F( VSpaceTests , testVObjectCanBeAccessedAfterDetach ) { auto memspace = getMemspace(); diff --git a/tests/utils/TestWorkspace.hpp b/tests/utils/TestWorkspace.hpp index c829ac9e..af09e0b7 100644 --- a/tests/utils/TestWorkspace.hpp +++ b/tests/utils/TestWorkspace.hpp @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -73,11 +74,11 @@ namespace db0 void tearDown(); std::size_t size() const override; - + private: const std::size_t m_slab_size; FixedObjectList m_shared_object_list; - SlabRecycler m_slab_recycler; + Recycler m_slab_recycler; db0::swine_ptr m_current_fixture; std::unordered_map > m_fixtures; std::unordered_map m_uuids;