diff --git a/AGENTS.md b/AGENTS.md index 18f734ab..176e7afb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -47,6 +47,10 @@ Types derived from `v_object` should follow the project-wide constructor pattern - New durable instances are constructed from `Memspace &` plus any type-specific creation arguments. - Existing durable instances are reopened from `mptr` plus any type-specific runtime dependencies. +### Overlaid type inheritance + +Variable-size overlaid types that derive from another overlaid type must use `db0::o_ext` rather than directly inheriting from an `o_base`-derived overlay such as `o_list`. Direct inheritance bypasses `o_ext` sizing, version, and dynamic-area handling and can corrupt overlaid layout assumptions. + ### C++ style - Use camelCase for local helper variables, lambdas, and method names in C++ code. diff --git a/design/IMMUTABLE_OBJECTS_DESIGN.md b/design/IMMUTABLE_OBJECTS_DESIGN.md index f15aaafa..09e9dfcb 100644 --- a/design/IMMUTABLE_OBJECTS_DESIGN.md +++ b/design/IMMUTABLE_OBJECTS_DESIGN.md @@ -106,10 +106,13 @@ Implementation requirements: - Field retrieval returns an object view of the root object that exposes only the nested fields for read access. - The view must maintain the lock or lifetime guard of the top-level object while nested fields are accessed. - References to embedded objects point to a memory location inside the root allocation and also carry the nested member offset. The offset may be deeply nested. +- Embedded object offsets are byte offsets relative to the persisted `o_immutable_object` data structure overlaid on the root allocation, not relative to the C++ `ObjectImmutableImpl` wrapper instance. - The lifecycle of an embedded object is tied to the root instance because the root owns the allocation containing the full embedded tree. - The embedded member is identified by its own address, but that address is inside the allocation and is not the allocation start. - The allocator must be able to recover allocation metadata from an inner address. This allows embedded object addresses to use the same 50-bit representation as regular object addresses. - A parent object can still be referenced by the parent allocation address. +- Root immutable objects store an exact compact index of valid nested embedded-object offsets. Lookup by offset must validate against this index and raise a bad-address error for invalid, out-of-range, or non-object offsets. +- The offset index uses packed integer encoding grouped by packed size class so offsets remain compact while supporting logarithmic exact membership checks. Most offsets are expected to fit in 3-4 packed bytes, but the representation must support larger offsets. ## Object Views diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp index 4e81d904..73792e86 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -629,6 +630,7 @@ namespace db0::python if (PyObject_GC_IsTracked(object)) { PyObject_GC_UnTrack(object); } + InitManager::instance.tryCloseInitializer(object->ext()); object->destroy(); new ((void *)const_cast(&object->ext())) EmbeddedObjectRef(rootObject, &embeddedObject, std::move(type)); diff --git a/src/dbzero/core/serialization/Types.cpp b/src/dbzero/core/serialization/Types.cpp index 05bdeba8..06a24074 100644 --- a/src/dbzero/core/serialization/Types.cpp +++ b/src/dbzero/core/serialization/Types.cpp @@ -32,10 +32,23 @@ namespace db0 std::copy(data.data(), data.data() + m_bytes, &m_buf); } - o_binary::o_binary(std::size_t size, void (*write)(void *, const void *), const void *source) + o_binary::o_binary( + std::size_t size, + void (*write)(void *, const void *, db0::object_model::EmbeddedObjectOffsetCollector *), + const void *source + ) + : o_binary(size, write, source, nullptr) + { + } + + o_binary::o_binary( + std::size_t size, + void (*write)(void *, const void *, db0::object_model::EmbeddedObjectOffsetCollector *), + const void *source, db0::object_model::EmbeddedObjectOffsetCollector *context + ) : m_bytes(size) { - write(&m_buf, source); + write(&m_buf, source, context); } o_binary &o_binary::operator=(const o_binary &binary) diff --git a/src/dbzero/core/serialization/Types.hpp b/src/dbzero/core/serialization/Types.hpp index 8302e1ac..2f178012 100644 --- a/src/dbzero/core/serialization/Types.hpp +++ b/src/dbzero/core/serialization/Types.hpp @@ -8,6 +8,11 @@ #include #include +namespace db0::object_model +{ + struct EmbeddedObjectOffsetCollector; +} + namespace db0 { @@ -116,7 +121,19 @@ DB0_PACKED_BEGIN o_binary(const std::vector &); - o_binary(std::size_t size, void (*write)(void *, const void *), const void *source); + // Optional context lets writer-backed payloads receive construction-only state. + // A null context is used by measurement and ordinary copy paths. + o_binary( + std::size_t size, + void (*write)(void *, const void *, db0::object_model::EmbeddedObjectOffsetCollector *), + const void *source + ); + + o_binary( + std::size_t size, + void (*write)(void *, const void *, db0::object_model::EmbeddedObjectOffsetCollector *), + const void *source, db0::object_model::EmbeddedObjectOffsetCollector *context + ); public: /** diff --git a/src/dbzero/core/serialization/list.hpp b/src/dbzero/core/serialization/list.hpp index efd8a280..eab4d1ab 100644 --- a/src/dbzero/core/serialization/list.hpp +++ b/src/dbzero/core/serialization/list.hpp @@ -4,28 +4,49 @@ #pragma once #include "Base.hpp" +#include "packed_int.hpp" #include +#include #include - + namespace db0 { DB0_PACKED_BEGIN - - template class DB0_PACKED_ATTR o_list: public o_base, 0, false> + + template class DB0_PACKED_ATTR o_list_header + { + }; + + template <> class DB0_PACKED_ATTR o_list_header + { + public: + std::uint32_t size_of; + // number of list elements + std::uint32_t count; + }; + + template + class DB0_PACKED_ATTR o_list: + public o_base, 0, false>, + public o_list_header { protected : - using self_t = o_list; - using super_t = o_base, 0, false>; + using self_t = o_list; + using super_t = o_base, 0, false>; friend super_t; /** * Constructs empty list instance */ explicit o_list() - : count(0) { - size_of = self_t::arrangeMembers(); + if constexpr (compact) { + writeCompactSize(self_t::measure()); + } else { + this->count = 0; + this->size_of = self_t::arrangeMembers(); + } } o_list(const self_t &other) @@ -36,16 +57,22 @@ DB0_PACKED_BEGIN } template explicit o_list(const sequence_t &data, Args&& ...args) - : count(data.size()) { - auto arranger = self_t::arrangeMembers(); + if constexpr (!compact) { + this->count = data.size(); + } + + auto arranger = makeArrangerFor(data, std::forward(args)...); auto it = data.begin(), end = data.end(); while (it != end) { arranger = arranger(T::type(), *it, std::forward(args)...); ++it; } - size_of = arranger; + + if constexpr (!compact) { + this->size_of = arranger; + } } public : @@ -53,7 +80,11 @@ DB0_PACKED_BEGIN * */ static std::size_t measure() { - return self_t::measureMembers(); + if constexpr (compact) { + return compactSizeFromElementBytes(0); + } else { + return self_t::measureMembers(); + } } static std::size_t measure(const self_t &other) { @@ -62,37 +93,62 @@ DB0_PACKED_BEGIN template static std::size_t measure(const SequenceT &data, Args&& ...args) { - auto meter = self_t::measureMembers(); - auto it = data.begin(), end = data.end(); - while (it != end) - { - meter = meter(T::type(), *it, std::forward(args)...); - ++it; + if constexpr (compact) { + return compactSizeFromElementBytes(measureElementBytes(data, std::forward(args)...)); + } else { + auto meter = self_t::measureMembers(); + auto it = data.begin(), end = data.end(); + while (it != end) + { + meter = meter(T::type(), *it, std::forward(args)...); + ++it; + } + return meter; } - return meter; } - + std::size_t sizeOf () const { - return static_cast(size_of); + if constexpr (compact) { + return compactSizeMember().value(); + } else { + return static_cast(this->size_of); + } } - + template static std::size_t safeSizeOf(buf_t at) { - std::uint32_t count = self_t::__const_ref(at).count; - auto meter = self_t::sizeOfMembers(at); - for (unsigned i = 0;i < count;++i) - { - meter = meter(T::type()); + if constexpr (compact) { + const std::byte *cursor = at; + auto result = packed_uint32::read(cursor); + at += result; + return result; + } else { + auto result = self_t::__const_ref(at).size_of; + at += result; + return result; } - return meter; } - + inline std::uint32_t size() const { - return this->count; + if constexpr (compact) { + std::uint32_t result = 0; + auto it = begin(), stop = end(); + while (it != stop) { + ++result; + ++it; + } + return result; + } else { + return this->count; + } } - + bool empty() const { - return this->count==0; + if constexpr (compact) { + return begin() == end(); + } else { + return this->count==0; + } } class const_iterator @@ -101,51 +157,113 @@ DB0_PACKED_BEGIN // as invalid const_iterator() = default; const_iterator(const T *item) - : item(item) + : item(item) { } - + const T *operator->() const { return this->item; } - + const T &operator*() const { return *this->item; } - + const_iterator &operator++() { item = (const T*)((char*)item + item->sizeOf()); return *this; } - + bool operator==(const const_iterator &it) const { return (item==it.item); } - + bool operator!=(const const_iterator &it) const { return (item!=it.item); } - + protected : const T *item = nullptr; }; - + const_iterator begin() const { - return const_iterator(reinterpret_cast(self_t::beginOfDynamicArea())); + if constexpr (compact) { + return const_iterator(reinterpret_cast(beginOfItems())); + } else { + return const_iterator(reinterpret_cast(self_t::beginOfDynamicArea())); + } } - + const_iterator end() const { // past the end of data - return const_iterator (reinterpret_cast(self_t::beginOfMemberArea() + size_of)); + return const_iterator (reinterpret_cast(self_t::beginOfMemberArea() + sizeOf())); } - public : - std::uint32_t size_of; - // number of list elements - std::uint32_t count; + private: + const packed_uint32 &compactSizeMember() const + { + static_assert(compact); + return packed_uint32::__const_ref(self_t::beginOfDynamicArea()); + } + + std::byte *beginOfItems() + { + if constexpr (compact) { + return self_t::beginOfDynamicArea() + packed_uint32::measure(static_cast(sizeOf())); + } else { + return self_t::beginOfDynamicArea(); + } + } + + const std::byte *beginOfItems() const + { + return const_cast(this)->beginOfItems(); + } + + void writeCompactSize(std::size_t size) + { + auto cursor = self_t::beginOfDynamicArea(); + packed_uint32::write(cursor, static_cast(size)); + } + + template + Foundation::Arranger makeArrangerFor(const SequenceT &data, Args&& ...args) + { + if constexpr (compact) { + auto size = self_t::measure(data, std::forward(args)...); + writeCompactSize(size); + return Foundation::Arranger(reinterpret_cast(this), beginOfItems()); + } else { + return self_t::arrangeMembers(); + } + } + + template + static std::size_t measureElementBytes(const SequenceT &data, Args&& ...args) + { + auto meter = Foundation::Meter(0); + auto it = data.begin(), end = data.end(); + while (it != end) + { + meter = meter(T::type(), *it, std::forward(args)...); + ++it; + } + return meter; + } + + static std::size_t compactSizeFromElementBytes(std::size_t elementBytes) + { + auto size = elementBytes + packed_uint32::measure(static_cast(elementBytes)); + while (true) { + auto nextSize = elementBytes + packed_uint32::measure(static_cast(size)); + if (nextSize == size) { + return size; + } + size = nextSize; + } + } }; - + DB0_PACKED_END } - diff --git a/src/dbzero/core/serialization/packed_int.hpp b/src/dbzero/core/serialization/packed_int.hpp index 19c03f13..7c1fbe2c 100644 --- a/src/dbzero/core/serialization/packed_int.hpp +++ b/src/dbzero/core/serialization/packed_int.hpp @@ -222,6 +222,7 @@ DB0_PACKED_END using packed_int32 = o_packed_int; using packed_int64 = o_packed_int; + using packed_uint32 = o_packed_int; using nullable_packed_int32 = o_packed_int; using nullable_packed_int64 = o_packed_int; diff --git a/src/dbzero/object_model/dict/o_dict.cpp b/src/dbzero/object_model/dict/o_dict.cpp index b6de7b31..88159465 100644 --- a/src/dbzero/object_model/dict/o_dict.cpp +++ b/src/dbzero/object_model/dict/o_dict.cpp @@ -602,7 +602,10 @@ namespace db0::object_model case StorageClass::EMBEDDED_OBJECT: { if (element.m_payload.m_bytes_value.m_writer) { std::vector payload(element.bytesSize()); - element.m_payload.m_bytes_value.m_writer(payload.data(), element.m_payload.m_bytes_value.m_source); + element.m_payload.m_bytes_value.m_writer( + payload.data(), element.m_payload.m_bytes_value.m_source, + element.m_payload.m_bytes_value.m_context + ); return hashBytes(payload.data(), payload.size(), seed); } return hashBytes(element.bytesData(), element.bytesSize(), seed); diff --git a/src/dbzero/object_model/dict/o_py_dict.cpp b/src/dbzero/object_model/dict/o_py_dict.cpp index 439fd0a3..a9c13396 100644 --- a/src/dbzero/object_model/dict/o_py_dict.cpp +++ b/src/dbzero/object_model/dict/o_py_dict.cpp @@ -20,19 +20,34 @@ namespace db0::object_model { namespace { - void writePyTuple(void *buf, const void *source) + void writePyTuple(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_tuple::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_tuple::__new(buf, pyObject, *context); + } else { + o_py_tuple::__new(buf, pyObject); + } } - void writePySet(void *buf, const void *source) + void writePySet(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_set::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_set::__new(buf, pyObject, *context); + } else { + o_py_set::__new(buf, pyObject); + } } - void writePyDict(void *buf, const void *source) + void writePyDict(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_dict::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_dict::__new(buf, pyObject, *context); + } else { + o_py_dict::__new(buf, pyObject); + } } const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) @@ -58,16 +73,53 @@ namespace db0::object_model return *initializer; } - void writeEmbeddedObject(void *buf, const void *source) + void writeEmbeddedObject(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { auto *pyObject = const_cast(static_cast(source)); const auto &initializer = getInitializer(pyObject); - o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + if (context) { + context->add(buf); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer, *context); + } else { + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } } } o_py_dict::o_py_dict(PyObject *dict) : o_dict() + { + std::uint32_t count = 0; + std::uint32_t pairsByteSize = 0; + std::size_t capacity = 0; + std::uint32_t bucketByteSize = 0; + count = dictSize(dict); + pairsByteSize = checkedUint32Size(measurePairs(dict), "Python dict pairs byte size"); + capacity = hashIndexCapacity(count); + bucketByteSize = checkedUint32Size( + measureCollisionBuckets(dict, capacity), "Python dict bucket byte size" + ); + + auto arranger = arrangeDictMembers(count, pairsByteSize, bucketByteSize); + auto iterator = Py_OWN(PyObject_GetIter(dict)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "o_py_dict expects a Python dict"; + } + + Py_FOR(key, iterator) { + arranger = arranger(Pair::type(), elementFromPythonObject(*key), valueFromPythonDict(dict, *key)); + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python dict"; + } + + finishDictConstruction(arranger.ptr(), pairsByteSize, capacity, bucketByteSize); + } + + o_py_dict::o_py_dict(PyObject *dict, EmbeddedObjectOffsetCollector &offsetCollector) + : o_dict() { auto count = dictSize(dict); auto pairsByteSize = checkedUint32Size(measurePairs(dict), "Python dict pairs byte size"); @@ -84,7 +136,11 @@ namespace db0::object_model } Py_FOR(key, iterator) { - arranger = arranger(Pair::type(), elementFromPythonObject(*key), valueFromPythonDict(dict, *key)); + arranger = arranger( + Pair::type(), + elementFromPythonObject(*key, &offsetCollector), + valueFromPythonDict(dict, *key, &offsetCollector) + ); } if (PyErr_Occurred()) { PyErr_Clear(); @@ -124,6 +180,13 @@ namespace db0::object_model } o_py_dict::Element o_py_dict::elementFromPythonObject(PyObject *object) + { + return elementFromPythonObject(object, nullptr); + } + + o_py_dict::Element o_py_dict::elementFromPythonObject( + PyObject *object, EmbeddedObjectOffsetCollector *offsetCollector + ) { auto &typeManager = db0::python::PyToolkit::getTypeManager(); auto typeId = typeManager.getTypeId(object); @@ -164,15 +227,15 @@ namespace db0::object_model } case db0::bindings::TypeId::LIST: case db0::bindings::TypeId::TUPLE: - return Element::embeddedTuple(o_py_tuple::measure(object), writePyTuple, object); + return Element::embeddedTuple(o_py_tuple::measure(object), writePyTuple, object, offsetCollector); case db0::bindings::TypeId::SET: - return Element::embeddedSet(o_py_set::measure(object), writePySet, object); + return Element::embeddedSet(o_py_set::measure(object), writePySet, object, offsetCollector); case db0::bindings::TypeId::DICT: - return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object); + return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object, offsetCollector); case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: { const auto &initializer = getInitializer(object); auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); - return Element::embeddedObject(size, writeEmbeddedObject, object); + return Element::embeddedObject(size, writeEmbeddedObject, object, offsetCollector); } default: break; @@ -183,6 +246,13 @@ namespace db0::object_model } o_py_dict::Element o_py_dict::valueFromPythonDict(PyObject *dict, PyObject *key) + { + return valueFromPythonDict(dict, key, nullptr); + } + + o_py_dict::Element o_py_dict::valueFromPythonDict( + PyObject *dict, PyObject *key, EmbeddedObjectOffsetCollector *offsetCollector + ) { auto *value = PyDict_GetItemWithError(dict, key); if (!value) { @@ -191,7 +261,7 @@ namespace db0::object_model } THROWF(db0::InputException) << "Unable to read Python dict value"; } - return elementFromPythonObject(value); + return elementFromPythonObject(value, offsetCollector); } std::uint32_t o_py_dict::dictSize(PyObject *dict) diff --git a/src/dbzero/object_model/dict/o_py_dict.hpp b/src/dbzero/object_model/dict/o_py_dict.hpp index 12769e8e..850ec635 100644 --- a/src/dbzero/object_model/dict/o_py_dict.hpp +++ b/src/dbzero/object_model/dict/o_py_dict.hpp @@ -14,12 +14,14 @@ using PyObject = _object; namespace db0::object_model { + struct EmbeddedObjectOffsetCollector; DB0_PACKED_BEGIN class DB0_PACKED_ATTR o_py_dict: public o_dict { public: explicit o_py_dict(PyObject *dict); + o_py_dict(PyObject *dict, EmbeddedObjectOffsetCollector &offsetCollector); static std::size_t measure(PyObject *dict); static Element elementFromPythonObject(PyObject *object); @@ -40,7 +42,13 @@ DB0_PACKED_BEGIN static db0::Foundation::Type type(); private: + static Element elementFromPythonObject( + PyObject *object, EmbeddedObjectOffsetCollector *offsetCollector + ); static Element valueFromPythonDict(PyObject *dict, PyObject *key); + static Element valueFromPythonDict( + PyObject *dict, PyObject *key, EmbeddedObjectOffsetCollector *offsetCollector + ); static std::uint32_t dictSize(PyObject *dict); static std::size_t measurePairs(PyObject *dict); static std::size_t measureCollisionBuckets(PyObject *dict, std::size_t capacity); diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index b7389e90..03cd0011 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,25 @@ namespace db0::object_model return type.isNoCache() ? FlagSet { AccessOptions::no_cache } : FlagSet {}; } + ObjectImmutableImpl::ObjectSharedPtr makeEmbeddedObjectView( + db0::swine_ptr &fixture, ObjectImmutableImpl::ObjectPtr rootObject, + const o_embedded_object &embeddedObject + ) + { + if (!rootObject) { + THROWF(db0::InternalException) + << "Embedded object retrieval requires an initialized root language object"; + } + + auto &classFactory = fixture->get(); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + auto memoType = classFactory.getLangType(*type); + if (memoType.get()) { + return python::makeEmbeddedMemoObject(rootObject, embeddedObject, std::move(type), memoType.get()); + } + return python::makeEmbeddedObject(rootObject, embeddedObject, std::move(type)); + } + std::uint8_t safeNumTypeTags(unsigned int value) { if (value > std::numeric_limits::max()) { @@ -136,7 +156,8 @@ namespace db0::object_model auto *embeddedValue = object->variableValue(value.m_loc.first); assert(embeddedValue); - if (value.m_storage_class == StorageClass::EMBEDDED_OBJECT) { + if (value.m_storage_class == StorageClass::OBJECT_REF + || value.m_storage_class == StorageClass::EMBEDDED_OBJECT) { assert(embeddedValue->itemKind() == StorageClass::EMBEDDED_OBJECT); const auto &embeddedObject = o_embedded_object::__const_ref( embeddedValue->embeddedPayload().begin() @@ -174,6 +195,7 @@ namespace db0::object_model } } } + } void ObjectImmutableImpl::postInit(FixtureLock &fixture) @@ -274,6 +296,22 @@ namespace db0::object_model return tryGet(this->findField(fieldName), isAutoGenerated); } + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::getEmbeddedInstanceAtOffset( + std::uint64_t offset + ) const + { + if (!this->hasInstance() || !(*this)->embeddedObjectOffsets().contains(offset)) { + THROWF(db0::BadAddressException) << "Invalid embedded immutable object offset: " << offset; + } + + auto fixture = this->getFixture(); + auto rootObject = getLangObject(); + + const auto *root = reinterpret_cast(this->operator->()); + const auto &embeddedObject = o_embedded_object::__const_ref(root + offset); + return makeEmbeddedObjectView(fixture, rootObject, embeddedObject); + } + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGetEmbeddedField( const FieldInfo &fieldInfo ) const @@ -408,7 +446,7 @@ namespace db0::object_model void ObjectImmutableImpl::dropMembers(db0::swine_ptr &fixture, Class &classRef) const { super_t::dropMembers(fixture, classRef); - unrefNestedEmbeddedObjects(fixture, (*this)->embeddedObject()); + unrefEmbeddedObject(fixture, (*this)->embeddedObject()); } } diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp index 7507e632..c35134ea 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp @@ -29,6 +29,7 @@ namespace db0::object_model ObjectSharedPtr tryGet(MemberLoc, bool *is_auto_generated = nullptr) const; ObjectSharedPtr tryGet(const char *field_name, bool *is_auto_generated = nullptr) const; ObjectSharedPtr get(const char *field_name) const; + ObjectSharedPtr getEmbeddedInstanceAtOffset(std::uint64_t offset) const; void postInit(FixtureLock &); void setLangObject(ObjectPtr) const; diff --git a/src/dbzero/object_model/object/ObjectInitializer.hpp b/src/dbzero/object_model/object/ObjectInitializer.hpp index 9c63fec9..48c40f2c 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.hpp +++ b/src/dbzero/object_model/object/ObjectInitializer.hpp @@ -280,6 +280,13 @@ namespace db0::object_model ObjectInitializer >; + for (auto i = 0u; i < m_active_count; ++i) { + if (m_initializers[i]->operator==(object)) { + closeAt(i); + break; + } + } + auto initAt = [&](std::uint32_t loc) { if (m_initializers[loc] && typeid(*m_initializers[loc]) == typeid(InitializerT)) { static_cast(m_initializers[loc].get())->init(object, std::forward(args)...); diff --git a/src/dbzero/object_model/object/o_embedded_object.cpp b/src/dbzero/object_model/object/o_embedded_object.cpp index fa5b98ae..2c8bf0f9 100644 --- a/src/dbzero/object_model/object/o_embedded_object.cpp +++ b/src/dbzero/object_model/object/o_embedded_object.cpp @@ -17,19 +17,34 @@ namespace db0::object_model { constexpr std::uint64_t PACK2_MASK = 0x3; - void writePyTuple(void *buf, const void *source) + void writePyTuple(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_tuple::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_tuple::__new(buf, pyObject, *context); + } else { + o_py_tuple::__new(buf, pyObject); + } } - void writePySet(void *buf, const void *source) + void writePySet(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_set::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_set::__new(buf, pyObject, *context); + } else { + o_py_set::__new(buf, pyObject); + } } - void writePyDict(void *buf, const void *source) + void writePyDict(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_dict::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_dict::__new(buf, pyObject, *context); + } else { + o_py_dict::__new(buf, pyObject); + } } const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) @@ -55,15 +70,21 @@ namespace db0::object_model return *initializer; } - void writeEmbeddedObject(void *buf, const void *source) + void writeEmbeddedObject(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { auto *pyObject = const_cast(static_cast(source)); const auto &initializer = getInitializer(pyObject); - o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + if (context) { + context->add(buf); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer, *context); + } else { + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } } o_dict::Element fieldMapElementFromObject( - StorageClass storageClass, ImmutableObjectInitializer::ObjectSharedPtr object + StorageClass storageClass, ImmutableObjectInitializer::ObjectSharedPtr object, + EmbeddedObjectOffsetCollector *offsetCollector ) { auto *pyObject = object.get(); @@ -84,21 +105,21 @@ namespace db0::object_model case StorageClass::DB0_LIST: case StorageClass::DB0_TUPLE: { auto size = o_py_tuple::measure(pyObject); - return o_dict::Element::embeddedTuple(size, writePyTuple, pyObject); + return o_dict::Element::embeddedTuple(size, writePyTuple, pyObject, offsetCollector); } case StorageClass::DB0_SET: { auto size = o_py_set::measure(pyObject); - return o_dict::Element::embeddedSet(size, writePySet, pyObject); + return o_dict::Element::embeddedSet(size, writePySet, pyObject, offsetCollector); } case StorageClass::DB0_DICT: { auto size = o_py_dict::measure(pyObject); - return o_dict::Element::embeddedDict(size, writePyDict, pyObject); + return o_dict::Element::embeddedDict(size, writePyDict, pyObject, offsetCollector); } case StorageClass::OBJECT_REF: case StorageClass::EMBEDDED_OBJECT: { const auto &initializer = getInitializer(pyObject); auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); - return o_dict::Element::embeddedObject(size, writeEmbeddedObject, pyObject); + return o_dict::Element::embeddedObject(size, writeEmbeddedObject, pyObject, offsetCollector); } default: THROWF(db0::InputException) @@ -107,7 +128,9 @@ namespace db0::object_model return o_dict::Element::none(); } - o_dict::ElementMap buildEmbeddedFieldMap(const ImmutableObjectInitializer &initializer) + o_dict::ElementMap buildEmbeddedFieldMap( + const ImmutableObjectInitializer &initializer, EmbeddedObjectOffsetCollector *offsetCollector + ) { o_dict::ElementMap fieldMap; for (const auto &value: initializer.objects()) { @@ -116,7 +139,9 @@ namespace db0::object_model fieldMap.erase(key); } else { assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); - fieldMap[key] = fieldMapElementFromObject(value.m_storage_class, value.m_object); + fieldMap[key] = fieldMapElementFromObject( + value.m_storage_class, value.m_object, offsetCollector + ); } } return fieldMap; @@ -158,11 +183,27 @@ namespace db0::object_model o_embedded_object::o_embedded_object( std::uint32_t classRefValue, const ImmutableObjectInitializer &initializer ) + { + construct(classRefValue, initializer, nullptr); + } + + o_embedded_object::o_embedded_object( + std::uint32_t classRefValue, const ImmutableObjectInitializer &initializer, + EmbeddedObjectOffsetCollector &offsetCollector + ) + { + construct(classRefValue, initializer, &offsetCollector); + } + + void o_embedded_object::construct( + std::uint32_t classRefValue, const ImmutableObjectInitializer &initializer, + EmbeddedObjectOffsetCollector *offsetCollector + ) { PosVT::Data posVtData; unsigned int posVtOffset = 0; auto indexVtData = initializer.getData(posVtData, posVtOffset); - auto fieldMap = buildEmbeddedFieldMap(initializer); + auto fieldMap = buildEmbeddedFieldMap(initializer, offsetCollector); arrangeMembers() (db0::packed_int32::type(), classRefValue) (PosVT::type(), posVtData, posVtOffset) @@ -245,7 +286,7 @@ namespace db0::object_model PosVT::Data posVtData; unsigned int posVtOffset = 0; auto indexVtData = initializer.getData(posVtData, posVtOffset); - auto fieldMap = buildEmbeddedFieldMap(initializer); + auto fieldMap = buildEmbeddedFieldMap(initializer, nullptr); return measureMembers() (db0::packed_int32::type(), classRefValue) (PosVT::type(), posVtData, posVtOffset) diff --git a/src/dbzero/object_model/object/o_embedded_object.hpp b/src/dbzero/object_model/object/o_embedded_object.hpp index e1b8ae68..cd7c0da8 100644 --- a/src/dbzero/object_model/object/o_embedded_object.hpp +++ b/src/dbzero/object_model/object/o_embedded_object.hpp @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -17,6 +18,22 @@ namespace db0::object_model { + struct EmbeddedObjectOffsetCollector + { + const std::byte *m_root = nullptr; + std::vector *m_offsets = nullptr; + + void add(const void *object) const + { + assert(m_root); + assert(m_offsets); + auto offset = static_cast( + reinterpret_cast(object) - m_root + ); + assert(m_offsets->empty() || m_offsets->back() < offset); + m_offsets->push_back(offset); + } + }; struct FixedValue { @@ -41,6 +58,10 @@ DB0_PACKED_BEGIN using Element = o_tuple_item::Element; o_embedded_object(std::uint32_t classRef, const ImmutableObjectInitializer &initializer); + o_embedded_object( + std::uint32_t classRef, const ImmutableObjectInitializer &initializer, + EmbeddedObjectOffsetCollector &offsetCollector + ); o_embedded_object( std::uint32_t classRef, const PosVT::Data &posVtData, unsigned int posVtOffset, const XValue *indexVtBegin = nullptr, const XValue *indexVtEnd = nullptr @@ -78,6 +99,10 @@ DB0_PACKED_BEGIN o_embedded_object() = default; private: + void construct( + std::uint32_t classRef, const ImmutableObjectInitializer &initializer, + EmbeddedObjectOffsetCollector *offsetCollector + ); const db0::packed_int32 &classRef() const; }; DB0_PACKED_END diff --git a/src/dbzero/object_model/object/o_immutable_object.cpp b/src/dbzero/object_model/object/o_immutable_object.cpp index af09f898..f9bf19cf 100644 --- a/src/dbzero/object_model/object/o_immutable_object.cpp +++ b/src/dbzero/object_model/object/o_immutable_object.cpp @@ -2,14 +2,364 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "o_immutable_object.hpp" +#include +#include #include #include #include +#include +#include +#include #include +#include +#include +#include + namespace db0::object_model { + namespace + { + struct MeasureScratch + { + o_tuple_item::Element embeddedTuple(std::size_t size) + { + return makeEmbedded(size, StorageClass::EMBEDDED_TUPLE); + } + + o_tuple_item::Element embeddedSet(std::size_t size) + { + return makeEmbedded(size, StorageClass::EMBEDDED_SET); + } + + o_tuple_item::Element embeddedDict(std::size_t size) + { + return makeEmbedded(size, StorageClass::EMBEDDED_DICT); + } + + o_tuple_item::Element embeddedObject(std::size_t size) + { + return makeEmbedded(size, StorageClass::EMBEDDED_OBJECT); + } + + private: + o_tuple_item::Element makeEmbedded(std::size_t size, StorageClass kind) + { + auto buffer = std::make_unique(size == 0 ? 1 : size); + if (size > 0) { + auto salt = m_nextSalt++; + for (std::size_t i = 0; i < sizeof(salt) && i < size; ++i) { + buffer[i] = static_cast((salt >> (i * 8)) & 0xffU); + } + } + auto *data = buffer.get(); + m_buffers.push_back(std::move(buffer)); + + switch (kind) { + case StorageClass::EMBEDDED_TUPLE: + return o_tuple_item::Element::embeddedTuple(data, size); + case StorageClass::EMBEDDED_SET: + return o_tuple_item::Element::embeddedSet(data, size); + case StorageClass::EMBEDDED_DICT: + return o_tuple_item::Element::embeddedDict(data, size); + case StorageClass::EMBEDDED_OBJECT: + return o_tuple_item::Element::embeddedObject(data, size); + default: + THROWF(db0::InternalException) << "Unsupported embedded measure kind"; + } + return o_tuple_item::Element::none(); + } + + std::vector> m_buffers; + std::uint64_t m_nextSalt = 1; + }; + + struct EmbeddedObjectMeter: o_embedded_object + { + using o_embedded_object::measureMembers; + }; + + std::size_t countEmbeddedMemoObjects(PyObject *object); + + const ImmutableObjectInitializer *tryGetImmutableInitializer(PyObject *object) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + if (!db0::python::PyToolkit::isMemoImmutableObject(object)) { + return nullptr; + } + const auto &memo = db0::python::PyToolkit::getTypeManager() + .template extractObject(object); + return dynamic_cast(InitManager::instance.findInitializer(memo)); + } + + std::size_t countEmbeddedMemoObjectsInInitializer(const ImmutableObjectInitializer &initializer) + { + std::size_t result = 0; + for (const auto &value: initializer.objects()) { + if (!value.m_object || value.m_storage_class == StorageClass::DELETED) { + continue; + } + result += countEmbeddedMemoObjects(value.m_object.get()); + } + return result; + } + + std::size_t countEmbeddedMemoObjects(PyObject *object) + { + if (!object) { + return 0; + } + if (auto *initializer = tryGetImmutableInitializer(object)) { + return 1 + countEmbeddedMemoObjectsInInitializer(*initializer); + } + if (PyTuple_Check(object)) { + std::size_t result = 0; + auto size = PyTuple_GET_SIZE(object); + for (Py_ssize_t i = 0; i < size; ++i) { + result += countEmbeddedMemoObjects(PyTuple_GET_ITEM(object, i)); + } + return result; + } + if (PyList_Check(object)) { + std::size_t result = 0; + auto size = PyList_GET_SIZE(object); + for (Py_ssize_t i = 0; i < size; ++i) { + result += countEmbeddedMemoObjects(PyList_GET_ITEM(object, i)); + } + return result; + } + if (PySet_Check(object)) { + std::size_t result = 0; + auto iterator = Py_OWN(PyObject_GetIter(object)); + if (!iterator.get()) { + PyErr_Clear(); + return 0; + } + Py_FOR(item, iterator) { + result += countEmbeddedMemoObjects(*item); + } + PyErr_Clear(); + return result; + } + if (PyDict_Check(object)) { + std::size_t result = 0; + PyObject *key = nullptr; + PyObject *value = nullptr; + Py_ssize_t pos = 0; + while (PyDict_Next(object, &pos, &key, &value)) { + result += countEmbeddedMemoObjects(key); + result += countEmbeddedMemoObjects(value); + } + return result; + } + return 0; + } + + std::vector worstCaseOffsetIndexValues(std::size_t count) + { + std::vector result; + result.reserve(count); + constexpr std::uint64_t BASE = std::numeric_limits::max() / 2; + for (std::size_t i = 0; i < count; ++i) { + result.push_back(BASE + i); + } + return result; + } + + std::size_t measureEmbeddedObjectNoWriters( + std::uint32_t classRef, const ImmutableObjectInitializer &initializer, MeasureScratch &scratch + ); + + o_tuple_item::Element elementFromPythonObjectNoWriters(PyObject *object, MeasureScratch &scratch); + + std::size_t measurePyTupleNoWriters(PyObject *sequence, MeasureScratch &scratch) + { + std::size_t count = 0; + if (PyTuple_Check(sequence)) { + count = static_cast(PyTuple_GET_SIZE(sequence)); + } else if (PyList_Check(sequence)) { + count = static_cast(PyList_GET_SIZE(sequence)); + } else { + THROWF(db0::InputException) << "o_py_tuple expects a Python tuple or list"; + } + + std::size_t elementsByteSize = 0; + for (std::size_t i = 0; i < count; ++i) { + auto *item = PyTuple_Check(sequence) + ? PyTuple_GET_ITEM(sequence, static_cast(i)) + : PyList_GET_ITEM(sequence, static_cast(i)); + elementsByteSize += o_tuple_item::measure(elementFromPythonObjectNoWriters(item, scratch)); + } + return o_tuple<>::Builder::measure( + static_cast(count), static_cast(elementsByteSize) + ); + } + + std::size_t measurePySetNoWriters(PyObject *setObject, MeasureScratch &scratch) + { + o_set::ElementSet elements; + auto iterator = Py_OWN(PyObject_GetIter(setObject)); + if (!iterator.get()) { + THROWF(db0::InputException) << "o_py_set expects an iterable"; + } + Py_FOR(item, iterator) { + elements.insert(elementFromPythonObjectNoWriters(*item, scratch)); + } + if (PyErr_Occurred()) { + THROWF(db0::InputException) << "o_py_set iteration failed"; + } + return o_set::measure(elements); + } + + std::size_t measurePyDictNoWriters(PyObject *dictObject, MeasureScratch &scratch) + { + if (!PyDict_Check(dictObject)) { + THROWF(db0::InputException) << "o_py_dict expects a Python dict"; + } + + o_dict::ElementMap elements; + PyObject *key = nullptr; + PyObject *value = nullptr; + Py_ssize_t pos = 0; + while (PyDict_Next(dictObject, &pos, &key, &value)) { + elements[ + elementFromPythonObjectNoWriters(key, scratch) + ] = elementFromPythonObjectNoWriters(value, scratch); + } + return o_dict::measure(elements); + } + + o_tuple_item::Element objectValueElementNoWriters( + StorageClass storageClass, PyObject *object, MeasureScratch &scratch + ) + { + auto &typeManager = db0::python::PyToolkit::getTypeManager(); + switch (storageClass) { + case StorageClass::STRING_REF: + case StorageClass::POOLED_STRING: + case StorageClass::STR64: + return o_tuple_item::Element::string(typeManager.extractString(object)); + case StorageClass::DB0_BYTES: + case StorageClass::DB0_BYTES_ARRAY: { + auto bytes = typeManager.extractBytes(object); + return o_tuple_item::Element::bytes(bytes.m_data, bytes.m_size); + } + case StorageClass::DB0_LIST: + case StorageClass::DB0_TUPLE: + return scratch.embeddedTuple(measurePyTupleNoWriters(object, scratch)); + case StorageClass::DB0_SET: + return scratch.embeddedSet(measurePySetNoWriters(object, scratch)); + case StorageClass::DB0_DICT: + return scratch.embeddedDict(measurePyDictNoWriters(object, scratch)); + case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT: { + auto *nestedInitializer = tryGetImmutableInitializer(object); + if (!nestedInitializer) { + THROWF(db0::InputException) + << "Only non-materialized immutable memo objects can be embedded"; + } + auto classRef = nestedInitializer->getClassPtr()->getClassRef(); + return scratch.embeddedObject( + measureEmbeddedObjectNoWriters(classRef, *nestedInitializer, scratch) + ); + } + default: + THROWF(db0::InputException) + << "Storage class cannot be stored in embedded field map: " << storageClass; + } + return o_tuple_item::Element::none(); + } + + o_tuple_item::Element elementFromPythonObjectNoWriters(PyObject *object, MeasureScratch &scratch) + { + auto &typeManager = db0::python::PyToolkit::getTypeManager(); + auto typeId = typeManager.getTypeId(object); + switch (typeId) { + case db0::bindings::TypeId::NONE: + return o_tuple_item::Element::none(); + case db0::bindings::TypeId::BOOLEAN: + return o_tuple_item::Element::boolean(object == Py_True); + case db0::bindings::TypeId::INTEGER: + return o_tuple_item::Element::integer(PyLong_AsLongLong(object)); + case db0::bindings::TypeId::FLOAT: + return o_tuple_item::Element::floating(PyFloat_AsDouble(object)); + case db0::bindings::TypeId::DATE: + return o_tuple_item::Element::date(typeManager.extractUInt64(typeId, object)); + case db0::bindings::TypeId::DATETIME: + return o_tuple_item::Element::datetime(typeManager.extractUInt64(typeId, object)); + case db0::bindings::TypeId::DATETIME_TZ: + return o_tuple_item::Element::datetimeTz(typeManager.extractUInt64(typeId, object)); + case db0::bindings::TypeId::TIME: + return o_tuple_item::Element::time(typeManager.extractUInt64(typeId, object)); + case db0::bindings::TypeId::TIME_TZ: + return o_tuple_item::Element::timeTz(typeManager.extractUInt64(typeId, object)); + case db0::bindings::TypeId::DECIMAL: + return o_tuple_item::Element::decimal(typeManager.extractUInt64(typeId, object)); + case db0::bindings::TypeId::STRING: + return o_tuple_item::Element::string(typeManager.extractString(object)); + case db0::bindings::TypeId::BYTES: { + auto bytes = typeManager.extractBytes(object); + return o_tuple_item::Element::bytes(bytes.m_data, bytes.m_size); + } + case db0::bindings::TypeId::LIST: + case db0::bindings::TypeId::TUPLE: + return scratch.embeddedTuple(measurePyTupleNoWriters(object, scratch)); + case db0::bindings::TypeId::SET: + return scratch.embeddedSet(measurePySetNoWriters(object, scratch)); + case db0::bindings::TypeId::DICT: + return scratch.embeddedDict(measurePyDictNoWriters(object, scratch)); + case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: { + auto *nestedInitializer = tryGetImmutableInitializer(object); + if (!nestedInitializer) { + THROWF(db0::InputException) + << "Only non-materialized immutable memo objects can be embedded"; + } + auto classRef = nestedInitializer->getClassPtr()->getClassRef(); + return scratch.embeddedObject( + measureEmbeddedObjectNoWriters(classRef, *nestedInitializer, scratch) + ); + } + default: + break; + } + + THROWF(db0::InputException) << "Unsupported embedded Python object type: " << Py_TYPE(object)->tp_name; + return o_tuple_item::Element::none(); + } + + o_dict::ElementMap buildMeasureFieldMapNoWriters( + const ImmutableObjectInitializer &initializer, MeasureScratch &scratch + ) + { + o_dict::ElementMap fieldMap; + for (const auto &value: initializer.objects()) { + auto key = o_tuple_item::Element::integer(value.m_loc.first); + if (!value.m_object || value.m_storage_class == StorageClass::DELETED) { + fieldMap.erase(key); + continue; + } + assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); + fieldMap[key] = objectValueElementNoWriters(value.m_storage_class, value.m_object.get(), scratch); + } + return fieldMap; + } + + std::size_t measureEmbeddedObjectNoWriters( + std::uint32_t classRef, const ImmutableObjectInitializer &initializer, MeasureScratch &scratch + ) + { + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer.getData(posVtData, posVtOffset); + auto fieldMap = buildMeasureFieldMapNoWriters(initializer, scratch); + return EmbeddedObjectMeter::measureMembers() + (db0::packed_int32::type(), classRef) + (PosVT::type(), posVtData, posVtOffset) + (IndexVT::type(), indexVtData.first, indexVtData.second) + (o_dict::type(), fieldMap); + } + } o_immutable_object::o_immutable_object(std::uint32_t class_ref, std::pair ref_counts, std::uint8_t num_type_tags, @@ -17,8 +367,11 @@ namespace db0::object_model : m_header(ref_counts) , m_num_type_tags(num_type_tags) { - arrangeMembers() - (o_embedded_object::type(), class_ref, initializer); + std::vector offsets; + EmbeddedObjectOffsetCollector offsetCollector{ reinterpret_cast(this), &offsets }; + auto arranger = arrangeMembers(); + arranger = arranger(o_embedded_object::type(), class_ref, initializer, offsetCollector); + arranger(o_packed_offset_index::type(), offsets); } o_immutable_object::o_immutable_object(std::uint32_t class_ref, @@ -27,15 +380,19 @@ namespace db0::object_model : m_header(ref_counts) , m_num_type_tags(num_type_tags) { - arrangeMembers() - (o_embedded_object::type(), class_ref, pos_vt_data, pos_vt_offset, index_vt_begin, index_vt_end); + auto arranger = arrangeMembers(); + arranger = arranger(o_embedded_object::type(), class_ref, pos_vt_data, pos_vt_offset, index_vt_begin, index_vt_end); + arranger(o_packed_offset_index::type(), std::vector()); } std::size_t o_immutable_object::measure(std::uint32_t class_ref, std::pair, std::uint8_t, const ImmutableObjectInitializer &initializer) { + MeasureScratch scratch; + auto offsets = worstCaseOffsetIndexValues(countEmbeddedMemoObjectsInInitializer(initializer)); return super_t::measureMembers() - (o_embedded_object::type(), class_ref, initializer); + (measureEmbeddedObjectNoWriters(class_ref, initializer, scratch)) + (o_packed_offset_index::type(), offsets); } std::size_t o_immutable_object::measure(std::uint32_t class_ref, @@ -43,7 +400,8 @@ namespace db0::object_model const XValue *index_vt_begin, const XValue *index_vt_end) { return super_t::measureMembers() - (o_embedded_object::type(), class_ref, pos_vt_data, pos_vt_offset, index_vt_begin, index_vt_end); + (o_embedded_object::type(), class_ref, pos_vt_data, pos_vt_offset, index_vt_begin, index_vt_end) + (o_packed_offset_index::type(), std::vector()); } o_embedded_object &o_immutable_object::embeddedObject() @@ -56,6 +414,11 @@ namespace db0::object_model return getDynFirst(o_embedded_object::type()); } + const o_packed_offset_index &o_immutable_object::embeddedObjectOffsets() const + { + return getDynAfter(embeddedObject(), o_packed_offset_index::type()); + } + const PosVT &o_immutable_object::pos_vt() const { return embeddedObject().pos_vt(); } diff --git a/src/dbzero/object_model/object/o_immutable_object.hpp b/src/dbzero/object_model/object/o_immutable_object.hpp index bcf4c23f..8b2c7424 100644 --- a/src/dbzero/object_model/object/o_immutable_object.hpp +++ b/src/dbzero/object_model/object/o_immutable_object.hpp @@ -6,6 +6,7 @@ #include #include #include "o_embedded_object.hpp" +#include "o_packed_offset_index.hpp" #include namespace db0::object_model @@ -27,6 +28,7 @@ DB0_PACKED_BEGIN o_embedded_object &embeddedObject(); const o_embedded_object &embeddedObject() const; + const o_packed_offset_index &embeddedObjectOffsets() const; PosVT &pos_vt(); const PosVT &pos_vt() const; @@ -60,7 +62,18 @@ DB0_PACKED_BEGIN template static std::size_t safeSizeOf(BufT buf) { return super_t::sizeOfMembers(buf) - (o_embedded_object::type()); + (o_embedded_object::type()) + (o_packed_offset_index::type()); + } + + static constexpr std::size_t dynamicOffset() + { + return super_t::baseSize(); + } + + static std::size_t measureBaseWithEmbeddedSize(std::size_t embeddedSize) + { + return super_t::baseSize() + embeddedSize; } void incRef(bool is_tag); diff --git a/src/dbzero/object_model/object/o_packed_offset_index.cpp b/src/dbzero/object_model/object/o_packed_offset_index.cpp new file mode 100644 index 00000000..1d5717c2 --- /dev/null +++ b/src/dbzero/object_model/object/o_packed_offset_index.cpp @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include "o_packed_offset_index.hpp" + +#include +#include +#include + +#include + +namespace db0::object_model +{ + namespace + { + std::uint32_t checkedUint32(std::size_t value, const char *name) + { + if (value > std::numeric_limits::max()) { + THROWF(db0::InternalException) << name << " exceeds uint32 range"; + } + return static_cast(value); + } + + std::uint8_t checkedPackedSize(std::size_t value) + { + if (value == 0 || value > db0::packed_int64::max_len()) { + THROWF(db0::InternalException) << "Packed offset width is invalid"; + } + return static_cast(value); + } + + std::uint64_t maxForPackedSize(std::uint8_t packedSize) + { + checkedPackedSize(packedSize); + if (packedSize == db0::packed_int64::max_len()) { + return std::numeric_limits::max(); + } + return (std::uint64_t { 1 } << (packedSize * 7)) - 1; + } + + void writePacked64WithWidth(std::byte *&cursor, std::uint64_t value, std::uint8_t width) + { + maxForPackedSize(width); + for (std::uint8_t byteIndex = 0; byteIndex < width; ++byteIndex) { + auto shift = (width - byteIndex - 1) * 7; + auto byteValue = static_cast((value >> shift) & 0x7f); + if (byteIndex + 1 < width) { + byteValue |= 0x80; + } + *cursor = static_cast(byteValue); + ++cursor; + } + } + + std::uint64_t readPacked64WithWidth( + const std::byte *members, std::uint8_t packedSize, std::uint32_t index + ) + { + auto *cursor = members + (static_cast(packedSize) * index); + return db0::packed_int64::read(cursor); + } + + std::uint8_t packedSizeOfGroup(const std::uint64_t *begin, const std::uint64_t *end) + { + if (begin == nullptr || end == nullptr || begin >= end) { + THROWF(db0::InternalException) << "Offset index group data range is invalid"; + } + return checkedPackedSize(db0::packed_int64::measure(*begin)); + } + + const std::uint64_t *scanInputGroup(const std::uint64_t *&cursor, const std::uint64_t *end) + { + auto begin = cursor; + auto packedSize = checkedPackedSize(db0::packed_int64::measure(*cursor)); + auto groupMax = maxForPackedSize(packedSize); +#ifndef NDEBUG + auto previous = *cursor; +#endif + ++cursor; + while (cursor < end && *cursor <= groupMax) { +#ifndef NDEBUG + assert(*cursor > previous && "Offset index input must be sorted and unique"); + previous = *cursor; +#endif + ++cursor; + } +#ifndef NDEBUG + assert((cursor >= end || *cursor > previous) && "Offset index input must be sorted and unique"); +#endif + return begin; + } + + class OffsetGroupRanges + { + public: + class const_iterator + { + public: + const_iterator() = default; + + const_iterator(const std::uint64_t *cursor, const std::uint64_t *end) + : m_cursor(cursor) + , m_end(end) + { + advance(); + } + + o_packed_offset_group_range operator*() const + { + return m_range; + } + + const_iterator &operator++() + { + advance(); + return *this; + } + + bool operator==(const const_iterator &other) const + { + return m_range.begin == other.m_range.begin; + } + + bool operator!=(const const_iterator &other) const + { + return !(*this == other); + } + + private: + void advance() + { + if (m_cursor >= m_end) { + m_range = { m_end, m_end }; + return; + } + auto *rangeBegin = scanInputGroup(m_cursor, m_end); + m_range = { rangeBegin, m_cursor }; + } + + const std::uint64_t *m_cursor = nullptr; + const std::uint64_t *m_end = nullptr; + o_packed_offset_group_range m_range; + }; + + explicit OffsetGroupRanges(const std::vector &offsets) + : m_begin(offsets.data()) + , m_end(offsets.data() + offsets.size()) + { + } + + const_iterator begin() const + { + return const_iterator(m_begin, m_end); + } + + const_iterator end() const + { + return const_iterator(m_end, m_end); + } + + private: + const std::uint64_t *m_begin = nullptr; + const std::uint64_t *m_end = nullptr; + }; + } + + o_packed_offset_group::o_packed_offset_group(const std::uint64_t *begin, const std::uint64_t *end) + { + auto packedSize = packedSizeOfGroup(begin, end); + auto count = checkedUint32(end - begin, "Offset index group count"); + auto arranger = arrangeMembers() + (db0::packed_int32::type(), packedSize) + (db0::packed_int32::type(), count); + auto *cursor = arranger.ptr(); + for (auto *it = begin; it != end; ++it) { + writePacked64WithWidth(cursor, *it, packedSize); + } + } + + o_packed_offset_group::o_packed_offset_group(o_packed_offset_group_range range) + : o_packed_offset_group(range.begin, range.end) + { + } + + std::uint32_t o_packed_offset_group::size() const + { + return countMember().value(); + } + + bool o_packed_offset_group::empty() const + { + return size() == 0; + } + + std::uint8_t o_packed_offset_group::elementSize() const + { + return checkedPackedSize(packedSizeMember().value()); + } + + std::uint64_t o_packed_offset_group::first() const + { + if (empty()) { + THROWF(db0::InternalException) << "Offset index group is empty"; + } + return at(0); + } + + std::uint64_t o_packed_offset_group::last() const + { + if (empty()) { + THROWF(db0::InternalException) << "Offset index group is empty"; + } + return at(size() - 1); + } + + std::uint64_t o_packed_offset_group::at(std::uint32_t index) const + { + auto count = size(); + if (index >= count) { + THROWF(db0::InternalException) << "Offset index group element is out of range"; + } + auto *cursor = members() + (static_cast(elementSize()) * index); + return db0::packed_int64::read(cursor); + } + + bool o_packed_offset_group::contains(std::uint64_t value) const + { + auto groupElementSize = elementSize(); + if (checkedPackedSize(db0::packed_int64::measure(value)) != groupElementSize) { + return false; + } + + const auto *groupMembers = members(); + auto count = size(); + std::uint32_t begin = 0; + auto end = count; + while (begin < end) { + auto mid = begin + ((end - begin) / 2); + auto candidate = readPacked64WithWidth(groupMembers, groupElementSize, mid); + if (candidate < value) { + begin = mid + 1; + } else { + end = mid; + } + } + return begin < count && readPacked64WithWidth(groupMembers, groupElementSize, begin) == value; + } + + std::size_t o_packed_offset_group::sizeOf() const + { + return members() - reinterpret_cast(this) + + (static_cast(elementSize()) * size()); + } + + std::size_t o_packed_offset_group::measure(const std::uint64_t *begin, const std::uint64_t *end) + { + auto packedSize = packedSizeOfGroup(begin, end); + auto count = checkedUint32(end - begin, "Offset index group count"); + return measureMembers() + (db0::packed_int32::type(), packedSize) + (db0::packed_int32::type(), count) + (static_cast(packedSize) * count); + } + + std::size_t o_packed_offset_group::measure(o_packed_offset_group_range range) + { + return measure(range.begin, range.end); + } + + const db0::packed_int32 &o_packed_offset_group::packedSizeMember() const + { + return getDynFirst(db0::packed_int32::type()); + } + + const db0::packed_int32 &o_packed_offset_group::countMember() const + { + return getDynAfter(packedSizeMember(), db0::packed_int32::type()); + } + + const std::byte *o_packed_offset_group::members() const + { + return reinterpret_cast(&countMember()) + countMember().sizeOf(); + } + + std::byte *o_packed_offset_group::members() + { + return const_cast(static_cast(this)->members()); + } + + o_packed_offset_index::o_packed_offset_index(const std::vector &offsets) + : super_t(OffsetGroupRanges(offsets)) + { + } + + std::size_t o_packed_offset_index::size() const + { + std::size_t result = 0; + for (auto it = begin(); it != end(); ++it) { + result += it->size(); + } + return result; + } + + bool o_packed_offset_index::contains(std::uint64_t value) const + { + if (getSuper().empty()) { + return false; + } + + auto targetPackedSize = checkedPackedSize(db0::packed_int64::measure(value)); + for (auto it = begin(); it != end(); ++it) { + if (targetPackedSize < it->elementSize()) { + return false; + } + if (targetPackedSize > it->elementSize()) { + continue; + } + return it->contains(value); + } + return false; + } + + std::size_t o_packed_offset_index::measure(const std::vector &offsets) + { + return list_t::measure(OffsetGroupRanges(offsets)); + } +} diff --git a/src/dbzero/object_model/object/o_packed_offset_index.hpp b/src/dbzero/object_model/object/o_packed_offset_index.hpp new file mode 100644 index 00000000..084d0eff --- /dev/null +++ b/src/dbzero/object_model/object/o_packed_offset_index.hpp @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace db0::object_model +{ + +DB0_PACKED_BEGIN + struct o_packed_offset_group_range + { + const std::uint64_t *begin = nullptr; + const std::uint64_t *end = nullptr; + }; + + class DB0_PACKED_ATTR o_packed_offset_group: public db0::o_base + { + protected: + using super_t = db0::o_base; + friend super_t; + + public: + o_packed_offset_group(const std::uint64_t *begin, const std::uint64_t *end); + explicit o_packed_offset_group(o_packed_offset_group_range range); + + std::uint32_t size() const; + bool empty() const; + std::uint8_t elementSize() const; + std::uint64_t first() const; + std::uint64_t last() const; + std::uint64_t at(std::uint32_t index) const; + bool contains(std::uint64_t value) const; + + std::size_t sizeOf() const; + + static std::size_t measure(const std::uint64_t *begin, const std::uint64_t *end); + static std::size_t measure(o_packed_offset_group_range range); + + template static std::size_t safeSizeOf(BufT buf) + { + const auto &group = __const_ref(buf); + auto result = group.sizeOf(); + buf += result; + return result; + } + + protected: + o_packed_offset_group() = default; + + private: + const db0::packed_int32 &packedSizeMember() const; + const db0::packed_int32 &countMember() const; + const std::byte *members() const; + std::byte *members(); + }; +DB0_PACKED_END + +DB0_PACKED_BEGIN + class DB0_PACKED_ATTR o_packed_offset_index: + public db0::o_ext, 0, false> + { + public: + using list_t = db0::o_list; + using super_t = db0::o_ext; + using const_iterator = list_t::const_iterator; + + friend super_t; + + std::size_t size() const; + bool contains(std::uint64_t value) const; + + static std::size_t measure(const std::vector &offsets); + + protected: + explicit o_packed_offset_index(const std::vector &offsets); + }; +DB0_PACKED_END + +} diff --git a/src/dbzero/object_model/set/o_py_set.cpp b/src/dbzero/object_model/set/o_py_set.cpp index 17d2a86f..0571d2b4 100644 --- a/src/dbzero/object_model/set/o_py_set.cpp +++ b/src/dbzero/object_model/set/o_py_set.cpp @@ -20,19 +20,34 @@ namespace db0::object_model { namespace { - void writePyTuple(void *buf, const void *source) + void writePyTuple(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_tuple::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_tuple::__new(buf, pyObject, *context); + } else { + o_py_tuple::__new(buf, pyObject); + } } - void writePySet(void *buf, const void *source) + void writePySet(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_set::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_set::__new(buf, pyObject, *context); + } else { + o_py_set::__new(buf, pyObject); + } } - void writePyDict(void *buf, const void *source) + void writePyDict(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_dict::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_dict::__new(buf, pyObject, *context); + } else { + o_py_dict::__new(buf, pyObject); + } } const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) @@ -58,16 +73,53 @@ namespace db0::object_model return *initializer; } - void writeEmbeddedObject(void *buf, const void *source) + void writeEmbeddedObject(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { auto *pyObject = const_cast(static_cast(source)); const auto &initializer = getInitializer(pyObject); - o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + if (context) { + context->add(buf); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer, *context); + } else { + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } } } o_py_set::o_py_set(PyObject *iterable) : o_set() + { + std::uint32_t count = 0; + std::uint32_t elementsByteSize = 0; + std::size_t capacity = 0; + std::uint32_t bucketByteSize = 0; + count = setSize(iterable); + elementsByteSize = checkedUint32Size(measureElements(iterable), "Python set elements byte size"); + capacity = hashIndexCapacity(count); + bucketByteSize = checkedUint32Size( + measureCollisionBuckets(iterable, capacity), "Python set bucket byte size" + ); + + auto arranger = arrangeSetMembers(count, elementsByteSize, bucketByteSize); + auto iterator = Py_OWN(PyObject_GetIter(iterable)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "o_py_set expects a Python set"; + } + + Py_FOR(item, iterator) { + arranger = arranger(Item::type(), elementFromPythonObject(*item)); + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python set"; + } + + finishSetConstruction(arranger.ptr(), elementsByteSize, capacity, bucketByteSize); + } + + o_py_set::o_py_set(PyObject *iterable, EmbeddedObjectOffsetCollector &offsetCollector) + : o_set() { auto count = setSize(iterable); auto elementsByteSize = checkedUint32Size(measureElements(iterable), "Python set elements byte size"); @@ -84,7 +136,7 @@ namespace db0::object_model } Py_FOR(item, iterator) { - arranger = arranger(Item::type(), elementFromPythonObject(*item)); + arranger = arranger(Item::type(), elementFromPythonObject(*item, &offsetCollector)); } if (PyErr_Occurred()) { PyErr_Clear(); @@ -124,6 +176,13 @@ namespace db0::object_model } o_py_set::Element o_py_set::elementFromPythonObject(PyObject *object) + { + return elementFromPythonObject(object, nullptr); + } + + o_py_set::Element o_py_set::elementFromPythonObject( + PyObject *object, EmbeddedObjectOffsetCollector *offsetCollector + ) { auto &typeManager = db0::python::PyToolkit::getTypeManager(); auto typeId = typeManager.getTypeId(object); @@ -164,15 +223,15 @@ namespace db0::object_model } case db0::bindings::TypeId::LIST: case db0::bindings::TypeId::TUPLE: - return Element::embeddedTuple(o_py_tuple::measure(object), writePyTuple, object); + return Element::embeddedTuple(o_py_tuple::measure(object), writePyTuple, object, offsetCollector); case db0::bindings::TypeId::SET: - return Element::embeddedSet(o_py_set::measure(object), writePySet, object); + return Element::embeddedSet(o_py_set::measure(object), writePySet, object, offsetCollector); case db0::bindings::TypeId::DICT: - return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object); + return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object, offsetCollector); case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: { const auto &initializer = getInitializer(object); auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); - return Element::embeddedObject(size, writeEmbeddedObject, object); + return Element::embeddedObject(size, writeEmbeddedObject, object, offsetCollector); } default: break; diff --git a/src/dbzero/object_model/set/o_py_set.hpp b/src/dbzero/object_model/set/o_py_set.hpp index f4298f15..904651f5 100644 --- a/src/dbzero/object_model/set/o_py_set.hpp +++ b/src/dbzero/object_model/set/o_py_set.hpp @@ -14,12 +14,14 @@ using PyObject = _object; namespace db0::object_model { + struct EmbeddedObjectOffsetCollector; DB0_PACKED_BEGIN class DB0_PACKED_ATTR o_py_set: public o_set { public: explicit o_py_set(PyObject *iterable); + o_py_set(PyObject *iterable, EmbeddedObjectOffsetCollector &offsetCollector); static std::size_t measure(PyObject *iterable); @@ -40,6 +42,9 @@ DB0_PACKED_BEGIN private: static Element elementFromPythonObject(PyObject *object); + static Element elementFromPythonObject( + PyObject *object, EmbeddedObjectOffsetCollector *offsetCollector + ); static std::uint32_t setSize(PyObject *set); static std::size_t measureElements(PyObject *set); static std::size_t measureCollisionBuckets(PyObject *set, std::size_t capacity); diff --git a/src/dbzero/object_model/set/o_set.cpp b/src/dbzero/object_model/set/o_set.cpp index d770e115..0e5d1b3e 100644 --- a/src/dbzero/object_model/set/o_set.cpp +++ b/src/dbzero/object_model/set/o_set.cpp @@ -505,7 +505,10 @@ namespace db0::object_model case StorageClass::EMBEDDED_OBJECT: { if (element.m_payload.m_bytes_value.m_writer) { std::vector payload(element.bytesSize()); - element.m_payload.m_bytes_value.m_writer(payload.data(), element.m_payload.m_bytes_value.m_source); + element.m_payload.m_bytes_value.m_writer( + payload.data(), element.m_payload.m_bytes_value.m_source, + element.m_payload.m_bytes_value.m_context + ); return hashBytes(payload.data(), payload.size(), seed); } return hashBytes(element.bytesData(), element.bytesSize(), seed); diff --git a/src/dbzero/object_model/tuple/o_py_tuple.cpp b/src/dbzero/object_model/tuple/o_py_tuple.cpp index a67ac29a..241cb5c0 100644 --- a/src/dbzero/object_model/tuple/o_py_tuple.cpp +++ b/src/dbzero/object_model/tuple/o_py_tuple.cpp @@ -17,19 +17,34 @@ namespace db0::object_model { namespace { - void writePyTuple(void *buf, const void *source) + void writePyTuple(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_tuple::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_tuple::__new(buf, pyObject, *context); + } else { + o_py_tuple::__new(buf, pyObject); + } } - void writePySet(void *buf, const void *source) + void writePySet(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_set::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_set::__new(buf, pyObject, *context); + } else { + o_py_set::__new(buf, pyObject); + } } - void writePyDict(void *buf, const void *source) + void writePyDict(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { - o_py_dict::__new(buf, const_cast(static_cast(source))); + auto *pyObject = const_cast(static_cast(source)); + if (context) { + o_py_dict::__new(buf, pyObject, *context); + } else { + o_py_dict::__new(buf, pyObject); + } } const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) @@ -55,11 +70,16 @@ namespace db0::object_model return *initializer; } - void writeEmbeddedObject(void *buf, const void *source) + void writeEmbeddedObject(void *buf, const void *source, EmbeddedObjectOffsetCollector *context) { auto *pyObject = const_cast(static_cast(source)); const auto &initializer = getInitializer(pyObject); - o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + if (context) { + context->add(buf); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer, *context); + } else { + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } } } @@ -77,6 +97,22 @@ namespace db0::object_model } } + o_py_tuple::o_py_tuple(PyObject *sequence, EmbeddedObjectOffsetCollector &offsetCollector) + : o_tuple<>() + { + auto count = static_cast(sequenceSize(sequence)); + auto elementsByteSize = static_cast(measureElements(sequence)); + + auto arranger = arrangeMembers(); + arranger = arranger(db0::packed_int32::type(), count); + arranger = arranger(db0::packed_int32::type(), elementsByteSize); + for (std::size_t i = 0; i < count; ++i) { + arranger = arranger( + o_tuple_item::type(), elementFromPythonObject(sequenceItem(sequence, i), &offsetCollector) + ); + } + } + std::size_t o_py_tuple::measure(PyObject *sequence) { auto count = static_cast(sequenceSize(sequence)); @@ -103,6 +139,13 @@ namespace db0::object_model } o_py_tuple::Element o_py_tuple::elementFromPythonObject(PyObject *object) + { + return elementFromPythonObject(object, nullptr); + } + + o_py_tuple::Element o_py_tuple::elementFromPythonObject( + PyObject *object, EmbeddedObjectOffsetCollector *offsetCollector + ) { auto &typeManager = db0::python::PyToolkit::getTypeManager(); auto typeId = typeManager.getTypeId(object); @@ -143,15 +186,15 @@ namespace db0::object_model } case db0::bindings::TypeId::LIST: case db0::bindings::TypeId::TUPLE: - return Element::embeddedTuple(o_py_tuple::measure(object), writePyTuple, object); + return Element::embeddedTuple(o_py_tuple::measure(object), writePyTuple, object, offsetCollector); case db0::bindings::TypeId::SET: - return Element::embeddedSet(o_py_set::measure(object), writePySet, object); + return Element::embeddedSet(o_py_set::measure(object), writePySet, object, offsetCollector); case db0::bindings::TypeId::DICT: - return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object); + return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object, offsetCollector); case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: { const auto &initializer = getInitializer(object); auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); - return Element::embeddedObject(size, writeEmbeddedObject, object); + return Element::embeddedObject(size, writeEmbeddedObject, object, offsetCollector); } default: break; diff --git a/src/dbzero/object_model/tuple/o_py_tuple.hpp b/src/dbzero/object_model/tuple/o_py_tuple.hpp index 9a64d4c5..73c9c082 100644 --- a/src/dbzero/object_model/tuple/o_py_tuple.hpp +++ b/src/dbzero/object_model/tuple/o_py_tuple.hpp @@ -13,12 +13,14 @@ using PyObject = _object; namespace db0::object_model { + struct EmbeddedObjectOffsetCollector; DB0_PACKED_BEGIN class DB0_PACKED_ATTR o_py_tuple: public o_tuple<> { public: explicit o_py_tuple(PyObject *sequence); + o_py_tuple(PyObject *sequence, EmbeddedObjectOffsetCollector &offsetCollector); static std::size_t measure(PyObject *sequence); @@ -39,6 +41,9 @@ DB0_PACKED_BEGIN private: static Element elementFromPythonObject(PyObject *object); + static Element elementFromPythonObject( + PyObject *object, EmbeddedObjectOffsetCollector *offsetCollector + ); static std::size_t sequenceSize(PyObject *sequence); static PyObject *sequenceItem(PyObject *sequence, std::size_t index); static std::size_t measureElements(PyObject *sequence); diff --git a/src/dbzero/object_model/tuple/o_tuple.cpp b/src/dbzero/object_model/tuple/o_tuple.cpp index e7dbbaf8..22598199 100644 --- a/src/dbzero/object_model/tuple/o_tuple.cpp +++ b/src/dbzero/object_model/tuple/o_tuple.cpp @@ -158,42 +158,46 @@ namespace db0::object_model } o_tuple_item::Element o_tuple_item::Element::embeddedTuple( - std::size_t size, BytesView::Writer writer, const void *source + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context ) { Element result; result.m_kind = StorageClass::EMBEDDED_TUPLE; - result.m_payload.m_bytes_value = { nullptr, size, writer, source }; + result.m_payload.m_bytes_value = { nullptr, size, writer, source, context }; return result; } o_tuple_item::Element o_tuple_item::Element::embeddedSet( - std::size_t size, BytesView::Writer writer, const void *source + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context ) { Element result; result.m_kind = StorageClass::EMBEDDED_SET; - result.m_payload.m_bytes_value = { nullptr, size, writer, source }; + result.m_payload.m_bytes_value = { nullptr, size, writer, source, context }; return result; } o_tuple_item::Element o_tuple_item::Element::embeddedDict( - std::size_t size, BytesView::Writer writer, const void *source + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context ) { Element result; result.m_kind = StorageClass::EMBEDDED_DICT; - result.m_payload.m_bytes_value = { nullptr, size, writer, source }; + result.m_payload.m_bytes_value = { nullptr, size, writer, source, context }; return result; } o_tuple_item::Element o_tuple_item::Element::embeddedObject( - std::size_t size, BytesView::Writer writer, const void *source + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context ) { Element result; result.m_kind = StorageClass::EMBEDDED_OBJECT; - result.m_payload.m_bytes_value = { nullptr, size, writer, source }; + result.m_payload.m_bytes_value = { nullptr, size, writer, source, context }; return result; } @@ -352,7 +356,7 @@ namespace db0::object_model if (element.m_payload.m_bytes_value.m_writer) { arrangeMembers()( o_binary::type(), element.bytesSize(), element.m_payload.m_bytes_value.m_writer, - element.m_payload.m_bytes_value.m_source + element.m_payload.m_bytes_value.m_source, element.m_payload.m_bytes_value.m_context ); } else { arrangeMembers()(o_binary::type(), element.bytesData(), element.bytesSize()); diff --git a/src/dbzero/object_model/tuple/o_tuple.hpp b/src/dbzero/object_model/tuple/o_tuple.hpp index 73c8997a..0e503a2c 100644 --- a/src/dbzero/object_model/tuple/o_tuple.hpp +++ b/src/dbzero/object_model/tuple/o_tuple.hpp @@ -17,6 +17,7 @@ namespace db0::object_model { + struct EmbeddedObjectOffsetCollector; DB0_PACKED_BEGIN class DB0_PACKED_ATTR o_tuple_item: public db0::o_base @@ -30,12 +31,15 @@ DB0_PACKED_BEGIN { struct BytesView { - using Writer = void (*)(void *, const void *); + using Writer = void (*)(void *, const void *, EmbeddedObjectOffsetCollector *); const std::byte *m_data = nullptr; std::size_t m_size = 0; Writer m_writer = nullptr; const void *m_source = nullptr; + // Optional construction context. Immutable object creation uses it to pass + // the embedded-offset collector; measurement leaves it null. + EmbeddedObjectOffsetCollector *m_context = nullptr; }; StorageClass m_kind = StorageClass::UNDEFINED; @@ -69,10 +73,22 @@ DB0_PACKED_BEGIN static Element embeddedSet(const void *data, std::size_t size); static Element embeddedDict(const void *data, std::size_t size); static Element embeddedObject(const void *data, std::size_t size); - static Element embeddedTuple(std::size_t size, BytesView::Writer writer, const void *source); - static Element embeddedSet(std::size_t size, BytesView::Writer writer, const void *source); - static Element embeddedDict(std::size_t size, BytesView::Writer writer, const void *source); - static Element embeddedObject(std::size_t size, BytesView::Writer writer, const void *source); + static Element embeddedTuple( + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context = nullptr + ); + static Element embeddedSet( + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context = nullptr + ); + static Element embeddedDict( + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context = nullptr + ); + static Element embeddedObject( + std::size_t size, BytesView::Writer writer, const void *source, + EmbeddedObjectOffsetCollector *context = nullptr + ); std::int64_t intValue() const; std::uint64_t uint64Value() const; diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 01b270ac..0392f24e 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -41,10 +41,18 @@ namespace tests static constexpr const char *file_name = "my-test-prefix_1.db0"; void SetUp() override { + if (Py_IsInitialized()) { + PyErr_Clear(); + PyGC_Collect(); + } drop(file_name); } void TearDown() override { + if (Py_IsInitialized()) { + PyErr_Clear(); + PyGC_Collect(); + } drop(file_name); } }; @@ -96,6 +104,16 @@ namespace tests return pyMemo; } + static std::uint64_t offsetOfEmbeddedObject( + const o_immutable_object &root, const o_tuple_item &embeddedItem + ) + { + const auto &embeddedObject = o_embedded_object::__const_ref(embeddedItem.embeddedPayload().begin()); + return static_cast( + reinterpret_cast(&embeddedObject) - reinterpret_cast(&root) + ); + } + static void assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( const char *fieldName, StorageClass fieldStorageClass, const std::function( @@ -808,6 +826,176 @@ namespace tests workspace.close(); } + TEST_F( ObjectInitializerTest, testImmutableRetrievesEmbeddedMemoByOffset ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + auto rootMemoType = makeImmutableMemoType(); + auto nestedMemoType = makeImmutableMemoType(); + ASSERT_TRUE(rootMemoType.get()); + ASSERT_TRUE(nestedMemoType.get()); + auto rootClass = fixture->get().getOrCreateType(rootMemoType.get()); + auto nestedClass = fixture->get().getOrCreateType(nestedMemoType.get()); + auto rootLoc = rootClass->addField("inner", 0).get(0).getIndexAndOffset(); + auto nestedLoc = nestedClass->addField("value", 0).get(0).getIndexAndOffset(); + rootClass->flush(); + nestedClass->flush(); + + auto pyRoot = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(rootMemoType.get()) + )); + pyRoot->makeNew(rootClass); + auto pyNested = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(nestedMemoType.get()) + )); + pyNested->makeNew(nestedClass); + + auto *nestedInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyNested->ext()) + ); + ASSERT_NE(nestedInitializer, nullptr); + nestedInitializer->set(nestedLoc, StorageClass::INT64, Value(123)); + + auto *rootInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyRoot->ext()) + ); + ASSERT_NE(rootInitializer, nullptr); + rootInitializer->setObject( + rootLoc, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(pyNested.get())) + ); + + { + db0::FixtureLock lock(fixture); + auto &root = pyRoot->modifyExt(); + root.setLangObject(reinterpret_cast(pyRoot.get())); + root.postInit(lock); + fixture->getLangCache().add(root.getAddress(), reinterpret_cast(pyRoot.get())); + } + + ASSERT_TRUE(nestedInitializer->closed()); + + const auto &root = pyRoot->ext(); + auto *embeddedValue = root->variableValue(rootLoc.first); + ASSERT_NE(embeddedValue, nullptr); + ASSERT_EQ(embeddedValue->itemKind(), StorageClass::EMBEDDED_OBJECT); + auto offset = offsetOfEmbeddedObject(*root.operator->(), *embeddedValue); + ASSERT_TRUE(root->embeddedObjectOffsets().contains(offset)); + + auto embedded = root.getEmbeddedInstanceAtOffset(offset); + ASSERT_TRUE(embedded.get()); + auto value = Py_OWN(PyObject_GetAttrString(embedded.get(), "value")); + ASSERT_TRUE(value.get()); + ASSERT_EQ(PyLong_AsLong(value.get()), 123); + + ASSERT_THROW(root.getEmbeddedInstanceAtOffset(0), db0::BadAddressException); + ASSERT_THROW(root.getEmbeddedInstanceAtOffset(offset + 1), db0::BadAddressException); + + value.reset(); + embedded.reset(); + fixture->getLangCache().erase(root.getAddress()); + const_cast(root).setLangObject(nullptr); + pyNested.reset(); + pyRoot.reset(); + + rootClass.reset(); + nestedClass.reset(); + workspace.close(); + } + + TEST_F( ObjectInitializerTest, testImmutableRetrievesDeepEmbeddedMemoByOffset ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + auto rootMemoType = makeImmutableMemoType(); + auto nestedMemoType = makeImmutableMemoType(); + ASSERT_TRUE(rootMemoType.get()); + ASSERT_TRUE(nestedMemoType.get()); + auto rootClass = fixture->get().getOrCreateType(rootMemoType.get()); + auto nestedClass = fixture->get().getOrCreateType(nestedMemoType.get()); + auto rootLoc = rootClass->addField("outer", 0).get(0).getIndexAndOffset(); + auto outerLoc = nestedClass->addField("inner", 0).get(0).getIndexAndOffset(); + auto innerLoc = nestedClass->addField("value", 0).get(0).getIndexAndOffset(); + rootClass->flush(); + nestedClass->flush(); + + auto pyRoot = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(rootMemoType.get()) + )); + pyRoot->makeNew(rootClass); + auto pyOuter = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(nestedMemoType.get()) + )); + pyOuter->makeNew(nestedClass); + auto pyInner = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(nestedMemoType.get()) + )); + pyInner->makeNew(nestedClass); + + auto *innerInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyInner->ext()) + ); + ASSERT_NE(innerInitializer, nullptr); + innerInitializer->set(innerLoc, StorageClass::INT64, Value(456)); + + auto *outerInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyOuter->ext()) + ); + ASSERT_NE(outerInitializer, nullptr); + outerInitializer->setObject( + outerLoc, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(pyInner.get())) + ); + + auto *rootInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyRoot->ext()) + ); + ASSERT_NE(rootInitializer, nullptr); + rootInitializer->setObject( + rootLoc, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(pyOuter.get())) + ); + + { + db0::FixtureLock lock(fixture); + auto &root = pyRoot->modifyExt(); + root.setLangObject(reinterpret_cast(pyRoot.get())); + root.postInit(lock); + fixture->getLangCache().add(root.getAddress(), reinterpret_cast(pyRoot.get())); + } + + const auto &root = pyRoot->ext(); + auto *outerValue = root->variableValue(rootLoc.first); + ASSERT_NE(outerValue, nullptr); + const auto &outerObject = o_embedded_object::__const_ref(outerValue->embeddedPayload().begin()); + auto *innerValue = outerObject.variableValue(outerLoc.first); + ASSERT_NE(innerValue, nullptr); + auto innerOffset = offsetOfEmbeddedObject(*root.operator->(), *innerValue); + ASSERT_TRUE(root->embeddedObjectOffsets().contains(innerOffset)); + + auto embedded = root.getEmbeddedInstanceAtOffset(innerOffset); + ASSERT_TRUE(embedded.get()); + auto value = Py_OWN(PyObject_GetAttrString(embedded.get(), "value")); + ASSERT_TRUE(value.get()); + ASSERT_EQ(PyLong_AsLong(value.get()), 456); + + value.reset(); + embedded.reset(); + fixture->getLangCache().erase(root.getAddress()); + const_cast(root).setLangObject(nullptr); + pyInner.reset(); + pyOuter.reset(); + pyRoot.reset(); + + rootClass.reset(); + nestedClass.reset(); + workspace.close(); + } + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedNestedObjectMembers ) { Py_Initialize(); diff --git a/tests/unit_tests/PackedOffsetIndexTest.cpp b/tests/unit_tests/PackedOffsetIndexTest.cpp new file mode 100644 index 00000000..c89e47f4 --- /dev/null +++ b/tests/unit_tests/PackedOffsetIndexTest.cpp @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace tests +{ + using namespace db0; + using namespace db0::object_model; + + static_assert( + std::is_base_of< + db0::o_ext, 0, false>, + o_packed_offset_index + >::value, + "Derived overlaid types must use o_ext" + ); + + class PackedOffsetIndexTest: public MemspaceTestBase + { + }; + + static void throwDecodeError() + { + throw std::runtime_error("decode error"); + } + + static void assertIndexConstructionStaysWithinMeasuredSize(const std::vector &offsets) + { + constexpr auto guardSize = std::size_t { 64 }; + constexpr auto guardByte = std::byte { 0xa5 }; + auto measured = o_packed_offset_index::measure(offsets); + std::vector storage(guardSize + measured + guardSize, guardByte); + auto *objectBegin = storage.data() + guardSize; + auto *objectEnd = objectBegin + measured; + + auto &index = o_packed_offset_index::__new(objectBegin, offsets); + + ASSERT_EQ(index.sizeOf(), measured); + ASSERT_TRUE(std::all_of(storage.data(), objectBegin, [](std::byte value) { + return value == guardByte; + })); + ASSERT_TRUE(std::all_of(objectEnd, storage.data() + storage.size(), [](std::byte value) { + return value == guardByte; + })); + } + + static void assertGroupConstructionStaysWithinMeasuredSize(const std::vector &values) + { + constexpr auto guardSize = std::size_t { 64 }; + constexpr auto guardByte = std::byte { 0xa5 }; + auto measured = o_packed_offset_group::measure(values.data(), values.data() + values.size()); + std::vector storage(guardSize + measured + guardSize, guardByte); + auto *objectBegin = storage.data() + guardSize; + auto *objectEnd = objectBegin + measured; + + auto &group = o_packed_offset_group::__new(objectBegin, values.data(), values.data() + values.size()); + + ASSERT_EQ(group.sizeOf(), measured); + ASSERT_TRUE(std::all_of(storage.data(), objectBegin, [](std::byte value) { + return value == guardByte; + })); + ASSERT_TRUE(std::all_of(objectEnd, storage.data() + storage.size(), [](std::byte value) { + return value == guardByte; + })); + } + + TEST_F( PackedOffsetIndexTest, testEmptyIndex ) + { + auto memspace = getMemspace(); + v_object index(memspace, std::vector {}); + + ASSERT_EQ(index->size(), 0u); + ASSERT_TRUE(index->empty()); + ASSERT_FALSE(index->contains(1)); + ASSERT_EQ(index->sizeOf(), o_packed_offset_index::measure({})); + } + + TEST_F( PackedOffsetIndexTest, testSingleAndMisses ) + { + auto memspace = getMemspace(); + v_object index(memspace, std::vector { 42 }); + + ASSERT_EQ(index->size(), 1u); + ASSERT_TRUE(index->contains(42)); + ASSERT_FALSE(index->contains(41)); + ASSERT_FALSE(index->contains(43)); + } + + TEST_F( PackedOffsetIndexTest, testMultiGroupExactLookup ) + { + std::vector offsets { + 1, 2, 3, 127, 128, 16'383, 16'384, 2'097'151, 2'097'152, 268'435'455, 268'435'456 + }; + auto memspace = getMemspace(); + v_object index(memspace, offsets); + + ASSERT_EQ(index->size(), offsets.size()); + for (auto offset: offsets) { + ASSERT_TRUE(index->contains(offset)) << offset; + } + ASSERT_FALSE(index->contains(0)); + ASSERT_FALSE(index->contains(129)); + ASSERT_FALSE(index->contains(2'097'153)); + ASSERT_FALSE(index->contains(999'999'999)); + } + + TEST_F( PackedOffsetIndexTest, testWidthBoundaryGroupsUseSortedInput ) + { + std::vector offsets { + 1, + 127, + 128, + 16'383, + 16'384, + 2'097'151, + 2'097'152, + std::numeric_limits::max() + }; + auto memspace = getMemspace(); + v_object index(memspace, offsets); + + for (auto offset: offsets) { + ASSERT_TRUE(index->contains(offset)) << offset; + } + ASSERT_FALSE(index->contains(0)); + ASSERT_FALSE(index->contains(126)); + ASSERT_FALSE(index->contains(129)); + ASSERT_FALSE(index->contains(16'385)); + ASSERT_FALSE(index->contains(std::numeric_limits::max() - 1)); + } + + TEST_F( PackedOffsetIndexTest, testContainsUsesWidthSelectedGroup ) + { + std::vector offsets { + 1, 3, 127, + 128, 130, 16'383, + 16'384, 16'386, 2'097'151, + 2'097'152, 2'097'154 + }; + auto memspace = getMemspace(); + v_object index(memspace, offsets); + + ASSERT_FALSE(index->contains(2)); + ASSERT_FALSE(index->contains(129)); + ASSERT_FALSE(index->contains(16'385)); + ASSERT_FALSE(index->contains(2'097'153)); + ASSERT_TRUE(index->contains(127)); + ASSERT_TRUE(index->contains(128)); + ASSERT_TRUE(index->contains(16'384)); + ASSERT_TRUE(index->contains(2'097'154)); + } + + TEST_F( PackedOffsetIndexTest, testGroupContainsMatchesLargeCollectionRandomLookups ) + { + std::vector values; + values.reserve(160); + for (std::uint64_t i = 0; i < 160; ++i) { + values.push_back(128 + (i * 2)); + } + auto memspace = getMemspace(); + v_object group(memspace, values.data(), values.data() + values.size()); + std::mt19937 rng(0xDB0); + std::uniform_int_distribution hitDistribution(0, values.size() - 1); + std::uniform_int_distribution missDistribution(0, values.size() - 2); + + ASSERT_FALSE(group->contains(127)); + ASSERT_FALSE(group->contains(16'384)); + + for (auto i = 0; i < 512; ++i) { + auto hitIndex = hitDistribution(rng); + ASSERT_TRUE(group->contains(values[hitIndex])) << values[hitIndex]; + + auto missValue = values[missDistribution(rng)] + 1; + ASSERT_FALSE(group->contains(missValue)) << missValue; + } + } + + TEST_F( PackedOffsetIndexTest, testConstructionDoesNotWriteOutsideMeasuredSize ) + { + assertIndexConstructionStaysWithinMeasuredSize({}); + assertIndexConstructionStaysWithinMeasuredSize({ 42 }); + assertIndexConstructionStaysWithinMeasuredSize({ + 1, + 127, + 128, + 16'383, + 16'384, + 2'097'151, + 2'097'152, + 268'435'455, + 268'435'456, + std::numeric_limits::max() + }); + + std::vector denseOffsets; + denseOffsets.reserve(512); + for (std::uint64_t i = 0; i < 512; ++i) { + denseOffsets.push_back(128 + (i * 3)); + } + assertGroupConstructionStaysWithinMeasuredSize(denseOffsets); + assertIndexConstructionStaysWithinMeasuredSize(denseOffsets); + } + + TEST_F( PackedOffsetIndexTest, testCompactRepresentation ) + { + std::vector oneByteOffsets { 1, 2, 3, 4 }; + std::vector mixedOffsets { 1, 128, 16'384, 2'097'152, 268'435'456 }; + auto memspace = getMemspace(); + + v_object oneByteIndex(memspace, oneByteOffsets); + v_object mixedIndex(memspace, mixedOffsets); + + ASSERT_EQ(oneByteIndex->sizeOf(), 7u); + ASSERT_LT(oneByteIndex->sizeOf(), mixedIndex->sizeOf()); + ASSERT_EQ(oneByteIndex->sizeOf(), o_packed_offset_index::safeSizeOf( + reinterpret_cast(oneByteIndex.getData()) + )); + ASSERT_EQ(mixedIndex->sizeOf(), o_packed_offset_index::safeSizeOf( + reinterpret_cast(mixedIndex.getData()) + )); + } + + TEST_F( PackedOffsetIndexTest, testSafeSizeOfValidatesTruncatedData ) + { + std::vector offsets { 5, 128, 129 }; + auto memspace = getMemspace(); + v_object index(memspace, offsets); + auto begin = reinterpret_cast(index.getData()); + auto size = index->sizeOf(); + + ASSERT_EQ(o_packed_offset_index::safeSizeOf(const_bounded_buf_t(throwDecodeError, begin, begin + size)), size); + ASSERT_THROW( + o_packed_offset_index::safeSizeOf(const_bounded_buf_t(throwDecodeError, begin, begin + size - 1)), + std::runtime_error + ); + } +} diff --git a/tests/unit_tests/SerializationTest.cpp b/tests/unit_tests/SerializationTest.cpp index def06435..c7ce46b7 100644 --- a/tests/unit_tests/SerializationTest.cpp +++ b/tests/unit_tests/SerializationTest.cpp @@ -3,8 +3,14 @@ #include #include +#include +#include +#include +#include #include +#include + namespace tests { @@ -26,5 +32,94 @@ namespace tests // reading past the end of the buffer should throw ASSERT_ANY_THROW(db0::serial::read(iter, end)); } + + TEST( SerializationTest , testRegularOListKeepsFixedSizeHeader ) + { + using List = db0::o_list >; + std::vector values { 10, 20, 30 }; + std::vector buf(List::measure(values)); + + auto &list = List::__new(buf.data(), values); + + ASSERT_EQ(db0::true_size_of(), 8u); + ASSERT_EQ(list.size(), 3u); + ASSERT_EQ(list.sizeOf(), 8u + values.size() * sizeof(std::uint32_t)); + ASSERT_EQ(List::safeSizeOf(buf.data()), list.sizeOf()); + + auto it = list.begin(); + ASSERT_EQ((*it).value(), 10u); + ++it; + ASSERT_EQ((*it).value(), 20u); + ++it; + ASSERT_EQ((*it).value(), 30u); + ++it; + ASSERT_EQ(it, list.end()); + } + + TEST( SerializationTest , testRegularOListSafeSizeOfUsesDeclaredSize ) + { + using List = db0::o_list >; + std::vector values { 10, 20, 30 }; + std::vector buf(List::measure(values)); + auto &list = List::__new(buf.data(), values); + list.count = 4; + + auto throwOutOfBounds = []() { + throw std::runtime_error("decode error"); + }; + db0::const_bounded_buf_t bounded(throwOutOfBounds, buf.data(), buf.data() + buf.size()); + + ASSERT_EQ(List::safeSizeOf(bounded), list.sizeOf()); + } + + TEST( SerializationTest , testCompactOListStoresOnlyPackedSize ) + { + using List = db0::o_list, true>; + std::vector values { 10, 20, 30 }; + std::vector buf(List::measure(values)); + + auto &list = List::__new(buf.data(), values); + + ASSERT_EQ(db0::true_size_of(), 0u); + ASSERT_EQ(list.size(), 3u); + ASSERT_EQ(list.sizeOf(), 1u + values.size() * sizeof(std::uint32_t)); + ASSERT_EQ(List::safeSizeOf(buf.data()), list.sizeOf()); + + auto it = list.begin(); + ASSERT_EQ((*it).value(), 10u); + ++it; + ASSERT_EQ((*it).value(), 20u); + ++it; + ASSERT_EQ((*it).value(), 30u); + ++it; + ASSERT_EQ(it, list.end()); + } + + TEST( SerializationTest , testCompactOListEmptyUsesOneByte ) + { + using List = db0::o_list, true>; + std::vector buf(List::measure()); + + auto &list = List::__new(buf.data()); + + ASSERT_EQ(list.sizeOf(), 1u); + ASSERT_EQ(list.size(), 0u); + ASSERT_TRUE(list.empty()); + ASSERT_EQ(list.begin(), list.end()); + ASSERT_EQ(List::safeSizeOf(buf.data()), 1u); + } + + TEST( SerializationTest , testCompactOListMeasureHandlesPackedSizeBoundary ) + { + using List = db0::o_list, true>; + std::vector values(32, 7); + std::vector buf(List::measure(values)); + + auto &list = List::__new(buf.data(), values); + + ASSERT_EQ(list.sizeOf(), 2u + values.size() * sizeof(std::uint32_t)); + ASSERT_EQ(list.size(), values.size()); + ASSERT_EQ(List::safeSizeOf(buf.data()), list.sizeOf()); + } -} \ No newline at end of file +}