From ea08eab682e9026ba1460691b24d85e6585b572c Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 10:34:01 +0200 Subject: [PATCH 1/8] ObjectImmutableImpl refactor / preparation for integration --- src/dbzero/object_model/object/Object.cpp | 17 +- src/dbzero/object_model/object/Object.hpp | 6 + .../object_model/object/ObjectImplBase.cpp | 61 +++++-- .../object_model/object/ObjectImplBase.hpp | 5 +- .../object_model/object/ObjectInitializer.cpp | 52 +++--- .../object_model/object/ObjectInitializer.hpp | 15 +- .../object_model/object/o_embedded_object.cpp | 42 ++++- .../object_model/object/o_embedded_object.hpp | 10 ++ .../object/o_immutable_object.cpp | 66 ++++++-- .../object/o_immutable_object.hpp | 23 ++- tests/unit_tests/EmbeddedObjectTest.cpp | 93 +++++++++++ tests/unit_tests/ObjectInitializerTest.cpp | 156 +++++++++++++++++- 12 files changed, 471 insertions(+), 75 deletions(-) diff --git a/src/dbzero/object_model/object/Object.cpp b/src/dbzero/object_model/object/Object.cpp index 0d8c914f..1e2fff1a 100644 --- a/src/dbzero/object_model/object/Object.cpp +++ b/src/dbzero/object_model/object/Object.cpp @@ -89,6 +89,21 @@ namespace db0::object_model return getType().isSingleton(); } + void Object::detach() const + { + // invalidate since detach is not supported by the MorphingBIndex + m_kv_index = nullptr; + super_t::detach(); + } + + void Object::commit() const + { + if (m_kv_index) { + m_kv_index->commit(); + } + super_t::commit(); + } + bool Object::tryFindMemberAt(std::pair field_info, std::pair &result, std::pair &find_result) const { @@ -614,4 +629,4 @@ namespace db0::object_model new ((void*)this) Object(tag_as_dropped(), unique_addr, ext_refs); } -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/object/Object.hpp b/src/dbzero/object_model/object/Object.hpp index 7b1af955..a8ee7603 100644 --- a/src/dbzero/object_model/object/Object.hpp +++ b/src/dbzero/object_model/object/Object.hpp @@ -37,9 +37,15 @@ namespace db0::object_model // Destroys an existing instance and constructs a "null" placeholder void dropInstance(FixtureLock &); + void detach() const; + void commit() const; + protected: friend super_t; + // local kv-index instance cache (created at first use) + mutable std::unique_ptr m_kv_index; + bool tryFindMemberAt(std::pair field_info, std::pair &result, std::pair &find_result) const; diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index 9877c4a3..e660d3f1 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -31,6 +31,25 @@ namespace db0::object_model } return static_cast(value); } + + bool isEmbeddableType(TypeId typeId, StorageClass storageClass) + { + switch (storageClass) { + case StorageClass::STRING_REF: + case StorageClass::DB0_BYTES: + return true; + case StorageClass::DB0_LIST: + return typeId == TypeId::LIST; + case StorageClass::DB0_TUPLE: + return typeId == TypeId::TUPLE; + case StorageClass::DB0_SET: + return typeId == TypeId::SET; + case StorageClass::DB0_DICT: + return typeId == TypeId::DICT; + default: + return false; + } + } template ObjectImplBase::ObjectImplBase(tag_as_dropped, UniqueAddress addr, unsigned int ext_refs) @@ -149,11 +168,19 @@ namespace db0::object_model assert(this->m_type); auto &type = *this->m_type; - super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), - safeCast(type.getNumBases() + 1, "Too many base classes"), - pos_vt_data, pos_vt_offset, index_vt_data.first, index_vt_data.second, - getAccessOptions(type) - ); + auto numTypeTags = safeCast(type.getNumBases() + 1, "Too many base classes"); + if constexpr (std::is_same_v) { + auto *immutableInitializer = dynamic_cast(&initializer); + assert(immutableInitializer); + super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, + *immutableInitializer, getAccessOptions(type) + ); + } else { + super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, + pos_vt_data, pos_vt_offset, index_vt_data.first, index_vt_data.second, + getAccessOptions(type) + ); + } // reference associated class type.incRef(false); @@ -250,9 +277,19 @@ namespace db0::object_model // register a regular member with the initializer // NOTE: a new member receives the no-cache flag if set (at the type level) auto member_flags = type.isNoCache() ? AccessFlags { AccessOptions::no_cache } : AccessFlags(); - initializer.set(member_id.get(0).getIndexAndOffset(), storage_class, - createMember(fixture, type_id, storage_class, obj_ptr, member_flags) - ); + auto loc = member_id.get(0).getIndexAndOffset(); + if constexpr (std::is_same_v) { + if (isEmbeddableType(type_id, storage_class)) { + auto &immutableInitializer = dynamic_cast(initializer); + immutableInitializer.setObject(loc, storage_class, {}, ObjectSharedPtr(obj_ptr)); + } else { + auto value = createMember(fixture, type_id, storage_class, obj_ptr, member_flags); + initializer.set(loc, storage_class, value); + } + } else { + auto value = createMember(fixture, type_id, storage_class, obj_ptr, member_flags); + initializer.set(loc, storage_class, value); + } } else { if (member_id.hasFidelity(0)) { // remove any existing regular initialization @@ -266,7 +303,8 @@ namespace db0::object_model auto value = lofi_store<2>::create(loc.second, createMember(fixture, type_id, storage_class, obj_ptr, {}).m_store); // register a lo-fi member with the initializer (using mask) - initializer.set(loc, storage_class, value, lofi_store<2>::mask(loc.second)); + auto mask = lofi_store<2>::mask(loc.second); + initializer.set(loc, storage_class, value, mask); } } @@ -1027,8 +1065,6 @@ namespace db0::object_model void ObjectImplBase::detach() const { this->m_type->detach(); - // invalidate since detach is not supported by the MorphingBIndex - this->m_kv_index = nullptr; super_t::detach(); } @@ -1036,9 +1072,6 @@ namespace db0::object_model void ObjectImplBase::commit() const { this->m_type->commit(); - if (m_kv_index) { - m_kv_index->commit(); - } super_t::commit(); // reset the silent-mutation flag this->m_touched = false; diff --git a/src/dbzero/object_model/object/ObjectImplBase.hpp b/src/dbzero/object_model/object/ObjectImplBase.hpp index 149f79a6..54b6f4eb 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.hpp +++ b/src/dbzero/object_model/object/ObjectImplBase.hpp @@ -135,10 +135,7 @@ namespace db0::object_model // of the use of num_type_tags property bool hasRefs() const; - protected: - // local kv-index instance cache (created at first use) - mutable std::unique_ptr m_kv_index; - + protected: void setType(std::shared_ptr); // adjusts to actual type if the type hint is a base class // @return true if type was changed (type hint hit) diff --git a/src/dbzero/object_model/object/ObjectInitializer.cpp b/src/dbzero/object_model/object/ObjectInitializer.cpp index 181ffe04..f370b9ca 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.cpp +++ b/src/dbzero/object_model/object/ObjectInitializer.cpp @@ -2,7 +2,6 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "ObjectInitializer.hpp" -#include #include #include @@ -192,27 +191,36 @@ namespace db0::object_model ObjectSharedPtr object, std::uint64_t mask ) { - set(loc, storage_class, value, mask); if (isFixedStorageClass(storage_class)) { - eraseObjectAt(loc); + ObjectInitializer::set(loc, storage_class, value, mask); + appendObjectTombstone(loc); return; } - eraseObjectAt(loc); m_objects.push_back({ loc, storage_class, std::move(object) }); } + void ImmutableObjectInitializer::set( + std::pair loc, StorageClass storage_class, Value value, std::uint64_t mask + ) + { + appendObjectTombstone(loc); + ObjectInitializer::set(loc, storage_class, value, mask); + } + bool ImmutableObjectInitializer::remove(std::pair loc, std::uint64_t mask) { - eraseObjectAt(loc); - return ObjectInitializer::remove(loc, mask); + auto hadObject = hasObjectAt(loc); + appendObjectTombstone(loc); + return ObjectInitializer::remove(loc, mask) || hadObject; } bool ImmutableObjectInitializer::tryGetObjectAt( std::pair loc, ObjectSharedPtr &object ) const { - for (const auto &value: m_objects) { + for (auto it = m_objects.rbegin(); it != m_objects.rend(); ++it) { + const auto &value = *it; if (value.m_loc == loc) { object = value.m_object; return object.get() != nullptr; @@ -225,23 +233,15 @@ namespace db0::object_model PosVT::Data &data, unsigned int &offset ) const { - m_values.sortAndMerge(); - m_fixed_values.clear(); - m_fixed_values.reserve(m_values.size()); - for (const auto &value: m_values) { - m_fixed_values.push_back(value); - } for (const auto &value: m_objects) { assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); - m_fixed_values.remove(value.m_loc.first); } - return getDataFrom(m_fixed_values, data, offset); + return getDataFrom(m_values, data, offset); } void ImmutableObjectInitializer::resetObjects() { m_objects.clear(); - m_fixed_values.clear(); } const std::vector &ImmutableObjectInitializer::objects() const @@ -249,14 +249,20 @@ namespace db0::object_model return m_objects; } - void ImmutableObjectInitializer::eraseObjectAt(std::pair loc) + bool ImmutableObjectInitializer::empty() const + { + return ObjectInitializer::empty() && m_objects.empty(); + } + + void ImmutableObjectInitializer::appendObjectTombstone(std::pair loc) + { + m_objects.push_back({ loc, StorageClass::DELETED, {} }); + } + + bool ImmutableObjectInitializer::hasObjectAt(std::pair loc) const { - m_objects.erase( - std::remove_if(m_objects.begin(), m_objects.end(), [&](const auto &value) { - return value.m_loc == loc; - }), - m_objects.end() - ); + ObjectSharedPtr object; + return tryGetObjectAt(loc, object); } bool ObjectInitializer::trySetFixture(db0::swine_ptr &new_fixture) diff --git a/src/dbzero/object_model/object/ObjectInitializer.hpp b/src/dbzero/object_model/object/ObjectInitializer.hpp index 12389fcc..ee65f301 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.hpp +++ b/src/dbzero/object_model/object/ObjectInitializer.hpp @@ -148,9 +148,9 @@ namespace db0::object_model } // @param mask required for lo-fi types (pack-2) - void set(std::pair loc, StorageClass storage_class, Value value, + virtual void set(std::pair loc, StorageClass storage_class, Value value, std::uint64_t mask = 0); - bool remove(std::pair loc, std::uint64_t mask = 0); + virtual bool remove(std::pair loc, std::uint64_t mask = 0); // Allows migrating initialization to other fixture (only for empty ObjectInitializer) // @return false if operation failed (exception not thrown) @@ -194,7 +194,7 @@ namespace db0::object_model // performs a deferred incRef on an actual object instance (the ref-count reflected upon creation) void incRef(bool is_tag); - bool empty() const; + virtual bool empty() const; protected: friend class ObjectInitializerManager; @@ -237,10 +237,13 @@ namespace db0::object_model std::pair loc, StorageClass storage_class, Value value, ObjectSharedPtr object, std::uint64_t mask = 0 ); - bool remove(std::pair loc, std::uint64_t mask = 0); + void set(std::pair loc, StorageClass storage_class, Value value, + std::uint64_t mask = 0) override; + bool remove(std::pair loc, std::uint64_t mask = 0) override; bool tryGetObjectAt(std::pair loc, ObjectSharedPtr &object) const; std::pair getData(PosVT::Data &data, unsigned int &pos_vt_offset) const; void resetObjects(); + bool empty() const override; static bool isFixedStorageClass(StorageClass storage_class); @@ -255,9 +258,9 @@ namespace db0::object_model private: std::vector m_objects; - mutable XValuesVector m_fixed_values; - void eraseObjectAt(std::pair loc); + void appendObjectTombstone(std::pair loc); + bool hasObjectAt(std::pair loc) const; }; template diff --git a/src/dbzero/object_model/object/o_embedded_object.cpp b/src/dbzero/object_model/object/o_embedded_object.cpp index 3d12daed..9882803f 100644 --- a/src/dbzero/object_model/object/o_embedded_object.cpp +++ b/src/dbzero/object_model/object/o_embedded_object.cpp @@ -74,8 +74,12 @@ namespace db0::object_model o_dict::ElementMap fieldMap; for (const auto &value: initializer.objects()) { assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); - fieldMap[o_dict::Element::integer(value.m_loc.first)] = - fieldMapElementFromObject(value.m_storage_class, value.m_object); + auto key = o_dict::Element::integer(value.m_loc.first); + if (!value.m_object) { + fieldMap.erase(key); + } else { + fieldMap[key] = fieldMapElementFromObject(value.m_storage_class, value.m_object); + } } return fieldMap; } @@ -128,6 +132,18 @@ namespace db0::object_model (o_dict::type(), fieldMap); } + o_embedded_object::o_embedded_object( + std::uint32_t classRefValue, const PosVT::Data &posVtData, unsigned int posVtOffset, + const XValue *indexVtBegin, const XValue *indexVtEnd + ) + { + arrangeMembers() + (db0::packed_int32::type(), classRefValue) + (PosVT::type(), posVtData, posVtOffset) + (IndexVT::type(), indexVtBegin, indexVtEnd) + (o_dict::type(), o_dict::ElementMap()); + } + std::uint32_t o_embedded_object::getClassRef() const { return classRef().value(); @@ -138,11 +154,21 @@ namespace db0::object_model return getDynAfter(classRef(), PosVT::type()); } + PosVT &o_embedded_object::pos_vt() + { + return getDynAfter(classRef(), PosVT::type()); + } + const IndexVT &o_embedded_object::index_vt() const { return getDynAfter(pos_vt(), IndexVT::type()); } + IndexVT &o_embedded_object::index_vt() + { + return getDynAfter(pos_vt(), IndexVT::type()); + } + const o_dict &o_embedded_object::field_map() const { return getDynAfter(index_vt(), o_dict::type()); @@ -189,6 +215,18 @@ namespace db0::object_model (o_dict::type(), fieldMap); } + std::size_t o_embedded_object::measure( + std::uint32_t classRefValue, const PosVT::Data &posVtData, unsigned int posVtOffset, + const XValue *indexVtBegin, const XValue *indexVtEnd + ) + { + return measureMembers() + (db0::packed_int32::type(), classRefValue) + (PosVT::type(), posVtData, posVtOffset) + (IndexVT::type(), indexVtBegin, indexVtEnd) + (o_dict::type(), o_dict::ElementMap()); + } + const db0::packed_int32 &o_embedded_object::classRef() const { return getDynFirst(db0::packed_int32::type()); diff --git a/src/dbzero/object_model/object/o_embedded_object.hpp b/src/dbzero/object_model/object/o_embedded_object.hpp index a96d8eb6..e1b8ae68 100644 --- a/src/dbzero/object_model/object/o_embedded_object.hpp +++ b/src/dbzero/object_model/object/o_embedded_object.hpp @@ -41,16 +41,26 @@ DB0_PACKED_BEGIN using Element = o_tuple_item::Element; o_embedded_object(std::uint32_t classRef, const ImmutableObjectInitializer &initializer); + o_embedded_object( + std::uint32_t classRef, const PosVT::Data &posVtData, unsigned int posVtOffset, + const XValue *indexVtBegin = nullptr, const XValue *indexVtEnd = nullptr + ); std::uint32_t getClassRef() const; const PosVT &pos_vt() const; + PosVT &pos_vt(); const IndexVT &index_vt() const; + IndexVT &index_vt(); const o_dict &field_map() const; std::optional fixedValue(std::uint32_t index, unsigned int fidelityOffset = 0) const; const o_tuple_item *variableValue(std::uint32_t index) const; std::size_t sizeOf() const; static std::size_t measure(std::uint32_t classRef, const ImmutableObjectInitializer &initializer); + static std::size_t measure( + std::uint32_t classRef, const PosVT::Data &posVtData, unsigned int posVtOffset, + const XValue *indexVtBegin = nullptr, const XValue *indexVtEnd = nullptr + ); template static std::size_t safeSizeOf(BufT buf) { diff --git a/src/dbzero/object_model/object/o_immutable_object.cpp b/src/dbzero/object_model/object/o_immutable_object.cpp index c5ec1ae7..af09f898 100644 --- a/src/dbzero/object_model/object/o_immutable_object.cpp +++ b/src/dbzero/object_model/object/o_immutable_object.cpp @@ -11,6 +11,16 @@ namespace db0::object_model { + o_immutable_object::o_immutable_object(std::uint32_t class_ref, + std::pair ref_counts, std::uint8_t num_type_tags, + const ImmutableObjectInitializer &initializer) + : m_header(ref_counts) + , m_num_type_tags(num_type_tags) + { + arrangeMembers() + (o_embedded_object::type(), class_ref, initializer); + } + o_immutable_object::o_immutable_object(std::uint32_t class_ref, std::pair ref_counts, std::uint8_t num_type_tags, const PosVT::Data &pos_vt_data, unsigned int pos_vt_offset, const XValue *index_vt_begin, const XValue *index_vt_end) @@ -18,9 +28,14 @@ namespace db0::object_model , m_num_type_tags(num_type_tags) { arrangeMembers() - (PosVT::type(), pos_vt_data, pos_vt_offset) - (packed_int32::type(), class_ref) - (IndexVT::type(), index_vt_begin, index_vt_end); + (o_embedded_object::type(), class_ref, pos_vt_data, pos_vt_offset, index_vt_begin, index_vt_end); + } + + std::size_t o_immutable_object::measure(std::uint32_t class_ref, + std::pair, std::uint8_t, const ImmutableObjectInitializer &initializer) + { + return super_t::measureMembers() + (o_embedded_object::type(), class_ref, initializer); } std::size_t o_immutable_object::measure(std::uint32_t class_ref, @@ -28,33 +43,52 @@ namespace db0::object_model const XValue *index_vt_begin, const XValue *index_vt_end) { return super_t::measureMembers() - (PosVT::type(), pos_vt_data, pos_vt_offset) - (packed_int32::type(), class_ref) - (IndexVT::type(), index_vt_begin, index_vt_end); + (o_embedded_object::type(), class_ref, pos_vt_data, pos_vt_offset, index_vt_begin, index_vt_end); } - const PosVT &o_immutable_object::pos_vt() const { - return getDynFirst(PosVT::type()); + o_embedded_object &o_immutable_object::embeddedObject() + { + return getDynFirst(o_embedded_object::type()); } - PosVT &o_immutable_object::pos_vt() { - return getDynFirst(PosVT::type()); + const o_embedded_object &o_immutable_object::embeddedObject() const + { + return getDynFirst(o_embedded_object::type()); } - const packed_int32 &o_immutable_object::classRef() const { - return getDynAfter(pos_vt(), packed_int32::type()); + const PosVT &o_immutable_object::pos_vt() const { + return embeddedObject().pos_vt(); + } + + PosVT &o_immutable_object::pos_vt() { + return embeddedObject().pos_vt(); } std::uint32_t o_immutable_object::getClassRef() const { - return classRef().value(); + return embeddedObject().getClassRef(); } const IndexVT &o_immutable_object::index_vt() const { - return getDynAfter(classRef(), IndexVT::type()); + return embeddedObject().index_vt(); } IndexVT &o_immutable_object::index_vt() { - return getDynAfter(classRef(), IndexVT::type()); + return embeddedObject().index_vt(); + } + + const o_dict &o_immutable_object::field_map() const + { + return embeddedObject().field_map(); + } + + std::optional o_immutable_object::fixedValue(std::uint32_t index, unsigned int fidelityOffset) const + { + return embeddedObject().fixedValue(index, fidelityOffset); + } + + const o_tuple_item *o_immutable_object::variableValue(std::uint32_t index) const + { + return embeddedObject().variableValue(index); } void o_immutable_object::incRef(bool is_tag) { @@ -74,4 +108,4 @@ namespace db0::object_model return m_header.hasRefs(); } -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/object/o_immutable_object.hpp b/src/dbzero/object_model/object/o_immutable_object.hpp index 8f4ca1f4..bcf4c23f 100644 --- a/src/dbzero/object_model/object/o_immutable_object.hpp +++ b/src/dbzero/object_model/object/o_immutable_object.hpp @@ -5,7 +5,7 @@ #include #include -#include "ValueTable.hpp" +#include "o_embedded_object.hpp" #include namespace db0::object_model @@ -24,22 +24,35 @@ DB0_PACKED_BEGIN o_unique_header m_header; // number of auto-assigned type tags std::uint8_t m_num_type_tags = 0; + + o_embedded_object &embeddedObject(); + const o_embedded_object &embeddedObject() const; PosVT &pos_vt(); const PosVT &pos_vt() const; - const packed_int32 &classRef() const; std::uint32_t getClassRef() const; const IndexVT &index_vt() const; IndexVT &index_vt(); + const o_dict &field_map() const; + std::optional fixedValue(std::uint32_t index, unsigned int fidelityOffset = 0) const; + const o_tuple_item *variableValue(std::uint32_t index) const; // ref_counts - the initial reference counts (tags / objects) inherited from the initializer + o_immutable_object( + std::uint32_t class_ref, std::pair ref_counts, + std::uint8_t num_type_tags, const ImmutableObjectInitializer &initializer + ); o_immutable_object(std::uint32_t class_ref, std::pair ref_counts, std::uint8_t num_type_tags, const PosVT::Data &pos_vt_data, unsigned int pos_vt_offset, const XValue *index_vt_begin = nullptr, const XValue *index_vt_end = nullptr); + static std::size_t measure( + std::uint32_t, std::pair, std::uint8_t num_type_tags, + const ImmutableObjectInitializer &initializer + ); static std::size_t measure(std::uint32_t, std::pair, std::uint8_t num_type_tags, const PosVT::Data &pos_vt_data, unsigned int pos_vt_offset, const XValue *index_vt_begin = nullptr, const XValue *index_vt_end = nullptr); @@ -47,9 +60,7 @@ DB0_PACKED_BEGIN template static std::size_t safeSizeOf(BufT buf) { return super_t::sizeOfMembers(buf) - (PosVT::type()) - (packed_int32::type()) - (IndexVT::type()); + (o_embedded_object::type()); } void incRef(bool is_tag); @@ -58,4 +69,4 @@ DB0_PACKED_BEGIN }; DB0_PACKED_END -} \ No newline at end of file +} diff --git a/tests/unit_tests/EmbeddedObjectTest.cpp b/tests/unit_tests/EmbeddedObjectTest.cpp index eee26230..b745eccf 100644 --- a/tests/unit_tests/EmbeddedObjectTest.cpp +++ b/tests/unit_tests/EmbeddedObjectTest.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -90,6 +91,40 @@ namespace tests ASSERT_FALSE(object->fixedValue(999).has_value()); } + TEST_F( EmbeddedObjectTest , testImmutableRootEncapsulatesEmbeddedObjectStorage ) + { + Py_Initialize(); + + auto memspace = getMemspace(); + int sourceObject = 0; + ObjectInitializerManager manager; + auto &initializer = makeInitializer(manager, sourceObject); + initializer.set({0, 0}, StorageClass::INT64, Value(42)); + + auto pyString = Py_OWN(PyUnicode_FromString("root variable string")); + initializer.setObject( + {300, 0}, StorageClass::STRING_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyString.get()) + ); + + v_object object(memspace, 88u, std::make_pair(1u, 2u), 1u, initializer); + + ASSERT_EQ(object->getClassRef(), 88u); + ASSERT_TRUE(object->hasAnyRefs()); + auto fixedValue = object->fixedValue(0); + ASSERT_TRUE(fixedValue.has_value()); + ASSERT_EQ(fixedValue->m_kind, StorageClass::INT64); + ASSERT_EQ(fixedValue->m_value, 42u); + + auto *variableValue = object->variableValue(300); + ASSERT_NE(variableValue, nullptr); + ASSERT_EQ(variableValue->itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(variableValue->stringPayload().toString(), "root variable string"); + ASSERT_EQ(&object->embeddedObject().pos_vt(), &object->pos_vt()); + ASSERT_EQ(&object->embeddedObject().index_vt(), &object->index_vt()); + ASSERT_EQ(&object->embeddedObject().field_map(), &object->field_map()); + } + TEST_F( EmbeddedObjectTest , testEmbeddedObjectStoresVariableFieldsInDictMap ) { Py_Initialize(); @@ -127,6 +162,64 @@ namespace tests ASSERT_EQ(object->variableValue(999), nullptr); } + TEST_F( EmbeddedObjectTest , testEmbeddedObjectUsesLatestVariableFieldMapValue ) + { + Py_Initialize(); + + auto memspace = getMemspace(); + int sourceObject = 0; + ObjectInitializerManager manager; + auto &initializer = makeInitializer(manager, sourceObject); + auto pyString1 = Py_OWN(PyUnicode_FromString("old value")); + auto pyString2 = Py_OWN(PyUnicode_FromString("new value")); + initializer.setObject( + {300, 0}, StorageClass::STRING_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyString1.get()) + ); + initializer.setObject( + {300, 0}, StorageClass::STRING_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyString2.get()) + ); + + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_TRUE(initializer.tryGetObjectAt({300, 0}, storedObject)); + ASSERT_EQ(storedObject.get(), pyString2.get()); + + v_object object(memspace, 88u, initializer); + + auto *stringValue = object->variableValue(300); + ASSERT_NE(stringValue, nullptr); + ASSERT_EQ(stringValue->itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(stringValue->stringPayload().toString(), "new value"); + } + + TEST_F( EmbeddedObjectTest , testEmbeddedObjectTombstoneRemovesVariableFieldMapValue ) + { + Py_Initialize(); + + auto memspace = getMemspace(); + int sourceObject = 0; + ObjectInitializerManager manager; + auto &initializer = makeInitializer(manager, sourceObject); + auto pyString = Py_OWN(PyUnicode_FromString("old variable value")); + initializer.setObject( + {300, 0}, StorageClass::STRING_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyString.get()) + ); + initializer.set({300, 0}, StorageClass::INT64, Value(91)); + + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_FALSE(initializer.tryGetObjectAt({300, 0}, storedObject)); + + v_object object(memspace, 88u, initializer); + + ASSERT_EQ(object->variableValue(300), nullptr); + auto fixedValue = object->fixedValue(300); + ASSERT_TRUE(fixedValue.has_value()); + ASSERT_EQ(fixedValue->m_kind, StorageClass::INT64); + ASSERT_EQ(fixedValue->m_value, 91u); + } + TEST_F( EmbeddedObjectTest , testEmbeddedObjectStoresNestedTuplePayload ) { Py_Initialize(); diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 56a18f83..2f66e0a4 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -163,11 +164,10 @@ namespace tests auto py_value = Py_OWN(PyLong_FromLong(42)); ImmutableObjectInitializer::ObjectSharedPtr object_value(py_value.get()); initializer->setObject({9, 0}, StorageClass::STRING_REF, Value(123), object_value); + ASSERT_FALSE(initializer->empty()); std::pair stored_value; - ASSERT_TRUE(initializer->tryGetAt({9, 0}, stored_value)); - ASSERT_EQ(stored_value.first, StorageClass::STRING_REF); - ASSERT_EQ(stored_value.second, Value(123)); + ASSERT_FALSE(initializer->tryGetAt({9, 0}, stored_value)); ImmutableObjectInitializer::ObjectSharedPtr stored_object; ASSERT_TRUE(initializer->tryGetObjectAt({9, 0}, stored_object)); @@ -334,6 +334,156 @@ namespace tests workspace.close(); } + TEST_F( ObjectInitializerTest, testObjectImmutableImplPostInitUsesEmbeddedStorageAndNoKVIndex ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + auto py_int = Py_OWN(PyLong_FromLong(42)); + object.setPreInit("value", db0::bindings::TypeId::INTEGER, py_int.get()); + auto py_string = Py_OWN(PyUnicode_FromString("immutable payload")); + object.setPreInit("name", db0::bindings::TypeId::STRING, py_string.get()); + + { + db0::FixtureLock lock(fixture); + object.postInit(lock); + } + + auto layout = object.getFieldLayout(); + ASSERT_TRUE(layout.m_kv_index_fields.empty()); + ASSERT_FALSE(layout.m_pos_vt_fields.empty()); + + std::optional fixedValue; + for (std::uint32_t index = 0; index < 32 && !fixedValue.has_value(); ++index) { + auto candidate = object->fixedValue(index); + if (candidate && candidate->m_value == 42u) { + fixedValue = candidate; + } + } + ASSERT_TRUE(fixedValue.has_value()); + + const o_tuple_item *variableValue = nullptr; + for (std::uint32_t index = 0; index < 32 && !variableValue; ++index) { + auto *candidate = object->variableValue(index); + if (candidate && candidate->itemKind() == StorageClass::STRING_REF) { + variableValue = candidate; + } + } + ASSERT_NE(variableValue, nullptr); + ASSERT_EQ(variableValue->stringPayload().toString(), "immutable payload"); + } + + workspace.close(); + } + + TEST_F( ObjectInitializerTest, testImmutablePreInitEmbeddableValueDoesNotCreateDurableMemberValue ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + auto py_string = Py_OWN(PyUnicode_FromString("embedded without durable side object")); + object.setPreInit("name", db0::bindings::TypeId::STRING, py_string.get()); + + auto *initializer = dynamic_cast(InitManager::instance.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + ASSERT_FALSE(initializer->objects().empty()); + auto loc = initializer->objects().back().m_loc; + ASSERT_EQ(initializer->objects().back().m_storage_class, StorageClass::STRING_REF); + + std::pair storedValue; + ASSERT_FALSE(initializer->tryGetAt(loc, storedValue)); + + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_TRUE(initializer->tryGetObjectAt(loc, storedObject)); + ASSERT_EQ(storedObject.get(), py_string.get()); + } + + workspace.close(); + } + + TEST_F( ObjectInitializerTest, testImmutablePreInitChangingRegularValueToLoFiClearsEmbeddedObject ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + auto pyString = Py_OWN(PyUnicode_FromString("stale embedded object")); + object.setPreInit("name", db0::bindings::TypeId::STRING, pyString.get()); + + auto *initializer = dynamic_cast(InitManager::instance.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + ASSERT_FALSE(initializer->objects().empty()); + auto regularLoc = initializer->objects().back().m_loc; + + object.setPreInit("name", db0::bindings::TypeId::BOOLEAN, Py_True); + + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_FALSE(initializer->tryGetObjectAt(regularLoc, storedObject)); + + std::pair storedValue; + ASSERT_FALSE(initializer->tryGetAt(regularLoc, storedValue)); + + auto [memberId, isInitVar] = mock_class->findField("name"); + (void)isInitVar; + ASSERT_TRUE(memberId); + ASSERT_TRUE(memberId.hasFidelity(2)); + auto lofiLoc = memberId.get(2).getIndexAndOffset(); + + ASSERT_TRUE(initializer->tryGetAt(lofiLoc, storedValue)); + ASSERT_EQ(storedValue.first, StorageClass::PACK_2); + ASSERT_TRUE(lofi_store<2>::fromValue(storedValue.second).isSet(lofiLoc.second)); + ASSERT_EQ(lofi_store<2>::fromValue(storedValue.second).get(lofiLoc.second), Value::TRUE); + } + + workspace.close(); + } + + TEST_F( ObjectInitializerTest, testImmutableRemovePreInitClearsEmbeddedObject ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + auto pyString = Py_OWN(PyUnicode_FromString("removed embedded object")); + object.setPreInit("name", db0::bindings::TypeId::STRING, pyString.get()); + + auto *initializer = dynamic_cast(InitManager::instance.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + ASSERT_FALSE(initializer->objects().empty()); + auto regularLoc = initializer->objects().back().m_loc; + + object.removePreInit("name"); + + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_FALSE(initializer->tryGetObjectAt(regularLoc, storedObject)); + + std::pair storedValue; + ASSERT_TRUE(initializer->tryGetAt(regularLoc, storedValue)); + ASSERT_EQ(storedValue.first, StorageClass::DELETED); + ASSERT_EQ(storedValue.second, Value()); + } + + workspace.close(); + } + TEST_F( ObjectInitializerTest, testPosVTLoFiExclusive ) { Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); From 779f0799420c959adb388e1996516cd80f5676e6 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 11:35:40 +0200 Subject: [PATCH 2/8] embedding measure --- AGENTS.md | 1 + src/dbzero/bindings/python/PyToolkit.cpp | 9 + src/dbzero/bindings/python/PyToolkit.hpp | 1 + .../object_model/object/EmbeddingMeasure.cpp | 268 +++++++++++++ .../object_model/object/EmbeddingMeasure.hpp | 38 ++ tests/unit_tests/EmbeddingMeasureTest.cpp | 354 ++++++++++++++++++ 6 files changed, 671 insertions(+) create mode 100644 src/dbzero/object_model/object/EmbeddingMeasure.cpp create mode 100644 src/dbzero/object_model/object/EmbeddingMeasure.hpp create mode 100644 tests/unit_tests/EmbeddingMeasureTest.cpp diff --git a/AGENTS.md b/AGENTS.md index 96bb49f0..0d8fbae0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -45,6 +45,7 @@ Types derived from `v_object` should follow the project-wide constructor pattern ### C++ style - Use camelCase for local helper variables, lambdas, and method names in C++ code. +- Project types often avoid implicit bool conversion because it can hide subtle ownership, state, and null-check bugs. Use explicit double-negation checks such as `if (!!obj)` or `while (!!item)` when a type supports `operator!()`. ### Python binding wrapper access diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index dcf58e62..382ecdef 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -650,6 +650,15 @@ namespace db0::python } return item; } + + PyToolkit::ObjectSharedPtr PyToolkit::getMappingItem(ObjectPtr py_object, ObjectPtr key) + { + auto item = Py_OWN(PyObject_GetItem(py_object, key)); + if (!item) { + THROWF(db0::InputException) << "Unable to get mapping item"; + } + return item; + } bool PyToolkit::isSingleton(TypeObjectPtr py_type) { return PyMemoType_IsSingleton(py_type); diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index fc53424a..216f190a 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -189,6 +189,7 @@ namespace db0::python static ObjectSharedPtr next(ObjectPtr py_object); static std::size_t length(ObjectPtr py_object); static ObjectSharedPtr getItem(ObjectPtr py_object, std::size_t i); + static ObjectSharedPtr getMappingItem(ObjectPtr py_object, ObjectPtr key); // Get value associated fixture UUID (e.g. enum value) static std::uint64_t getFixtureUUID(ObjectPtr py_object); // Get scoped type's associated fixture UUID (or 0x0) diff --git a/src/dbzero/object_model/object/EmbeddingMeasure.cpp b/src/dbzero/object_model/object/EmbeddingMeasure.cpp new file mode 100644 index 00000000..21f35f1a --- /dev/null +++ b/src/dbzero/object_model/object/EmbeddingMeasure.cpp @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include "EmbeddingMeasure.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace db0::object_model +{ + namespace + { + using TypeId = db0::bindings::TypeId; + using LangToolkit = LangConfig::LangToolkit; + using ObjectPtr = LangConfig::ObjectPtr; + using MemoImmutableObject = typename LangToolkit::TypeManager::MemoImmutableObject; + + EmbeddingMeasure makeMeasure( + StorageClass storageClass, std::size_t embeddedBytes, std::uint32_t allocationsAvoided = 0, + bool requiresObjectView = false, bool requiresCollectionView = false + ) + { + return { + storageClass, + embeddedBytes, + 0, + allocationsAvoided, + requiresObjectView, + requiresCollectionView + }; + } + + o_tuple_item::Element stringElement(ObjectPtr value) + { + auto &typeManager = LangToolkit::getTypeManager(); + return o_tuple_item::Element::string(typeManager.extractString(value)); + } + + o_tuple_item::Element bytesElement(ObjectPtr value) + { + auto &typeManager = LangToolkit::getTypeManager(); + auto bytes = typeManager.extractBytes(value); + return o_tuple_item::Element::bytes(bytes.m_data, bytes.m_size); + } + + std::uint32_t saturatedAdd(std::uint32_t left, std::uint32_t right) + { + if (right > std::numeric_limits::max() - left) { + return std::numeric_limits::max(); + } + return left + right; + } + + std::uint32_t allocationsForBytes(std::size_t bytes) + { + return static_cast(std::max( + 1, (bytes + SlabAllocatorConfig::DEFAULT_PAGE_SIZE - 1) / SlabAllocatorConfig::DEFAULT_PAGE_SIZE + )); + } + + std::uint32_t listRootAllocationsAvoided(std::size_t itemCount) + { + if (itemCount == 0) { + return 1; + } + + auto dataBlockCapacity = std::size_t{1} + << db0::o_block_data::shift(SlabAllocatorConfig::DEFAULT_PAGE_SIZE); + return static_cast( + 1 + (itemCount - 1) / dataBlockCapacity + ); + } + + std::uint32_t tupleRootAllocationsAvoided(std::size_t itemCount) + { + return allocationsForBytes(o_db0_tuple::measure(itemCount)); + } + + std::uint32_t nestedAllocationsAvoided(TypeId typeId, ObjectPtr value); + + std::uint32_t iterableAllocationsAvoided(ObjectPtr value) + { + std::uint32_t result = 0; + auto &typeManager = LangToolkit::getTypeManager(); + auto iterator = LangToolkit::getIterator(value); + auto item = LangToolkit::next(iterator.get()); + while (!!item) { + result = saturatedAdd(result, nestedAllocationsAvoided(typeManager.getTypeId(item.get()), item.get())); + item = LangToolkit::next(iterator.get()); + } + return result; + } + + std::uint32_t dictAllocationsAvoided(ObjectPtr value) + { + std::uint32_t result = 0; + auto &typeManager = LangToolkit::getTypeManager(); + auto iterator = LangToolkit::getIterator(value); + auto key = LangToolkit::next(iterator.get()); + while (!!key) { + auto dictValue = LangToolkit::getMappingItem(value, key.get()); + result = saturatedAdd(result, nestedAllocationsAvoided(typeManager.getTypeId(key.get()), key.get())); + result = saturatedAdd( + result, nestedAllocationsAvoided(typeManager.getTypeId(dictValue.get()), dictValue.get()) + ); + key = LangToolkit::next(iterator.get()); + } + return result; + } + + std::uint32_t immutableMemoAllocationsAvoided(ObjectPtr value); + + std::uint32_t nestedAllocationsAvoided(TypeId typeId, ObjectPtr value) + { + switch (typeId) { + case TypeId::STRING: + case TypeId::BYTES: + case TypeId::BYTES_ARRAY: + return 1; + case TypeId::LIST: + return saturatedAdd(listRootAllocationsAvoided(LangToolkit::length(value)), iterableAllocationsAvoided(value)); + case TypeId::TUPLE: + return saturatedAdd(tupleRootAllocationsAvoided(LangToolkit::length(value)), iterableAllocationsAvoided(value)); + case TypeId::SET: + return saturatedAdd(1, iterableAllocationsAvoided(value)); + case TypeId::DICT: + return saturatedAdd(1, dictAllocationsAvoided(value)); + case TypeId::MEMO_IMMUTABLE_OBJECT: + return immutableMemoAllocationsAvoided(value); + default: + return 0; + } + } + + std::uint32_t initializerObjectAllocationsAvoided(const ImmutableObjectInitializer &initializer) + { + std::uint32_t result = 0; + auto &typeManager = LangToolkit::getTypeManager(); + for (const auto &objectValue: initializer.objects()) { + if (!objectValue.m_object) { + continue; + } + result = saturatedAdd( + result, nestedAllocationsAvoided( + typeManager.getTypeId(objectValue.m_object.get()), objectValue.m_object.get() + ) + ); + } + return result; + } + + std::uint32_t immutableMemoAllocationsAvoided(ObjectPtr value) + { + if (!LangToolkit::isMemoImmutableObject(value)) { + return 0; + } + + const auto &object = LangToolkit::getTypeManager().template extractObject(value); + if (object.hasInstance()) { + return 0; + } + + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(object) + ); + if (!initializer) { + return 0; + } + + return saturatedAdd(1, initializerObjectAllocationsAvoided(*initializer)); + } + + } + + std::optional tryMeasureEmbeddingValue( + TypeId typeId, StorageClass storageClass, ObjectPtr value + ) + { + if (!value) { + return std::nullopt; + } + + switch (storageClass) { + case StorageClass::STRING_REF: + case StorageClass::POOLED_STRING: + case StorageClass::STR64: + if (typeId != TypeId::STRING) { + return std::nullopt; + } + return makeMeasure(storageClass, o_tuple_item::measure(stringElement(value)), 1); + + case StorageClass::DB0_BYTES: + case StorageClass::DB0_BYTES_ARRAY: + if (typeId != TypeId::BYTES && typeId != TypeId::BYTES_ARRAY) { + return std::nullopt; + } + return makeMeasure(storageClass, o_tuple_item::measure(bytesElement(value)), 1); + + case StorageClass::DB0_LIST: + case StorageClass::DB0_TUPLE: + if (typeId != TypeId::LIST && typeId != TypeId::TUPLE) { + return std::nullopt; + } + return makeMeasure( + storageClass, o_py_tuple::measure(value), nestedAllocationsAvoided(typeId, value), false, true + ); + + case StorageClass::DB0_SET: + if (typeId != TypeId::SET) { + return std::nullopt; + } + return makeMeasure( + storageClass, o_py_set::measure(value), nestedAllocationsAvoided(typeId, value), false, true + ); + + case StorageClass::DB0_DICT: + if (typeId != TypeId::DICT) { + return std::nullopt; + } + return makeMeasure( + storageClass, o_py_dict::measure(value), nestedAllocationsAvoided(typeId, value), false, true + ); + + case StorageClass::OBJECT_REF: { + if (typeId != TypeId::MEMO_IMMUTABLE_OBJECT || !LangToolkit::isMemoImmutableObject(value)) { + return std::nullopt; + } + + const auto &object = LangToolkit::getTypeManager().template extractObject(value); + if (object.hasInstance()) { + return std::nullopt; + } + + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(object) + ); + if (!initializer) { + return std::nullopt; + } + + auto classRef = initializer->getClassPtr()->getClassRef(); + return makeMeasure( + storageClass, o_embedded_object::measure(classRef, *initializer), + saturatedAdd(1, initializerObjectAllocationsAvoided(*initializer)), true, false + ); + } + + default: + return std::nullopt; + } + } + +} diff --git a/src/dbzero/object_model/object/EmbeddingMeasure.hpp b/src/dbzero/object_model/object/EmbeddingMeasure.hpp new file mode 100644 index 00000000..d3b1e4e7 --- /dev/null +++ b/src/dbzero/object_model/object/EmbeddingMeasure.hpp @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace db0::object_model +{ + + struct EmbeddingMeasure + { + // Storage class of the value being measured. + StorageClass m_storageClass = StorageClass::UNDEFINED; + // Bytes required by the embedded representation under consideration. + std::size_t m_embeddedBytes = 0; + // Bytes required if stored separately; zero until a caller supplies that comparison. + std::size_t m_separateStorageBytes = 0; + // Heuristic count of durable root/member allocations avoided by embedding this value. + std::uint32_t m_allocationsAvoided = 0; + // True when measurement depended on a memo/object wrapper view. + bool m_requiresObjectView = false; + // True when measurement depended on a collection wrapper/view. + bool m_requiresCollectionView = false; + }; + + // Returns std::nullopt when the value cannot be embedded by this measurement path. + std::optional tryMeasureEmbeddingValue( + db0::bindings::TypeId typeId, StorageClass storageClass, LangConfig::ObjectPtr value + ); + +} diff --git a/tests/unit_tests/EmbeddingMeasureTest.cpp b/tests/unit_tests/EmbeddingMeasureTest.cpp new file mode 100644 index 00000000..7bf9f6cf --- /dev/null +++ b/tests/unit_tests/EmbeddingMeasureTest.cpp @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace db0; +using namespace db0::bindings; +using namespace db0::object_model; +using namespace db0::tests; + +namespace tests +{ + + class EmbeddingMeasureTest: public testing::Test + { + public: + static constexpr const char *prefixName = "embedding-measure-prefix"; + static constexpr const char *fileName = "embedding-measure-prefix.db0"; + + void SetUp() override + { + Py_Initialize(); + drop(fileName); + } + + void TearDown() override + { + drop(fileName); + } + }; + + db0::python::shared_py_object makeMemoType(bool immutable) + { + static std::uint64_t memoTypeIndex = 0; + auto className = std::string("EmbeddingMeasure") + (immutable ? "Immutable" : "Regular") + + "Memo" + std::to_string(memoTypeIndex); + auto typeId = "tests/" + className; + ++memoTypeIndex; + + if (PyRun_SimpleString(("class " + className + ": pass\n").c_str()) != 0) { + return {}; + } + + auto mainModule = Py_BORROW(PyImport_AddModule("__main__")); + if (!mainModule.get()) { + return {}; + } + + auto pyClass = Py_OWN(PyObject_GetAttrString(*mainModule, className.c_str())); + auto args = Py_OWN(PyTuple_Pack(1, pyClass.get())); + auto kwargs = Py_OWN(PyDict_New()); + auto pyTypeId = Py_OWN(PyUnicode_FromString(typeId.c_str())); + if (!pyClass.get() || !args.get() || !kwargs.get() || !pyTypeId.get()) { + return {}; + } + db0::python::PySafeDict_SetItemString(kwargs.get(), "id", std::move(pyTypeId)); + if (immutable) { + auto pyImmutable = Py_OWN(PyBool_FromLong(1)); + if (!pyImmutable.get()) { + return {}; + } + db0::python::PySafeDict_SetItemString(kwargs.get(), "immutable", std::move(pyImmutable)); + } + + return db0::python::shared_py_object( + reinterpret_cast(db0::python::PyAPI_wrapPyClass(nullptr, args.get(), kwargs.get())), + false + ); + } + + db0::python::shared_py_object makeUnsupportedValue() + { + static std::uint64_t unsupportedTypeIndex = 0; + auto className = std::string("UnsupportedEmbeddingMeasureValue") + std::to_string(unsupportedTypeIndex); + ++unsupportedTypeIndex; + + if (PyRun_SimpleString(("class " + className + ": pass\n").c_str()) != 0) { + return {}; + } + + auto mainModule = Py_BORROW(PyImport_AddModule("__main__")); + if (!mainModule.get()) { + return {}; + } + + auto pyClass = Py_OWN(PyObject_GetAttrString(mainModule.get(), className.c_str())); + if (!pyClass.get()) { + return {}; + } + + return Py_OWN(PyObject_CallNoArgs(pyClass.get())); + } + + TEST_F( EmbeddingMeasureTest, testMeasuresStringAndBytesValues ) + { + auto pyString = Py_OWN(PyUnicode_FromString("embedded-string")); + auto stringMeasure = tryMeasureEmbeddingValue(TypeId::STRING, StorageClass::STRING_REF, pyString.get()); + ASSERT_TRUE(stringMeasure.has_value()); + ASSERT_EQ(stringMeasure->m_storageClass, StorageClass::STRING_REF); + ASSERT_EQ( + stringMeasure->m_embeddedBytes, + o_tuple_item::measure(o_tuple_item::Element::string("embedded-string")) + ); + ASSERT_FALSE(stringMeasure->m_requiresObjectView); + ASSERT_FALSE(stringMeasure->m_requiresCollectionView); + ASSERT_EQ(stringMeasure->m_allocationsAvoided, 1u); + + const char bytes[] = { 'a', 'b', 'c' }; + auto pyBytes = Py_OWN(PyBytes_FromStringAndSize(bytes, sizeof(bytes))); + auto bytesMeasure = tryMeasureEmbeddingValue(TypeId::BYTES, StorageClass::DB0_BYTES, pyBytes.get()); + ASSERT_TRUE(bytesMeasure.has_value()); + ASSERT_EQ( + bytesMeasure->m_embeddedBytes, + o_tuple_item::measure(o_tuple_item::Element::bytes( + reinterpret_cast(bytes), sizeof(bytes) + )) + ); + ASSERT_EQ(bytesMeasure->m_allocationsAvoided, 1u); + } + + TEST_F( EmbeddingMeasureTest, testMeasuresPythonCollectionValues ) + { + auto pyList = Py_OWN(PyList_New(2)); + db0::python::PySafeList_SetItem(pyList.get(), 0, Py_OWN(PyLong_FromLong(7))); + db0::python::PySafeList_SetItem(pyList.get(), 1, Py_OWN(PyUnicode_FromString("seven"))); + + auto listMeasure = tryMeasureEmbeddingValue(TypeId::LIST, StorageClass::DB0_LIST, pyList.get()); + ASSERT_TRUE(listMeasure.has_value()); + ASSERT_GT(listMeasure->m_embeddedBytes, 0u); + ASSERT_TRUE(listMeasure->m_requiresCollectionView); + ASSERT_EQ(listMeasure->m_allocationsAvoided, 2u); + + auto pyTuple = Py_OWN(PyTuple_New(1)); + db0::python::PySafeTuple_SetItem(pyTuple.get(), 0, Py_OWN(PyLong_FromLong(42))); + auto tupleMeasure = tryMeasureEmbeddingValue(TypeId::TUPLE, StorageClass::DB0_TUPLE, pyTuple.get()); + ASSERT_TRUE(tupleMeasure.has_value()); + ASSERT_GT(tupleMeasure->m_embeddedBytes, 0u); + ASSERT_EQ(tupleMeasure->m_allocationsAvoided, 1u); + + auto pySet = Py_OWN(PySet_New(nullptr)); + db0::python::PySafeSet_Add(pySet.get(), Py_OWN(PyUnicode_FromString("item"))); + auto setMeasure = tryMeasureEmbeddingValue(TypeId::SET, StorageClass::DB0_SET, pySet.get()); + ASSERT_TRUE(setMeasure.has_value()); + ASSERT_GT(setMeasure->m_embeddedBytes, 0u); + ASSERT_EQ(setMeasure->m_allocationsAvoided, 2u); + + auto pyDict = Py_OWN(PyDict_New()); + db0::python::PySafeDict_SetItemString(pyDict.get(), "key", Py_OWN(PyLong_FromLong(99))); + auto dictMeasure = tryMeasureEmbeddingValue(TypeId::DICT, StorageClass::DB0_DICT, pyDict.get()); + ASSERT_TRUE(dictMeasure.has_value()); + ASSERT_GT(dictMeasure->m_embeddedBytes, 0u); + ASSERT_EQ(dictMeasure->m_allocationsAvoided, 2u); + } + + TEST_F( EmbeddingMeasureTest, testCollectionAllocationHeuristicCountsNestedMemberStorage ) + { + auto nestedTuple = Py_OWN(PyTuple_New(2)); + db0::python::PySafeTuple_SetItem(nestedTuple.get(), 0, Py_OWN(PyUnicode_FromString("nested"))); + db0::python::PySafeTuple_SetItem(nestedTuple.get(), 1, Py_OWN(PyBytes_FromStringAndSize("b", 1))); + + auto pyList = Py_OWN(PyList_New(2)); + db0::python::PySafeList_SetItem(pyList.get(), 0, Py_OWN(PyUnicode_FromString("root"))); + db0::python::PySafeList_SetItem(pyList.get(), 1, std::move(nestedTuple)); + + auto measure = tryMeasureEmbeddingValue(TypeId::LIST, StorageClass::DB0_LIST, pyList.get()); + ASSERT_TRUE(measure.has_value()); + ASSERT_EQ(measure->m_allocationsAvoided, 5u); + } + + TEST_F( EmbeddingMeasureTest, testCollectionAllocationHeuristicCountsLargeRoots ) + { + constexpr std::size_t largeCount = 10000; + auto smallTuple = Py_OWN(PyTuple_New(1)); + db0::python::PySafeTuple_SetItem(smallTuple.get(), 0, Py_OWN(PyLong_FromLong(0))); + auto largeTuple = Py_OWN(PyTuple_New(static_cast(largeCount))); + for (std::size_t i = 0; i < largeCount; ++i) { + db0::python::PySafeTuple_SetItem( + largeTuple.get(), static_cast(i), Py_OWN(PyLong_FromLong(static_cast(i))) + ); + } + + auto smallTupleMeasure = tryMeasureEmbeddingValue(TypeId::TUPLE, StorageClass::DB0_TUPLE, smallTuple.get()); + auto largeTupleMeasure = tryMeasureEmbeddingValue(TypeId::TUPLE, StorageClass::DB0_TUPLE, largeTuple.get()); + ASSERT_TRUE(smallTupleMeasure.has_value()); + ASSERT_TRUE(largeTupleMeasure.has_value()); + ASSERT_GT(largeTupleMeasure->m_allocationsAvoided, smallTupleMeasure->m_allocationsAvoided); + + auto smallList = Py_OWN(PyList_New(1)); + db0::python::PySafeList_SetItem(smallList.get(), 0, Py_OWN(PyLong_FromLong(0))); + auto largeList = Py_OWN(PyList_New(static_cast(largeCount))); + for (std::size_t i = 0; i < largeCount; ++i) { + db0::python::PySafeList_SetItem( + largeList.get(), static_cast(i), Py_OWN(PyLong_FromLong(static_cast(i))) + ); + } + + auto smallListMeasure = tryMeasureEmbeddingValue(TypeId::LIST, StorageClass::DB0_LIST, smallList.get()); + auto largeListMeasure = tryMeasureEmbeddingValue(TypeId::LIST, StorageClass::DB0_LIST, largeList.get()); + ASSERT_TRUE(smallListMeasure.has_value()); + ASSERT_TRUE(largeListMeasure.has_value()); + ASSERT_GT(largeListMeasure->m_allocationsAvoided, smallListMeasure->m_allocationsAvoided); + } + + TEST_F( EmbeddingMeasureTest, testUnsupportedCollectionElementsThrow ) + { + auto pyList = Py_OWN(PyList_New(1)); + auto unsupportedListValue = makeUnsupportedValue(); + ASSERT_TRUE(unsupportedListValue.get()); + db0::python::PySafeList_SetItem(pyList.get(), 0, std::move(unsupportedListValue)); + ASSERT_THROW( + tryMeasureEmbeddingValue(TypeId::LIST, StorageClass::DB0_LIST, pyList.get()), + db0::InputException + ); + + auto pyTuple = Py_OWN(PyTuple_New(1)); + auto unsupportedTupleValue = makeUnsupportedValue(); + ASSERT_TRUE(unsupportedTupleValue.get()); + db0::python::PySafeTuple_SetItem(pyTuple.get(), 0, std::move(unsupportedTupleValue)); + ASSERT_THROW( + tryMeasureEmbeddingValue(TypeId::TUPLE, StorageClass::DB0_TUPLE, pyTuple.get()), + db0::InputException + ); + + auto pySet = Py_OWN(PySet_New(nullptr)); + auto unsupportedSetValue = makeUnsupportedValue(); + ASSERT_TRUE(unsupportedSetValue.get()); + db0::python::PySafeSet_Add(pySet.get(), std::move(unsupportedSetValue)); + ASSERT_THROW( + tryMeasureEmbeddingValue(TypeId::SET, StorageClass::DB0_SET, pySet.get()), + db0::InputException + ); + } + + TEST_F( EmbeddingMeasureTest, testUnsupportedDictElementsThrow ) + { + auto dictWithUnsupportedKey = Py_OWN(PyDict_New()); + auto unsupportedKey = makeUnsupportedValue(); + ASSERT_TRUE(unsupportedKey.get()); + db0::python::PySafeDict_SetItem( + dictWithUnsupportedKey.get(), std::move(unsupportedKey), Py_OWN(PyLong_FromLong(3)) + ); + ASSERT_THROW( + tryMeasureEmbeddingValue(TypeId::DICT, StorageClass::DB0_DICT, dictWithUnsupportedKey.get()), + db0::InputException + ); + + auto dictWithUnsupportedValue = Py_OWN(PyDict_New()); + auto unsupportedValue = makeUnsupportedValue(); + ASSERT_TRUE(unsupportedValue.get()); + db0::python::PySafeDict_SetItemString( + dictWithUnsupportedValue.get(), "key", std::move(unsupportedValue) + ); + ASSERT_THROW( + tryMeasureEmbeddingValue(TypeId::DICT, StorageClass::DB0_DICT, dictWithUnsupportedValue.get()), + db0::InputException + ); + } + + TEST_F( EmbeddingMeasureTest, testUnsupportedNestedCollectionElementThrows ) + { + auto nestedList = Py_OWN(PyList_New(1)); + auto unsupportedValue = makeUnsupportedValue(); + ASSERT_TRUE(unsupportedValue.get()); + db0::python::PySafeList_SetItem(nestedList.get(), 0, std::move(unsupportedValue)); + + auto pyList = Py_OWN(PyList_New(2)); + db0::python::PySafeList_SetItem(pyList.get(), 0, Py_OWN(PyUnicode_FromString("ok"))); + db0::python::PySafeList_SetItem(pyList.get(), 1, std::move(nestedList)); + + ASSERT_THROW( + tryMeasureEmbeddingValue(TypeId::LIST, StorageClass::DB0_LIST, pyList.get()), + db0::InputException + ); + } + + TEST_F( EmbeddingMeasureTest, testUnsupportedScalarValueReturnsNullopt ) + { + auto pyInt = Py_OWN(PyLong_FromLong(7)); + ASSERT_FALSE(tryMeasureEmbeddingValue(TypeId::INTEGER, StorageClass::INT64, pyInt.get()).has_value()); + } + + TEST_F( EmbeddingMeasureTest, testDeferredImmutableMemoObjectMeasuresEmbeddedObjectOnly ) + { + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefixName); + auto mockClass = getTestClass(fixture); + auto pyMemoType = makeMemoType(true); + ASSERT_TRUE(pyMemoType.get()); + + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemo->makeNew(mockClass); + auto &object = pyMemo->ext(); + auto *initializer = dynamic_cast(InitManager::instance.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + initializer->set({0, 0}, StorageClass::INT64, Value(17)); + + auto measure = tryMeasureEmbeddingValue( + TypeId::MEMO_IMMUTABLE_OBJECT, StorageClass::OBJECT_REF, + reinterpret_cast(pyMemo.get()) + ); + ASSERT_TRUE(measure.has_value()); + ASSERT_EQ(measure->m_embeddedBytes, o_embedded_object::measure(mockClass->getClassRef(), *initializer)); + ASSERT_TRUE(measure->m_requiresObjectView); + ASSERT_FALSE(measure->m_requiresCollectionView); + + workspace.close(); + } + + TEST_F( EmbeddingMeasureTest, testMaterializedMemoObjectReturnsNullopt ) + { + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefixName); + auto mockClass = getTestClass(fixture); + auto pyMemoType = makeMemoType(false); + ASSERT_TRUE(pyMemoType.get()); + + auto pyMemo = Py_OWN(db0::python::MemoObjectStub_new(pyMemoType.get())); + pyMemo->makeNew(mockClass); + { + FixtureLock lock(fixture); + pyMemo->modifyExt().postInit(lock); + } + + auto measure = tryMeasureEmbeddingValue( + TypeId::MEMO_OBJECT, StorageClass::OBJECT_REF, reinterpret_cast(pyMemo.get()) + ); + ASSERT_FALSE(measure.has_value()); + // This synthetic wrapper is only needed to prove the measurement contract. + // Avoid unrelated deallocation paths after the durable object is created. + pyMemo.steal(); + + workspace.close(); + } + +} From 1eaa75b48086721d881d78d60843b48428cc2a0b Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 13:07:30 +0200 Subject: [PATCH 3/8] first immutable getters --- python_tests/test_memo_immutable.py | 72 +++++++- src/dbzero/bindings/python/PyAPI.cpp | 4 + src/dbzero/bindings/python/PyAPI.hpp | 1 + src/dbzero/bindings/python/PyToolkit.cpp | 110 +++++++++++- src/dbzero/bindings/python/PyToolkit.hpp | 2 + src/dbzero/bindings/python/Types.cpp | 14 +- .../object_model/object/EmbeddingMeasure.cpp | 52 +++++- .../object_model/object/EmbeddingMeasure.hpp | 4 + .../object/ObjectImmutableImpl.cpp | 105 ++++++++++- .../object/ObjectImmutableImpl.hpp | 10 ++ .../object_model/object/ObjectImplBase.cpp | 34 +++- tests/unit_tests/EmbeddingMeasureTest.cpp | 24 +++ tests/unit_tests/ObjectInitializerTest.cpp | 167 ++++++++++++++++++ 13 files changed, 573 insertions(+), 26 deletions(-) diff --git a/python_tests/test_memo_immutable.py b/python_tests/test_memo_immutable.py index 7a15f9b3..1690af6f 100644 --- a/python_tests/test_memo_immutable.py +++ b/python_tests/test_memo_immutable.py @@ -7,6 +7,7 @@ from dataclasses import dataclass from .conftest import DB0_DIR import random +import gc @db0.memo(immutable=True, no_default_tags=True) @@ -14,6 +15,27 @@ class MemoImmutableClass1: data: str value: int = 0 + + +@db0.memo(immutable=True, no_default_tags=True) +@dataclass +class MemoImmutableBytesClass: + data: bytes + + +@db0.memo(immutable=True, no_default_tags=True) +@dataclass +class MemoImmutableLargePayloadClass: + data: object + + +@db0.memo(immutable=True, no_default_tags=True) +class MemoImmutableReadInConstructor: + def __init__(self, data, payload): + self.data = data + self.payload = payload + self.seen_data = self.data + self.seen_payload = self.payload def test_create_memo_immutable(db0_fixture): _ = MemoImmutableClass1(data="immutable data", value=42) @@ -23,4 +45,52 @@ def test_tag_and_find_immutable_instance(db0_fixture): obj_1 = MemoImmutableClass1(data="immutable data", value=42) db0.tags(obj_1).add("tag1", "tag2") assert list(db0.find("tag1")) == [obj_1] - \ No newline at end of file + + +def test_read_embedded_immutable_string_after_reopen(db0_fixture): + obj = MemoImmutableClass1(data="small embedded string", value=7) + db0.tags(obj).add("keep-embedded-string") + obj_id = db0.uuid(obj) + assert obj.data == "small embedded string" + assert db0.fetch(obj_id).data == "small embedded string" + + del obj + gc.collect() + db0.commit() + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix", "rw") + + reopened = db0.fetch(obj_id) + assert reopened.data == "small embedded string" + assert reopened.value == 7 + del reopened + gc.collect() + + +def test_read_embedded_immutable_bytes(db0_fixture): + payload = b"a\x00b embedded bytes" + obj = MemoImmutableBytesClass(payload) + assert obj.data == payload + + +def test_large_immutable_string_and_bytes_fallback_read(db0_fixture): + large_text = "x" * (12 * 1024) + large_bytes = b"y" * (12 * 1024) + + text_obj = MemoImmutableLargePayloadClass(large_text) + bytes_obj = MemoImmutableLargePayloadClass(large_bytes) + + assert text_obj.data == large_text + assert bytes_obj.data == large_bytes + + +def test_read_embedded_immutable_values_inside_constructor(db0_fixture): + payload = b"constructor\x00bytes" + obj = MemoImmutableReadInConstructor("constructor string", payload) + + assert obj.seen_data == "constructor string" + assert obj.seen_payload == payload + assert obj.data == "constructor string" + assert obj.payload == payload + diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index d2ee6281..e82463cf 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -1113,6 +1113,10 @@ namespace db0::python template <> db0::object_model::StorageClass getStorageClass() { return db0::object_model::StorageClass::OBJECT_REF; } + + template <> db0::object_model::StorageClass getStorageClass() { + return db0::object_model::StorageClass::OBJECT_REF; + } template <> db0::object_model::StorageClass getStorageClass() { return db0::object_model::StorageClass::DB0_LIST; diff --git a/src/dbzero/bindings/python/PyAPI.hpp b/src/dbzero/bindings/python/PyAPI.hpp index b4ff4865..ff047379 100644 --- a/src/dbzero/bindings/python/PyAPI.hpp +++ b/src/dbzero/bindings/python/PyAPI.hpp @@ -203,6 +203,7 @@ namespace db0::python template db0::object_model::StorageClass getStorageClass(); template <> db0::object_model::StorageClass getStorageClass(); + template <> db0::object_model::StorageClass getStorageClass(); template <> db0::object_model::StorageClass getStorageClass(); } diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index 382ecdef..5e346274 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,59 @@ namespace db0::python PyToolkit::PyWorkspace PyToolkit::m_py_workspace; SafeRMutex PyToolkit::m_api_mutex; + + namespace + { + std::uint16_t getMemoInstanceId(PyObject *pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().getInstanceId(); + } + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().getInstanceId(); + } + return reinterpret_cast(pyObject)->ext().getInstanceId(); + } + + bool memoHasRefs(PyObject *pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().hasRefs(); + } + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().hasRefs(); + } + return reinterpret_cast(pyObject)->ext().hasRefs(); + } + } + + PyToolkit::ObjectSharedPtr PyToolkit::unloadEmbeddedInstance(const db0::object_model::o_tuple_item &item) + { + switch (item.itemKind()) { + case StorageClass::STRING_REF: { + auto str = item.stringPayload().get(); + auto result = Py_OWN(PyUnicode_FromStringAndSize(str.get_raw(), str.size())); + if (!result) { + THROWF(db0::InputException) << "Failed to convert embedded string"; + } + return result; + } + case StorageClass::DB0_BYTES: { + const auto &bytes = item.bytesPayload(); + auto result = Py_OWN(PyBytes_FromStringAndSize( + reinterpret_cast(bytes.getBuffer()), bytes.size() + )); + if (!result) { + THROWF(db0::InputException) << "Failed to convert embedded bytes"; + } + return result; + } + default: + THROWF(db0::InputException) + << "Unsupported embedded immutable member storage class: " << item.itemKind(); + } + return {}; + } void PyToolkit::throwErrorWithPyErrorCheck(const std::string& message, const std::string& error_detail) { if (PyErr_Occurred()) { @@ -240,19 +294,18 @@ namespace db0::python if (obj_ptr.get()) { // only validate instance ID if provided - auto &memo = reinterpret_cast(obj_ptr.get())->ext(); if (instance_id) { // NOTE: we first must check if this is really a memo object if (!isAnyMemoObject(obj_ptr.get())) { return false; } - if (memo.getInstanceId() != instance_id) { + if (getMemoInstanceId(obj_ptr.get()) != instance_id) { return false; } } // NOTE: objects with no references (either from dbzero or other lang types) are considered deleted - return PyToolkit::hasLangRefs(*obj_ptr) || memo.hasRefs(); + return PyToolkit::hasLangRefs(*obj_ptr) || memoHasRefs(obj_ptr.get()); } // Check if object's stem can be unloaded (and has refs) @@ -274,7 +327,7 @@ namespace db0::python if (!isAnyMemoObject(obj_ptr.get())) { return {}; } - if (reinterpret_cast(obj_ptr.get())->ext().getInstanceId() != instance_id) { + if (getMemoInstanceId(obj_ptr.get()) != instance_id) { return {}; } } @@ -282,6 +335,51 @@ namespace db0::python return obj_ptr; } + std::shared_ptr type; + shared_py_object lang_type; + auto immutableStem = [&]() { + try { + auto stem = db0::object_model::ObjectImmutableImpl::tryUnloadStem( + fixture, address, instance_id, access_mode + ); + if (!stem) { + return decltype(stem)(); + } + auto typeInfo = class_factory.getTypeByClassRef(stem->getClassRef()); + if (!typeInfo.m_class->isImmutable()) { + return decltype(stem)(); + } + type = typeInfo.m_class; + lang_type = typeInfo.m_lang_type; + return stem; + } catch (...) { + return db0::object_model::ObjectImmutableImpl::ObjectStem(); + } + }(); + + if (!!immutableStem) { + if (!lang_type_ptr) { + if (!lang_type) { + lang_type = class_factory.getLangType(*type); + } + lang_type_ptr = lang_type.get(); + } + + if (!lang_type_ptr) { + lang_type_ptr = PyToolkit::getTypeManager().getMemoBaseType().get(); + } + + auto *memo_ptr = reinterpret_cast(lang_type_ptr->tp_alloc(lang_type_ptr, 0)); + memo_ptr->unload( + fixture, std::move(immutableStem), type, db0::object_model::ObjectImmutableImpl::with_type_hint{} + ); + obj_ptr = Py_OWN(reinterpret_cast(memo_ptr)); + if (!memo_ptr->ext().isNoCache()) { + lang_cache.add(address, obj_ptr.get()); + } + return obj_ptr; + } + // Unload from backend otherwise auto stem = db0::object_model::Object::tryUnloadStem( fixture, address, instance_id, access_mode @@ -290,7 +388,9 @@ namespace db0::python // object not found return {}; } - auto [type, lang_type] = class_factory.getTypeByClassRef(stem->getClassRef()); + auto typeInfo = class_factory.getTypeByClassRef(stem->getClassRef()); + type = typeInfo.m_class; + lang_type = typeInfo.m_lang_type; if (!lang_type_ptr) { if (!lang_type) { diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index 216f190a..bd7d0de3 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -29,6 +29,7 @@ namespace db0::object_model { + class o_tuple_item; class Object; class Class; class ClassFactory; @@ -132,6 +133,7 @@ namespace db0::python static ObjectSharedPtr unloadWeakSet(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadDict(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadTuple(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); + static ObjectSharedPtr unloadEmbeddedInstance(const db0::object_model::o_tuple_item &); // Unload dbzero block instance static ObjectSharedPtr unloadBlock(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); diff --git a/src/dbzero/bindings/python/Types.cpp b/src/dbzero/bindings/python/Types.cpp index 7f349f58..5a0c4c7e 100644 --- a/src/dbzero/bindings/python/Types.cpp +++ b/src/dbzero/bindings/python/Types.cpp @@ -32,6 +32,11 @@ namespace db0::python return reinterpret_cast(py_value)->ext().getFixture(); } + // IMMUTABLE OBJECT specialization + template <> db0::swine_ptr getFixtureOf(PyObject *py_value) { + return reinterpret_cast(py_value)->ext().getFixture(); + } + // LIST specialization template <> db0::swine_ptr getFixtureOf(PyObject *py_value) { return reinterpret_cast(py_value)->ext().getFixture(); @@ -77,6 +82,7 @@ namespace db0::python functions.resize(static_cast(TypeId::COUNT)); std::fill(functions.begin(), functions.end(), nullptr); functions[static_cast(TypeId::MEMO_OBJECT)] = getFixtureOf; + functions[static_cast(TypeId::MEMO_IMMUTABLE_OBJECT)] = getFixtureOf; functions[static_cast(TypeId::DB0_LIST)] = getFixtureOf; functions[static_cast(TypeId::DB0_DICT)] = getFixtureOf; functions[static_cast(TypeId::DB0_SET)] = getFixtureOf; @@ -139,6 +145,11 @@ namespace db0::python return tryGetUUIDOf(reinterpret_cast(py_value)); } + // IMMUTABLE OBJECT specialization + template <> PyObject *tryGetUUID(PyObject *py_value) { + return tryGetUUIDOf(reinterpret_cast(py_value)); + } + // OBJECT_ITERABLE specialization template <> PyObject *tryGetUUID(PyObject *py_value) { return tryGetSerializableUUID(&reinterpret_cast(py_value)->ext()); @@ -165,6 +176,7 @@ namespace db0::python std::fill(functions.begin(), functions.end(), nullptr); // NOTE: for security reasons we only allow UUID retrieval for a strictly limited set of types functions[static_cast(TypeId::MEMO_OBJECT)] = tryGetUUID; + functions[static_cast(TypeId::MEMO_IMMUTABLE_OBJECT)] = tryGetUUID; // the purpose of UUID here is to find identical queries functions[static_cast(TypeId::OBJECT_ITERABLE)] = tryGetUUID; // for expired refs UUIDs are still available @@ -190,4 +202,4 @@ namespace db0::python return try_get_uuid_functions[static_cast(type_id)](py_value); } -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/object/EmbeddingMeasure.cpp b/src/dbzero/object_model/object/EmbeddingMeasure.cpp index 21f35f1a..aa364ffd 100644 --- a/src/dbzero/object_model/object/EmbeddingMeasure.cpp +++ b/src/dbzero/object_model/object/EmbeddingMeasure.cpp @@ -30,15 +30,18 @@ namespace db0::object_model using ObjectPtr = LangConfig::ObjectPtr; using MemoImmutableObject = typename LangToolkit::TypeManager::MemoImmutableObject; + constexpr std::size_t ALLOCATION_COST = 64; + constexpr std::size_t PAGE_FETCH_COST = SlabAllocatorConfig::DEFAULT_PAGE_SIZE / 2; + EmbeddingMeasure makeMeasure( - StorageClass storageClass, std::size_t embeddedBytes, std::uint32_t allocationsAvoided = 0, - bool requiresObjectView = false, bool requiresCollectionView = false + StorageClass storageClass, std::size_t embeddedBytes, std::size_t separateStorageBytes = 0, + std::uint32_t allocationsAvoided = 0, bool requiresObjectView = false, bool requiresCollectionView = false ) { return { storageClass, embeddedBytes, - 0, + separateStorageBytes, allocationsAvoided, requiresObjectView, requiresCollectionView @@ -73,6 +76,11 @@ namespace db0::object_model )); } + std::size_t extraPagesFetched(std::size_t embeddedBytes) + { + return embeddedBytes / SlabAllocatorConfig::DEFAULT_PAGE_SIZE; + } + std::uint32_t listRootAllocationsAvoided(std::size_t itemCount) { if (itemCount == 0) { @@ -202,14 +210,23 @@ namespace db0::object_model if (typeId != TypeId::STRING) { return std::nullopt; } - return makeMeasure(storageClass, o_tuple_item::measure(stringElement(value)), 1); + return makeMeasure( + storageClass, o_tuple_item::measure(stringElement(value)), + db0::o_string::measure(LangToolkit::getTypeManager().extractString(value)), 1 + ); case StorageClass::DB0_BYTES: case StorageClass::DB0_BYTES_ARRAY: if (typeId != TypeId::BYTES && typeId != TypeId::BYTES_ARRAY) { return std::nullopt; } - return makeMeasure(storageClass, o_tuple_item::measure(bytesElement(value)), 1); + { + auto bytes = LangToolkit::getTypeManager().extractBytes(value); + return makeMeasure( + storageClass, o_tuple_item::measure(bytesElement(value)), + db0::o_binary::measure(bytes.m_data, bytes.m_size), 1 + ); + } case StorageClass::DB0_LIST: case StorageClass::DB0_TUPLE: @@ -217,7 +234,7 @@ namespace db0::object_model return std::nullopt; } return makeMeasure( - storageClass, o_py_tuple::measure(value), nestedAllocationsAvoided(typeId, value), false, true + storageClass, o_py_tuple::measure(value), 0, nestedAllocationsAvoided(typeId, value), false, true ); case StorageClass::DB0_SET: @@ -225,7 +242,7 @@ namespace db0::object_model return std::nullopt; } return makeMeasure( - storageClass, o_py_set::measure(value), nestedAllocationsAvoided(typeId, value), false, true + storageClass, o_py_set::measure(value), 0, nestedAllocationsAvoided(typeId, value), false, true ); case StorageClass::DB0_DICT: @@ -233,7 +250,7 @@ namespace db0::object_model return std::nullopt; } return makeMeasure( - storageClass, o_py_dict::measure(value), nestedAllocationsAvoided(typeId, value), false, true + storageClass, o_py_dict::measure(value), 0, nestedAllocationsAvoided(typeId, value), false, true ); case StorageClass::OBJECT_REF: { @@ -256,7 +273,7 @@ namespace db0::object_model auto classRef = initializer->getClassPtr()->getClassRef(); return makeMeasure( storageClass, o_embedded_object::measure(classRef, *initializer), - saturatedAdd(1, initializerObjectAllocationsAvoided(*initializer)), true, false + 0, saturatedAdd(1, initializerObjectAllocationsAvoided(*initializer)), true, false ); } @@ -265,4 +282,21 @@ namespace db0::object_model } } + bool shouldEmbedValue(TypeId typeId, StorageClass storageClass, ObjectPtr value) + { + auto measure = tryMeasureEmbeddingValue(typeId, storageClass, value); + if (!measure) { + return false; + } + + if (measure->m_separateStorageBytes == 0 && measure->m_allocationsAvoided == 0) { + return false; + } + + auto savedCost = measure->m_separateStorageBytes + measure->m_allocationsAvoided * ALLOCATION_COST; + auto embeddedCost = measure->m_embeddedBytes + + extraPagesFetched(measure->m_embeddedBytes) * PAGE_FETCH_COST; + return savedCost > embeddedCost; + } + } diff --git a/src/dbzero/object_model/object/EmbeddingMeasure.hpp b/src/dbzero/object_model/object/EmbeddingMeasure.hpp index d3b1e4e7..636b99b6 100644 --- a/src/dbzero/object_model/object/EmbeddingMeasure.hpp +++ b/src/dbzero/object_model/object/EmbeddingMeasure.hpp @@ -35,4 +35,8 @@ namespace db0::object_model db0::bindings::TypeId typeId, StorageClass storageClass, LangConfig::ObjectPtr value ); + bool shouldEmbedValue( + db0::bindings::TypeId typeId, StorageClass storageClass, LangConfig::ObjectPtr value + ); + } diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index 8b561879..2f493162 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -3,10 +3,111 @@ #include "ObjectImmutableImpl.hpp" +#include +#include +#include + namespace db0::object_model { - GC0_Define(ObjectImmutableImpl) + + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGet( + MemberLoc memberLoc, bool *isAutoGenerated + ) const + { + bool baseIsAutoGenerated = false; + auto result = super_t::tryGet(memberLoc, &baseIsAutoGenerated); + if (result.get() && !baseIsAutoGenerated) { + if (isAutoGenerated) { + *isAutoGenerated = false; + } + return result; + } + + const auto &memberId = memberLoc.first; + if (!memberId) { + if (result.get() && isAutoGenerated) { + *isAutoGenerated = baseIsAutoGenerated; + } + return result; + } + + for (const auto &fieldInfo: memberId) { + auto object = tryGetEmbeddedField(fieldInfo); + if (object.get()) { + if (isAutoGenerated) { + *isAutoGenerated = false; + } + return object; + } + } + + if (result.get() && isAutoGenerated) { + *isAutoGenerated = baseIsAutoGenerated; + } + return result; + } + + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGet( + const char *fieldName, bool *isAutoGenerated + ) const + { + return tryGet(this->findField(fieldName), isAutoGenerated); + } + + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGetEmbeddedField( + const FieldInfo &fieldInfo + ) const + { + const auto &[fieldId, fidelity] = fieldInfo; + if (!fieldId || fidelity != 0) { + return {}; + } + + if (this->hasInstance()) { + auto *embeddedValue = (*this)->variableValue(fieldId.getIndex()); + return embeddedValue ? python::PyToolkit::unloadEmbeddedInstance(*embeddedValue) : ObjectSharedPtr(); + } + + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(*this) + ); + if (!initializer) { + return {}; + } + + ObjectSharedPtr object; + return initializer->tryGetObjectAt(fieldId.getIndexAndOffset(), object) ? object : ObjectSharedPtr(); + } + + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::get(const char *fieldName) const + { + auto obj = tryGet(fieldName); + if (!obj) { + if (this->isDropped()) { + THROWF(db0::InputException) << "Object is no longer accessible"; + } + THROWF(db0::InputException) << "Attribute not found: " << fieldName; + } + return obj; + } + + void ObjectImmutableImpl::getMembersImpl(std::unordered_set &result) const + { + super_t::getMembersImpl(result); + auto &objType = this->getType(); + for (const auto &entry: (*this)->field_map()) { + std::uint32_t index = 0; + if (entry.key().itemKind() == StorageClass::PACKED_INT32) { + index = entry.key().packedIntPayload().value(); + } else if (entry.key().itemKind() == StorageClass::INT64) { + index = static_cast(entry.key().intPayload().value()); + } else { + continue; + } + result.insert(objType.getMember(FieldID::fromIndex(index)).m_name); + } + } -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp index 549c1c01..ee252056 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp @@ -23,6 +23,16 @@ namespace db0::object_model : super_t(std::forward(args)...) { } + + ObjectSharedPtr tryGet(MemberLoc, bool *is_auto_generated = nullptr) const; + ObjectSharedPtr tryGet(const char *field_name, bool *is_auto_generated = nullptr) const; + ObjectSharedPtr get(const char *field_name) const; + + protected: + friend super_t; + + ObjectSharedPtr tryGetEmbeddedField(const FieldInfo &) const; + void getMembersImpl(std::unordered_set &) const; }; } diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index e660d3f1..9a990101 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -4,11 +4,13 @@ #include "ObjectImplBase.hpp" #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -32,25 +34,29 @@ namespace db0::object_model return static_cast(value); } - bool isEmbeddableType(TypeId typeId, StorageClass storageClass) + bool canStorePreInitEmbeddedValue(StorageClass storageClass) { switch (storageClass) { case StorageClass::STRING_REF: + case StorageClass::POOLED_STRING: + case StorageClass::STR64: case StorageClass::DB0_BYTES: - return true; + case StorageClass::DB0_BYTES_ARRAY: case StorageClass::DB0_LIST: - return typeId == TypeId::LIST; case StorageClass::DB0_TUPLE: - return typeId == TypeId::TUPLE; case StorageClass::DB0_SET: - return typeId == TypeId::SET; case StorageClass::DB0_DICT: - return typeId == TypeId::DICT; + return true; default: return false; } } - + + bool shouldEmbedd(TypeId typeId, StorageClass storageClass, LangConfig::ObjectPtr value) + { + return canStorePreInitEmbeddedValue(storageClass) && shouldEmbedValue(typeId, storageClass, value); + } + template ObjectImplBase::ObjectImplBase(tag_as_dropped, UniqueAddress addr, unsigned int ext_refs) : super_t(tag_as_dropped(), addr, ext_refs) @@ -175,6 +181,18 @@ namespace db0::object_model super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, *immutableInitializer, getAccessOptions(type) ); + std::unordered_map embeddedSchemaTypes; + for (const auto &objectValue: immutableInitializer->objects()) { + auto index = objectValue.m_loc.first; + if (!objectValue.m_object) { + embeddedSchemaTypes.erase(index); + } else { + embeddedSchemaTypes[index] = objectValue.m_storage_class; + } + } + for (const auto &[index, storageClass]: embeddedSchemaTypes) { + type.addToSchema(index, storageClass, {}); + } } else { super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, pos_vt_data, pos_vt_offset, index_vt_data.first, index_vt_data.second, @@ -279,7 +297,7 @@ namespace db0::object_model auto member_flags = type.isNoCache() ? AccessFlags { AccessOptions::no_cache } : AccessFlags(); auto loc = member_id.get(0).getIndexAndOffset(); if constexpr (std::is_same_v) { - if (isEmbeddableType(type_id, storage_class)) { + if (shouldEmbedd(type_id, storage_class, obj_ptr)) { auto &immutableInitializer = dynamic_cast(initializer); immutableInitializer.setObject(loc, storage_class, {}, ObjectSharedPtr(obj_ptr)); } else { diff --git a/tests/unit_tests/EmbeddingMeasureTest.cpp b/tests/unit_tests/EmbeddingMeasureTest.cpp index 7bf9f6cf..253ca157 100644 --- a/tests/unit_tests/EmbeddingMeasureTest.cpp +++ b/tests/unit_tests/EmbeddingMeasureTest.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,7 @@ namespace tests stringMeasure->m_embeddedBytes, o_tuple_item::measure(o_tuple_item::Element::string("embedded-string")) ); + ASSERT_EQ(stringMeasure->m_separateStorageBytes, db0::o_string::measure("embedded-string")); ASSERT_FALSE(stringMeasure->m_requiresObjectView); ASSERT_FALSE(stringMeasure->m_requiresCollectionView); ASSERT_EQ(stringMeasure->m_allocationsAvoided, 1u); @@ -132,9 +134,31 @@ namespace tests reinterpret_cast(bytes), sizeof(bytes) )) ); + ASSERT_EQ( + bytesMeasure->m_separateStorageBytes, + db0::o_binary::measure(reinterpret_cast(bytes), sizeof(bytes)) + ); ASSERT_EQ(bytesMeasure->m_allocationsAvoided, 1u); } + TEST_F( EmbeddingMeasureTest, testStringAndBytesEmbeddingDecisionUsesCostRule ) + { + auto smallString = Py_OWN(PyUnicode_FromString("small embedded string")); + ASSERT_TRUE(shouldEmbedValue(TypeId::STRING, StorageClass::STRING_REF, smallString.get())); + + std::string largeString(3 * SlabAllocatorConfig::DEFAULT_PAGE_SIZE, 'x'); + auto largePyString = Py_OWN(PyUnicode_FromStringAndSize(largeString.data(), largeString.size())); + ASSERT_FALSE(shouldEmbedValue(TypeId::STRING, StorageClass::STRING_REF, largePyString.get())); + + const char smallBytes[] = { 'a', '\0', 'b' }; + auto smallPyBytes = Py_OWN(PyBytes_FromStringAndSize(smallBytes, sizeof(smallBytes))); + ASSERT_TRUE(shouldEmbedValue(TypeId::BYTES, StorageClass::DB0_BYTES, smallPyBytes.get())); + + std::string largeBytes(3 * SlabAllocatorConfig::DEFAULT_PAGE_SIZE, 'y'); + auto largePyBytes = Py_OWN(PyBytes_FromStringAndSize(largeBytes.data(), largeBytes.size())); + ASSERT_FALSE(shouldEmbedValue(TypeId::BYTES, StorageClass::DB0_BYTES, largePyBytes.get())); + } + TEST_F( EmbeddingMeasureTest, testMeasuresPythonCollectionValues ) { auto pyList = Py_OWN(PyList_New(2)); diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 2f66e0a4..486455d4 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,8 @@ #include #include +#include + using namespace std; using namespace db0; using namespace db0::tests; @@ -381,6 +384,120 @@ namespace tests workspace.close(); } + TEST_F( ObjectInitializerTest, testImmutableGetRetrievesEmbeddedStringAndBytes ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + auto pyString = Py_OWN(PyUnicode_FromString("embedded read string")); + const char rawBytes[] = { 'a', '\0', 'z' }; + auto pyBytes = Py_OWN(PyBytes_FromStringAndSize(rawBytes, sizeof(rawBytes))); + object.setPreInit("name", db0::bindings::TypeId::STRING, pyString.get()); + object.setPreInit("payload", db0::bindings::TypeId::BYTES, pyBytes.get()); + + { + db0::FixtureLock lock(fixture); + object.postInit(lock); + } + + auto stringResult = object.get("name"); + ASSERT_STREQ(PyUnicode_AsUTF8(stringResult.get()), "embedded read string"); + + auto bytesResult = object.get("payload"); + ASSERT_TRUE(PyBytes_Check(bytesResult.get())); + ASSERT_EQ(PyBytes_GET_SIZE(bytesResult.get()), static_cast(sizeof(rawBytes))); + ASSERT_EQ(std::memcmp(PyBytes_AsString(bytesResult.get()), rawBytes, sizeof(rawBytes)), 0); + } + + workspace.close(); + } + + TEST_F( ObjectInitializerTest, testImmutablePreInitGetRetrievesEmbeddedStringAndBytes ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + auto pyString = Py_OWN(PyUnicode_FromString("pre-init embedded string")); + const char rawBytes[] = { 'p', '\0', 'i' }; + auto pyBytes = Py_OWN(PyBytes_FromStringAndSize(rawBytes, sizeof(rawBytes))); + object.setPreInit("name", db0::bindings::TypeId::STRING, pyString.get()); + object.setPreInit("payload", db0::bindings::TypeId::BYTES, pyBytes.get()); + + auto stringResult = object.get("name"); + ASSERT_EQ(stringResult.get(), pyString.get()); + + auto bytesResult = object.get("payload"); + ASSERT_EQ(bytesResult.get(), pyBytes.get()); + } + + workspace.close(); + } + + TEST_F( ObjectInitializerTest, testImmutableLargeStringAndBytesUseDurableFallback ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + std::string largeString(3 * SlabAllocatorConfig::DEFAULT_PAGE_SIZE, 'x'); + std::string largeBytes(3 * SlabAllocatorConfig::DEFAULT_PAGE_SIZE, 'y'); + auto pyString = Py_OWN(PyUnicode_FromStringAndSize(largeString.data(), largeString.size())); + auto pyBytes = Py_OWN(PyBytes_FromStringAndSize(largeBytes.data(), largeBytes.size())); + object.setPreInit("name", db0::bindings::TypeId::STRING, pyString.get()); + object.setPreInit("payload", db0::bindings::TypeId::BYTES, pyBytes.get()); + + auto *initializer = dynamic_cast(InitManager::instance.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + + auto [nameMemberId, nameIsInitVar] = mock_class->findField("name"); + (void)nameIsInitVar; + ASSERT_TRUE(nameMemberId); + auto nameLoc = nameMemberId.get(0).getIndexAndOffset(); + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_FALSE(initializer->tryGetObjectAt(nameLoc, storedObject)); + std::pair storedValue; + ASSERT_TRUE(initializer->tryGetAt(nameLoc, storedValue)); + ASSERT_EQ(storedValue.first, StorageClass::STRING_REF); + + auto [payloadMemberId, payloadIsInitVar] = mock_class->findField("payload"); + (void)payloadIsInitVar; + ASSERT_TRUE(payloadMemberId); + auto payloadLoc = payloadMemberId.get(0).getIndexAndOffset(); + ASSERT_FALSE(initializer->tryGetObjectAt(payloadLoc, storedObject)); + ASSERT_TRUE(initializer->tryGetAt(payloadLoc, storedValue)); + ASSERT_EQ(storedValue.first, StorageClass::DB0_BYTES); + + { + db0::FixtureLock lock(fixture); + object.postInit(lock); + } + + ASSERT_EQ(object->variableValue(nameLoc.first), nullptr); + ASSERT_EQ(object->variableValue(payloadLoc.first), nullptr); + auto stringResult = object.get("name"); + ASSERT_EQ(std::string(PyUnicode_AsUTF8(stringResult.get())), largeString); + auto bytesResult = object.get("payload"); + ASSERT_EQ(PyBytes_GET_SIZE(bytesResult.get()), static_cast(largeBytes.size())); + ASSERT_EQ(std::memcmp(PyBytes_AsString(bytesResult.get()), largeBytes.data(), largeBytes.size()), 0); + } + + workspace.close(); + } + TEST_F( ObjectInitializerTest, testImmutablePreInitEmbeddableValueDoesNotCreateDurableMemberValue ) { Py_Initialize(); @@ -411,6 +528,56 @@ namespace tests workspace.close(); } + TEST_F( ObjectInitializerTest, testImmutablePreInitEmbedsPythonList ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + std::shared_ptr mock_class = getTestClass(fixture); + + { + ObjectImmutableImpl object(mock_class); + auto pyList = Py_OWN(PyList_New(2)); + db0::python::PySafeList_SetItem(pyList.get(), 0, Py_OWN(PyLong_FromLong(7))); + db0::python::PySafeList_SetItem(pyList.get(), 1, Py_OWN(PyUnicode_FromString("seven"))); + object.setPreInit("items", db0::bindings::TypeId::LIST, pyList.get()); + + auto *initializer = dynamic_cast(InitManager::instance.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + + auto [memberId, isInitVar] = mock_class->findField("items"); + (void)isInitVar; + ASSERT_TRUE(memberId); + auto loc = memberId.get(0).getIndexAndOffset(); + + std::pair storedValue; + ASSERT_FALSE(initializer->tryGetAt(loc, storedValue)); + + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_TRUE(initializer->tryGetObjectAt(loc, storedObject)); + ASSERT_EQ(storedObject.get(), pyList.get()); + + { + db0::FixtureLock lock(fixture); + object.postInit(lock); + } + + auto *embeddedValue = object->variableValue(loc.first); + ASSERT_NE(embeddedValue, nullptr); + ASSERT_EQ(embeddedValue->itemKind(), StorageClass::DB0_TUPLE); + + const auto &tuple = o_tuple<>::__const_ref(embeddedValue->embeddedPayload().begin()); + ASSERT_EQ(tuple.size(), 2u); + ASSERT_EQ(tuple.item(0).itemKind(), StorageClass::PACKED_INT32); + ASSERT_EQ(tuple.item(0).packedIntPayload().value(), 7u); + ASSERT_EQ(tuple.item(1).itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(tuple.item(1).stringPayload().toString(), "seven"); + } + + workspace.close(); + } + TEST_F( ObjectInitializerTest, testImmutablePreInitChangingRegularValueToLoFiClearsEmbeddedObject ) { Py_Initialize(); From 1a737c8f0c279cb07130adfee89d30ce6f630ba5 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 14:36:13 +0200 Subject: [PATCH 4/8] embedding other instances --- src/dbzero/bindings/python/PyToolkit.cpp | 6 +- src/dbzero/object_model/dict/o_dict.cpp | 76 ++++++++++----- .../object_model/object/ObjectImplBase.cpp | 1 + .../object_model/object/o_embedded_object.cpp | 37 +++++++ src/dbzero/object_model/set/o_set.cpp | 76 ++++++++++----- src/dbzero/object_model/tuple/o_tuple.cpp | 58 +++++++---- src/dbzero/object_model/tuple/o_tuple.hpp | 11 ++- .../object_model/value/StorageClass.cpp | 9 ++ .../object_model/value/StorageClass.hpp | 8 ++ tests/unit_tests/EmbeddedDictTest.cpp | 4 + tests/unit_tests/EmbeddedObjectTest.cpp | 96 +++++++++++++++++-- tests/unit_tests/EmbeddedSetTest.cpp | 10 +- tests/unit_tests/EmbeddedTupleTest.cpp | 32 +++---- tests/unit_tests/ObjectInitializerTest.cpp | 95 ++++++++++++++++-- 14 files changed, 412 insertions(+), 107 deletions(-) diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index 5e346274..33a57580 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -68,7 +68,8 @@ namespace db0::python PyToolkit::ObjectSharedPtr PyToolkit::unloadEmbeddedInstance(const db0::object_model::o_tuple_item &item) { switch (item.itemKind()) { - case StorageClass::STRING_REF: { + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { auto str = item.stringPayload().get(); auto result = Py_OWN(PyUnicode_FromStringAndSize(str.get_raw(), str.size())); if (!result) { @@ -76,7 +77,8 @@ namespace db0::python } return result; } - case StorageClass::DB0_BYTES: { + case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: { const auto &bytes = item.bytesPayload(); auto result = Py_OWN(PyBytes_FromStringAndSize( reinterpret_cast(bytes.getBuffer()), bytes.size() diff --git a/src/dbzero/object_model/dict/o_dict.cpp b/src/dbzero/object_model/dict/o_dict.cpp index d9225514..b6de7b31 100644 --- a/src/dbzero/object_model/dict/o_dict.cpp +++ b/src/dbzero/object_model/dict/o_dict.cpp @@ -364,7 +364,12 @@ namespace db0::object_model { auto lhsIsInt = lhs.m_kind == StorageClass::INT64 || lhs.m_kind == StorageClass::PACKED_INT32; auto rhsIsInt = rhs.m_kind == StorageClass::INT64 || rhs.m_kind == StorageClass::PACKED_INT32; - if (lhs.m_kind != rhs.m_kind && !(lhsIsInt && rhsIsInt)) { + auto lhsIsString = lhs.m_kind == StorageClass::STRING_REF || lhs.m_kind == StorageClass::EMBEDDED_STRING; + auto rhsIsString = rhs.m_kind == StorageClass::STRING_REF || rhs.m_kind == StorageClass::EMBEDDED_STRING; + auto lhsIsBytes = lhs.m_kind == StorageClass::DB0_BYTES || lhs.m_kind == StorageClass::EMBEDDED_BYTES; + auto rhsIsBytes = rhs.m_kind == StorageClass::DB0_BYTES || rhs.m_kind == StorageClass::EMBEDDED_BYTES; + if (lhs.m_kind != rhs.m_kind && !(lhsIsInt && rhsIsInt) && !(lhsIsString && rhsIsString) + && !(lhsIsBytes && rhsIsBytes)) { return false; } @@ -379,13 +384,15 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return lhs.doubleValue() == rhs.doubleValue(); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return lhs.m_payload.m_string_value == rhs.m_payload.m_string_value; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return lhs.bytesSize() == rhs.bytesSize() && bytesEqual(lhs.bytesData(), rhs.bytesData(), lhs.bytesSize()); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return lhs.bytesSize() == rhs.bytesSize() && bytesEqual(lhs.bytesData(), rhs.bytesData(), lhs.bytesSize()); case StorageClass::PTIME64: case StorageClass::DATE: @@ -405,7 +412,16 @@ namespace db0::object_model { auto itemIsInt = item.itemKind() == StorageClass::INT64 || item.itemKind() == StorageClass::PACKED_INT32; auto elementIsInt = element.m_kind == StorageClass::INT64 || element.m_kind == StorageClass::PACKED_INT32; - if (item.itemKind() != element.m_kind && !(itemIsInt && elementIsInt)) { + auto itemIsString = item.itemKind() == StorageClass::STRING_REF + || item.itemKind() == StorageClass::EMBEDDED_STRING; + auto elementIsString = element.m_kind == StorageClass::STRING_REF + || element.m_kind == StorageClass::EMBEDDED_STRING; + auto itemIsBytes = item.itemKind() == StorageClass::DB0_BYTES + || item.itemKind() == StorageClass::EMBEDDED_BYTES; + auto elementIsBytes = element.m_kind == StorageClass::DB0_BYTES + || element.m_kind == StorageClass::EMBEDDED_BYTES; + if (item.itemKind() != element.m_kind && !(itemIsInt && elementIsInt) + && !(itemIsString && elementIsString) && !(itemIsBytes && elementIsBytes)) { return false; } @@ -424,14 +440,16 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return item.doublePayload().value() == element.doubleValue(); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return item.stringPayload().toString() == element.stringValue(); case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return item.bytesPayload().size() == element.bytesSize() && bytesEqual(item.bytesPayload().begin(), element.bytesData(), element.bytesSize()); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return item.embeddedPayload().size() == element.bytesSize() && bytesEqual(item.embeddedPayload().begin(), element.bytesData(), element.bytesSize()); case StorageClass::PTIME64: @@ -518,19 +536,21 @@ namespace db0::object_model return Element::integer(static_cast(item.packedIntPayload().value())); case StorageClass::FP_NUMERIC64: return Element::floating(item.doublePayload().value()); - case StorageClass::STRING_REF: { + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { auto str = item.stringPayload().get(); return Element::string(std::string_view(str.get_raw(), str.size())); } case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return Element::bytes(item.bytesPayload().begin(), item.bytesPayload().size()); - case StorageClass::DB0_TUPLE: + case StorageClass::EMBEDDED_TUPLE: return Element::embeddedTuple(item.embeddedPayload().begin(), item.embeddedPayload().size()); - case StorageClass::DB0_SET: + case StorageClass::EMBEDDED_SET: return Element::embeddedSet(item.embeddedPayload().begin(), item.embeddedPayload().size()); - case StorageClass::DB0_DICT: + case StorageClass::EMBEDDED_DICT: return Element::embeddedDict(item.embeddedPayload().begin(), item.embeddedPayload().size()); - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT: return Element::embeddedObject(item.embeddedPayload().begin(), item.embeddedPayload().size()); case StorageClass::PTIME64: return Element::timestamp(item.uint64Payload().value()); @@ -555,6 +575,8 @@ namespace db0::object_model std::uint32_t o_dict::elementHash(const Element &element) { auto seedKind = element.m_kind == StorageClass::PACKED_INT32 ? StorageClass::INT64 : element.m_kind; + seedKind = seedKind == StorageClass::EMBEDDED_STRING ? StorageClass::STRING_REF : seedKind; + seedKind = seedKind == StorageClass::EMBEDDED_BYTES ? StorageClass::DB0_BYTES : seedKind; auto seed = 0x9e3779b9U ^ static_cast(seedKind); switch (element.m_kind) { case StorageClass::NONE: @@ -567,15 +589,17 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return hashBytes(&element.m_payload.m_double_value, sizeof(element.m_payload.m_double_value), seed); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return hashBytes( element.m_payload.m_string_value.data(), element.m_payload.m_string_value.size(), seed ); case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return hashBytes(element.bytesData(), element.bytesSize(), seed); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: { + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: { if (element.m_payload.m_bytes_value.m_writer) { std::vector payload(element.bytesSize()); element.m_payload.m_bytes_value.m_writer(payload.data(), element.m_payload.m_bytes_value.m_source); @@ -600,6 +624,8 @@ namespace db0::object_model std::uint32_t o_dict::itemHash(const Item &item) { auto seedKind = item.itemKind() == StorageClass::PACKED_INT32 ? StorageClass::INT64 : item.itemKind(); + seedKind = seedKind == StorageClass::EMBEDDED_STRING ? StorageClass::STRING_REF : seedKind; + seedKind = seedKind == StorageClass::EMBEDDED_BYTES ? StorageClass::DB0_BYTES : seedKind; auto seed = 0x9e3779b9U ^ static_cast(seedKind); switch (item.itemKind()) { case StorageClass::NONE: @@ -620,16 +646,18 @@ namespace db0::object_model auto value = item.doublePayload().value(); return hashBytes(&value, sizeof(value), seed); } - case StorageClass::STRING_REF: { + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { auto str = item.stringPayload().get(); return hashBytes(str.get_raw(), str.size(), seed); } case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return hashBytes(item.bytesPayload().begin(), item.bytesPayload().size(), seed); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return hashBytes(item.embeddedPayload().begin(), item.embeddedPayload().size(), seed); case StorageClass::PTIME64: case StorageClass::DATE: diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index 9a990101..4d46f21e 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -46,6 +46,7 @@ namespace db0::object_model case StorageClass::DB0_TUPLE: case StorageClass::DB0_SET: case StorageClass::DB0_DICT: + case StorageClass::OBJECT_REF: return true; default: return false; diff --git a/src/dbzero/object_model/object/o_embedded_object.cpp b/src/dbzero/object_model/object/o_embedded_object.cpp index 9882803f..e8d7866a 100644 --- a/src/dbzero/object_model/object/o_embedded_object.cpp +++ b/src/dbzero/object_model/object/o_embedded_object.cpp @@ -5,7 +5,9 @@ #include #include +#include #include +#include #include #include @@ -30,6 +32,36 @@ namespace db0::object_model o_py_dict::__new(buf, const_cast(static_cast(source))); } + const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + + assert(db0::python::PyToolkit::isMemoImmutableObject(pyObject)); + + const auto &object = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject); + if (object.hasInstance()) { + THROWF(db0::InputException) + << "Only non-materialized immutable memo objects can be embedded"; + } + + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(object) + ); + if (!initializer) { + THROWF(db0::InputException) + << "Non-materialized immutable memo object has no active initializer"; + } + return *initializer; + } + + void writeEmbeddedObject(void *buf, const void *source) + { + auto *pyObject = const_cast(static_cast(source)); + const auto &initializer = getInitializer(pyObject); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } + o_dict::Element fieldMapElementFromObject( StorageClass storageClass, ImmutableObjectInitializer::ObjectSharedPtr object ) @@ -62,6 +94,11 @@ namespace db0::object_model auto size = o_py_dict::measure(pyObject); return o_dict::Element::embeddedDict(size, writePyDict, pyObject); } + case StorageClass::OBJECT_REF: { + const auto &initializer = getInitializer(pyObject); + auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); + return o_dict::Element::embeddedObject(size, writeEmbeddedObject, pyObject); + } default: THROWF(db0::InputException) << "Storage class cannot be stored in embedded field map: " << storageClass; diff --git a/src/dbzero/object_model/set/o_set.cpp b/src/dbzero/object_model/set/o_set.cpp index b75243d7..d770e115 100644 --- a/src/dbzero/object_model/set/o_set.cpp +++ b/src/dbzero/object_model/set/o_set.cpp @@ -273,7 +273,12 @@ namespace db0::object_model { auto lhsIsInt = lhs.m_kind == StorageClass::INT64 || lhs.m_kind == StorageClass::PACKED_INT32; auto rhsIsInt = rhs.m_kind == StorageClass::INT64 || rhs.m_kind == StorageClass::PACKED_INT32; - if (lhs.m_kind != rhs.m_kind && !(lhsIsInt && rhsIsInt)) { + auto lhsIsString = lhs.m_kind == StorageClass::STRING_REF || lhs.m_kind == StorageClass::EMBEDDED_STRING; + auto rhsIsString = rhs.m_kind == StorageClass::STRING_REF || rhs.m_kind == StorageClass::EMBEDDED_STRING; + auto lhsIsBytes = lhs.m_kind == StorageClass::DB0_BYTES || lhs.m_kind == StorageClass::EMBEDDED_BYTES; + auto rhsIsBytes = rhs.m_kind == StorageClass::DB0_BYTES || rhs.m_kind == StorageClass::EMBEDDED_BYTES; + if (lhs.m_kind != rhs.m_kind && !(lhsIsInt && rhsIsInt) && !(lhsIsString && rhsIsString) + && !(lhsIsBytes && rhsIsBytes)) { return false; } @@ -288,13 +293,15 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return lhs.doubleValue() == rhs.doubleValue(); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return lhs.m_payload.m_string_value == rhs.m_payload.m_string_value; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return lhs.bytesSize() == rhs.bytesSize() && bytesEqual(lhs.bytesData(), rhs.bytesData(), lhs.bytesSize()); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return lhs.bytesSize() == rhs.bytesSize() && bytesEqual(lhs.bytesData(), rhs.bytesData(), lhs.bytesSize()); case StorageClass::PTIME64: case StorageClass::DATE: @@ -314,7 +321,16 @@ namespace db0::object_model { auto itemIsInt = item.itemKind() == StorageClass::INT64 || item.itemKind() == StorageClass::PACKED_INT32; auto elementIsInt = element.m_kind == StorageClass::INT64 || element.m_kind == StorageClass::PACKED_INT32; - if (item.itemKind() != element.m_kind && !(itemIsInt && elementIsInt)) { + auto itemIsString = item.itemKind() == StorageClass::STRING_REF + || item.itemKind() == StorageClass::EMBEDDED_STRING; + auto elementIsString = element.m_kind == StorageClass::STRING_REF + || element.m_kind == StorageClass::EMBEDDED_STRING; + auto itemIsBytes = item.itemKind() == StorageClass::DB0_BYTES + || item.itemKind() == StorageClass::EMBEDDED_BYTES; + auto elementIsBytes = element.m_kind == StorageClass::DB0_BYTES + || element.m_kind == StorageClass::EMBEDDED_BYTES; + if (item.itemKind() != element.m_kind && !(itemIsInt && elementIsInt) + && !(itemIsString && elementIsString) && !(itemIsBytes && elementIsBytes)) { return false; } @@ -333,14 +349,16 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return item.doublePayload().value() == element.doubleValue(); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return item.stringPayload().toString() == element.stringValue(); case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return item.bytesPayload().size() == element.bytesSize() && bytesEqual(item.bytesPayload().begin(), element.bytesData(), element.bytesSize()); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return item.embeddedPayload().size() == element.bytesSize() && bytesEqual(item.embeddedPayload().begin(), element.bytesData(), element.bytesSize()); case StorageClass::PTIME64: @@ -421,19 +439,21 @@ namespace db0::object_model return Element::integer(static_cast(item.packedIntPayload().value())); case StorageClass::FP_NUMERIC64: return Element::floating(item.doublePayload().value()); - case StorageClass::STRING_REF: { + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { auto str = item.stringPayload().get(); return Element::string(std::string_view(str.get_raw(), str.size())); } case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return Element::bytes(item.bytesPayload().begin(), item.bytesPayload().size()); - case StorageClass::DB0_TUPLE: + case StorageClass::EMBEDDED_TUPLE: return Element::embeddedTuple(item.embeddedPayload().begin(), item.embeddedPayload().size()); - case StorageClass::DB0_SET: + case StorageClass::EMBEDDED_SET: return Element::embeddedSet(item.embeddedPayload().begin(), item.embeddedPayload().size()); - case StorageClass::DB0_DICT: + case StorageClass::EMBEDDED_DICT: return Element::embeddedDict(item.embeddedPayload().begin(), item.embeddedPayload().size()); - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT: return Element::embeddedObject(item.embeddedPayload().begin(), item.embeddedPayload().size()); case StorageClass::PTIME64: return Element::timestamp(item.uint64Payload().value()); @@ -458,6 +478,8 @@ namespace db0::object_model std::uint32_t o_set::elementHash(const Element &element) { auto seedKind = element.m_kind == StorageClass::PACKED_INT32 ? StorageClass::INT64 : element.m_kind; + seedKind = seedKind == StorageClass::EMBEDDED_STRING ? StorageClass::STRING_REF : seedKind; + seedKind = seedKind == StorageClass::EMBEDDED_BYTES ? StorageClass::DB0_BYTES : seedKind; auto seed = 0x9e3779b9U ^ static_cast(seedKind); switch (element.m_kind) { case StorageClass::NONE: @@ -470,15 +492,17 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return hashBytes(&element.m_payload.m_double_value, sizeof(element.m_payload.m_double_value), seed); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return hashBytes( element.m_payload.m_string_value.data(), element.m_payload.m_string_value.size(), seed ); case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return hashBytes(element.bytesData(), element.bytesSize(), seed); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: { + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: { if (element.m_payload.m_bytes_value.m_writer) { std::vector payload(element.bytesSize()); element.m_payload.m_bytes_value.m_writer(payload.data(), element.m_payload.m_bytes_value.m_source); @@ -503,6 +527,8 @@ namespace db0::object_model std::uint32_t o_set::itemHash(const Item &item) { auto seedKind = item.itemKind() == StorageClass::PACKED_INT32 ? StorageClass::INT64 : item.itemKind(); + seedKind = seedKind == StorageClass::EMBEDDED_STRING ? StorageClass::STRING_REF : seedKind; + seedKind = seedKind == StorageClass::EMBEDDED_BYTES ? StorageClass::DB0_BYTES : seedKind; auto seed = 0x9e3779b9U ^ static_cast(seedKind); switch (item.itemKind()) { case StorageClass::NONE: @@ -523,16 +549,18 @@ namespace db0::object_model auto value = item.doublePayload().value(); return hashBytes(&value, sizeof(value), seed); } - case StorageClass::STRING_REF: { + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { auto str = item.stringPayload().get(); return hashBytes(str.get_raw(), str.size(), seed); } case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return hashBytes(item.bytesPayload().begin(), item.bytesPayload().size(), seed); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return hashBytes(item.embeddedPayload().begin(), item.embeddedPayload().size(), seed); case StorageClass::PTIME64: case StorageClass::DATE: diff --git a/src/dbzero/object_model/tuple/o_tuple.cpp b/src/dbzero/object_model/tuple/o_tuple.cpp index 784fa06a..e7dbbaf8 100644 --- a/src/dbzero/object_model/tuple/o_tuple.cpp +++ b/src/dbzero/object_model/tuple/o_tuple.cpp @@ -51,7 +51,7 @@ namespace db0::object_model o_tuple_item::Element o_tuple_item::Element::string(std::string_view value) { Element result; - result.m_kind = StorageClass::STRING_REF; + result.m_kind = StorageClass::EMBEDDED_STRING; result.m_payload.m_string_value = value; return result; } @@ -59,7 +59,7 @@ namespace db0::object_model o_tuple_item::Element o_tuple_item::Element::bytes(const std::byte *data, std::size_t size) { Element result; - result.m_kind = StorageClass::DB0_BYTES; + result.m_kind = StorageClass::EMBEDDED_BYTES; result.m_payload.m_bytes_value = { data, size }; return result; } @@ -128,7 +128,7 @@ namespace db0::object_model o_tuple_item::Element o_tuple_item::Element::embeddedTuple(const void *data, std::size_t size) { Element result; - result.m_kind = StorageClass::DB0_TUPLE; + result.m_kind = StorageClass::EMBEDDED_TUPLE; result.m_payload.m_bytes_value = { reinterpret_cast(data), size }; return result; } @@ -136,7 +136,7 @@ namespace db0::object_model o_tuple_item::Element o_tuple_item::Element::embeddedSet(const void *data, std::size_t size) { Element result; - result.m_kind = StorageClass::DB0_SET; + result.m_kind = StorageClass::EMBEDDED_SET; result.m_payload.m_bytes_value = { reinterpret_cast(data), size }; return result; } @@ -144,7 +144,7 @@ namespace db0::object_model o_tuple_item::Element o_tuple_item::Element::embeddedDict(const void *data, std::size_t size) { Element result; - result.m_kind = StorageClass::DB0_DICT; + result.m_kind = StorageClass::EMBEDDED_DICT; result.m_payload.m_bytes_value = { reinterpret_cast(data), size }; return result; } @@ -152,7 +152,7 @@ namespace db0::object_model o_tuple_item::Element o_tuple_item::Element::embeddedObject(const void *data, std::size_t size) { Element result; - result.m_kind = StorageClass::OBJECT_REF; + result.m_kind = StorageClass::EMBEDDED_OBJECT; result.m_payload.m_bytes_value = { reinterpret_cast(data), size }; return result; } @@ -162,7 +162,7 @@ namespace db0::object_model ) { Element result; - result.m_kind = StorageClass::DB0_TUPLE; + result.m_kind = StorageClass::EMBEDDED_TUPLE; result.m_payload.m_bytes_value = { nullptr, size, writer, source }; return result; } @@ -172,7 +172,7 @@ namespace db0::object_model ) { Element result; - result.m_kind = StorageClass::DB0_SET; + result.m_kind = StorageClass::EMBEDDED_SET; result.m_payload.m_bytes_value = { nullptr, size, writer, source }; return result; } @@ -182,7 +182,17 @@ namespace db0::object_model ) { Element result; - result.m_kind = StorageClass::DB0_DICT; + result.m_kind = StorageClass::EMBEDDED_DICT; + result.m_payload.m_bytes_value = { nullptr, size, writer, source }; + return result; + } + + o_tuple_item::Element o_tuple_item::Element::embeddedObject( + std::size_t size, BytesView::Writer writer, const void *source + ) + { + Element result; + result.m_kind = StorageClass::EMBEDDED_OBJECT; result.m_payload.m_bytes_value = { nullptr, size, writer, source }; return result; } @@ -247,13 +257,15 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return sizeOfMembers()(o_simple::type()); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return sizeOfMembers()(o_string::type()); case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return sizeOfMembers()(o_binary::type()); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return sizeOfMembers()(o_binary::type()); case StorageClass::PTIME64: case StorageClass::DATE: @@ -283,13 +295,15 @@ namespace db0::object_model case StorageClass::FP_NUMERIC64: return measureMembers()(o_simple::type(), element.doubleValue()); case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: return measureMembers()(o_string::type(), element.stringValue()); case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return measureMembers()(o_binary::type(), element.bytesData(), element.bytesSize()); - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: return measureMembers()(o_binary::type(), element.bytesData(), element.bytesSize()); case StorageClass::PTIME64: case StorageClass::DATE: @@ -324,15 +338,17 @@ namespace db0::object_model arrangeMembers()(o_simple::type(), element.doubleValue()); return; case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: arrangeMembers()(o_string::type(), element.stringValue()); return; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: arrangeMembers()(o_binary::type(), element.bytesData(), element.bytesSize()); return; - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: if (element.m_payload.m_bytes_value.m_writer) { arrangeMembers()( o_binary::type(), element.bytesSize(), element.m_payload.m_bytes_value.m_writer, diff --git a/src/dbzero/object_model/tuple/o_tuple.hpp b/src/dbzero/object_model/tuple/o_tuple.hpp index 6147e6f7..73c8997a 100644 --- a/src/dbzero/object_model/tuple/o_tuple.hpp +++ b/src/dbzero/object_model/tuple/o_tuple.hpp @@ -72,6 +72,7 @@ DB0_PACKED_BEGIN static Element embeddedTuple(std::size_t size, BytesView::Writer writer, const void *source); static Element embeddedSet(std::size_t size, BytesView::Writer writer, const void *source); static Element embeddedDict(std::size_t size, BytesView::Writer writer, const void *source); + static Element embeddedObject(std::size_t size, BytesView::Writer writer, const void *source); std::int64_t intValue() const; std::uint64_t uint64Value() const; @@ -132,15 +133,17 @@ DB0_PACKED_BEGIN cursor += o_simple::safeSizeOf(cursor); return; case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: cursor += o_string::safeSizeOf(cursor); return; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: cursor += o_binary::safeSizeOf(cursor); return; - case StorageClass::DB0_TUPLE: - case StorageClass::DB0_SET: - case StorageClass::DB0_DICT: - case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_TUPLE: + case StorageClass::EMBEDDED_SET: + case StorageClass::EMBEDDED_DICT: + case StorageClass::EMBEDDED_OBJECT: cursor += o_binary::safeSizeOf(cursor); return; case StorageClass::PTIME64: diff --git a/src/dbzero/object_model/value/StorageClass.cpp b/src/dbzero/object_model/value/StorageClass.cpp index c9a2d109..65bd9d0a 100644 --- a/src/dbzero/object_model/value/StorageClass.cpp +++ b/src/dbzero/object_model/value/StorageClass.cpp @@ -61,6 +61,9 @@ namespace db0::object_model // determine string type dynamically return PreStorageClass::STRING_REF; } + if (type_id == TypeId::MEMO_IMMUTABLE_OBJECT) { + return PreStorageClass::OBJECT_REF; + } auto storage_map = allow_packed ? &m_storage_class_packed_map : &m_storage_class_map; auto int_id = static_cast(type_id); @@ -165,6 +168,12 @@ namespace std case StorageClass::PACK_2: return os << "PACK_2"; case StorageClass::OBJECT_WEAK_REF: return os << "OBJECT_WEAK_REF"; case StorageClass::OBJECT_LONG_WEAK_REF: return os << "OBJECT_LONG_WEAK_REF"; + case StorageClass::EMBEDDED_STRING: return os << "EMBEDDED_STRING"; + case StorageClass::EMBEDDED_BYTES: return os << "EMBEDDED_BYTES"; + case StorageClass::EMBEDDED_TUPLE: return os << "EMBEDDED_TUPLE"; + case StorageClass::EMBEDDED_SET: return os << "EMBEDDED_SET"; + case StorageClass::EMBEDDED_DICT: return os << "EMBEDDED_DICT"; + case StorageClass::EMBEDDED_OBJECT: return os << "EMBEDDED_OBJECT"; case StorageClass::PACKED_INT32: return os << "PACKED_INT32"; case StorageClass::INVALID: return os << "INVALID"; default: return os << "ERROR!"; diff --git a/src/dbzero/object_model/value/StorageClass.hpp b/src/dbzero/object_model/value/StorageClass.hpp index 7effdb45..010e7e93 100644 --- a/src/dbzero/object_model/value/StorageClass.hpp +++ b/src/dbzero/object_model/value/StorageClass.hpp @@ -116,6 +116,14 @@ namespace db0::object_model DELETED = static_cast(PreStorageClass::DELETED), CALLABLE = static_cast(PreStorageClass::CALLABLE), DB0_WEAK_SET = static_cast(PreStorageClass::DB0_WEAK_SET), + // Embedded variable-length payloads stored inside another immutable object. + // Count down from the top range so future PreStorageClass values can grow upward without collision. + EMBEDDED_STRING = std::numeric_limits::max() - 8, + EMBEDDED_BYTES = std::numeric_limits::max() - 7, + EMBEDDED_TUPLE = std::numeric_limits::max() - 6, + EMBEDDED_SET = std::numeric_limits::max() - 5, + EMBEDDED_DICT = std::numeric_limits::max() - 4, + EMBEDDED_OBJECT = std::numeric_limits::max() - 3, // Embedded immutable integer encoded with packed-int storage. PACKED_INT32 = std::numeric_limits::max() - 2, // weak reference to other (Memo) instance from a foreign prefix diff --git a/tests/unit_tests/EmbeddedDictTest.cpp b/tests/unit_tests/EmbeddedDictTest.cpp index b5ee5174..b2a54387 100644 --- a/tests/unit_tests/EmbeddedDictTest.cpp +++ b/tests/unit_tests/EmbeddedDictTest.cpp @@ -91,9 +91,11 @@ namespace tests key << std::setprecision(17) << element.doubleValue(); break; case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: key << element.stringValue(); break; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: key << bytesKey(element.bytesData(), element.bytesSize()); break; case StorageClass::PTIME64: @@ -132,9 +134,11 @@ namespace tests key << std::setprecision(17) << item.doublePayload().value(); break; case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: key << item.stringPayload().toString(); break; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: key << bytesKey(item.bytesPayload().begin(), item.bytesPayload().size()); break; case StorageClass::PTIME64: diff --git a/tests/unit_tests/EmbeddedObjectTest.cpp b/tests/unit_tests/EmbeddedObjectTest.cpp index b745eccf..acdd7aed 100644 --- a/tests/unit_tests/EmbeddedObjectTest.cpp +++ b/tests/unit_tests/EmbeddedObjectTest.cpp @@ -4,15 +4,20 @@ #include #include #include +#include +#include +#include #include #include #include #include +#include #include #include #include #include #include +#include #include @@ -41,6 +46,36 @@ namespace tests return *initializer; } + static db0::python::shared_py_object makeMemoType() + { + static std::uint64_t memoTypeIndex = 0; + auto className = std::string("EmbeddedObjectNestedImmutable") + std::to_string(memoTypeIndex); + auto typeId = "tests/" + className; + ++memoTypeIndex; + + if (PyRun_SimpleString(("class " + className + ": pass\n").c_str()) != 0) { + return {}; + } + + auto mainModule = Py_BORROW(PyImport_AddModule("__main__")); + auto pyClass = Py_OWN(PyObject_GetAttrString(mainModule.get(), className.c_str())); + auto args = Py_OWN(PyTuple_Pack(1, pyClass.get())); + auto kwargs = Py_OWN(PyDict_New()); + auto pyTypeId = Py_OWN(PyUnicode_FromString(typeId.c_str())); + auto pyImmutable = Py_OWN(PyBool_FromLong(1)); + if (!mainModule.get() || !pyClass.get() || !args.get() || !kwargs.get() + || !pyTypeId.get() || !pyImmutable.get()) { + return {}; + } + db0::python::PySafeDict_SetItemString(kwargs.get(), "id", std::move(pyTypeId)); + db0::python::PySafeDict_SetItemString(kwargs.get(), "immutable", std::move(pyImmutable)); + + return db0::python::shared_py_object( + reinterpret_cast(db0::python::PyAPI_wrapPyClass(nullptr, args.get(), kwargs.get())), + false + ); + } + TEST_F( EmbeddedObjectTest , testEmbeddedObjectStoresInitializerPlannedFixedTables ) { auto memspace = getMemspace(); @@ -118,7 +153,7 @@ namespace tests auto *variableValue = object->variableValue(300); ASSERT_NE(variableValue, nullptr); - ASSERT_EQ(variableValue->itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(variableValue->itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(variableValue->stringPayload().toString(), "root variable string"); ASSERT_EQ(&object->embeddedObject().pos_vt(), &object->pos_vt()); ASSERT_EQ(&object->embeddedObject().index_vt(), &object->index_vt()); @@ -150,11 +185,11 @@ namespace tests ASSERT_EQ(object->getClassRef(), 88u); auto *stringValue = object->variableValue(300); ASSERT_NE(stringValue, nullptr); - ASSERT_EQ(stringValue->itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(stringValue->itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(stringValue->stringPayload().toString(), "variable string"); auto *bytesValue = object->variableValue(301); ASSERT_NE(bytesValue, nullptr); - ASSERT_EQ(bytesValue->itemKind(), StorageClass::DB0_BYTES); + ASSERT_EQ(bytesValue->itemKind(), StorageClass::EMBEDDED_BYTES); ASSERT_EQ(bytesValue->bytesPayload().size(), 3u); ASSERT_EQ(bytesValue->bytesPayload().begin()[0], std::byte{0x01}); ASSERT_EQ(bytesValue->bytesPayload().begin()[1], std::byte{0x02}); @@ -189,7 +224,7 @@ namespace tests auto *stringValue = object->variableValue(300); ASSERT_NE(stringValue, nullptr); - ASSERT_EQ(stringValue->itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(stringValue->itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(stringValue->stringPayload().toString(), "new value"); } @@ -240,7 +275,7 @@ namespace tests auto *tupleValue = object->variableValue(400); ASSERT_NE(tupleValue, nullptr); - ASSERT_EQ(tupleValue->itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(tupleValue->itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &payload = tupleValue->embeddedPayload(); const auto &tuple = o_tuple<>::__const_ref(payload.begin()); ASSERT_EQ(tuple.size(), 2u); @@ -304,7 +339,7 @@ namespace tests auto *listValue = object->variableValue(100); ASSERT_NE(listValue, nullptr); - ASSERT_EQ(listValue->itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(listValue->itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &embeddedTuple = o_tuple<>::__const_ref(listValue->embeddedPayload().begin()); ASSERT_EQ(embeddedTuple.size(), 3u); ASSERT_EQ(embeddedTuple.item(0).packedIntPayload().value(), 7u); @@ -313,7 +348,7 @@ namespace tests auto *setValue = object->variableValue(101); ASSERT_NE(setValue, nullptr); - ASSERT_EQ(setValue->itemKind(), StorageClass::DB0_SET); + ASSERT_EQ(setValue->itemKind(), StorageClass::EMBEDDED_SET); const auto &embeddedSet = o_set::__const_ref(setValue->embeddedPayload().begin()); ASSERT_EQ(embeddedSet.size(), 2u); ASSERT_TRUE(embeddedSet.contains(o_set::Element::integer(10))); @@ -321,7 +356,7 @@ namespace tests auto *dictValue = object->variableValue(102); ASSERT_NE(dictValue, nullptr); - ASSERT_EQ(dictValue->itemKind(), StorageClass::DB0_DICT); + ASSERT_EQ(dictValue->itemKind(), StorageClass::EMBEDDED_DICT); const auto &embeddedDict = o_dict::__const_ref(dictValue->embeddedPayload().begin()); ASSERT_EQ(embeddedDict.size(), 2u); auto *nameValue = embeddedDict.get(o_dict::Element::string("name")); @@ -332,6 +367,51 @@ namespace tests ASSERT_EQ(countValue->packedIntPayload().value(), 3u); } + TEST_F( EmbeddedObjectTest , testEmbeddedObjectStoresNestedImmutableMemoPayload ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("embedded-object-nested-memo"); + auto nestedClass = getTestClass(fixture); + auto pyMemoType = makeMemoType(); + ASSERT_TRUE(pyMemoType.get()); + + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemo->makeNew(nestedClass); + auto *nestedInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemo->ext()) + ); + ASSERT_NE(nestedInitializer, nullptr); + nestedInitializer->set({0, 0}, StorageClass::INT64, Value(17)); + + auto memspace = getMemspace(); + int sourceObject = 0; + ObjectInitializerManager manager; + auto &initializer = makeInitializer(manager, sourceObject); + initializer.setObject( + {500, 0}, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(pyMemo.get())) + ); + + v_object object(memspace, 88u, initializer); + + auto *nestedValue = object->variableValue(500); + ASSERT_NE(nestedValue, nullptr); + ASSERT_EQ(nestedValue->itemKind(), StorageClass::EMBEDDED_OBJECT); + + const auto &nestedObject = o_embedded_object::__const_ref(nestedValue->embeddedPayload().begin()); + ASSERT_EQ(nestedObject.getClassRef(), nestedClass->getClassRef()); + auto fixedValue = nestedObject.fixedValue(0); + ASSERT_TRUE(fixedValue.has_value()); + ASSERT_EQ(fixedValue->m_kind, StorageClass::INT64); + ASSERT_EQ(fixedValue->m_value, 17u); + + workspace.close(); + } + TEST_F( EmbeddedObjectTest , testEmbeddedObjectMeasureSizeOfAndSafeSizeOf ) { Py_Initialize(); diff --git a/tests/unit_tests/EmbeddedSetTest.cpp b/tests/unit_tests/EmbeddedSetTest.cpp index 75f4d5b7..f473a365 100644 --- a/tests/unit_tests/EmbeddedSetTest.cpp +++ b/tests/unit_tests/EmbeddedSetTest.cpp @@ -48,6 +48,8 @@ namespace tests static std::uint32_t testElementHash(const o_set::Element &element) { auto seedKind = element.m_kind == StorageClass::PACKED_INT32 ? StorageClass::INT64 : element.m_kind; + seedKind = seedKind == StorageClass::EMBEDDED_STRING ? StorageClass::STRING_REF : seedKind; + seedKind = seedKind == StorageClass::EMBEDDED_BYTES ? StorageClass::DB0_BYTES : seedKind; auto seed = 0x9e3779b9U ^ static_cast(seedKind); switch (element.m_kind) { case StorageClass::NONE: @@ -68,11 +70,13 @@ namespace tests auto value = element.doubleValue(); return testHashBytes(&value, sizeof(value), seed); } - case StorageClass::STRING_REF: { + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { auto value = element.m_payload.m_string_value; return testHashBytes(value.data(), value.size(), seed); } case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: return testHashBytes(element.bytesData(), element.bytesSize(), seed); case StorageClass::PTIME64: case StorageClass::DATE: @@ -130,9 +134,11 @@ namespace tests key << std::setprecision(17) << element.doubleValue(); break; case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: key << element.stringValue(); break; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: key << bytesKey(element.bytesData(), element.bytesSize()); break; case StorageClass::PTIME64: @@ -171,9 +177,11 @@ namespace tests key << std::setprecision(17) << item.doublePayload().value(); break; case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: key << item.stringPayload().toString(); break; case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: key << bytesKey(item.bytesPayload().begin(), item.bytesPayload().size()); break; case StorageClass::PTIME64: diff --git a/tests/unit_tests/EmbeddedTupleTest.cpp b/tests/unit_tests/EmbeddedTupleTest.cpp index bc8a71e0..0d508a8e 100644 --- a/tests/unit_tests/EmbeddedTupleTest.cpp +++ b/tests/unit_tests/EmbeddedTupleTest.cpp @@ -98,11 +98,11 @@ namespace tests ASSERT_EQ(tuple->elementsByteSize(), expectedElementsSize); ASSERT_EQ(tuple->item(0).itemKind(), StorageClass::PACKED_INT32); ASSERT_EQ(asInt64(tuple->item(0)), 42); - ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(asString(tuple->item(1)), "alpha"); ASSERT_EQ(tuple->item(2).itemKind(), StorageClass::BOOLEAN); ASSERT_TRUE(asBool(tuple->item(2))); - ASSERT_EQ(tuple->item(3).itemKind(), StorageClass::DB0_BYTES); + ASSERT_EQ(tuple->item(3).itemKind(), StorageClass::EMBEDDED_BYTES); ASSERT_EQ(asBytes(tuple->item(3)), (std::vector{ std::byte{0x01}, std::byte{0x02}, std::byte{0xff} })); } @@ -335,13 +335,13 @@ namespace tests ASSERT_EQ(tuple->size(), 5u); ASSERT_EQ(tuple->item(0).itemKind(), StorageClass::PACKED_INT32); ASSERT_EQ(asInt64(tuple->item(0)), 123); - ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(asString(tuple->item(1)), "python"); ASSERT_EQ(tuple->item(2).itemKind(), StorageClass::BOOLEAN); ASSERT_TRUE(asBool(tuple->item(2))); ASSERT_EQ(tuple->item(3).itemKind(), StorageClass::FP_NUMERIC64); ASSERT_EQ(asDouble(tuple->item(3)), 4.5); - ASSERT_EQ(tuple->item(4).itemKind(), StorageClass::DB0_BYTES); + ASSERT_EQ(tuple->item(4).itemKind(), StorageClass::EMBEDDED_BYTES); ASSERT_EQ(asBytes(tuple->item(4)), (std::vector{ std::byte{0x01}, std::byte{0x02} })); } @@ -358,7 +358,7 @@ namespace tests ASSERT_EQ(o_py_tuple::measure(*pyList), tuple->sizeOf()); ASSERT_EQ(tuple->size(), 2u); ASSERT_EQ(tuple->item(0).itemKind(), StorageClass::NONE); - ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(asString(tuple->item(1)), "list item"); } @@ -423,37 +423,37 @@ namespace tests ASSERT_EQ(o_py_tuple::measure(*pyRoot), tuple->sizeOf()); ASSERT_EQ(tuple->size(), 3u); - ASSERT_EQ(tuple->item(0).itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(tuple->item(0).itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &nestedList = o_tuple<>::__const_ref(tuple->item(0).embeddedPayload().begin()); ASSERT_EQ(nestedList.size(), 2u); ASSERT_EQ(asInt64(nestedList.item(0)), 11); - ASSERT_EQ(nestedList.item(1).itemKind(), StorageClass::DB0_DICT); + ASSERT_EQ(nestedList.item(1).itemKind(), StorageClass::EMBEDDED_DICT); const auto &innerDict = o_dict::__const_ref(nestedList.item(1).embeddedPayload().begin()); auto *innerTupleItem = innerDict.get(o_dict::Element::string("tuple")); ASSERT_NE(innerTupleItem, nullptr); - ASSERT_EQ(innerTupleItem->itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(innerTupleItem->itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &innerTuple = o_tuple<>::__const_ref(innerTupleItem->embeddedPayload().begin()); ASSERT_EQ(innerTuple.size(), 2u); ASSERT_EQ(asInt64(innerTuple.item(0)), 22); - ASSERT_EQ(innerTuple.item(1).itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(innerTuple.item(1).itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &deepList = o_tuple<>::__const_ref(innerTuple.item(1).embeddedPayload().begin()); ASSERT_EQ(deepList.size(), 2u); ASSERT_EQ(asString(deepList.item(0)), "deep"); - ASSERT_EQ(deepList.item(1).itemKind(), StorageClass::DB0_DICT); + ASSERT_EQ(deepList.item(1).itemKind(), StorageClass::EMBEDDED_DICT); const auto &deepDict = o_dict::__const_ref(deepList.item(1).embeddedPayload().begin()); auto *answer = deepDict.get(o_dict::Element::string("answer")); ASSERT_NE(answer, nullptr); ASSERT_EQ(asInt64(*answer), 42); - ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::DB0_DICT); + ASSERT_EQ(tuple->item(1).itemKind(), StorageClass::EMBEDDED_DICT); const auto &rootDict = o_dict::__const_ref(tuple->item(1).embeddedPayload().begin()); auto *numbersItem = rootDict.get(o_dict::Element::string("numbers")); ASSERT_NE(numbersItem, nullptr); - ASSERT_EQ(numbersItem->itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(numbersItem->itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &numbers = o_tuple<>::__const_ref(numbersItem->embeddedPayload().begin()); ASSERT_EQ(numbers.size(), 2u); ASSERT_EQ(asInt64(numbers.item(0)), 3); @@ -461,13 +461,13 @@ namespace tests auto *flagsItem = rootDict.get(o_dict::Element::string("flags")); ASSERT_NE(flagsItem, nullptr); - ASSERT_EQ(flagsItem->itemKind(), StorageClass::DB0_SET); + ASSERT_EQ(flagsItem->itemKind(), StorageClass::EMBEDDED_SET); const auto &flags = o_set::__const_ref(flagsItem->embeddedPayload().begin()); ASSERT_EQ(flags.size(), 2u); ASSERT_TRUE(flags.contains(o_set::Element::boolean(true))); ASSERT_TRUE(flags.contains(o_set::Element::string("ok"))); - ASSERT_EQ(tuple->item(2).itemKind(), StorageClass::DB0_SET); + ASSERT_EQ(tuple->item(2).itemKind(), StorageClass::EMBEDDED_SET); const auto &rootSet = o_set::__const_ref(tuple->item(2).embeddedPayload().begin()); ASSERT_EQ(rootSet.size(), 3u); ASSERT_TRUE(rootSet.contains(o_set::Element::string("root-set"))); @@ -475,7 +475,7 @@ namespace tests const o_tuple_item *setTupleItem = nullptr; for (auto it = rootSet.begin(); it != rootSet.end(); ++it) { - if (it->itemKind() == StorageClass::DB0_TUPLE) { + if (it->itemKind() == StorageClass::EMBEDDED_TUPLE) { setTupleItem = &*it; break; } @@ -546,7 +546,7 @@ namespace tests ASSERT_EQ(asUint64(tuple->item(2)), db0::python::pyTimeToUint64(PyTuple_GET_ITEM(*pyTuple, 2))); ASSERT_EQ(tuple->item(3).itemKind(), StorageClass::DECIMAL); ASSERT_EQ(asUint64(tuple->item(3)), db0::python::pyDecimalToUint64(PyTuple_GET_ITEM(*pyTuple, 3))); - ASSERT_EQ(tuple->item(4).itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(tuple->item(4).itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(asString(tuple->item(4)), "tail"); } diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 486455d4..44d7963a 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -6,11 +6,14 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -45,6 +48,36 @@ namespace tests } }; + static db0::python::shared_py_object makeImmutableMemoType() + { + static std::uint64_t memoTypeIndex = 0; + auto className = std::string("ObjectInitializerNestedImmutable") + std::to_string(memoTypeIndex); + auto typeId = "tests/" + className; + ++memoTypeIndex; + + if (PyRun_SimpleString(("class " + className + ": pass\n").c_str()) != 0) { + return {}; + } + + auto mainModule = Py_BORROW(PyImport_AddModule("__main__")); + auto pyClass = Py_OWN(PyObject_GetAttrString(mainModule.get(), className.c_str())); + auto args = Py_OWN(PyTuple_Pack(1, pyClass.get())); + auto kwargs = Py_OWN(PyDict_New()); + auto pyTypeId = Py_OWN(PyUnicode_FromString(typeId.c_str())); + auto pyImmutable = Py_OWN(PyBool_FromLong(1)); + if (!mainModule.get() || !pyClass.get() || !args.get() || !kwargs.get() + || !pyTypeId.get() || !pyImmutable.get()) { + return {}; + } + db0::python::PySafeDict_SetItemString(kwargs.get(), "id", std::move(pyTypeId)); + db0::python::PySafeDict_SetItemString(kwargs.get(), "immutable", std::move(pyImmutable)); + + return db0::python::shared_py_object( + reinterpret_cast(db0::python::PyAPI_wrapPyClass(nullptr, args.get(), kwargs.get())), + false + ); + } + TEST_F( ObjectInitializerTest, testIncompletePosVT ) { Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); @@ -239,7 +272,7 @@ namespace tests auto *variable_value = embedded_object.variableValue(4); ASSERT_NE(variable_value, nullptr); - ASSERT_EQ(variable_value->itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(variable_value->itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(variable_value->stringPayload().toString(), "variable-value"); workspace.close(); @@ -275,13 +308,13 @@ namespace tests ASSERT_FALSE(embedded_object.fixedValue(8).has_value()); auto *variable_value = embedded_object.variableValue(8); ASSERT_NE(variable_value, nullptr); - ASSERT_EQ(variable_value->itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(variable_value->itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &tuple = o_tuple<>::__const_ref(variable_value->embeddedPayload().begin()); ASSERT_EQ(tuple.size(), 2u); ASSERT_EQ(tuple.item(0).itemKind(), StorageClass::PACKED_INT32); ASSERT_EQ(tuple.item(0).packedIntPayload().value(), 7u); - ASSERT_EQ(tuple.item(1).itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(tuple.item(1).itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(tuple.item(1).stringPayload().toString(), "seven"); workspace.close(); @@ -328,7 +361,7 @@ namespace tests auto *list_value = embedded_object.variableValue(8); ASSERT_NE(list_value, nullptr); - ASSERT_EQ(list_value->itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(list_value->itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &tuple = o_tuple<>::__const_ref(list_value->embeddedPayload().begin()); ASSERT_EQ(tuple.size(), 2u); ASSERT_EQ(tuple.item(0).packedIntPayload().value(), 7u); @@ -373,7 +406,7 @@ namespace tests const o_tuple_item *variableValue = nullptr; for (std::uint32_t index = 0; index < 32 && !variableValue; ++index) { auto *candidate = object->variableValue(index); - if (candidate && candidate->itemKind() == StorageClass::STRING_REF) { + if (candidate && candidate->itemKind() == StorageClass::EMBEDDED_STRING) { variableValue = candidate; } } @@ -565,19 +598,67 @@ namespace tests auto *embeddedValue = object->variableValue(loc.first); ASSERT_NE(embeddedValue, nullptr); - ASSERT_EQ(embeddedValue->itemKind(), StorageClass::DB0_TUPLE); + ASSERT_EQ(embeddedValue->itemKind(), StorageClass::EMBEDDED_TUPLE); const auto &tuple = o_tuple<>::__const_ref(embeddedValue->embeddedPayload().begin()); ASSERT_EQ(tuple.size(), 2u); ASSERT_EQ(tuple.item(0).itemKind(), StorageClass::PACKED_INT32); ASSERT_EQ(tuple.item(0).packedIntPayload().value(), 7u); - ASSERT_EQ(tuple.item(1).itemKind(), StorageClass::STRING_REF); + ASSERT_EQ(tuple.item(1).itemKind(), StorageClass::EMBEDDED_STRING); ASSERT_EQ(tuple.item(1).stringPayload().toString(), "seven"); } workspace.close(); } + TEST_F( ObjectInitializerTest, testImmutablePreInitEmbedsNonMaterializedNestedMemo ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + auto mockClass = getTestClass(fixture); + auto pyMemoType = makeImmutableMemoType(); + ASSERT_TRUE(pyMemoType.get()); + auto nestedClass = fixture->get().getOrCreateType(pyMemoType.get()); + + { + ObjectImmutableImpl object(mockClass); + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemo->makeNew(nestedClass); + auto *nestedInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemo->ext()) + ); + ASSERT_NE(nestedInitializer, nullptr); + nestedInitializer->set({0, 0}, StorageClass::INT64, Value(17)); + + object.setPreInit( + "inner", db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT, reinterpret_cast(pyMemo.get()) + ); + + auto *initializer = dynamic_cast(InitManager::instance.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + + auto [memberId, isInitVar] = mockClass->findField("inner"); + (void)isInitVar; + ASSERT_TRUE(memberId); + auto loc = memberId.get(0).getIndexAndOffset(); + + std::pair storedValue; + ASSERT_FALSE(initializer->tryGetAt(loc, storedValue)); + + ImmutableObjectInitializer::ObjectSharedPtr storedObject; + ASSERT_TRUE(initializer->tryGetObjectAt(loc, storedObject)); + ASSERT_EQ(storedObject.get(), reinterpret_cast(pyMemo.get())); + } + + mockClass.reset(); + nestedClass.reset(); + workspace.close(); + } + TEST_F( ObjectInitializerTest, testImmutablePreInitChangingRegularValueToLoFiClearsEmbeddedObject ) { Py_Initialize(); From 4b1a141d1216fafd43ce26cdb6f2a1d9fbc86459 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 15:02:44 +0200 Subject: [PATCH 5/8] unref from embedded instances --- src/dbzero/object_model/ObjectBase.hpp | 6 +- .../object/ObjectImmutableImpl.cpp | 50 +++++++ .../object/ObjectImmutableImpl.hpp | 1 + .../object_model/object/ObjectImplBase.cpp | 2 + tests/unit_tests/ObjectInitializerTest.cpp | 133 ++++++++++++++++++ 5 files changed, 189 insertions(+), 3 deletions(-) diff --git a/src/dbzero/object_model/ObjectBase.hpp b/src/dbzero/object_model/ObjectBase.hpp index 584a7497..e4ab16a0 100644 --- a/src/dbzero/object_model/ObjectBase.hpp +++ b/src/dbzero/object_model/ObjectBase.hpp @@ -87,13 +87,13 @@ namespace db0 } // Unregister must be called pre-destruction - void unregister() const + void unregister(bool noDrop = false) const { // remove from the registry (on condition the underlying instance & fixture still exists) if (m_gc_registered && hasInstance()) { auto fixture = this->tryGetFixture(); if (fixture) { - fixture->getGC0().tryRemove((void*)this); + fixture->getGC0().tryRemove((void*)this, noDrop); } m_gc_registered = false; } @@ -311,4 +311,4 @@ namespace db0 *this = std::move(new_instance); } -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index 2f493162..a0b2d3da 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -6,12 +6,56 @@ #include #include #include +#include namespace db0::object_model { GC0_Define(ObjectImmutableImpl) + namespace + { + void unrefNestedEmbeddedObjects(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject); + + void unrefEmbeddedObjectTables(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject) + { + auto &types = embeddedObject.pos_vt().types(); + auto &values = embeddedObject.pos_vt().values(); + auto value = values.begin(); + for (auto type = types.begin(); type != types.end(); ++type, ++value) { + if (*type == StorageClass::DELETED || *type == StorageClass::UNDEFINED) { + continue; + } + unrefMember(fixture, *type, *value); + } + + for (const auto &xvalue: embeddedObject.index_vt().xvalues()) { + if (xvalue.m_type == StorageClass::DELETED || xvalue.m_type == StorageClass::UNDEFINED) { + continue; + } + unrefMember(fixture, xvalue.m_type, xvalue.m_value); + } + } + + void unrefEmbeddedObject(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject) + { + unrefEmbeddedObjectTables(fixture, embeddedObject); + unrefNestedEmbeddedObjects(fixture, embeddedObject); + } + + void unrefNestedEmbeddedObjects(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject) + { + for (const auto &entry: embeddedObject.field_map()) { + const auto &value = entry.value(); + if (value.itemKind() != StorageClass::EMBEDDED_OBJECT) { + // Embedded collection traversal is intentionally left for a later implementation. + continue; + } + unrefEmbeddedObject(fixture, o_embedded_object::__const_ref(value.embeddedPayload().begin())); + } + } + } + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGet( MemberLoc memberLoc, bool *isAutoGenerated ) const @@ -109,5 +153,11 @@ namespace db0::object_model result.insert(objType.getMember(FieldID::fromIndex(index)).m_name); } } + + void ObjectImmutableImpl::dropMembers(db0::swine_ptr &fixture, Class &classRef) const + { + super_t::dropMembers(fixture, classRef); + unrefNestedEmbeddedObjects(fixture, (*this)->embeddedObject()); + } } diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp index ee252056..aaa9f037 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp @@ -33,6 +33,7 @@ namespace db0::object_model ObjectSharedPtr tryGetEmbeddedField(const FieldInfo &) const; void getMembersImpl(std::unordered_set &) const; + void dropMembers(db0::swine_ptr &, Class &) const; }; } diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index 4d46f21e..3771954f 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -828,6 +828,8 @@ namespace db0::object_model template void ObjectImplBase::destroy() { + // Explicit destroy already owns the drop path; avoid recursive GC0 drop while unregistering. + this->unregister(true); if (this->hasInstance()) { // associated class type (may require unloading) auto type = this->m_type; diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 44d7963a..3002e857 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -659,6 +659,139 @@ namespace tests workspace.close(); } + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedNestedObjectMembers ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + auto rootClass = getTestClass(fixture); + auto referencedClass = getTestClass(fixture); + auto pyMemoType = makeImmutableMemoType(); + ASSERT_TRUE(pyMemoType.get()); + auto nestedClass = fixture->get().getOrCreateType(pyMemoType.get()); + auto rootLoc = rootClass->addField("inner", 0).get(0).getIndexAndOffset(); + auto nestedLoc = nestedClass->addField("held", 0).get(0).getIndexAndOffset(); + + { + Object referenced(referencedClass); + { + db0::FixtureLock lock(fixture); + referenced.postInit(lock); + } + referenced.incRef(false); + referenced.incRef(false); + ASSERT_EQ(referenced.getRefCounts().second, 2u); + + ObjectImmutableImpl root(rootClass); + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemo->makeNew(nestedClass); + auto *nestedInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemo->ext()) + ); + ASSERT_NE(nestedInitializer, nullptr); + nestedInitializer->set(nestedLoc, StorageClass::OBJECT_REF, Value(referenced.getAddress())); + + auto *rootInitializer = dynamic_cast( + InitManager::instance.findInitializer(root) + ); + ASSERT_NE(rootInitializer, nullptr); + rootInitializer->setObject( + rootLoc, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(pyMemo.get())) + ); + + { + db0::FixtureLock lock(fixture); + root.postInit(lock); + } + + ASSERT_TRUE(fixture->isAddressValid(root.getAddress(), ObjectImmutableImpl::REALM_ID)); + root.destroy(); + ASSERT_EQ(referenced.getRefCounts().second, 1u); + } + + rootClass.reset(); + referencedClass.reset(); + nestedClass.reset(); + workspace.close(); + } + + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsRecursivelyEmbeddedNestedObjectMembers ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + auto rootClass = getTestClass(fixture); + auto referencedClass = getTestClass(fixture); + auto pyMemoType = makeImmutableMemoType(); + ASSERT_TRUE(pyMemoType.get()); + auto nestedClass = fixture->get().getOrCreateType(pyMemoType.get()); + auto rootLoc = rootClass->addField("outer", 0).get(0).getIndexAndOffset(); + auto outerLoc = nestedClass->addField("inner", 0).get(0).getIndexAndOffset(); + auto innerLoc = nestedClass->addField("held", 0).get(0).getIndexAndOffset(); + + { + Object referenced(referencedClass); + { + db0::FixtureLock lock(fixture); + referenced.postInit(lock); + } + referenced.incRef(false); + referenced.incRef(false); + ASSERT_EQ(referenced.getRefCounts().second, 2u); + + auto pyInnerMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyInnerMemo->makeNew(nestedClass); + auto *innerInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyInnerMemo->ext()) + ); + ASSERT_NE(innerInitializer, nullptr); + innerInitializer->set(innerLoc, StorageClass::OBJECT_REF, Value(referenced.getAddress())); + + auto pyOuterMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyOuterMemo->makeNew(nestedClass); + auto *outerInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyOuterMemo->ext()) + ); + ASSERT_NE(outerInitializer, nullptr); + outerInitializer->setObject( + outerLoc, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(pyInnerMemo.get())) + ); + + ObjectImmutableImpl root(rootClass); + auto *rootInitializer = dynamic_cast( + InitManager::instance.findInitializer(root) + ); + ASSERT_NE(rootInitializer, nullptr); + rootInitializer->setObject( + rootLoc, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(pyOuterMemo.get())) + ); + + { + db0::FixtureLock lock(fixture); + root.postInit(lock); + } + + root.destroy(); + ASSERT_EQ(referenced.getRefCounts().second, 1u); + } + + rootClass.reset(); + referencedClass.reset(); + nestedClass.reset(); + workspace.close(); + } + TEST_F( ObjectInitializerTest, testImmutablePreInitChangingRegularValueToLoFiClearsEmbeddedObject ) { Py_Initialize(); From 0cce1711439eb9edd23be1413cae3bccbcd8795c Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 17:08:26 +0200 Subject: [PATCH 6/8] lifetime management fixes --- src/dbzero/object_model/class/Class.cpp | 33 ++++++++++++++++++++++- src/dbzero/object_model/object/Object.cpp | 19 +++++++++---- src/dbzero/object_model/object/Object.hpp | 5 ++++ 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/src/dbzero/object_model/class/Class.cpp b/src/dbzero/object_model/class/Class.cpp index 6e2cd832..16e781b4 100644 --- a/src/dbzero/object_model/class/Class.cpp +++ b/src/dbzero/object_model/class/Class.cpp @@ -1059,7 +1059,38 @@ namespace db0::object_model FieldID Class::getPrimaryKey(unsigned int index) const { - assert(index < m_unique_keys.size()); + auto resolvePrimaryKey = [this, index]() -> FieldID { + for (const auto &entry: m_index) { + const auto &memberId = entry.second.first; + if (!memberId) { + continue; + } + if (memberId.size() > 1 && memberId.secondary().first.getIndex() == index) { + return memberId.primary().first; + } + if (memberId.primary().first.getIndex() == index) { + return memberId.primary().first; + } + } + return {}; + }; + + if (index >= m_unique_keys.size() || !m_unique_keys[index]) { + m_member_cache.refresh(); + } + if (index >= m_unique_keys.size() || !m_unique_keys[index]) { + auto primaryKey = resolvePrimaryKey(); + if (!!primaryKey) { + if (m_unique_keys.size() <= index) { + m_unique_keys.resize(index + 1); + } + m_unique_keys[index] = primaryKey; + } + } + if (index >= m_unique_keys.size() || !m_unique_keys[index]) { + // Destruction/schema cleanup must not read past the cache if the primary-key cache is stale. + return FieldID::fromIndex(index); + } return m_unique_keys[index]; } diff --git a/src/dbzero/object_model/object/Object.cpp b/src/dbzero/object_model/object/Object.cpp index 1e2fff1a..37ec57fd 100644 --- a/src/dbzero/object_model/object/Object.cpp +++ b/src/dbzero/object_model/object/Object.cpp @@ -276,7 +276,11 @@ namespace db0::object_model KV_Index *Object::tryGetKV_Index() const { // if KV index address has changed, update the cached instance - if (!m_kv_index || m_kv_index->getAddress() != (*this)->m_kv_address) { + auto shouldOpenIndex = !m_kv_index; + if (!shouldOpenIndex && m_kv_index->getIndexType() != bindex::type::itty) { + shouldOpenIndex = m_kv_index->getAddress() != (*this)->m_kv_address; + } + if (shouldOpenIndex) { if ((*this)->m_kv_address) { m_kv_index = std::make_unique( std::make_pair(&getMemspace(), (*this)->m_kv_address), (*this)->m_kv_type @@ -305,10 +309,11 @@ namespace db0::object_model lofi_store<2>::fromValue(kv_value).set(field_id.getOffset(), value.m_store); xvalue.m_value = kv_value; kv_index_ptr->updateExisting(xvalue); - // in case of the IttyIndex updating an element changes the address + // in case of the IttyIndex updating an element changes the address/type // which needs to be updated in the object if (kv_index_ptr->getIndexType() == bindex::type::itty) { this->modify().m_kv_address = kv_index_ptr->getAddress(); + this->modify().m_kv_type = kv_index_ptr->getIndexType(); } } else { if (kv_index_ptr->insert(xvalue)) { @@ -340,10 +345,11 @@ namespace db0::object_model // mark as deleted in kv-index xvalue.m_type = StorageClass::DELETED; kv_index_ptr->updateExisting(xvalue); - // in case of the IttyIndex updating an element changes the address + // in case of the IttyIndex updating an element changes the address/type // which needs to be updated in the object if (kv_index_ptr->getIndexType() == bindex::type::itty) { this->modify().m_kv_address = kv_index_ptr->getAddress(); + this->modify().m_kv_type = kv_index_ptr->getIndexType(); } } else { auto old_addr = kv_index_ptr->getAddress(); @@ -372,10 +378,11 @@ namespace db0::object_model } xvalue.m_value = value; kv_index_ptr->updateExisting(xvalue); - // in case of the IttyIndex updating an element changes the address + // in case of the IttyIndex updating an element changes the address/type // which needs to be updated in the object if (kv_index_ptr->getIndexType() == bindex::type::itty) { this->modify().m_kv_address = kv_index_ptr->getAddress(); + this->modify().m_kv_type = kv_index_ptr->getIndexType(); } m_type->removeFromSchema(field_id, fidelity, old_type_id); @@ -440,10 +447,11 @@ namespace db0::object_model auto new_type_id = getSchemaTypeId(storage_class, value); m_type->updateSchema(field_id, fidelity, old_type_id, new_type_id); } - // in case of the IttyIndex updating an element changes the address + // in case of the IttyIndex updating an element changes the address/type // which needs to be updated in the object if (kv_index_ptr->getIndexType() == bindex::type::itty) { this->modify().m_kv_address = kv_index_ptr->getAddress(); + this->modify().m_kv_type = kv_index_ptr->getIndexType(); } } else { if (kv_index_ptr->insert(xvalue)) { @@ -624,6 +632,7 @@ namespace db0::object_model { auto unique_addr = this->getUniqueAddress(); auto ext_refs = this->getExtRefs(); + this->destroy(); this->~Object(); // construct a null placeholder new ((void*)this) Object(tag_as_dropped(), unique_addr, ext_refs); diff --git a/src/dbzero/object_model/object/Object.hpp b/src/dbzero/object_model/object/Object.hpp index a8ee7603..d3968b73 100644 --- a/src/dbzero/object_model/object/Object.hpp +++ b/src/dbzero/object_model/object/Object.hpp @@ -23,6 +23,11 @@ namespace db0::object_model : super_t(std::forward(args)...) { } + + ~Object() + { + this->unregister(); + } // Convert singleton into a regular instance void unSingleton(FixtureLock &); From 2b709fd6aae10588fbcc19ea1c86e162c5b39a5e Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 21:35:09 +0200 Subject: [PATCH 7/8] embedded immutable objects --- dbzero/dbzero/dbzero.py | 2 +- python_tests/test_memo_immutable.py | 56 ++ src/dbzero/bindings/python/EmbeddedObject.cpp | 503 ++++++++++++++++++ src/dbzero/bindings/python/EmbeddedObject.hpp | 65 +++ src/dbzero/bindings/python/Memo.cpp | 25 +- src/dbzero/bindings/python/PyInternalAPI.cpp | 71 ++- src/dbzero/bindings/python/PyInternalAPI.hpp | 8 +- .../bindings/python/PyObjectTagManager.cpp | 11 + src/dbzero/bindings/python/PyToolkit.cpp | 115 ++++ src/dbzero/bindings/python/PyToolkit.hpp | 13 +- src/dbzero/bindings/python/PyTypeManager.cpp | 39 +- src/dbzero/bindings/python/PyTypeManager.hpp | 7 + src/dbzero/bindings/python/dbzero.cpp | 2 + src/dbzero/object_model/ObjectModel.cpp | 5 +- .../object_model/object/ObjectAnyBase.hpp | 2 +- .../object/ObjectImmutableImpl.cpp | 159 +++++- .../object/ObjectImmutableImpl.hpp | 17 + .../object_model/object/ObjectImplBase.cpp | 47 +- .../object_model/object/ObjectInitializer.cpp | 43 ++ .../object_model/object/ObjectInitializer.hpp | 4 +- .../object_model/object/o_embedded_object.cpp | 5 +- src/dbzero/object_model/tags/TagIndex.cpp | 15 +- src/dbzero/object_model/tags/TagIndex.hpp | 5 +- tests/unit_tests/ObjectInitializerTest.cpp | 72 +++ 24 files changed, 1222 insertions(+), 69 deletions(-) create mode 100644 src/dbzero/bindings/python/EmbeddedObject.cpp create mode 100644 src/dbzero/bindings/python/EmbeddedObject.hpp diff --git a/dbzero/dbzero/dbzero.py b/dbzero/dbzero/dbzero.py index c9e4f4dc..21899e3d 100644 --- a/dbzero/dbzero/dbzero.py +++ b/dbzero/dbzero/dbzero.py @@ -10,7 +10,7 @@ def load_dynamic(name, path): def __bootstrap__(): global __bootstrap__, __loader__, __file__ - paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"] + paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"] __file__ = None for path in paths: if os.path.isdir(path): diff --git a/python_tests/test_memo_immutable.py b/python_tests/test_memo_immutable.py index 1690af6f..5a0b4b6d 100644 --- a/python_tests/test_memo_immutable.py +++ b/python_tests/test_memo_immutable.py @@ -29,6 +29,27 @@ class MemoImmutableLargePayloadClass: data: object +@db0.memo(immutable=True, no_default_tags=True) +@dataclass +class MemoImmutableNestedPayload: + name: str + count: int + + +@db0.memo(immutable=True, no_default_tags=True) +class MemoImmutableNestedHolder: + def __init__(self, name, count, label): + self.nested = MemoImmutableNestedPayload(name=name, count=count) + self.label = label + + +@db0.memo(immutable=True, no_default_tags=True) +class MemoImmutablePreboundNestedHolder: + def __init__(self, nested, label): + self.nested = nested + self.label = label + + @db0.memo(immutable=True, no_default_tags=True) class MemoImmutableReadInConstructor: def __init__(self, data, payload): @@ -93,4 +114,39 @@ def test_read_embedded_immutable_values_inside_constructor(db0_fixture): assert obj.seen_payload == payload assert obj.data == "constructor string" assert obj.payload == payload + + +def test_read_embedded_immutable_nested_object_after_reopen(db0_fixture): + obj = MemoImmutableNestedHolder(name="embedded child", count=5, label="root") + db0.tags(obj).add("keep-embedded-nested") + obj_id = db0.uuid(obj) + + assert obj.nested.name == "embedded child" + assert obj.nested.count == 5 + + del obj + gc.collect() + db0.commit() + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix", "rw") + + reopened = db0.fetch(obj_id) + assert reopened.nested.name == "embedded child" + assert reopened.nested.count == 5 + assert isinstance(reopened.nested, MemoImmutableNestedPayload) + + +def test_prebound_immutable_nested_object_embeds_into_owner(db0_fixture): + inner = MemoImmutableNestedPayload(name="prebound child", count=8) + obj = MemoImmutablePreboundNestedHolder(inner, "root") + db0.tags(obj).add("keep-prebound-embedded") + + assert obj.nested.name == "prebound child" + assert inner.name == "prebound child" + assert inner.count == 8 + assert isinstance(inner, MemoImmutableNestedPayload) + assert db0.is_memo(inner) + with pytest.raises(Exception): + db0.uuid(inner) diff --git a/src/dbzero/bindings/python/EmbeddedObject.cpp b/src/dbzero/bindings/python/EmbeddedObject.cpp new file mode 100644 index 00000000..5ba1d452 --- /dev/null +++ b/src/dbzero/bindings/python/EmbeddedObject.cpp @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include "EmbeddedObject.hpp" + +#include "MemoObject.hpp" +#include "PyInternalAPI.hpp" +#include "PySafeAPI.hpp" +#include "PyToolkit.hpp" +#include "Utils.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace db0::python +{ + using ObjectSharedPtr = PyTypes::ObjectSharedPtr; + using namespace db0::object_model; + + static_assert(EmbeddedObject::sizeOf() > sizeof(PyObject), "EmbeddedObject must allocate storage for its view state"); + static_assert( + sizeof(EmbeddedObjectRef) <= sizeof(MemoImmutableObject::ExtT), + "EmbeddedObjectRef must fit in MemoImmutableObject native storage for in-place morphing" + ); + static_assert( + alignof(EmbeddedObjectRef) <= alignof(MemoImmutableObject::ExtT), + "EmbeddedObjectRef alignment must be compatible with MemoImmutableObject native storage" + ); + + EmbeddedObjectRef::EmbeddedObjectRef( + PyObject *rootObject, const o_embedded_object *embeddedObject, std::shared_ptr type + ) + : m_root_object(rootObject) + , m_embedded_object(embeddedObject) + , m_type(std::move(type)) + { + Py_XINCREF(m_root_object); + } + + EmbeddedObjectRef::~EmbeddedObjectRef() + { + Py_XDECREF(m_root_object); + } + + PyObject *EmbeddedObjectRef::rootObject() const + { + return m_root_object; + } + + const o_embedded_object &EmbeddedObjectRef::embeddedObject() const + { + return *m_embedded_object; + } + + Class &EmbeddedObjectRef::type() const + { + return *m_type; + } + + namespace + { + EmbeddedObjectRef &embeddedMemoRef(MemoImmutableObject *object) + { + return *reinterpret_cast(const_cast(&object->ext())); + } + + db0::swine_ptr getRootFixture(PyObject *rootObject) + { + return reinterpret_cast(rootObject)->ext().getFixture(); + } + + ObjectSharedPtr unloadMember(EmbeddedObjectRef &embeddedRef, const FieldInfo &fieldInfo) + { + auto fixture = getRootFixture(embeddedRef.rootObject()); + return ObjectImmutableImpl::tryGetEmbeddedField( + fixture, embeddedRef.rootObject(), embeddedRef.embeddedObject(), fieldInfo, + reinterpret_cast(embeddedRef.rootObject())->ext().getMemberFlags() + ); + } + + ObjectSharedPtr tryGetMember(EmbeddedObjectRef &embeddedRef, const char *attrName) + { + auto memberLoc = embeddedRef.type().findField(attrName); + if (!memberLoc.first) { + return {}; + } + for (const auto &fieldInfo: memberLoc.first) { + auto result = unloadMember(embeddedRef, fieldInfo); + if (result.get()) { + return result; + } + } + return {}; + } + + std::unordered_set getEmbeddedMemberNames( + const o_embedded_object &embeddedObject, Class &type + ) + { + std::unordered_set result; + auto &types = embeddedObject.pos_vt().types(); + unsigned int index = types.offset(); + for (unsigned int pos = 0; pos < types.size(); ++pos, ++index) { + if (types[pos] == StorageClass::DELETED || types[pos] == StorageClass::UNDEFINED) { + continue; + } + result.insert(type.getMember(FieldID::fromIndex(index)).m_name); + } + + for (const auto &xvalue: embeddedObject.index_vt().xvalues()) { + if (xvalue.m_type == StorageClass::DELETED || xvalue.m_type == StorageClass::UNDEFINED) { + continue; + } + result.insert(type.getMember(FieldID::fromIndex(xvalue.getIndex())).m_name); + } + + for (const auto &entry: embeddedObject.field_map()) { + const auto &value = entry.value(); + if (value.itemKind() == StorageClass::DELETED || value.itemKind() == StorageClass::UNDEFINED) { + continue; + } + std::uint32_t memberIndex = 0; + if (entry.key().itemKind() == StorageClass::PACKED_INT32) { + memberIndex = entry.key().packedIntPayload().value(); + } else if (entry.key().itemKind() == StorageClass::INT64) { + memberIndex = static_cast(entry.key().intPayload().value()); + } else { + continue; + } + result.insert(type.getMember(FieldID::fromIndex(memberIndex)).m_name); + } + return result; + } + + PyObject *tryEmbeddedObjectGetAttr(EmbeddedObject *self, PyObject *attr) + { + const char *attrName = PyUnicode_AsUTF8(attr); + if (!attrName) { + PyErr_SetString(PyExc_AttributeError, "Invalid attribute name"); + return nullptr; + } + + if (!(attrName[0] == '_' && attrName[1] == 'X' && attrName[2] == '_' && attrName[3] == '_')) { + auto fixture = getRootFixture(self->ext().rootObject()); + fixture->refreshIfUpdated(); + auto member = tryGetMember(self->modifyExt(), attrName); + if (member.get()) { + return member.steal(); + } + } + + return PyObject_GenericGetAttr(reinterpret_cast(self), attr); + } + + PyObject *PyAPI_EmbeddedObject_getattro(EmbeddedObject *self, PyObject *attr) + { + PY_API_FUNC + return runSafe(tryEmbeddedObjectGetAttr, self, attr); + } + + PyObject *tryEmbeddedMemoGetAttr(MemoImmutableObject *self, PyObject *attr) + { + const char *attrName = PyUnicode_AsUTF8(attr); + if (!attrName) { + PyErr_SetString(PyExc_AttributeError, "Invalid attribute name"); + return nullptr; + } + + if (!(attrName[0] == '_' && attrName[1] == 'X' && attrName[2] == '_' && attrName[3] == '_')) { + auto &embeddedRef = embeddedMemoRef(self); + auto fixture = getRootFixture(embeddedRef.rootObject()); + fixture->refreshIfUpdated(); + auto member = tryGetMember(embeddedRef, attrName); + if (member.get()) { + return member.steal(); + } + } + + return PyObject_GenericGetAttr(reinterpret_cast(self), attr); + } + + PyObject *PyAPI_EmbeddedMemo_getattro(MemoImmutableObject *self, PyObject *attr) + { + PY_API_FUNC + return runSafe(tryEmbeddedMemoGetAttr, self, attr); + } + + int PyAPI_EmbeddedMemo_setattro(MemoImmutableObject *, PyObject *, PyObject *) + { + PY_API_FUNC + PyErr_SetString(PyExc_AttributeError, "Cannot modify an embedded immutable memo object"); + return -1; + } + + PyObject *tryEmbeddedObjectStr(EmbeddedObject *self) + { + std::stringstream str; + str << ""; + return PyUnicode_FromString(str.str().c_str()); + } + + PyObject *PyAPI_EmbeddedObject_str(EmbeddedObject *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedObjectStr, self); + } + + void PyAPI_EmbeddedObject_del(EmbeddedObject *self) + { + PY_API_FUNC + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + self->destroy(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + void PyAPI_EmbeddedMemo_del(MemoImmutableObject *self) + { + PY_API_FUNC + if (Py_IsInitialized()) { + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + embeddedMemoRef(self).~EmbeddedObjectRef(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + } + + int EmbeddedObject_traverse(EmbeddedObject *self, visitproc visit, void *arg) + { + Py_VISIT(self->ext().rootObject()); + return 0; + } + + [[maybe_unused]] int EmbeddedMemo_traverse(MemoImmutableObject *self, visitproc visit, void *arg) + { + Py_VISIT(embeddedMemoRef(self).rootObject()); + return 0; + } + + [[maybe_unused]] int EmbeddedMemo_clear(MemoImmutableObject *) + { + return 0; + } + + PyObject *tryEmbeddedMemoStr(MemoImmutableObject *self) + { + std::stringstream str; + str << "<" << Py_TYPE(self)->tp_base->tp_name + << " embedded instance type=" << embeddedMemoRef(self).type().getName() << ">"; + return PyUnicode_FromString(str.str().c_str()); + } + + PyObject *PyAPI_EmbeddedMemo_str(MemoImmutableObject *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedMemoStr, self); + } + + PyObject *PyAPI_EmbeddedMemo_dir(MemoImmutableObject *self, PyObject *) + { + PY_API_FUNC + auto result = Py_OWN(PyObject_CallMethod( + reinterpret_cast(&PyBaseObject_Type), "__dir__", + "O", reinterpret_cast(self) + )); + if (!result) { + return nullptr; + } + + auto &type = embeddedMemoRef(self).type(); + for (const auto &name: getEmbeddedMemberNames(embeddedMemoRef(self).embeddedObject(), type)) { + auto pyName = Py_OWN(PyUnicode_FromString(name.c_str())); + if (!pyName || PySequence_Contains(*result, *pyName) == 1) { + continue; + } + if (PyList_Append(*result, *pyName) < 0) { + return nullptr; + } + } + return result.steal(); + } + + PyObject *PyAPI_EmbeddedMemo_get_dict(MemoImmutableObject *self, void *) + { + PY_API_FUNC + auto result = Py_OWN(PyDict_New()); + if (!result) { + return nullptr; + } + + auto &type = embeddedMemoRef(self).type(); + for (const auto &name: getEmbeddedMemberNames(embeddedMemoRef(self).embeddedObject(), type)) { + auto value = tryGetMember(embeddedMemoRef(self), name.c_str()); + if (!value.get()) { + continue; + } + auto pyName = Py_OWN(PyUnicode_FromString(name.c_str())); + if (!pyName || PyDict_SetItem(*result, *pyName, *value) < 0) { + return nullptr; + } + } + return result.steal(); + } + + Py_hash_t PyAPI_EmbeddedMemo_hash(MemoImmutableObject *) + { + PY_API_FUNC + PyErr_SetString(PyExc_TypeError, "Embedded immutable memo objects do not have durable identity"); + return -1; + } + + static PyMethodDef EmbeddedMemo_methods[] = { + {"__dir__", (PyCFunction)PyAPI_EmbeddedMemo_dir, METH_NOARGS, nullptr}, + {NULL} + }; + + static PyGetSetDef EmbeddedMemo_getsets[] = { + {"__dict__", (getter)PyAPI_EmbeddedMemo_get_dict, nullptr, nullptr, nullptr}, + {nullptr} + }; + + std::string consumePyErrorMessage(); + + PyTypeObject *createEmbeddedMemoType(PyTypeObject *memoType) + { + std::vector slots = { + {Py_tp_dealloc, reinterpret_cast(PyAPI_EmbeddedMemo_del)}, + {Py_tp_getattro, reinterpret_cast(PyAPI_EmbeddedMemo_getattro)}, + {Py_tp_setattro, reinterpret_cast(PyAPI_EmbeddedMemo_setattro)}, + {Py_tp_methods, reinterpret_cast(EmbeddedMemo_methods)}, + {Py_tp_getset, reinterpret_cast(EmbeddedMemo_getsets)}, + {Py_tp_hash, reinterpret_cast(PyAPI_EmbeddedMemo_hash)}, + {Py_tp_repr, reinterpret_cast(PyAPI_EmbeddedMemo_str)}, + {Py_tp_str, reinterpret_cast(PyAPI_EmbeddedMemo_str)}, + {0, 0} + }; + if (memoType->tp_flags & Py_TPFLAGS_HAVE_GC) { + slots.insert(slots.end() - 1, { + {Py_tp_traverse, reinterpret_cast(EmbeddedMemo_traverse)}, + {Py_tp_clear, reinterpret_cast(EmbeddedMemo_clear)} + }); + } + + std::stringstream typeName; + typeName << memoType->tp_name << ".__dbzero_embedded_view__"; + const char *safeName = PyToolkit::getTypeManager().getPooledString(typeName.str()); + std::uint32_t flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; + if (memoType->tp_flags & Py_TPFLAGS_HAVE_GC) { + flags |= Py_TPFLAGS_HAVE_GC; + } + flags &= ~Py_TPFLAGS_MANAGED_DICT; + + auto spec = PyType_Spec { + .name = safeName, + .basicsize = static_cast(memoType->tp_basicsize), + .itemsize = 0, + .flags = flags, + .slots = slots.data() + }; + auto bases = Py_OWN(PySafeTuple_Pack(Py_BORROW(memoType))); + auto shadowType = reinterpret_cast(PyType_FromSpecWithBases(&spec, *bases)); + if (!shadowType) { + return nullptr; + } + + shadowType->tp_weaklistoffset = memoType->tp_weaklistoffset; + shadowType->tp_dictoffset = memoType->tp_dictoffset; + if (shadowType->tp_basicsize != memoType->tp_basicsize) { + Py_DECREF(shadowType); + PyErr_SetString(PyExc_RuntimeError, "Embedded memo shadow type size mismatch"); + return nullptr; + } + if ((shadowType->tp_flags & Py_TPFLAGS_HAVE_GC) != (memoType->tp_flags & Py_TPFLAGS_HAVE_GC)) { + Py_DECREF(shadowType); + PyErr_SetString(PyExc_RuntimeError, "Embedded memo shadow type GC flag mismatch"); + return nullptr; + } + + return shadowType; + } + + PyTypeObject *getEmbeddedMemoType(PyTypeObject *memoType) + { + auto *embeddedType = PyToolkit::getTypeManager().getEmbeddedMemoType(memoType, createEmbeddedMemoType); + if (!embeddedType) { + THROWF(db0::InternalException) + << "Unable to create embedded memo shadow type: " << consumePyErrorMessage(); + } + return embeddedType; + } + + std::string consumePyErrorMessage() + { + if (!PyErr_Occurred()) { + return "unknown Python error"; + } + PyObject *ptype = nullptr; + PyObject *pvalue = nullptr; + PyObject *ptraceback = nullptr; + PyErr_Fetch(&ptype, &pvalue, &ptraceback); + PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); + auto str = Py_OWN(PyObject_Str(pvalue ? pvalue : Py_None)); + std::string result = str.get() ? PyUnicode_AsUTF8(*str) : "unable to format Python error"; + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + return result; + } + } + + PyTypeObject EmbeddedObjectType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "dbzero.EmbeddedObject", + .tp_basicsize = static_cast(EmbeddedObject::sizeOf()), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedObject_del), + .tp_vectorcall_offset = 0, + .tp_getattr = nullptr, + .tp_setattr = nullptr, + .tp_as_async = nullptr, + .tp_repr = reinterpret_cast(PyAPI_EmbeddedObject_str), + .tp_as_number = nullptr, + .tp_as_sequence = nullptr, + .tp_as_mapping = nullptr, + .tp_hash = nullptr, + .tp_call = nullptr, + .tp_str = reinterpret_cast(PyAPI_EmbeddedObject_str), + .tp_getattro = reinterpret_cast(PyAPI_EmbeddedObject_getattro), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = "dbzero embedded immutable object view", + .tp_traverse = reinterpret_cast(EmbeddedObject_traverse), + .tp_alloc = PyType_GenericAlloc, + .tp_free = PyObject_GC_Del, + }; + + ObjectSharedPtr makeEmbeddedObject( + PyObject *rootObject, const o_embedded_object &embeddedObject, std::shared_ptr type + ) + { + auto *pyObject = reinterpret_cast(EmbeddedObjectType.tp_alloc(&EmbeddedObjectType, 0)); + if (!pyObject) { + return {}; + } + pyObject->makeNew(rootObject, &embeddedObject, std::move(type)); + return Py_OWN(reinterpret_cast(pyObject)); + } + + ObjectSharedPtr makeEmbeddedMemoObject( + PyObject *rootObject, const o_embedded_object &embeddedObject, std::shared_ptr type, + PyTypeObject *memoType + ) + { + auto *embeddedType = getEmbeddedMemoType(memoType); + auto *pyObject = reinterpret_cast(embeddedType->tp_alloc(embeddedType, 0)); + if (!pyObject) { + return {}; + } + new ((void *)const_cast(&pyObject->ext())) + EmbeddedObjectRef(rootObject, &embeddedObject, std::move(type)); + return Py_OWN(reinterpret_cast(pyObject)); + } + + void transformMemoImmutableObjectToEmbedded( + MemoImmutableObject *object, PyObject *rootObject, const o_embedded_object &embeddedObject, + std::shared_ptr type + ) + { + auto *oldType = Py_TYPE(object); + auto *embeddedType = getEmbeddedMemoType(oldType); + if (PyObject_GC_IsTracked(object)) { + PyObject_GC_UnTrack(object); + } + object->destroy(); + new ((void *)const_cast(&object->ext())) + EmbeddedObjectRef(rootObject, &embeddedObject, std::move(type)); + Py_INCREF(embeddedType); + Py_SET_TYPE(object, embeddedType); + Py_DECREF(oldType); + if (Py_TYPE(object)->tp_flags & Py_TPFLAGS_HAVE_GC) { + PyObject_GC_Track(object); + } + } + + bool PyEmbeddedMemoType_Check(PyTypeObject *type) + { + return PyToolkit::getTypeManager().isEmbeddedMemoType(type); + } + + bool PyEmbeddedMemo_Check(PyObject *object) + { + return object && PyEmbeddedMemoType_Check(Py_TYPE(object)); + } +} diff --git a/src/dbzero/bindings/python/EmbeddedObject.hpp b/src/dbzero/bindings/python/EmbeddedObject.hpp new file mode 100644 index 00000000..52a8cdf1 --- /dev/null +++ b/src/dbzero/bindings/python/EmbeddedObject.hpp @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include + +#include + +#include "MemoObject.hpp" +#include "PyTypes.hpp" +#include "PyWrapper.hpp" + +namespace db0::object_model +{ + class Class; + class o_embedded_object; +} + +namespace db0::python +{ + class EmbeddedObjectRef + { + public: + EmbeddedObjectRef( + PyObject *rootObject, const db0::object_model::o_embedded_object *embeddedObject, + std::shared_ptr type + ); + ~EmbeddedObjectRef(); + + EmbeddedObjectRef(const EmbeddedObjectRef &) = delete; + EmbeddedObjectRef &operator=(const EmbeddedObjectRef &) = delete; + + PyObject *rootObject() const; + const db0::object_model::o_embedded_object &embeddedObject() const; + db0::object_model::Class &type() const; + + private: + PyObject *m_root_object = nullptr; + const db0::object_model::o_embedded_object *m_embedded_object = nullptr; + std::shared_ptr m_type; + }; + + using EmbeddedObject = PyWrapper; + + extern PyTypeObject EmbeddedObjectType; + + PyTypes::ObjectSharedPtr makeEmbeddedObject( + PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, + std::shared_ptr type + ); + + PyTypes::ObjectSharedPtr makeEmbeddedMemoObject( + PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, + std::shared_ptr type, PyTypeObject *memoType + ); + + void transformMemoImmutableObjectToEmbedded( + MemoImmutableObject *object, PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, + std::shared_ptr type + ); + + bool PyEmbeddedMemo_Check(PyObject *object); + bool PyEmbeddedMemoType_Check(PyTypeObject *type); +} diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index 4a920a4c..93e20834 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -2,9 +2,11 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "Memo.hpp" +#include "EmbeddedObject.hpp" #include "PyToolkit.hpp" #include #include +#include #include "PySnapshot.hpp" #include "PyInternalAPI.hpp" #include "Utils.hpp" @@ -282,10 +284,24 @@ namespace db0::python return -1; } + if constexpr (std::is_same_v) { + auto &initializer = db0::object_model::InitManager::instance.getInitializer(self->ext()); + const Class *class_ptr = &initializer.getClass(); + // Keep a freshly-created immutable object deferred while it has no durable references. + // It will be materialized later when it is referenced or embedded; default type tags are + // assigned only after materialization and must not force it here. + if (!class_ptr->isSingleton() && initializer.getRefCounts() == std::make_pair(0u, 0u)) { + return 0; + } + } + // invoke post-init on associated dbzero object auto &object = self->modifyExt(); db0::FixtureLock fixture(object.getFixture()); object.postInit(fixture); + if constexpr (std::is_same_v) { + object.setLangObject(reinterpret_cast(self)); + } // need to call modifyExt again after postInit because the instance has just been created // and potentially needs to be included in the AtomicContext @@ -629,12 +645,6 @@ namespace db0::python PY_API_FUNC // assign value to a dbzero attribute try { - // must materialize the object before setting as an attribute - if (value && !db0::object_model::isMaterialized(value)) { - db0::FixtureLock lock(self->ext().getFixture()); - db0::object_model::materialize(lock, value); - } - if (self->ext().hasInstance()) { PyErr_SetString(PyExc_AttributeError, "Cannot modify an immutable memo object"); return -1; @@ -1161,7 +1171,8 @@ namespace db0::python PyObject *tryPyMemoCheck(PyObject *py_obj) { - if (PyAnyMemo_Check(py_obj) || (PyType_Check(py_obj) && PyAnyMemoType_Check(reinterpret_cast(py_obj)))) { + if (PyAnyMemo_Check(py_obj) || PyEmbeddedMemo_Check(py_obj) + || (PyType_Check(py_obj) && PyAnyMemoType_Check(reinterpret_cast(py_obj)))) { Py_RETURN_TRUE; } Py_RETURN_FALSE; diff --git a/src/dbzero/bindings/python/PyInternalAPI.cpp b/src/dbzero/bindings/python/PyInternalAPI.cpp index 9dd9558e..d8bec903 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.cpp +++ b/src/dbzero/bindings/python/PyInternalAPI.cpp @@ -2,11 +2,14 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "PyInternalAPI.hpp" +#include "EmbeddedObject.hpp" #include "PyToolkit.hpp" #include "Memo.hpp" #include #include #include +#include +#include #include #include #include @@ -796,20 +799,79 @@ namespace db0::python } } - template - PyObject *getMaterializedMemoObject(MemoImplT *memo_obj) + PyObject *materializeMemoObject(MemoObject *memo_obj) { if (memo_obj->ext().hasInstance()) { Py_INCREF(memo_obj); return memo_obj; } - + db0::FixtureLock lock(memo_obj->ext().getFixture()); // materialize by calling postInit memo_obj->modifyExt().postInit(lock); + if (!memo_obj->ext().getType().isNoCache()) { + memo_obj->ext().getFixture()->getLangCache().add(memo_obj->ext().getAddress(), memo_obj); + } Py_INCREF(memo_obj); return memo_obj; } + + PyObject *materializeMemoObject(MemoImmutableObject *memo_obj) + { + if (memo_obj->ext().hasInstance()) { + Py_INCREF(memo_obj); + return memo_obj; + } + + auto fixture = memo_obj->ext().getFixture(); + db0::FixtureLock lock(fixture); + // materialize by calling postInit + memo_obj->modifyExt().postInit(lock, [&](const auto &initializer) { + auto &classFactory = fixture->get(); + for (const auto &value: initializer.objects()) { + if (value.m_storage_class == db0::object_model::StorageClass::DELETED) { + continue; + } + if (value.m_storage_class != db0::object_model::StorageClass::EMBEDDED_OBJECT) { + continue; + } + assert(value.m_object.get()); + + auto *pyObject = value.m_object.get(); + assert(PyMemo_Check(pyObject)); + + auto *embeddedValue = (memo_obj->ext())->variableValue(value.m_loc.first); + assert(embeddedValue); + assert(embeddedValue->itemKind() == db0::object_model::StorageClass::EMBEDDED_OBJECT); + const auto &embeddedObject = db0::object_model::o_embedded_object::__const_ref( + embeddedValue->embeddedPayload().begin() + ); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + auto *embeddedMemo = reinterpret_cast(pyObject); + transformMemoImmutableObjectToEmbedded( + embeddedMemo, reinterpret_cast(memo_obj), embeddedObject, std::move(type) + ); + } + }); + memo_obj->modifyExt().setLangObject(reinterpret_cast(memo_obj)); + if (!memo_obj->ext().getType().isNoCache()) { + fixture->getLangCache().add(memo_obj->ext().getAddress(), memo_obj); + } + Py_INCREF(memo_obj); + return memo_obj; + } + + template <> + PyObject *getMaterializedMemoObject(MemoObject *memo_obj) + { + return materializeMemoObject(memo_obj); + } + + template <> + PyObject *getMaterializedMemoObject(MemoImmutableObject *memo_obj) + { + return materializeMemoObject(memo_obj); + } shared_py_object tryUnloadObjectFromCache(LangCacheView &lang_cache, Address address, std::shared_ptr expected_type) @@ -1097,7 +1159,4 @@ namespace db0::python } #endif - template PyObject *getMaterializedMemoObject(MemoObject *); - template PyObject *getMaterializedMemoObject(MemoImmutableObject *); - } diff --git a/src/dbzero/bindings/python/PyInternalAPI.hpp b/src/dbzero/bindings/python/PyInternalAPI.hpp index 85b1b7ea..ab45d2d6 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.hpp +++ b/src/dbzero/bindings/python/PyInternalAPI.hpp @@ -208,6 +208,10 @@ namespace db0::python template PyObject *getMaterializedMemoObject(MemoImplT *py_obj); + template <> + PyObject *getMaterializedMemoObject(MemoObject *py_obj); + template <> + PyObject *getMaterializedMemoObject(MemoImmutableObject *py_obj); // Retrieve prefix (its Fixture objects) from the optional argument "prefix" db0::swine_ptr getOptionalPrefixFromArg(db0::Snapshot &workspace, const char *prefix_name); @@ -249,8 +253,4 @@ namespace db0::python std::optional page_io_step_size = {}); PyObject *tryCopyPrefix(PyObject *args, PyObject *kwargs); - extern template PyObject *getMaterializedMemoObject(MemoObject *); - extern template PyObject *getMaterializedMemoObject(MemoImmutableObject *); - } - diff --git a/src/dbzero/bindings/python/PyObjectTagManager.cpp b/src/dbzero/bindings/python/PyObjectTagManager.cpp index c39ecc87..d222c9ef 100644 --- a/src/dbzero/bindings/python/PyObjectTagManager.cpp +++ b/src/dbzero/bindings/python/PyObjectTagManager.cpp @@ -106,6 +106,17 @@ namespace db0::python if (!PyAnyMemo_Check(args[i])) { THROWF(db0::InputException) << "All arguments must be dbzero memo objects"; } + if (PyMemo_Check(args[i])) { + auto *memoObject = reinterpret_cast(args[i]); + if (!memoObject->ext().hasInstance()) { + auto materialized = Py_OWN(getMaterializedMemoObject(memoObject)); + } + } else if (PyMemo_Check(args[i])) { + auto *memoObject = reinterpret_cast(args[i]); + if (!memoObject->ext().hasInstance()) { + auto materialized = Py_OWN(getMaterializedMemoObject(memoObject)); + } + } } auto tags_obj = Py_OWN(PyObjectTagManager_new(&PyObjectTagManagerType, NULL, NULL)); diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index 33a57580..cb9d8016 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -2,6 +2,7 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "PyToolkit.hpp" +#include "EmbeddedObject.hpp" #include "Memo.hpp" #include "MemoExpiredRef.hpp" #include "PyInternalAPI.hpp" @@ -94,6 +95,101 @@ namespace db0::python } return {}; } + + PyToolkit::ObjectSharedPtr PyToolkit::unloadEmbeddedInstance( + db0::swine_ptr &fixture, ObjectPtr rootObject, const db0::object_model::o_tuple_item &item + ) + { + switch (item.itemKind()) { + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { + auto str = item.stringPayload().get(); + auto result = Py_OWN(PyUnicode_FromStringAndSize(str.get_raw(), str.size())); + if (!result) { + THROWF(db0::InputException) << "Failed to convert embedded string"; + } + return result; + } + case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: { + const auto &bytes = item.bytesPayload(); + auto result = Py_OWN(PyBytes_FromStringAndSize( + reinterpret_cast(bytes.getBuffer()), bytes.size() + )); + if (!result) { + THROWF(db0::InputException) << "Failed to convert embedded bytes"; + } + return result; + } + case StorageClass::EMBEDDED_OBJECT: { + if (!rootObject) { + THROWF(db0::InputException) << "Embedded object retrieval requires a root memo object"; + } + const auto &embeddedObject = db0::object_model::o_embedded_object::__const_ref( + item.embeddedPayload().begin() + ); + auto &classFactory = fixture->get(); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + auto memoType = classFactory.getLangType(*type); + if (memoType.get()) { + return makeEmbeddedMemoObject(rootObject, embeddedObject, std::move(type), memoType.get()); + } + return makeEmbeddedObject(rootObject, embeddedObject, std::move(type)); + } + default: + THROWF(db0::InputException) + << "Unsupported embedded immutable member storage class: " << item.itemKind(); + } + return {}; + } + + bool PyToolkit::hasMemoInstance(ObjectPtr pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().hasInstance(); + } + return getTypeManager().extractAnyObject(pyObject).hasInstance(); + } + + UniqueAddress PyToolkit::getMemoUniqueAddress(ObjectPtr pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().getUniqueAddress(); + } + return getTypeManager().extractAnyObject(pyObject).getUniqueAddress(); + } + + bool PyToolkit::isMemoDead(ObjectPtr pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().isDead(); + } + return getTypeManager().extractAnyObject(pyObject).isDead(); + } + + bool PyToolkit::isMemoDropped(ObjectPtr pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().isDropped(); + } + return getTypeManager().extractAnyObject(pyObject).isDropped(); + } + + bool PyToolkit::hasMemoAnyRefs(ObjectPtr pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().hasAnyRefs(); + } + return getTypeManager().extractAnyObject(pyObject).hasAnyRefs(); + } + + const object_model::Class &PyToolkit::getMemoType(ObjectPtr pyObject) + { + if (PyMemo_Check(pyObject)) { + return reinterpret_cast(pyObject)->ext().getType(); + } + return getTypeManager().extractAnyObject(pyObject).getType(); + } void PyToolkit::throwErrorWithPyErrorCheck(const std::string& message, const std::string& error_detail) { if (PyErr_Occurred()) { @@ -375,6 +471,7 @@ namespace db0::python memo_ptr->unload( fixture, std::move(immutableStem), type, db0::object_model::ObjectImmutableImpl::with_type_hint{} ); + memo_ptr->ext().setLangObject(reinterpret_cast(memo_ptr)); obj_ptr = Py_OWN(reinterpret_cast(memo_ptr)); if (!memo_ptr->ext().isNoCache()) { lang_cache.add(address, obj_ptr.get()); @@ -1121,6 +1218,24 @@ namespace db0::python return py_object != nullptr; } + template + void incRefMemoImpl(bool is_tag, MemoImplT *memo_obj) + { + memo_obj->modifyExt().incRef(is_tag); + } + + void PyToolkit::incRefMemo(bool is_tag, ObjectPtr py_object) + { + if (PyMemo_Check(py_object)) { + incRefMemoImpl(is_tag, reinterpret_cast(py_object)); + } else if (PyMemo_Check(py_object)) { + incRefMemoImpl(is_tag, reinterpret_cast(py_object)); + } else { + assert(false); + THROWF(db0::InputException) << "Invalid memo object type for incRefMemo" << THROWF_END; + } + } + template bool decRefMemoImpl(bool is_tag, MemoImplT *memo_obj) { diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index bd7d0de3..2a33d633 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -30,6 +30,7 @@ namespace db0::object_model { class o_tuple_item; + class o_embedded_object; class Object; class Class; class ClassFactory; @@ -134,6 +135,9 @@ namespace db0::python static ObjectSharedPtr unloadDict(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadTuple(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadEmbeddedInstance(const db0::object_model::o_tuple_item &); + static ObjectSharedPtr unloadEmbeddedInstance( + db0::swine_ptr &, ObjectPtr root_object, const db0::object_model::o_tuple_item & + ); // Unload dbzero block instance static ObjectSharedPtr unloadBlock(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); @@ -210,6 +214,12 @@ namespace db0::python static bool isImmutable(TypeObjectPtr); static bool isProtectFields(TypeObjectPtr); static FlagSet getMemoFlags(TypeObjectPtr); + static bool hasMemoInstance(ObjectPtr); + static UniqueAddress getMemoUniqueAddress(ObjectPtr); + static bool isMemoDead(ObjectPtr); + static bool isMemoDropped(ObjectPtr); + static bool hasMemoAnyRefs(ObjectPtr); + static const object_model::Class &getMemoType(ObjectPtr); inline static void incRef(ObjectPtr py_object) { Py_INCREF(py_object); @@ -271,7 +281,8 @@ namespace db0::python // NOTE: returns nullptr if Python not initialized / defunct static std::unique_ptr ensureLocked(); - // decRef operation for memo objects + // ref-count operations for memo objects + static void incRefMemo(bool is_tag, ObjectPtr py_object); // @return true if reference count was decremented to zero (!hasRefs) static bool decRefMemo(bool is_tag, ObjectPtr py_object); diff --git a/src/dbzero/bindings/python/PyTypeManager.cpp b/src/dbzero/bindings/python/PyTypeManager.cpp index 0c3b24d2..af46c66e 100644 --- a/src/dbzero/bindings/python/PyTypeManager.cpp +++ b/src/dbzero/bindings/python/PyTypeManager.cpp @@ -117,7 +117,10 @@ namespace db0::python } for (auto &pair: m_enum_cache) { pair.second.steal(); - } + } + for (auto &pair: m_embedded_memo_types) { + pair.second.steal(); + } m_py_bad_prefix_error.steal(); m_py_class_not_found_error.steal(); m_py_reference_error.steal(); @@ -559,6 +562,37 @@ namespace db0::python for (auto &item: m_type_registry) { item.second.close(); } + m_embedded_memo_types.clear(); + } + + PyTypeManager::TypeObjectPtr PyTypeManager::getEmbeddedMemoType( + TypeObjectPtr memo_type, const std::function &create_type + ) + { + auto existing = m_embedded_memo_types.find(memo_type); + if (existing != m_embedded_memo_types.end()) { + return existing->second.get(); + } + + auto *embedded_type = create_type(memo_type); + if (!embedded_type) { + return nullptr; + } + m_embedded_memo_types[memo_type] = TypeObjectSharedPtr(embedded_type, false); + return embedded_type; + } + + bool PyTypeManager::isEmbeddedMemoType(TypeObjectPtr type) const + { + if (!type) { + return false; + } + for (const auto &[_, embedded_type]: m_embedded_memo_types) { + if (embedded_type.get() == type) { + return true; + } + } + return false; } PyTypeManager::ObjectPtr PyTypeManager::getBadPrefixError() const { @@ -602,8 +636,7 @@ namespace db0::python bool PyTypeManager::isMemoBase(TypeObjectPtr py_type) const { - assert(m_memo_base_type); - return py_type == m_memo_base_type; + return m_memo_base_type && py_type == m_memo_base_type; } bool PyTypeManager::isdbzeroTypeId(TypeId type_id) const { diff --git a/src/dbzero/bindings/python/PyTypeManager.hpp b/src/dbzero/bindings/python/PyTypeManager.hpp index 7164a8df..e95ba951 100644 --- a/src/dbzero/bindings/python/PyTypeManager.hpp +++ b/src/dbzero/bindings/python/PyTypeManager.hpp @@ -206,6 +206,11 @@ namespace db0::python bool isSimplePyType(ObjectPtr) const; bool isSimplePyTypeId(TypeId type_id) const; + + TypeObjectPtr getEmbeddedMemoType( + TypeObjectPtr memo_type, const std::function &create_type + ); + bool isEmbeddedMemoType(TypeObjectPtr type) const; // Decode either of: None, False or True from a lo-fi code ObjectSharedPtr getLangConstant(unsigned int) const; @@ -223,6 +228,8 @@ namespace db0::python std::unordered_map m_type_cache; // lang enums by name variant std::unordered_map m_enum_cache; + // Heap shadow types used to expose embedded immutable memo objects as instances of their Python memo class. + std::unordered_map m_embedded_memo_types; mutable ObjectSharedPtr m_py_bad_prefix_error; // error associated with missing / invalid type accessed (e.g. missing import) mutable ObjectSharedPtr m_py_class_not_found_error; diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index abbeb243..3846add9 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -7,6 +7,7 @@ #include "PyInternalAPI.hpp" #include "PyTagsAPI.hpp" #include "PyObjectTagManager.hpp" +#include "EmbeddedObject.hpp" #include "PySnapshot.hpp" #include "PyTagSet.hpp" #include "PyAtomic.hpp" @@ -202,6 +203,7 @@ PyMODINIT_FUNC PyInit_dbzero(void) &py::DictObjectType, &py::DictIteratorObjectType, &py::PyObjectTagManagerType, + &py::EmbeddedObjectType, &py::PySnapshotObjectType, &py::PyObjectIterableType, &py::PyObjectIteratorType, diff --git a/src/dbzero/object_model/ObjectModel.cpp b/src/dbzero/object_model/ObjectModel.cpp index 4d9401b6..2a8532e6 100644 --- a/src/dbzero/object_model/ObjectModel.cpp +++ b/src/dbzero/object_model/ObjectModel.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ namespace db0::object_model return [](db0::swine_ptr &fixture, bool is_new, bool read_only, bool is_snapshot) { // static GC0 bindings initialization - GC0::registerTypes(); + GC0::registerTypes(); auto &oc = fixture->getObjectCatalogue(); if (is_new) { assert(!is_snapshot); @@ -145,4 +146,4 @@ namespace db0::object_model }; } -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/object/ObjectAnyBase.hpp b/src/dbzero/object_model/object/ObjectAnyBase.hpp index eeddb8a4..381236ea 100644 --- a/src/dbzero/object_model/object/ObjectAnyBase.hpp +++ b/src/dbzero/object_model/object/ObjectAnyBase.hpp @@ -164,4 +164,4 @@ namespace db0::object_model } -DECLARE_ENUM_VALUES(db0::object_model::ObjectOptions, 2) \ No newline at end of file +DECLARE_ENUM_VALUES(db0::object_model::ObjectOptions, 2) diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index a0b2d3da..c87b93ef 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -6,8 +6,12 @@ #include #include #include +#include #include +#include +#include + namespace db0::object_model { @@ -15,6 +19,19 @@ namespace db0::object_model namespace { + FlagSet getAccessOptions(const Class &type) + { + return type.isNoCache() ? FlagSet { AccessOptions::no_cache } : FlagSet {}; + } + + std::uint8_t safeNumTypeTags(unsigned int value) + { + if (value > std::numeric_limits::max()) { + THROWF(db0::InputException) << "Too many base classes"; + } + return static_cast(value); + } + void unrefNestedEmbeddedObjects(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject); void unrefEmbeddedObjectTables(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject) @@ -56,6 +73,69 @@ namespace db0::object_model } } + void ObjectImmutableImpl::postInit(FixtureLock &fixture) + { + postInit(fixture, {}); + } + + void ObjectImmutableImpl::postInit( + FixtureLock &fixture, const std::function &preClose + ) + { + if (!this->hasInstance()) { + auto &initializer = InitManager::instance.getInitializer(*this); + auto *immutableInitializer = dynamic_cast(&initializer); + assert(immutableInitializer); + + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer.getData(posVtData, posVtOffset); + + this->m_type = initializer.getClassPtr(); + assert(this->m_type); + + auto &type = *this->m_type; + auto numTypeTags = safeNumTypeTags(type.getNumBases() + 1); + + this->init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, + *immutableInitializer, getAccessOptions(type) + ); + for (const auto &objectValue: immutableInitializer->objects()) { + type.addToSchema(objectValue.m_loc.first, objectValue.m_storage_class, {}); + } + + type.incRef(false); + type.updateSchema(posVtOffset, posVtData.m_types, posVtData.m_values); + type.updateSchema(indexVtData.first, indexVtData.second); + + if (type.isSingleton()) { + type.setSingletonAddress(*this); + } + if (preClose) { + preClose(*immutableInitializer); + } + initializer.close(); + } + + assert(this->hasInstance()); + } + + void ObjectImmutableImpl::setLangObject(ObjectPtr object) const + { + m_lang_object = object; + } + + ObjectImmutableImpl::ObjectPtr ObjectImmutableImpl::getLangObject() const + { + return m_lang_object; + } + + void ObjectImmutableImpl::destroy() + { + m_lang_object = nullptr; + super_t::destroy(); + } + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGet( MemberLoc memberLoc, bool *isAutoGenerated ) const @@ -111,7 +191,17 @@ namespace db0::object_model if (this->hasInstance()) { auto *embeddedValue = (*this)->variableValue(fieldId.getIndex()); - return embeddedValue ? python::PyToolkit::unloadEmbeddedInstance(*embeddedValue) : ObjectSharedPtr(); + if (!embeddedValue) { + return {}; + } + auto fixture = this->getFixture(); + auto rootObject = getLangObject(); + LangConfig::LangToolkit::ObjectSharedExtPtr cachedRootObject; + if (!rootObject) { + cachedRootObject = fixture->getLangCache().get(this->getAddress()); + rootObject = cachedRootObject.get(); + } + return python::PyToolkit::unloadEmbeddedInstance(fixture, rootObject, *embeddedValue); } auto *initializer = dynamic_cast( @@ -125,6 +215,33 @@ namespace db0::object_model return initializer->tryGetObjectAt(fieldId.getIndexAndOffset(), object) ? object : ObjectSharedPtr(); } + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGetEmbeddedField( + db0::swine_ptr &fixture, ObjectPtr rootObject, const o_embedded_object &embeddedObject, + const FieldInfo &fieldInfo, AccessFlags memberFlags + ) + { + const auto &[fieldId, fidelity] = fieldInfo; + if (!fieldId) { + return {}; + } + + if (auto fixedValue = embeddedObject.fixedValue(fieldId.getIndex(), fieldId.getOffset())) { + return unloadMember( + fixture, fixedValue->m_kind, Value(fixedValue->m_value), 0, memberFlags + ); + } + + if (fidelity != 0) { + return {}; + } + + auto *embeddedValue = embeddedObject.variableValue(fieldId.getIndex()); + if (!embeddedValue) { + return {}; + } + return python::PyToolkit::unloadEmbeddedInstance(fixture, rootObject, *embeddedValue); + } + ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::get(const char *fieldName) const { auto obj = tryGet(fieldName); @@ -139,6 +256,46 @@ namespace db0::object_model void ObjectImmutableImpl::getMembersImpl(std::unordered_set &result) const { + if (!this->hasInstance()) { + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(*this) + ); + if (!initializer) { + return; + } + + auto &objType = initializer->getClass(); + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer->getData(posVtData, posVtOffset); + + for (std::size_t pos = 0; pos < posVtData.size(); ++pos) { + getMembersFrom( + objType, static_cast(pos + posVtOffset), posVtData.m_types[pos], + posVtData.m_values[pos], result + ); + } + for (auto xvalue = indexVtData.first; xvalue != indexVtData.second; ++xvalue) { + getMembersFrom(objType, xvalue->getIndex(), xvalue->m_type, xvalue->m_value, result); + } + + std::unordered_map embeddedObjectMembers; + for (const auto &objectValue: initializer->objects()) { + if (objectValue.m_loc.second != 0) { + continue; + } + if (!objectValue.m_object || objectValue.m_storage_class == StorageClass::DELETED) { + embeddedObjectMembers.erase(objectValue.m_loc.first); + } else { + embeddedObjectMembers[objectValue.m_loc.first] = true; + } + } + for (const auto &[index, _]: embeddedObjectMembers) { + result.insert(objType.getMember(FieldID::fromIndex(index)).m_name); + } + return; + } + super_t::getMembersImpl(result); auto &objType = this->getType(); for (const auto &entry: (*this)->field_map()) { diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp index aaa9f037..0e7737b6 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp @@ -6,6 +6,8 @@ #include "ObjectImplBase.hpp" #include "o_immutable_object.hpp" +#include + namespace db0::object_model { @@ -28,12 +30,27 @@ namespace db0::object_model ObjectSharedPtr tryGet(const char *field_name, bool *is_auto_generated = nullptr) const; ObjectSharedPtr get(const char *field_name) const; + void postInit(FixtureLock &); + void postInit(FixtureLock &, const std::function &); + void setLangObject(ObjectPtr) const; + void destroy(); + + static ObjectSharedPtr tryGetEmbeddedField( + db0::swine_ptr &, ObjectPtr root_object, const o_embedded_object &, + const FieldInfo &, AccessFlags member_flags + ); + protected: friend super_t; ObjectSharedPtr tryGetEmbeddedField(const FieldInfo &) const; void getMembersImpl(std::unordered_set &) const; void dropMembers(db0::swine_ptr &, Class &) const; + + private: + ObjectPtr getLangObject() const; + + mutable ObjectPtr m_lang_object = nullptr; }; } diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index 3771954f..4752d9c9 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -4,7 +4,6 @@ #include "ObjectImplBase.hpp" #include #include -#include #include #include #include @@ -163,6 +162,11 @@ namespace db0::object_model template void ObjectImplBase::postInit(FixtureLock &fixture) { + if constexpr (std::is_same_v) { + assert(false && "ObjectImmutableImpl::postInit must be used for immutable objects"); + return; + } + if (!this->hasInstance()) { auto &initializer = InitManager::instance.getInitializer(*this); PosVT::Data pos_vt_data; @@ -176,30 +180,10 @@ namespace db0::object_model auto &type = *this->m_type; auto numTypeTags = safeCast(type.getNumBases() + 1, "Too many base classes"); - if constexpr (std::is_same_v) { - auto *immutableInitializer = dynamic_cast(&initializer); - assert(immutableInitializer); - super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, - *immutableInitializer, getAccessOptions(type) - ); - std::unordered_map embeddedSchemaTypes; - for (const auto &objectValue: immutableInitializer->objects()) { - auto index = objectValue.m_loc.first; - if (!objectValue.m_object) { - embeddedSchemaTypes.erase(index); - } else { - embeddedSchemaTypes[index] = objectValue.m_storage_class; - } - } - for (const auto &[index, storageClass]: embeddedSchemaTypes) { - type.addToSchema(index, storageClass, {}); - } - } else { - super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, - pos_vt_data, pos_vt_offset, index_vt_data.first, index_vt_data.second, - getAccessOptions(type) - ); - } + super_t::init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, + pos_vt_data, pos_vt_offset, index_vt_data.first, index_vt_data.second, + getAccessOptions(type) + ); // reference associated class type.incRef(false); @@ -276,7 +260,11 @@ namespace db0::object_model auto fixture = initializer.getFixture(); auto &type = initializer.getClass(); auto storage_class = recognizeType(*fixture, type_id, obj_ptr); - auto storage_fidelity = getStorageFidelity(storage_class); + bool embedValue = false; + if constexpr (std::is_same_v) { + embedValue = shouldEmbedd(type_id, storage_class, obj_ptr); + } + auto storage_fidelity = embedValue ? 0 : getStorageFidelity(storage_class); // Find an already existing field index auto [member_id, is_init_var] = type.findField(field_name); @@ -298,9 +286,12 @@ namespace db0::object_model auto member_flags = type.isNoCache() ? AccessFlags { AccessOptions::no_cache } : AccessFlags(); auto loc = member_id.get(0).getIndexAndOffset(); if constexpr (std::is_same_v) { - if (shouldEmbedd(type_id, storage_class, obj_ptr)) { + if (embedValue) { auto &immutableInitializer = dynamic_cast(initializer); - immutableInitializer.setObject(loc, storage_class, {}, ObjectSharedPtr(obj_ptr)); + auto embeddedStorageClass = storage_class == StorageClass::OBJECT_REF + ? StorageClass::EMBEDDED_OBJECT + : storage_class; + immutableInitializer.setObject(loc, embeddedStorageClass, {}, ObjectSharedPtr(obj_ptr)); } else { auto value = createMember(fixture, type_id, storage_class, obj_ptr, member_flags); initializer.set(loc, storage_class, value); diff --git a/src/dbzero/object_model/object/ObjectInitializer.cpp b/src/dbzero/object_model/object/ObjectInitializer.cpp index f370b9ca..778ef4a2 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.cpp +++ b/src/dbzero/object_model/object/ObjectInitializer.cpp @@ -4,6 +4,7 @@ #include "ObjectInitializer.hpp" #include #include +#include namespace db0::object_model @@ -197,6 +198,7 @@ namespace db0::object_model return; } + m_objects_compacted = false; m_objects.push_back({ loc, storage_class, std::move(object) }); } @@ -234,18 +236,58 @@ namespace db0::object_model ) const { for (const auto &value: m_objects) { + if (!value.m_object || value.m_storage_class == StorageClass::DELETED) { + continue; + } assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); } return getDataFrom(m_values, data, offset); } + void ImmutableObjectInitializer::compactObjects() const + { + if (m_objects_compacted) { + return; + } + + std::stable_sort(m_objects.begin(), m_objects.end(), + [](const ObjectValue &lhs, const ObjectValue &rhs) { + return lhs.m_loc.first < rhs.m_loc.first; + } + ); + + std::size_t writePos = 0; + for (std::size_t groupBegin = 0; groupBegin < m_objects.size();) { + auto index = m_objects[groupBegin].m_loc.first; + auto groupEnd = groupBegin + 1; + while (groupEnd < m_objects.size() && m_objects[groupEnd].m_loc.first == index) { + ++groupEnd; + } + + auto &value = m_objects[groupEnd - 1]; + if (!!value.m_object && value.m_storage_class != StorageClass::DELETED) { + assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); + if (writePos != groupEnd - 1) { + m_objects[writePos] = std::move(value); + } + ++writePos; + } + + groupBegin = groupEnd; + } + m_objects.erase(m_objects.begin() + writePos, m_objects.end()); + m_objects_compacted = true; + } + void ImmutableObjectInitializer::resetObjects() { m_objects.clear(); + m_objects_compacted = true; } const std::vector &ImmutableObjectInitializer::objects() const { + compactObjects(); return m_objects; } @@ -256,6 +298,7 @@ namespace db0::object_model void ImmutableObjectInitializer::appendObjectTombstone(std::pair loc) { + m_objects_compacted = false; m_objects.push_back({ loc, StorageClass::DELETED, {} }); } diff --git a/src/dbzero/object_model/object/ObjectInitializer.hpp b/src/dbzero/object_model/object/ObjectInitializer.hpp index ee65f301..9c63fec9 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.hpp +++ b/src/dbzero/object_model/object/ObjectInitializer.hpp @@ -257,8 +257,10 @@ namespace db0::object_model const std::vector &objects() const; private: - std::vector m_objects; + mutable std::vector m_objects; + mutable bool m_objects_compacted = true; + void compactObjects() const; void appendObjectTombstone(std::pair loc); bool hasObjectAt(std::pair loc) const; }; diff --git a/src/dbzero/object_model/object/o_embedded_object.cpp b/src/dbzero/object_model/object/o_embedded_object.cpp index e8d7866a..fa5b98ae 100644 --- a/src/dbzero/object_model/object/o_embedded_object.cpp +++ b/src/dbzero/object_model/object/o_embedded_object.cpp @@ -94,7 +94,8 @@ namespace db0::object_model auto size = o_py_dict::measure(pyObject); return o_dict::Element::embeddedDict(size, writePyDict, pyObject); } - case StorageClass::OBJECT_REF: { + case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT: { const auto &initializer = getInitializer(pyObject); auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); return o_dict::Element::embeddedObject(size, writeEmbeddedObject, pyObject); @@ -110,11 +111,11 @@ namespace db0::object_model { o_dict::ElementMap fieldMap; for (const auto &value: initializer.objects()) { - assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); auto key = o_dict::Element::integer(value.m_loc.first); if (!value.m_object) { fieldMap.erase(key); } else { + assert(value.m_loc.second == 0 && "Variable-length embedded fields must use default fidelity"); fieldMap[key] = fieldMapElementFromObject(value.m_storage_class, value.m_object); } } diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index c0bb1800..c55d709d 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -480,12 +480,10 @@ namespace db0::object_model // this is to resolve addresses of incomplete objects (must be done before flushing) buildActiveValues(); - auto &type_manager = LangToolkit::getTypeManager(); // NOTE: some object might've been dropped in the meantime, need to be reverted from batch operations for (const auto &item: m_object_cache) { auto obj_ptr = item.second.get(); - auto &memo = type_manager.extractAnyObject(obj_ptr); - if (memo.isDead()) { + if (LangToolkit::isMemoDead(obj_ptr)) { revert(obj_ptr); } } @@ -497,7 +495,7 @@ namespace db0::object_model auto it = m_object_cache.find(obj_addr); assert(it != m_object_cache.end()); // NOTE: inc-ref as tag - type_manager.extractMutableAnyObject(it->second.get()).incRef(true); + LangToolkit::incRefMemo(true, it->second.get()); }; // add_index_callback adds reference to tags (string pool tokens) @@ -514,11 +512,10 @@ namespace db0::object_model auto obj_ptr = it->second.get(); // NOTE: we check for acutal language references (excluding LangCache + TagIndex) if (LangToolkit::decRefMemo(true, obj_ptr) && !LangToolkit::hasAnyLangRefs(obj_ptr, 2)) { - auto &memo = type_manager.extractAnyObject(obj_ptr); // if object is pending deletion, remove all type tags as well // we might skip this operation and leave it to Object's dropTags function // but it will be more efficient to do it here - const Class *type_ptr = &memo.getType(); + const Class *type_ptr = &LangToolkit::getMemoType(obj_ptr); while (type_ptr) { batch_op_types->removeTag({ obj_addr, nullptr }, type_ptr->getAddress().getOffset()); type_ptr = type_ptr->getBaseClassPtr(); @@ -558,11 +555,11 @@ namespace db0::object_model // Mid-init objects are in m_active_pre_cache, not m_object_cache, so they are unaffected. for (const auto &item: m_object_cache) { auto obj_ptr = item.second.get(); - auto &memo = type_manager.extractAnyObject(obj_ptr); // NOTE: dropped instances should've already been reverted by now // NOTE: we check for actual language references (excluding LangCache + TagIndex) - if (!memo.isDropped() && !memo.hasAnyRefs() && !LangToolkit::hasAnyLangRefs(obj_ptr, 2)) { - m_batch_op_types->revert(memo.getUniqueAddress()); + if (!LangToolkit::isMemoDropped(obj_ptr) && !LangToolkit::hasMemoAnyRefs(obj_ptr) + && !LangToolkit::hasAnyLangRefs(obj_ptr, 2)) { + m_batch_op_types->revert(LangToolkit::getMemoUniqueAddress(obj_ptr)); } } // flush all type-tag updates diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index dc456534..031eaf47 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -281,10 +281,9 @@ DB0_PACKED_END { // prepare the active value only if it's not yet initialized if (!result.first.isValid() && !result.second) { - auto &memo = LangToolkit::getTypeManager().extractAnyObject(memo_ptr); // NOTE: that memo object may not have address before fully initialized (before postInit) - if (memo.hasInstance()) { - auto object_addr = memo.getUniqueAddress(); + if (LangToolkit::hasMemoInstance(memo_ptr)) { + auto object_addr = LangToolkit::getMemoUniqueAddress(memo_ptr); // cache object locally if (m_object_cache.find(object_addr) == m_object_cache.end()) { m_object_cache.emplace(object_addr, memo_ptr); diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 3002e857..217586ee 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -215,6 +215,72 @@ namespace tests workspace.close(); } + TEST_F( ObjectInitializerTest, testImmutableInitializerCompactsEmbeddedObjectsInPlace ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + + int object = 0; + std::shared_ptr mock_class = getTestClass(fixture); + ObjectInitializerManager manager; + manager.addInitializerFor(object, mock_class); + auto *initializer = dynamic_cast(manager.findInitializer(object)); + ASSERT_NE(initializer, nullptr); + + auto pyFirst = Py_OWN(PyLong_FromLong(1)); + auto pyRemoved = Py_OWN(PyLong_FromLong(2)); + auto pyFinal = Py_OWN(PyLong_FromLong(3)); + auto pyOther = Py_OWN(PyLong_FromLong(4)); + auto pyUpdated = Py_OWN(PyLong_FromLong(5)); + initializer->setObject( + {9, 0}, StorageClass::DB0_BYTES, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyOther.get()) + ); + initializer->setObject( + {4, 0}, StorageClass::STRING_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyFirst.get()) + ); + initializer->setObject( + {7, 0}, StorageClass::DB0_BYTES, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyRemoved.get()) + ); + initializer->setObject( + {4, 0}, StorageClass::DB0_LIST, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyFinal.get()) + ); + initializer->setObject( + {9, 0}, StorageClass::DB0_SET, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyUpdated.get()) + ); + ASSERT_TRUE(initializer->remove({7, 0})); + + ASSERT_EQ(initializer->objects().size(), 2u); + ASSERT_EQ(initializer->objects()[0].m_loc, std::make_pair(4u, 0u)); + ASSERT_EQ(initializer->objects()[0].m_storage_class, StorageClass::DB0_LIST); + ASSERT_EQ(initializer->objects()[0].m_object.get(), pyFinal.get()); + ASSERT_EQ(initializer->objects()[1].m_loc, std::make_pair(9u, 0u)); + ASSERT_EQ(initializer->objects()[1].m_storage_class, StorageClass::DB0_SET); + ASSERT_EQ(initializer->objects()[1].m_object.get(), pyUpdated.get()); + ASSERT_EQ(initializer->objects().size(), 2u); + + auto pyLatest = Py_OWN(PyLong_FromLong(6)); + initializer->setObject( + {4, 0}, StorageClass::DB0_DICT, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(pyLatest.get()) + ); + ASSERT_EQ(initializer->objects().size(), 2u); + ASSERT_EQ(initializer->objects()[0].m_loc, std::make_pair(4u, 0u)); + ASSERT_EQ(initializer->objects()[0].m_storage_class, StorageClass::DB0_DICT); + ASSERT_EQ(initializer->objects()[0].m_object.get(), pyLatest.get()); + ASSERT_EQ(initializer->objects()[1].m_loc, std::make_pair(9u, 0u)); + ASSERT_EQ(initializer->objects()[1].m_storage_class, StorageClass::DB0_SET); + ASSERT_EQ(initializer->objects()[1].m_object.get(), pyUpdated.get()); + + workspace.close(); + } + TEST_F( ObjectInitializerTest, testImmutableInitializerDoesNotStoreObjectForFixedValues ) { Py_Initialize(); @@ -672,6 +738,8 @@ namespace tests auto nestedClass = fixture->get().getOrCreateType(pyMemoType.get()); auto rootLoc = rootClass->addField("inner", 0).get(0).getIndexAndOffset(); auto nestedLoc = nestedClass->addField("held", 0).get(0).getIndexAndOffset(); + rootClass->flush(); + nestedClass->flush(); { Object referenced(referencedClass); @@ -710,6 +778,7 @@ namespace tests ASSERT_TRUE(fixture->isAddressValid(root.getAddress(), ObjectImmutableImpl::REALM_ID)); root.destroy(); + rootClass->flush(); ASSERT_EQ(referenced.getRefCounts().second, 1u); } @@ -733,6 +802,8 @@ namespace tests auto rootLoc = rootClass->addField("outer", 0).get(0).getIndexAndOffset(); auto outerLoc = nestedClass->addField("inner", 0).get(0).getIndexAndOffset(); auto innerLoc = nestedClass->addField("held", 0).get(0).getIndexAndOffset(); + rootClass->flush(); + nestedClass->flush(); { Object referenced(referencedClass); @@ -783,6 +854,7 @@ namespace tests } root.destroy(); + rootClass->flush(); ASSERT_EQ(referenced.getRefCounts().second, 1u); } From 3b6c79b8d5bd28125990ccbe935ff49cbd31f66a Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 20 May 2026 21:41:12 +0200 Subject: [PATCH 8/8] compile fix (3.9, 3.10) --- src/dbzero/bindings/python/PySafeAPI.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/dbzero/bindings/python/PySafeAPI.hpp b/src/dbzero/bindings/python/PySafeAPI.hpp index b83259f5..d1aedb2b 100644 --- a/src/dbzero/bindings/python/PySafeAPI.hpp +++ b/src/dbzero/bindings/python/PySafeAPI.hpp @@ -6,6 +6,12 @@ #include #include "shared_py_object.hpp" +// Python 3.11 introduced Py_TPFLAGS_MANAGED_DICT. Older supported versions do +// not define it, so treat it as an absent flag when normalizing heap type flags. +#ifndef Py_TPFLAGS_MANAGED_DICT +#define Py_TPFLAGS_MANAGED_DICT 0 +#endif + namespace db0::python {