From 4b93525adb3f2390e02381904d7e01c61e582f78 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 21 May 2026 10:59:51 +0200 Subject: [PATCH 1/7] embedded tuples --- python_tests/test_memo_immutable.py | 47 ++ src/dbzero/bindings/python/EmbeddedObject.cpp | 503 ------------------ src/dbzero/bindings/python/EmbeddedObject.hpp | 65 --- src/dbzero/bindings/python/Memo.cpp | 2 +- src/dbzero/bindings/python/PyInternalAPI.cpp | 32 +- src/dbzero/bindings/python/PyToolkit.cpp | 144 ++++- src/dbzero/bindings/python/PyToolkit.hpp | 10 +- src/dbzero/bindings/python/dbzero.cpp | 4 +- .../object/ObjectImmutableImpl.cpp | 52 +- .../object/ObjectImmutableImpl.hpp | 1 - src/dbzero/object_model/tuple/o_py_tuple.cpp | 38 ++ tests/unit_tests/EmbeddedTupleTest.cpp | 76 +++ tests/unit_tests/VInstanceMapTest.cpp | 34 +- 13 files changed, 348 insertions(+), 660 deletions(-) delete mode 100644 src/dbzero/bindings/python/EmbeddedObject.cpp delete mode 100644 src/dbzero/bindings/python/EmbeddedObject.hpp diff --git a/python_tests/test_memo_immutable.py b/python_tests/test_memo_immutable.py index 5a0b4b6d..7f853cd2 100644 --- a/python_tests/test_memo_immutable.py +++ b/python_tests/test_memo_immutable.py @@ -50,6 +50,12 @@ def __init__(self, nested, label): self.label = label +@db0.memo(immutable=True, no_default_tags=True) +class MemoImmutableTupleHolder: + def __init__(self, payload): + self.payload = payload + + @db0.memo(immutable=True, no_default_tags=True) class MemoImmutableReadInConstructor: def __init__(self, data, payload): @@ -149,4 +155,45 @@ def test_prebound_immutable_nested_object_embeds_into_owner(db0_fixture): assert db0.is_memo(inner) with pytest.raises(Exception): db0.uuid(inner) + + +def test_read_embedded_tuple_field(db0_fixture): + payload = tuple(f"alpha-{index}" for index in range(12)) + (7, b"bytes", None) + obj = MemoImmutableTupleHolder(payload) + db0.tags(obj).add("keep-embedded-tuple") + + assert type(obj.payload).__name__ == "EmbeddedTuple" + assert len(obj.payload) == len(payload) + assert obj.payload[0] == "alpha-0" + assert obj.payload[12] == 7 + assert obj.payload[-2] == b"bytes" + assert obj.payload.count("alpha-3") == 1 + assert obj.payload.index("alpha-3") == 3 + assert tuple(obj.payload) == payload + assert repr(obj.payload) == repr(payload) + + +def test_embedded_list_field_is_exposed_as_embedded_tuple(db0_fixture): + payload = [f"alpha-{index}" for index in range(12)] + [7] + obj = MemoImmutableTupleHolder(payload) + db0.tags(obj).add("keep-embedded-list") + + assert type(obj.payload).__name__ == "EmbeddedTuple" + assert tuple(obj.payload) == tuple(payload) + + +def test_embedded_tuple_with_prebound_immutable_object_element(db0_fixture): + inner = MemoImmutableNestedPayload(name="tuple child", count=11) + obj = MemoImmutableTupleHolder(("prefix", inner)) + db0.tags(obj).add("keep-embedded-tuple-object") + + assert obj.payload[0] == "prefix" + assert obj.payload[1].name == "tuple child" + assert obj.payload[1].count == 11 + assert inner.name == "tuple child" + assert inner.count == 11 + assert isinstance(inner, MemoImmutableNestedPayload) + assert db0.is_memo(inner) + with pytest.raises(Exception): + db0.uuid(inner) diff --git a/src/dbzero/bindings/python/EmbeddedObject.cpp b/src/dbzero/bindings/python/EmbeddedObject.cpp deleted file mode 100644 index 5ba1d452..00000000 --- a/src/dbzero/bindings/python/EmbeddedObject.cpp +++ /dev/null @@ -1,503 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1-or-later -// Copyright (c) 2025 DBZero Software sp. z o.o. - -#include "EmbeddedObject.hpp" - -#include "MemoObject.hpp" -#include "PyInternalAPI.hpp" -#include "PySafeAPI.hpp" -#include "PyToolkit.hpp" -#include "Utils.hpp" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace db0::python -{ - using ObjectSharedPtr = PyTypes::ObjectSharedPtr; - using namespace db0::object_model; - - static_assert(EmbeddedObject::sizeOf() > sizeof(PyObject), "EmbeddedObject must allocate storage for its view state"); - static_assert( - sizeof(EmbeddedObjectRef) <= sizeof(MemoImmutableObject::ExtT), - "EmbeddedObjectRef must fit in MemoImmutableObject native storage for in-place morphing" - ); - static_assert( - alignof(EmbeddedObjectRef) <= alignof(MemoImmutableObject::ExtT), - "EmbeddedObjectRef alignment must be compatible with MemoImmutableObject native storage" - ); - - EmbeddedObjectRef::EmbeddedObjectRef( - PyObject *rootObject, const o_embedded_object *embeddedObject, std::shared_ptr type - ) - : m_root_object(rootObject) - , m_embedded_object(embeddedObject) - , m_type(std::move(type)) - { - Py_XINCREF(m_root_object); - } - - EmbeddedObjectRef::~EmbeddedObjectRef() - { - Py_XDECREF(m_root_object); - } - - PyObject *EmbeddedObjectRef::rootObject() const - { - return m_root_object; - } - - const o_embedded_object &EmbeddedObjectRef::embeddedObject() const - { - return *m_embedded_object; - } - - Class &EmbeddedObjectRef::type() const - { - return *m_type; - } - - namespace - { - EmbeddedObjectRef &embeddedMemoRef(MemoImmutableObject *object) - { - return *reinterpret_cast(const_cast(&object->ext())); - } - - db0::swine_ptr getRootFixture(PyObject *rootObject) - { - return reinterpret_cast(rootObject)->ext().getFixture(); - } - - ObjectSharedPtr unloadMember(EmbeddedObjectRef &embeddedRef, const FieldInfo &fieldInfo) - { - auto fixture = getRootFixture(embeddedRef.rootObject()); - return ObjectImmutableImpl::tryGetEmbeddedField( - fixture, embeddedRef.rootObject(), embeddedRef.embeddedObject(), fieldInfo, - reinterpret_cast(embeddedRef.rootObject())->ext().getMemberFlags() - ); - } - - ObjectSharedPtr tryGetMember(EmbeddedObjectRef &embeddedRef, const char *attrName) - { - auto memberLoc = embeddedRef.type().findField(attrName); - if (!memberLoc.first) { - return {}; - } - for (const auto &fieldInfo: memberLoc.first) { - auto result = unloadMember(embeddedRef, fieldInfo); - if (result.get()) { - return result; - } - } - return {}; - } - - std::unordered_set getEmbeddedMemberNames( - const o_embedded_object &embeddedObject, Class &type - ) - { - std::unordered_set result; - auto &types = embeddedObject.pos_vt().types(); - unsigned int index = types.offset(); - for (unsigned int pos = 0; pos < types.size(); ++pos, ++index) { - if (types[pos] == StorageClass::DELETED || types[pos] == StorageClass::UNDEFINED) { - continue; - } - result.insert(type.getMember(FieldID::fromIndex(index)).m_name); - } - - for (const auto &xvalue: embeddedObject.index_vt().xvalues()) { - if (xvalue.m_type == StorageClass::DELETED || xvalue.m_type == StorageClass::UNDEFINED) { - continue; - } - result.insert(type.getMember(FieldID::fromIndex(xvalue.getIndex())).m_name); - } - - for (const auto &entry: embeddedObject.field_map()) { - const auto &value = entry.value(); - if (value.itemKind() == StorageClass::DELETED || value.itemKind() == StorageClass::UNDEFINED) { - continue; - } - std::uint32_t memberIndex = 0; - if (entry.key().itemKind() == StorageClass::PACKED_INT32) { - memberIndex = entry.key().packedIntPayload().value(); - } else if (entry.key().itemKind() == StorageClass::INT64) { - memberIndex = static_cast(entry.key().intPayload().value()); - } else { - continue; - } - result.insert(type.getMember(FieldID::fromIndex(memberIndex)).m_name); - } - return result; - } - - PyObject *tryEmbeddedObjectGetAttr(EmbeddedObject *self, PyObject *attr) - { - const char *attrName = PyUnicode_AsUTF8(attr); - if (!attrName) { - PyErr_SetString(PyExc_AttributeError, "Invalid attribute name"); - return nullptr; - } - - if (!(attrName[0] == '_' && attrName[1] == 'X' && attrName[2] == '_' && attrName[3] == '_')) { - auto fixture = getRootFixture(self->ext().rootObject()); - fixture->refreshIfUpdated(); - auto member = tryGetMember(self->modifyExt(), attrName); - if (member.get()) { - return member.steal(); - } - } - - return PyObject_GenericGetAttr(reinterpret_cast(self), attr); - } - - PyObject *PyAPI_EmbeddedObject_getattro(EmbeddedObject *self, PyObject *attr) - { - PY_API_FUNC - return runSafe(tryEmbeddedObjectGetAttr, self, attr); - } - - PyObject *tryEmbeddedMemoGetAttr(MemoImmutableObject *self, PyObject *attr) - { - const char *attrName = PyUnicode_AsUTF8(attr); - if (!attrName) { - PyErr_SetString(PyExc_AttributeError, "Invalid attribute name"); - return nullptr; - } - - if (!(attrName[0] == '_' && attrName[1] == 'X' && attrName[2] == '_' && attrName[3] == '_')) { - auto &embeddedRef = embeddedMemoRef(self); - auto fixture = getRootFixture(embeddedRef.rootObject()); - fixture->refreshIfUpdated(); - auto member = tryGetMember(embeddedRef, attrName); - if (member.get()) { - return member.steal(); - } - } - - return PyObject_GenericGetAttr(reinterpret_cast(self), attr); - } - - PyObject *PyAPI_EmbeddedMemo_getattro(MemoImmutableObject *self, PyObject *attr) - { - PY_API_FUNC - return runSafe(tryEmbeddedMemoGetAttr, self, attr); - } - - int PyAPI_EmbeddedMemo_setattro(MemoImmutableObject *, PyObject *, PyObject *) - { - PY_API_FUNC - PyErr_SetString(PyExc_AttributeError, "Cannot modify an embedded immutable memo object"); - return -1; - } - - PyObject *tryEmbeddedObjectStr(EmbeddedObject *self) - { - std::stringstream str; - str << ""; - return PyUnicode_FromString(str.str().c_str()); - } - - PyObject *PyAPI_EmbeddedObject_str(EmbeddedObject *self) - { - PY_API_FUNC - return runSafe(tryEmbeddedObjectStr, self); - } - - void PyAPI_EmbeddedObject_del(EmbeddedObject *self) - { - PY_API_FUNC - if (PyObject_GC_IsTracked(self)) { - PyObject_GC_UnTrack(self); - } - self->destroy(); - Py_TYPE(self)->tp_free(reinterpret_cast(self)); - } - - void PyAPI_EmbeddedMemo_del(MemoImmutableObject *self) - { - PY_API_FUNC - if (Py_IsInitialized()) { - if (PyObject_GC_IsTracked(self)) { - PyObject_GC_UnTrack(self); - } - embeddedMemoRef(self).~EmbeddedObjectRef(); - Py_TYPE(self)->tp_free(reinterpret_cast(self)); - } - } - - int EmbeddedObject_traverse(EmbeddedObject *self, visitproc visit, void *arg) - { - Py_VISIT(self->ext().rootObject()); - return 0; - } - - [[maybe_unused]] int EmbeddedMemo_traverse(MemoImmutableObject *self, visitproc visit, void *arg) - { - Py_VISIT(embeddedMemoRef(self).rootObject()); - return 0; - } - - [[maybe_unused]] int EmbeddedMemo_clear(MemoImmutableObject *) - { - return 0; - } - - PyObject *tryEmbeddedMemoStr(MemoImmutableObject *self) - { - std::stringstream str; - str << "<" << Py_TYPE(self)->tp_base->tp_name - << " embedded instance type=" << embeddedMemoRef(self).type().getName() << ">"; - return PyUnicode_FromString(str.str().c_str()); - } - - PyObject *PyAPI_EmbeddedMemo_str(MemoImmutableObject *self) - { - PY_API_FUNC - return runSafe(tryEmbeddedMemoStr, self); - } - - PyObject *PyAPI_EmbeddedMemo_dir(MemoImmutableObject *self, PyObject *) - { - PY_API_FUNC - auto result = Py_OWN(PyObject_CallMethod( - reinterpret_cast(&PyBaseObject_Type), "__dir__", - "O", reinterpret_cast(self) - )); - if (!result) { - return nullptr; - } - - auto &type = embeddedMemoRef(self).type(); - for (const auto &name: getEmbeddedMemberNames(embeddedMemoRef(self).embeddedObject(), type)) { - auto pyName = Py_OWN(PyUnicode_FromString(name.c_str())); - if (!pyName || PySequence_Contains(*result, *pyName) == 1) { - continue; - } - if (PyList_Append(*result, *pyName) < 0) { - return nullptr; - } - } - return result.steal(); - } - - PyObject *PyAPI_EmbeddedMemo_get_dict(MemoImmutableObject *self, void *) - { - PY_API_FUNC - auto result = Py_OWN(PyDict_New()); - if (!result) { - return nullptr; - } - - auto &type = embeddedMemoRef(self).type(); - for (const auto &name: getEmbeddedMemberNames(embeddedMemoRef(self).embeddedObject(), type)) { - auto value = tryGetMember(embeddedMemoRef(self), name.c_str()); - if (!value.get()) { - continue; - } - auto pyName = Py_OWN(PyUnicode_FromString(name.c_str())); - if (!pyName || PyDict_SetItem(*result, *pyName, *value) < 0) { - return nullptr; - } - } - return result.steal(); - } - - Py_hash_t PyAPI_EmbeddedMemo_hash(MemoImmutableObject *) - { - PY_API_FUNC - PyErr_SetString(PyExc_TypeError, "Embedded immutable memo objects do not have durable identity"); - return -1; - } - - static PyMethodDef EmbeddedMemo_methods[] = { - {"__dir__", (PyCFunction)PyAPI_EmbeddedMemo_dir, METH_NOARGS, nullptr}, - {NULL} - }; - - static PyGetSetDef EmbeddedMemo_getsets[] = { - {"__dict__", (getter)PyAPI_EmbeddedMemo_get_dict, nullptr, nullptr, nullptr}, - {nullptr} - }; - - std::string consumePyErrorMessage(); - - PyTypeObject *createEmbeddedMemoType(PyTypeObject *memoType) - { - std::vector slots = { - {Py_tp_dealloc, reinterpret_cast(PyAPI_EmbeddedMemo_del)}, - {Py_tp_getattro, reinterpret_cast(PyAPI_EmbeddedMemo_getattro)}, - {Py_tp_setattro, reinterpret_cast(PyAPI_EmbeddedMemo_setattro)}, - {Py_tp_methods, reinterpret_cast(EmbeddedMemo_methods)}, - {Py_tp_getset, reinterpret_cast(EmbeddedMemo_getsets)}, - {Py_tp_hash, reinterpret_cast(PyAPI_EmbeddedMemo_hash)}, - {Py_tp_repr, reinterpret_cast(PyAPI_EmbeddedMemo_str)}, - {Py_tp_str, reinterpret_cast(PyAPI_EmbeddedMemo_str)}, - {0, 0} - }; - if (memoType->tp_flags & Py_TPFLAGS_HAVE_GC) { - slots.insert(slots.end() - 1, { - {Py_tp_traverse, reinterpret_cast(EmbeddedMemo_traverse)}, - {Py_tp_clear, reinterpret_cast(EmbeddedMemo_clear)} - }); - } - - std::stringstream typeName; - typeName << memoType->tp_name << ".__dbzero_embedded_view__"; - const char *safeName = PyToolkit::getTypeManager().getPooledString(typeName.str()); - std::uint32_t flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; - if (memoType->tp_flags & Py_TPFLAGS_HAVE_GC) { - flags |= Py_TPFLAGS_HAVE_GC; - } - flags &= ~Py_TPFLAGS_MANAGED_DICT; - - auto spec = PyType_Spec { - .name = safeName, - .basicsize = static_cast(memoType->tp_basicsize), - .itemsize = 0, - .flags = flags, - .slots = slots.data() - }; - auto bases = Py_OWN(PySafeTuple_Pack(Py_BORROW(memoType))); - auto shadowType = reinterpret_cast(PyType_FromSpecWithBases(&spec, *bases)); - if (!shadowType) { - return nullptr; - } - - shadowType->tp_weaklistoffset = memoType->tp_weaklistoffset; - shadowType->tp_dictoffset = memoType->tp_dictoffset; - if (shadowType->tp_basicsize != memoType->tp_basicsize) { - Py_DECREF(shadowType); - PyErr_SetString(PyExc_RuntimeError, "Embedded memo shadow type size mismatch"); - return nullptr; - } - if ((shadowType->tp_flags & Py_TPFLAGS_HAVE_GC) != (memoType->tp_flags & Py_TPFLAGS_HAVE_GC)) { - Py_DECREF(shadowType); - PyErr_SetString(PyExc_RuntimeError, "Embedded memo shadow type GC flag mismatch"); - return nullptr; - } - - return shadowType; - } - - PyTypeObject *getEmbeddedMemoType(PyTypeObject *memoType) - { - auto *embeddedType = PyToolkit::getTypeManager().getEmbeddedMemoType(memoType, createEmbeddedMemoType); - if (!embeddedType) { - THROWF(db0::InternalException) - << "Unable to create embedded memo shadow type: " << consumePyErrorMessage(); - } - return embeddedType; - } - - std::string consumePyErrorMessage() - { - if (!PyErr_Occurred()) { - return "unknown Python error"; - } - PyObject *ptype = nullptr; - PyObject *pvalue = nullptr; - PyObject *ptraceback = nullptr; - PyErr_Fetch(&ptype, &pvalue, &ptraceback); - PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); - auto str = Py_OWN(PyObject_Str(pvalue ? pvalue : Py_None)); - std::string result = str.get() ? PyUnicode_AsUTF8(*str) : "unable to format Python error"; - Py_XDECREF(ptype); - Py_XDECREF(pvalue); - Py_XDECREF(ptraceback); - return result; - } - } - - PyTypeObject EmbeddedObjectType = { - PyVarObject_HEAD_INIT(nullptr, 0) - .tp_name = "dbzero.EmbeddedObject", - .tp_basicsize = static_cast(EmbeddedObject::sizeOf()), - .tp_itemsize = 0, - .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedObject_del), - .tp_vectorcall_offset = 0, - .tp_getattr = nullptr, - .tp_setattr = nullptr, - .tp_as_async = nullptr, - .tp_repr = reinterpret_cast(PyAPI_EmbeddedObject_str), - .tp_as_number = nullptr, - .tp_as_sequence = nullptr, - .tp_as_mapping = nullptr, - .tp_hash = nullptr, - .tp_call = nullptr, - .tp_str = reinterpret_cast(PyAPI_EmbeddedObject_str), - .tp_getattro = reinterpret_cast(PyAPI_EmbeddedObject_getattro), - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, - .tp_doc = "dbzero embedded immutable object view", - .tp_traverse = reinterpret_cast(EmbeddedObject_traverse), - .tp_alloc = PyType_GenericAlloc, - .tp_free = PyObject_GC_Del, - }; - - ObjectSharedPtr makeEmbeddedObject( - PyObject *rootObject, const o_embedded_object &embeddedObject, std::shared_ptr type - ) - { - auto *pyObject = reinterpret_cast(EmbeddedObjectType.tp_alloc(&EmbeddedObjectType, 0)); - if (!pyObject) { - return {}; - } - pyObject->makeNew(rootObject, &embeddedObject, std::move(type)); - return Py_OWN(reinterpret_cast(pyObject)); - } - - ObjectSharedPtr makeEmbeddedMemoObject( - PyObject *rootObject, const o_embedded_object &embeddedObject, std::shared_ptr type, - PyTypeObject *memoType - ) - { - auto *embeddedType = getEmbeddedMemoType(memoType); - auto *pyObject = reinterpret_cast(embeddedType->tp_alloc(embeddedType, 0)); - if (!pyObject) { - return {}; - } - new ((void *)const_cast(&pyObject->ext())) - EmbeddedObjectRef(rootObject, &embeddedObject, std::move(type)); - return Py_OWN(reinterpret_cast(pyObject)); - } - - void transformMemoImmutableObjectToEmbedded( - MemoImmutableObject *object, PyObject *rootObject, const o_embedded_object &embeddedObject, - std::shared_ptr type - ) - { - auto *oldType = Py_TYPE(object); - auto *embeddedType = getEmbeddedMemoType(oldType); - if (PyObject_GC_IsTracked(object)) { - PyObject_GC_UnTrack(object); - } - object->destroy(); - new ((void *)const_cast(&object->ext())) - EmbeddedObjectRef(rootObject, &embeddedObject, std::move(type)); - Py_INCREF(embeddedType); - Py_SET_TYPE(object, embeddedType); - Py_DECREF(oldType); - if (Py_TYPE(object)->tp_flags & Py_TPFLAGS_HAVE_GC) { - PyObject_GC_Track(object); - } - } - - bool PyEmbeddedMemoType_Check(PyTypeObject *type) - { - return PyToolkit::getTypeManager().isEmbeddedMemoType(type); - } - - bool PyEmbeddedMemo_Check(PyObject *object) - { - return object && PyEmbeddedMemoType_Check(Py_TYPE(object)); - } -} diff --git a/src/dbzero/bindings/python/EmbeddedObject.hpp b/src/dbzero/bindings/python/EmbeddedObject.hpp deleted file mode 100644 index 52a8cdf1..00000000 --- a/src/dbzero/bindings/python/EmbeddedObject.hpp +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1-or-later -// Copyright (c) 2025 DBZero Software sp. z o.o. - -#pragma once - -#include - -#include - -#include "MemoObject.hpp" -#include "PyTypes.hpp" -#include "PyWrapper.hpp" - -namespace db0::object_model -{ - class Class; - class o_embedded_object; -} - -namespace db0::python -{ - class EmbeddedObjectRef - { - public: - EmbeddedObjectRef( - PyObject *rootObject, const db0::object_model::o_embedded_object *embeddedObject, - std::shared_ptr type - ); - ~EmbeddedObjectRef(); - - EmbeddedObjectRef(const EmbeddedObjectRef &) = delete; - EmbeddedObjectRef &operator=(const EmbeddedObjectRef &) = delete; - - PyObject *rootObject() const; - const db0::object_model::o_embedded_object &embeddedObject() const; - db0::object_model::Class &type() const; - - private: - PyObject *m_root_object = nullptr; - const db0::object_model::o_embedded_object *m_embedded_object = nullptr; - std::shared_ptr m_type; - }; - - using EmbeddedObject = PyWrapper; - - extern PyTypeObject EmbeddedObjectType; - - PyTypes::ObjectSharedPtr makeEmbeddedObject( - PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, - std::shared_ptr type - ); - - PyTypes::ObjectSharedPtr makeEmbeddedMemoObject( - PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, - std::shared_ptr type, PyTypeObject *memoType - ); - - void transformMemoImmutableObjectToEmbedded( - MemoImmutableObject *object, PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, - std::shared_ptr type - ); - - bool PyEmbeddedMemo_Check(PyObject *object); - bool PyEmbeddedMemoType_Check(PyTypeObject *type); -} diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index 93e20834..a1356b69 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "Memo.hpp" -#include "EmbeddedObject.hpp" +#include #include "PyToolkit.hpp" #include #include diff --git a/src/dbzero/bindings/python/PyInternalAPI.cpp b/src/dbzero/bindings/python/PyInternalAPI.cpp index d8bec903..c4104c26 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.cpp +++ b/src/dbzero/bindings/python/PyInternalAPI.cpp @@ -2,14 +2,12 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "PyInternalAPI.hpp" -#include "EmbeddedObject.hpp" #include "PyToolkit.hpp" #include "Memo.hpp" #include #include #include #include -#include #include #include #include @@ -798,7 +796,7 @@ namespace db0::python << Py_TYPE(py_obj)->tp_name << THROWF_END; } } - + PyObject *materializeMemoObject(MemoObject *memo_obj) { if (memo_obj->ext().hasInstance()) { @@ -826,34 +824,8 @@ namespace db0::python auto fixture = memo_obj->ext().getFixture(); db0::FixtureLock lock(fixture); // materialize by calling postInit - memo_obj->modifyExt().postInit(lock, [&](const auto &initializer) { - auto &classFactory = fixture->get(); - for (const auto &value: initializer.objects()) { - if (value.m_storage_class == db0::object_model::StorageClass::DELETED) { - continue; - } - if (value.m_storage_class != db0::object_model::StorageClass::EMBEDDED_OBJECT) { - continue; - } - assert(value.m_object.get()); - - auto *pyObject = value.m_object.get(); - assert(PyMemo_Check(pyObject)); - - auto *embeddedValue = (memo_obj->ext())->variableValue(value.m_loc.first); - assert(embeddedValue); - assert(embeddedValue->itemKind() == db0::object_model::StorageClass::EMBEDDED_OBJECT); - const auto &embeddedObject = db0::object_model::o_embedded_object::__const_ref( - embeddedValue->embeddedPayload().begin() - ); - auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; - auto *embeddedMemo = reinterpret_cast(pyObject); - transformMemoImmutableObjectToEmbedded( - embeddedMemo, reinterpret_cast(memo_obj), embeddedObject, std::move(type) - ); - } - }); memo_obj->modifyExt().setLangObject(reinterpret_cast(memo_obj)); + memo_obj->modifyExt().postInit(lock); if (!memo_obj->ext().getType().isNoCache()) { fixture->getLangCache().add(memo_obj->ext().getAddress(), memo_obj); } diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index cb9d8016..54ea3bb5 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -2,7 +2,8 @@ // Copyright (c) 2025 DBZero Software sp. z o.o. #include "PyToolkit.hpp" -#include "EmbeddedObject.hpp" +#include +#include #include "Memo.hpp" #include "MemoExpiredRef.hpp" #include "PyInternalAPI.hpp" @@ -18,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +34,9 @@ #include #include #include +#include +#include +#include namespace db0::python @@ -64,36 +69,67 @@ namespace db0::python } return reinterpret_cast(pyObject)->ext().hasRefs(); } - } - PyToolkit::ObjectSharedPtr PyToolkit::unloadEmbeddedInstance(const db0::object_model::o_tuple_item &item) - { - switch (item.itemKind()) { - case StorageClass::STRING_REF: - case StorageClass::EMBEDDED_STRING: { - auto str = item.stringPayload().get(); - auto result = Py_OWN(PyUnicode_FromStringAndSize(str.get_raw(), str.size())); - if (!result) { - THROWF(db0::InputException) << "Failed to convert embedded string"; - } - return result; + Py_ssize_t embeddedSequenceSize(PyObject *sequence) + { + if (PyTuple_Check(sequence)) { + return PyTuple_GET_SIZE(sequence); } - case StorageClass::DB0_BYTES: - case StorageClass::EMBEDDED_BYTES: { - const auto &bytes = item.bytesPayload(); - auto result = Py_OWN(PyBytes_FromStringAndSize( - reinterpret_cast(bytes.getBuffer()), bytes.size() - )); - if (!result) { - THROWF(db0::InputException) << "Failed to convert embedded bytes"; + if (PyList_Check(sequence)) { + return PyList_GET_SIZE(sequence); + } + return -1; + } + + PyObject *embeddedSequenceItem(PyObject *sequence, Py_ssize_t index) + { + if (PyTuple_Check(sequence)) { + return PyTuple_GET_ITEM(sequence, index); + } + return PyList_GET_ITEM(sequence, index); + } + + void transformEmbeddedTupleObjects( + db0::swine_ptr &fixture, db0::object_model::ClassFactory &classFactory, + PyObject *rootObject, PyObject *sourceSequence, const db0::object_model::o_py_tuple &embeddedTuple + ) + { + // During immutable materialization, tuple/list fields are copied into the root object's embedded + // storage. Any non-materialized immutable memo object originally present in that Python sequence + // must then be morphed in place into an embedded memo view. The Python object keeps its identity, + // but its native payload now points at the embedded object stored under rootObject. Walk the source + // Python sequence in lockstep with the persisted embedded tuple so nested tuple/list elements can + // be fixed up recursively. + auto sourceSize = embeddedSequenceSize(sourceSequence); + assert(sourceSize >= 0); + assert(static_cast(sourceSize) == embeddedTuple.size()); + + for (Py_ssize_t index = 0; index < sourceSize; ++index) { + auto *sourceItem = embeddedSequenceItem(sourceSequence, index); + const auto &embeddedItem = embeddedTuple.item(static_cast(index)); + + if (PyEmbeddedMemo_Check(sourceItem)) { + continue; + } + + if (PyMemo_Check(sourceItem)) { + assert(embeddedItem.itemKind() == db0::object_model::StorageClass::EMBEDDED_OBJECT); + const auto &embeddedObject = db0::object_model::o_embedded_object::__const_ref( + embeddedItem.embeddedPayload().begin() + ); + PyToolkit::transformEmbeddedObject(fixture, rootObject, sourceItem, embeddedObject); + continue; + } + + if (PyTuple_Check(sourceItem) || PyList_Check(sourceItem)) { + assert(embeddedItem.itemKind() == db0::object_model::StorageClass::EMBEDDED_TUPLE); + const auto &nestedTuple = db0::object_model::o_py_tuple::__const_ref( + embeddedItem.embeddedPayload().begin() + ); + transformEmbeddedTupleObjects(fixture, classFactory, rootObject, sourceItem, nestedTuple); } - return result; } - default: - THROWF(db0::InputException) - << "Unsupported embedded immutable member storage class: " << item.itemKind(); } - return {}; } PyToolkit::ObjectSharedPtr PyToolkit::unloadEmbeddedInstance( @@ -101,6 +137,30 @@ namespace db0::python ) { switch (item.itemKind()) { + case StorageClass::NONE: + return Py_BORROW(Py_None); + case StorageClass::BOOLEAN: + return Py_OWN(PyBool_FromLong(item.boolPayload().value())); + case StorageClass::INT64: + return Py_OWN(PyLong_FromLongLong(item.intPayload().value())); + case StorageClass::PACKED_INT32: + return Py_OWN(PyLong_FromUnsignedLong(item.packedIntPayload().value())); + case StorageClass::FP_NUMERIC64: + return Py_OWN(PyFloat_FromDouble(item.doublePayload().value())); + case StorageClass::PTIME64: + return Py_OWN(PyLong_FromUnsignedLongLong(item.uint64Payload().value())); + case StorageClass::DATE: + return Py_OWN(uint64ToPyDate(item.uint64Payload().value())); + case StorageClass::DATETIME: + return Py_OWN(uint64ToPyDatetime(item.uint64Payload().value())); + case StorageClass::DATETIME_TZ: + return Py_OWN(uint64ToPyDatetimeWithTZ(item.uint64Payload().value())); + case StorageClass::TIME: + return Py_OWN(uint64ToPyTime(item.uint64Payload().value())); + case StorageClass::TIME_TZ: + return Py_OWN(uint64ToPyTimeWithTz(item.uint64Payload().value())); + case StorageClass::DECIMAL: + return Py_OWN(uint64ToPyDecimal(item.uint64Payload().value())); case StorageClass::STRING_REF: case StorageClass::EMBEDDED_STRING: { auto str = item.stringPayload().get(); @@ -121,6 +181,13 @@ namespace db0::python } return result; } + case StorageClass::EMBEDDED_TUPLE: { + if (!rootObject) { + THROWF(db0::InputException) << "Embedded tuple retrieval requires a root memo object"; + } + const auto &tuple = db0::object_model::o_py_tuple::__const_ref(item.embeddedPayload().begin()); + return makeEmbeddedTuple(rootObject, tuple); + } case StorageClass::EMBEDDED_OBJECT: { if (!rootObject) { THROWF(db0::InputException) << "Embedded object retrieval requires a root memo object"; @@ -143,6 +210,31 @@ namespace db0::python return {}; } + void PyToolkit::transformEmbeddedObject( + db0::swine_ptr &fixture, ObjectPtr rootObject, ObjectPtr sourceObject, + const db0::object_model::o_embedded_object &embeddedObject + ) + { + if (PyEmbeddedMemo_Check(sourceObject)) { + return; + } + + assert(PyMemo_Check(sourceObject)); + auto &classFactory = fixture->get(); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + auto *embeddedMemo = reinterpret_cast(sourceObject); + transformMemoImmutableObjectToEmbedded(embeddedMemo, rootObject, embeddedObject, std::move(type)); + } + + void PyToolkit::transformEmbeddedTuple( + db0::swine_ptr &fixture, ObjectPtr rootObject, ObjectPtr sourceSequence, + const db0::object_model::o_py_tuple &embeddedTuple + ) + { + auto &classFactory = fixture->get(); + transformEmbeddedTupleObjects(fixture, classFactory, rootObject, sourceSequence, embeddedTuple); + } + bool PyToolkit::hasMemoInstance(ObjectPtr pyObject) { if (PyMemo_Check(pyObject)) { diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index 2a33d633..d30fdb8f 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -31,6 +31,7 @@ namespace db0::object_model class o_tuple_item; class o_embedded_object; + class o_py_tuple; class Object; class Class; class ClassFactory; @@ -134,10 +135,17 @@ namespace db0::python static ObjectSharedPtr unloadWeakSet(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadDict(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadTuple(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); - static ObjectSharedPtr unloadEmbeddedInstance(const db0::object_model::o_tuple_item &); static ObjectSharedPtr unloadEmbeddedInstance( db0::swine_ptr &, ObjectPtr root_object, const db0::object_model::o_tuple_item & ); + static void transformEmbeddedObject( + db0::swine_ptr &, ObjectPtr root_object, ObjectPtr source_object, + const db0::object_model::o_embedded_object & + ); + static void transformEmbeddedTuple( + db0::swine_ptr &, ObjectPtr root_object, ObjectPtr source_sequence, + const db0::object_model::o_py_tuple & + ); // Unload dbzero block instance static ObjectSharedPtr unloadBlock(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index 3846add9..3fae26ef 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -7,7 +7,8 @@ #include "PyInternalAPI.hpp" #include "PyTagsAPI.hpp" #include "PyObjectTagManager.hpp" -#include "EmbeddedObject.hpp" +#include +#include #include "PySnapshot.hpp" #include "PyTagSet.hpp" #include "PyAtomic.hpp" @@ -204,6 +205,7 @@ PyMODINIT_FUNC PyInit_dbzero(void) &py::DictIteratorObjectType, &py::PyObjectTagManagerType, &py::EmbeddedObjectType, + &py::EmbeddedTupleType, &py::PySnapshotObjectType, &py::PyObjectIterableType, &py::PyObjectIteratorType, diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index c87b93ef..b8b8c0a6 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include @@ -71,16 +73,48 @@ namespace db0::object_model unrefEmbeddedObject(fixture, o_embedded_object::__const_ref(value.embeddedPayload().begin())); } } - } - void ObjectImmutableImpl::postInit(FixtureLock &fixture) - { - postInit(fixture, {}); + void transformEmbeddedObjectValues( + db0::swine_ptr &fixture, ObjectImmutableImpl &object, ObjectImmutableImpl::ObjectPtr rootObject, + const ImmutableObjectInitializer &initializer + ) + { + if (!rootObject) { + return; + } + + for (const auto &value: initializer.objects()) { + if (value.m_storage_class == StorageClass::DELETED) { + continue; + } + assert(value.m_object.get()); + + auto *embeddedValue = object->variableValue(value.m_loc.first); + assert(embeddedValue); + + if (value.m_storage_class == StorageClass::EMBEDDED_OBJECT) { + assert(embeddedValue->itemKind() == StorageClass::EMBEDDED_OBJECT); + const auto &embeddedObject = o_embedded_object::__const_ref( + embeddedValue->embeddedPayload().begin() + ); + LangConfig::LangToolkit::transformEmbeddedObject( + fixture, rootObject, value.m_object.get(), embeddedObject + ); + continue; + } + + if (value.m_storage_class == StorageClass::DB0_TUPLE || value.m_storage_class == StorageClass::DB0_LIST) { + assert(embeddedValue->itemKind() == StorageClass::EMBEDDED_TUPLE); + const auto &embeddedTuple = o_py_tuple::__const_ref(embeddedValue->embeddedPayload().begin()); + LangConfig::LangToolkit::transformEmbeddedTuple( + fixture, rootObject, value.m_object.get(), embeddedTuple + ); + } + } + } } - void ObjectImmutableImpl::postInit( - FixtureLock &fixture, const std::function &preClose - ) + void ObjectImmutableImpl::postInit(FixtureLock &fixture) { if (!this->hasInstance()) { auto &initializer = InitManager::instance.getInitializer(*this); @@ -111,9 +145,7 @@ namespace db0::object_model if (type.isSingleton()) { type.setSingletonAddress(*this); } - if (preClose) { - preClose(*immutableInitializer); - } + transformEmbeddedObjectValues(*fixture, *this, m_lang_object, *immutableInitializer); initializer.close(); } diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp index 0e7737b6..7507e632 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp @@ -31,7 +31,6 @@ namespace db0::object_model ObjectSharedPtr get(const char *field_name) const; void postInit(FixtureLock &); - void postInit(FixtureLock &, const std::function &); void setLangObject(ObjectPtr) const; void destroy(); diff --git a/src/dbzero/object_model/tuple/o_py_tuple.cpp b/src/dbzero/object_model/tuple/o_py_tuple.cpp index 37a0e750..a67ac29a 100644 --- a/src/dbzero/object_model/tuple/o_py_tuple.cpp +++ b/src/dbzero/object_model/tuple/o_py_tuple.cpp @@ -7,7 +7,10 @@ #include #include +#include #include +#include +#include #include namespace db0::object_model @@ -28,6 +31,36 @@ namespace db0::object_model { o_py_dict::__new(buf, const_cast(static_cast(source))); } + + const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + + assert(db0::python::PyToolkit::isMemoImmutableObject(pyObject)); + + const auto &object = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject); + if (object.hasInstance()) { + THROWF(db0::InputException) + << "Only non-materialized immutable memo objects can be embedded"; + } + + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(object) + ); + if (!initializer) { + THROWF(db0::InputException) + << "Non-materialized immutable memo object has no active initializer"; + } + return *initializer; + } + + void writeEmbeddedObject(void *buf, const void *source) + { + auto *pyObject = const_cast(static_cast(source)); + const auto &initializer = getInitializer(pyObject); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } } o_py_tuple::o_py_tuple(PyObject *sequence) @@ -115,6 +148,11 @@ namespace db0::object_model return Element::embeddedSet(o_py_set::measure(object), writePySet, object); case db0::bindings::TypeId::DICT: return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object); + case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: { + const auto &initializer = getInitializer(object); + auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); + return Element::embeddedObject(size, writeEmbeddedObject, object); + } default: break; } diff --git a/tests/unit_tests/EmbeddedTupleTest.cpp b/tests/unit_tests/EmbeddedTupleTest.cpp index 0d508a8e..ebdcf5fc 100644 --- a/tests/unit_tests/EmbeddedTupleTest.cpp +++ b/tests/unit_tests/EmbeddedTupleTest.cpp @@ -4,17 +4,24 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include #include +#include +#include +#include #include #include #include #include +#include #include #include @@ -78,6 +85,36 @@ namespace tests return size; } + static db0::python::shared_py_object makeMemoType() + { + static std::uint64_t memoTypeIndex = 0; + auto className = std::string("EmbeddedTupleNestedImmutable") + std::to_string(memoTypeIndex); + auto typeId = "tests/" + className; + ++memoTypeIndex; + + if (PyRun_SimpleString(("class " + className + ": pass\n").c_str()) != 0) { + return {}; + } + + auto mainModule = Py_BORROW(PyImport_AddModule("__main__")); + auto pyClass = Py_OWN(PyObject_GetAttrString(mainModule.get(), className.c_str())); + auto args = Py_OWN(PyTuple_Pack(1, pyClass.get())); + auto kwargs = Py_OWN(PyDict_New()); + auto pyTypeId = Py_OWN(PyUnicode_FromString(typeId.c_str())); + auto pyImmutable = Py_OWN(PyBool_FromLong(1)); + if (!mainModule.get() || !pyClass.get() || !args.get() || !kwargs.get() + || !pyTypeId.get() || !pyImmutable.get()) { + return {}; + } + db0::python::PySafeDict_SetItemString(kwargs.get(), "id", std::move(pyTypeId)); + db0::python::PySafeDict_SetItemString(kwargs.get(), "immutable", std::move(pyImmutable)); + + return db0::python::shared_py_object( + reinterpret_cast(db0::python::PyAPI_wrapPyClass(nullptr, args.get(), kwargs.get())), + false + ); + } + TEST_F( EmbeddedTupleTest , testTupleStoresInlineAndVariableLengthElements ) { auto memspace = getMemspace(); @@ -362,6 +399,45 @@ namespace tests ASSERT_EQ(asString(tuple->item(1)), "list item"); } + TEST_F( EmbeddedTupleTest , testPyTupleConstructsFromImmutableMemoElement ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("embedded-tuple-nested-memo"); + auto nestedClass = getTestClass(fixture); + auto pyMemoType = makeMemoType(); + ASSERT_TRUE(pyMemoType.get()); + + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemo->makeNew(nestedClass); + auto *nestedInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemo->ext()) + ); + ASSERT_NE(nestedInitializer, nullptr); + nestedInitializer->set({0, 0}, StorageClass::INT64, Value(23)); + + auto pyTuple = Py_OWN(PyTuple_New(1)); + PySafeTuple_SetItem(*pyTuple, 0, Py_OWN(Py_NewRef(reinterpret_cast(pyMemo.get())))); + + auto memspace = getMemspace(); + v_object tuple(memspace, *pyTuple); + + ASSERT_EQ(tuple->size(), 1u); + ASSERT_EQ(tuple->item(0).itemKind(), StorageClass::EMBEDDED_OBJECT); + + const auto &nestedObject = o_embedded_object::__const_ref(tuple->item(0).embeddedPayload().begin()); + ASSERT_EQ(nestedObject.getClassRef(), nestedClass->getClassRef()); + auto fixedValue = nestedObject.fixedValue(0); + ASSERT_TRUE(fixedValue.has_value()); + ASSERT_EQ(fixedValue->m_kind, StorageClass::INT64); + ASSERT_EQ(fixedValue->m_value, 23u); + + workspace.close(); + } + TEST_F( EmbeddedTupleTest , testPyTupleConstructsDeeplyNestedCollections ) { Py_Initialize(); diff --git a/tests/unit_tests/VInstanceMapTest.cpp b/tests/unit_tests/VInstanceMapTest.cpp index 717a845f..68ffbf7c 100644 --- a/tests/unit_tests/VInstanceMapTest.cpp +++ b/tests/unit_tests/VInstanceMapTest.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -230,6 +229,11 @@ DB0_PACKED_END class VInstanceMapTagIndexTest: public FixtureTestBase { + void SetUp() override + { + auto fixture = getFixture(); + db0::object_model::initializer()(fixture, true, false, false); + } }; void initializeDbzeroPythonBindingsOnce() @@ -410,8 +414,7 @@ DB0_PACKED_END { using TagIndex = db0::object_model::TagIndex; - Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); - auto fixture = workspace.getFixture("v-instance-map-composite-tag"); + auto fixture = getFixture(); db0::object_model::ClassFactory class_factory(fixture); db0::object_model::EnumFactory enum_factory(fixture); auto mutation_log = fixture->addMutationHandler(); @@ -452,15 +455,13 @@ DB0_PACKED_END update_child_tag_index.reset(); child_tag_index.reset(); memo_ptr.reset(); - workspace.close(); } TEST_F(VInstanceMapTagIndexTest, testFlushForwardsToCompositeTagIndexes) { using TagIndex = db0::object_model::TagIndex; - Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); - auto fixture = workspace.getFixture("v-instance-map-composite-flush"); + auto fixture = getFixture(); db0::object_model::ClassFactory class_factory(fixture); db0::object_model::EnumFactory enum_factory(fixture); auto mutation_log = fixture->addMutationHandler(); @@ -487,15 +488,13 @@ DB0_PACKED_END assertTagIndexContainsObject(*child_tag_index, composite_tag, memo_ptr->ext().getUniqueAddress()); child_tag_index.reset(); memo_ptr.reset(); - workspace.close(); } TEST_F(VInstanceMapTagIndexTest, testFlushReturnsFalseWhenTagIndexContainsNoElements) { using TagIndex = db0::object_model::TagIndex; - Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); - auto fixture = workspace.getFixture("v-instance-map-composite-flush-empty"); + auto fixture = getFixture(); db0::object_model::ClassFactory class_factory(fixture); db0::object_model::EnumFactory enum_factory(fixture); auto mutation_log = fixture->addMutationHandler(); @@ -528,15 +527,13 @@ DB0_PACKED_END child_tag_index.reset(); memo_ptr.reset(); - workspace.close(); } TEST_F(VInstanceMapTagIndexTest, testRollbackForwardsToCompositeTagIndexes) { using TagIndex = db0::object_model::TagIndex; - Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); - auto fixture = workspace.getFixture("v-instance-map-composite-rollback"); + auto fixture = getFixture(); db0::object_model::ClassFactory class_factory(fixture); db0::object_model::EnumFactory enum_factory(fixture); auto mutation_log = fixture->addMutationHandler(); @@ -563,15 +560,13 @@ DB0_PACKED_END ASSERT_TRUE(child_tag_index->empty()); child_tag_index.reset(); memo_ptr.reset(); - workspace.close(); } TEST_F(VInstanceMapTagIndexTest, testCloseForwardsToCompositeTagIndexes) { using TagIndex = db0::object_model::TagIndex; - Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); - auto fixture = workspace.getFixture("v-instance-map-composite-close"); + auto fixture = getFixture(); db0::object_model::ClassFactory class_factory(fixture); db0::object_model::EnumFactory enum_factory(fixture); auto mutation_log = fixture->addMutationHandler(); @@ -598,15 +593,13 @@ DB0_PACKED_END ASSERT_TRUE(child_tag_index->empty()); child_tag_index.reset(); memo_ptr.reset(); - workspace.close(); } TEST_F(VInstanceMapTagIndexTest, testCommitForwardsToCompositeTagIndexes) { using TagIndex = db0::object_model::TagIndex; - Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); - auto fixture = workspace.getFixture("v-instance-map-composite-commit"); + auto fixture = getFixture(); db0::object_model::ClassFactory class_factory(fixture); db0::object_model::EnumFactory enum_factory(fixture); auto mutation_log = fixture->addMutationHandler(); @@ -642,15 +635,13 @@ DB0_PACKED_END child_tag_index.reset(); reopened_child.reset(); memo_ptr.reset(); - workspace.close(); } TEST_F(VInstanceMapTagIndexTest, testDetachForwardsToCompositeTagIndexes) { using TagIndex = db0::object_model::TagIndex; - Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); - auto fixture = workspace.getFixture("v-instance-map-composite-detach"); + auto fixture = getFixture(); db0::object_model::ClassFactory class_factory(fixture); db0::object_model::EnumFactory enum_factory(fixture); auto mutation_log = fixture->addMutationHandler(); @@ -676,7 +667,6 @@ DB0_PACKED_END assertTagIndexContainsObject(*child_tag_index, composite_tag, memo_ptr->ext().getUniqueAddress()); child_tag_index.reset(); memo_ptr.reset(); - workspace.close(); } } From b3c461c5d04ab41f8a0d3c8ad17782b6a780a397 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 21 May 2026 11:04:34 +0200 Subject: [PATCH 2/7] embedded collection files --- .../python/embedded/EmbeddedObject.cpp | 502 ++++++++++++++++++ .../python/embedded/EmbeddedObject.hpp | 65 +++ .../python/embedded/EmbeddedTuple.cpp | 228 ++++++++ .../python/embedded/EmbeddedTuple.hpp | 42 ++ 4 files changed, 837 insertions(+) create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedObject.cpp create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedObject.hpp create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedTuple.hpp diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp new file mode 100644 index 00000000..c90112f6 --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace db0::python +{ + using ObjectSharedPtr = PyTypes::ObjectSharedPtr; + using namespace db0::object_model; + + static_assert(EmbeddedObject::sizeOf() > sizeof(PyObject), "EmbeddedObject must allocate storage for its view state"); + static_assert( + sizeof(EmbeddedObjectRef) <= sizeof(MemoImmutableObject::ExtT), + "EmbeddedObjectRef must fit in MemoImmutableObject native storage for in-place morphing" + ); + static_assert( + alignof(EmbeddedObjectRef) <= alignof(MemoImmutableObject::ExtT), + "EmbeddedObjectRef alignment must be compatible with MemoImmutableObject native storage" + ); + + EmbeddedObjectRef::EmbeddedObjectRef( + PyObject *rootObject, const o_embedded_object *embeddedObject, std::shared_ptr type + ) + : m_root_object(rootObject) + , m_embedded_object(embeddedObject) + , m_type(std::move(type)) + { + Py_XINCREF(m_root_object); + } + + EmbeddedObjectRef::~EmbeddedObjectRef() + { + Py_XDECREF(m_root_object); + } + + PyObject *EmbeddedObjectRef::rootObject() const + { + return m_root_object; + } + + const o_embedded_object &EmbeddedObjectRef::embeddedObject() const + { + return *m_embedded_object; + } + + Class &EmbeddedObjectRef::type() const + { + return *m_type; + } + + namespace + { + EmbeddedObjectRef &embeddedMemoRef(MemoImmutableObject *object) + { + return *reinterpret_cast(const_cast(&object->ext())); + } + + db0::swine_ptr getRootFixture(PyObject *rootObject) + { + return reinterpret_cast(rootObject)->ext().getFixture(); + } + + ObjectSharedPtr unloadMember(EmbeddedObjectRef &embeddedRef, const FieldInfo &fieldInfo) + { + auto fixture = getRootFixture(embeddedRef.rootObject()); + return ObjectImmutableImpl::tryGetEmbeddedField( + fixture, embeddedRef.rootObject(), embeddedRef.embeddedObject(), fieldInfo, + reinterpret_cast(embeddedRef.rootObject())->ext().getMemberFlags() + ); + } + + ObjectSharedPtr tryGetMember(EmbeddedObjectRef &embeddedRef, const char *attrName) + { + auto memberLoc = embeddedRef.type().findField(attrName); + if (!memberLoc.first) { + return {}; + } + for (const auto &fieldInfo: memberLoc.first) { + auto result = unloadMember(embeddedRef, fieldInfo); + if (result.get()) { + return result; + } + } + return {}; + } + + std::unordered_set getEmbeddedMemberNames( + const o_embedded_object &embeddedObject, Class &type + ) + { + std::unordered_set result; + auto &types = embeddedObject.pos_vt().types(); + unsigned int index = types.offset(); + for (unsigned int pos = 0; pos < types.size(); ++pos, ++index) { + if (types[pos] == StorageClass::DELETED || types[pos] == StorageClass::UNDEFINED) { + continue; + } + result.insert(type.getMember(FieldID::fromIndex(index)).m_name); + } + + for (const auto &xvalue: embeddedObject.index_vt().xvalues()) { + if (xvalue.m_type == StorageClass::DELETED || xvalue.m_type == StorageClass::UNDEFINED) { + continue; + } + result.insert(type.getMember(FieldID::fromIndex(xvalue.getIndex())).m_name); + } + + for (const auto &entry: embeddedObject.field_map()) { + const auto &value = entry.value(); + if (value.itemKind() == StorageClass::DELETED || value.itemKind() == StorageClass::UNDEFINED) { + continue; + } + std::uint32_t memberIndex = 0; + if (entry.key().itemKind() == StorageClass::PACKED_INT32) { + memberIndex = entry.key().packedIntPayload().value(); + } else if (entry.key().itemKind() == StorageClass::INT64) { + memberIndex = static_cast(entry.key().intPayload().value()); + } else { + continue; + } + result.insert(type.getMember(FieldID::fromIndex(memberIndex)).m_name); + } + return result; + } + + PyObject *tryEmbeddedObjectGetAttr(EmbeddedObject *self, PyObject *attr) + { + const char *attrName = PyUnicode_AsUTF8(attr); + if (!attrName) { + PyErr_SetString(PyExc_AttributeError, "Invalid attribute name"); + return nullptr; + } + + if (!(attrName[0] == '_' && attrName[1] == 'X' && attrName[2] == '_' && attrName[3] == '_')) { + auto fixture = getRootFixture(self->ext().rootObject()); + fixture->refreshIfUpdated(); + auto member = tryGetMember(self->modifyExt(), attrName); + if (member.get()) { + return member.steal(); + } + } + + return PyObject_GenericGetAttr(reinterpret_cast(self), attr); + } + + PyObject *PyAPI_EmbeddedObject_getattro(EmbeddedObject *self, PyObject *attr) + { + PY_API_FUNC + return runSafe(tryEmbeddedObjectGetAttr, self, attr); + } + + PyObject *tryEmbeddedMemoGetAttr(MemoImmutableObject *self, PyObject *attr) + { + const char *attrName = PyUnicode_AsUTF8(attr); + if (!attrName) { + PyErr_SetString(PyExc_AttributeError, "Invalid attribute name"); + return nullptr; + } + + if (!(attrName[0] == '_' && attrName[1] == 'X' && attrName[2] == '_' && attrName[3] == '_')) { + auto &embeddedRef = embeddedMemoRef(self); + auto fixture = getRootFixture(embeddedRef.rootObject()); + fixture->refreshIfUpdated(); + auto member = tryGetMember(embeddedRef, attrName); + if (member.get()) { + return member.steal(); + } + } + + return PyObject_GenericGetAttr(reinterpret_cast(self), attr); + } + + PyObject *PyAPI_EmbeddedMemo_getattro(MemoImmutableObject *self, PyObject *attr) + { + PY_API_FUNC + return runSafe(tryEmbeddedMemoGetAttr, self, attr); + } + + int PyAPI_EmbeddedMemo_setattro(MemoImmutableObject *, PyObject *, PyObject *) + { + PY_API_FUNC + PyErr_SetString(PyExc_AttributeError, "Cannot modify an embedded immutable memo object"); + return -1; + } + + PyObject *tryEmbeddedObjectStr(EmbeddedObject *self) + { + std::stringstream str; + str << ""; + return PyUnicode_FromString(str.str().c_str()); + } + + PyObject *PyAPI_EmbeddedObject_str(EmbeddedObject *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedObjectStr, self); + } + + void PyAPI_EmbeddedObject_del(EmbeddedObject *self) + { + PY_API_FUNC + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + self->destroy(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + void PyAPI_EmbeddedMemo_del(MemoImmutableObject *self) + { + PY_API_FUNC + if (Py_IsInitialized()) { + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + embeddedMemoRef(self).~EmbeddedObjectRef(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + } + + int EmbeddedObject_traverse(EmbeddedObject *self, visitproc visit, void *arg) + { + Py_VISIT(self->ext().rootObject()); + return 0; + } + + [[maybe_unused]] int EmbeddedMemo_traverse(MemoImmutableObject *self, visitproc visit, void *arg) + { + Py_VISIT(embeddedMemoRef(self).rootObject()); + return 0; + } + + [[maybe_unused]] int EmbeddedMemo_clear(MemoImmutableObject *) + { + return 0; + } + + PyObject *tryEmbeddedMemoStr(MemoImmutableObject *self) + { + std::stringstream str; + str << "<" << Py_TYPE(self)->tp_base->tp_name + << " embedded instance type=" << embeddedMemoRef(self).type().getName() << ">"; + return PyUnicode_FromString(str.str().c_str()); + } + + PyObject *PyAPI_EmbeddedMemo_str(MemoImmutableObject *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedMemoStr, self); + } + + PyObject *PyAPI_EmbeddedMemo_dir(MemoImmutableObject *self, PyObject *) + { + PY_API_FUNC + auto result = Py_OWN(PyObject_CallMethod( + reinterpret_cast(&PyBaseObject_Type), "__dir__", + "O", reinterpret_cast(self) + )); + if (!result) { + return nullptr; + } + + auto &type = embeddedMemoRef(self).type(); + for (const auto &name: getEmbeddedMemberNames(embeddedMemoRef(self).embeddedObject(), type)) { + auto pyName = Py_OWN(PyUnicode_FromString(name.c_str())); + if (!pyName || PySequence_Contains(*result, *pyName) == 1) { + continue; + } + if (PyList_Append(*result, *pyName) < 0) { + return nullptr; + } + } + return result.steal(); + } + + PyObject *PyAPI_EmbeddedMemo_get_dict(MemoImmutableObject *self, void *) + { + PY_API_FUNC + auto result = Py_OWN(PyDict_New()); + if (!result) { + return nullptr; + } + + auto &type = embeddedMemoRef(self).type(); + for (const auto &name: getEmbeddedMemberNames(embeddedMemoRef(self).embeddedObject(), type)) { + auto value = tryGetMember(embeddedMemoRef(self), name.c_str()); + if (!value.get()) { + continue; + } + auto pyName = Py_OWN(PyUnicode_FromString(name.c_str())); + if (!pyName || PyDict_SetItem(*result, *pyName, *value) < 0) { + return nullptr; + } + } + return result.steal(); + } + + Py_hash_t PyAPI_EmbeddedMemo_hash(MemoImmutableObject *) + { + PY_API_FUNC + PyErr_SetString(PyExc_TypeError, "Embedded immutable memo objects do not have durable identity"); + return -1; + } + + static PyMethodDef EmbeddedMemo_methods[] = { + {"__dir__", (PyCFunction)PyAPI_EmbeddedMemo_dir, METH_NOARGS, nullptr}, + {NULL} + }; + + static PyGetSetDef EmbeddedMemo_getsets[] = { + {"__dict__", (getter)PyAPI_EmbeddedMemo_get_dict, nullptr, nullptr, nullptr}, + {nullptr} + }; + + std::string consumePyErrorMessage(); + + PyTypeObject *createEmbeddedMemoType(PyTypeObject *memoType) + { + std::vector slots = { + {Py_tp_dealloc, reinterpret_cast(PyAPI_EmbeddedMemo_del)}, + {Py_tp_getattro, reinterpret_cast(PyAPI_EmbeddedMemo_getattro)}, + {Py_tp_setattro, reinterpret_cast(PyAPI_EmbeddedMemo_setattro)}, + {Py_tp_methods, reinterpret_cast(EmbeddedMemo_methods)}, + {Py_tp_getset, reinterpret_cast(EmbeddedMemo_getsets)}, + {Py_tp_hash, reinterpret_cast(PyAPI_EmbeddedMemo_hash)}, + {Py_tp_repr, reinterpret_cast(PyAPI_EmbeddedMemo_str)}, + {Py_tp_str, reinterpret_cast(PyAPI_EmbeddedMemo_str)}, + {0, 0} + }; + if (memoType->tp_flags & Py_TPFLAGS_HAVE_GC) { + slots.insert(slots.end() - 1, { + {Py_tp_traverse, reinterpret_cast(EmbeddedMemo_traverse)}, + {Py_tp_clear, reinterpret_cast(EmbeddedMemo_clear)} + }); + } + + std::stringstream typeName; + typeName << memoType->tp_name << ".__dbzero_embedded_view__"; + const char *safeName = PyToolkit::getTypeManager().getPooledString(typeName.str()); + std::uint32_t flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; + if (memoType->tp_flags & Py_TPFLAGS_HAVE_GC) { + flags |= Py_TPFLAGS_HAVE_GC; + } + flags &= ~Py_TPFLAGS_MANAGED_DICT; + + auto spec = PyType_Spec { + .name = safeName, + .basicsize = static_cast(memoType->tp_basicsize), + .itemsize = 0, + .flags = flags, + .slots = slots.data() + }; + auto bases = Py_OWN(PySafeTuple_Pack(Py_BORROW(memoType))); + auto shadowType = reinterpret_cast(PyType_FromSpecWithBases(&spec, *bases)); + if (!shadowType) { + return nullptr; + } + + shadowType->tp_weaklistoffset = memoType->tp_weaklistoffset; + shadowType->tp_dictoffset = memoType->tp_dictoffset; + if (shadowType->tp_basicsize != memoType->tp_basicsize) { + Py_DECREF(shadowType); + PyErr_SetString(PyExc_RuntimeError, "Embedded memo shadow type size mismatch"); + return nullptr; + } + if ((shadowType->tp_flags & Py_TPFLAGS_HAVE_GC) != (memoType->tp_flags & Py_TPFLAGS_HAVE_GC)) { + Py_DECREF(shadowType); + PyErr_SetString(PyExc_RuntimeError, "Embedded memo shadow type GC flag mismatch"); + return nullptr; + } + + return shadowType; + } + + PyTypeObject *getEmbeddedMemoType(PyTypeObject *memoType) + { + auto *embeddedType = PyToolkit::getTypeManager().getEmbeddedMemoType(memoType, createEmbeddedMemoType); + if (!embeddedType) { + THROWF(db0::InternalException) + << "Unable to create embedded memo shadow type: " << consumePyErrorMessage(); + } + return embeddedType; + } + + std::string consumePyErrorMessage() + { + if (!PyErr_Occurred()) { + return "unknown Python error"; + } + PyObject *ptype = nullptr; + PyObject *pvalue = nullptr; + PyObject *ptraceback = nullptr; + PyErr_Fetch(&ptype, &pvalue, &ptraceback); + PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); + auto str = Py_OWN(PyObject_Str(pvalue ? pvalue : Py_None)); + std::string result = str.get() ? PyUnicode_AsUTF8(*str) : "unable to format Python error"; + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + return result; + } + } + + PyTypeObject EmbeddedObjectType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "dbzero.EmbeddedObject", + .tp_basicsize = static_cast(EmbeddedObject::sizeOf()), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedObject_del), + .tp_vectorcall_offset = 0, + .tp_getattr = nullptr, + .tp_setattr = nullptr, + .tp_as_async = nullptr, + .tp_repr = reinterpret_cast(PyAPI_EmbeddedObject_str), + .tp_as_number = nullptr, + .tp_as_sequence = nullptr, + .tp_as_mapping = nullptr, + .tp_hash = nullptr, + .tp_call = nullptr, + .tp_str = reinterpret_cast(PyAPI_EmbeddedObject_str), + .tp_getattro = reinterpret_cast(PyAPI_EmbeddedObject_getattro), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = "dbzero embedded immutable object view", + .tp_traverse = reinterpret_cast(EmbeddedObject_traverse), + .tp_alloc = PyType_GenericAlloc, + .tp_free = PyObject_GC_Del, + }; + + ObjectSharedPtr makeEmbeddedObject( + PyObject *rootObject, const o_embedded_object &embeddedObject, std::shared_ptr type + ) + { + auto *pyObject = reinterpret_cast(EmbeddedObjectType.tp_alloc(&EmbeddedObjectType, 0)); + if (!pyObject) { + return {}; + } + pyObject->makeNew(rootObject, &embeddedObject, std::move(type)); + return Py_OWN(reinterpret_cast(pyObject)); + } + + ObjectSharedPtr makeEmbeddedMemoObject( + PyObject *rootObject, const o_embedded_object &embeddedObject, std::shared_ptr type, + PyTypeObject *memoType + ) + { + auto *embeddedType = getEmbeddedMemoType(memoType); + auto *pyObject = reinterpret_cast(embeddedType->tp_alloc(embeddedType, 0)); + if (!pyObject) { + return {}; + } + new ((void *)const_cast(&pyObject->ext())) + EmbeddedObjectRef(rootObject, &embeddedObject, std::move(type)); + return Py_OWN(reinterpret_cast(pyObject)); + } + + void transformMemoImmutableObjectToEmbedded( + MemoImmutableObject *object, PyObject *rootObject, const o_embedded_object &embeddedObject, + std::shared_ptr type + ) + { + auto *oldType = Py_TYPE(object); + auto *embeddedType = getEmbeddedMemoType(oldType); + if (PyObject_GC_IsTracked(object)) { + PyObject_GC_UnTrack(object); + } + object->destroy(); + new ((void *)const_cast(&object->ext())) + EmbeddedObjectRef(rootObject, &embeddedObject, std::move(type)); + Py_INCREF(embeddedType); + Py_SET_TYPE(object, embeddedType); + Py_DECREF(oldType); + if (Py_TYPE(object)->tp_flags & Py_TPFLAGS_HAVE_GC) { + PyObject_GC_Track(object); + } + } + + bool PyEmbeddedMemoType_Check(PyTypeObject *type) + { + return PyToolkit::getTypeManager().isEmbeddedMemoType(type); + } + + bool PyEmbeddedMemo_Check(PyObject *object) + { + return object && PyEmbeddedMemoType_Check(Py_TYPE(object)); + } +} diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp new file mode 100644 index 00000000..6370246d --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include + +#include + +#include +#include +#include + +namespace db0::object_model +{ + class Class; + class o_embedded_object; +} + +namespace db0::python +{ + class EmbeddedObjectRef + { + public: + EmbeddedObjectRef( + PyObject *rootObject, const db0::object_model::o_embedded_object *embeddedObject, + std::shared_ptr type + ); + ~EmbeddedObjectRef(); + + EmbeddedObjectRef(const EmbeddedObjectRef &) = delete; + EmbeddedObjectRef &operator=(const EmbeddedObjectRef &) = delete; + + PyObject *rootObject() const; + const db0::object_model::o_embedded_object &embeddedObject() const; + db0::object_model::Class &type() const; + + private: + PyObject *m_root_object = nullptr; + const db0::object_model::o_embedded_object *m_embedded_object = nullptr; + std::shared_ptr m_type; + }; + + using EmbeddedObject = PyWrapper; + + extern PyTypeObject EmbeddedObjectType; + + PyTypes::ObjectSharedPtr makeEmbeddedObject( + PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, + std::shared_ptr type + ); + + PyTypes::ObjectSharedPtr makeEmbeddedMemoObject( + PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, + std::shared_ptr type, PyTypeObject *memoType + ); + + void transformMemoImmutableObjectToEmbedded( + MemoImmutableObject *object, PyObject *rootObject, const db0::object_model::o_embedded_object &embeddedObject, + std::shared_ptr type + ); + + bool PyEmbeddedMemo_Check(PyObject *object); + bool PyEmbeddedMemoType_Check(PyTypeObject *type); +} diff --git a/src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp b/src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp new file mode 100644 index 00000000..9a10bb95 --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace db0::python +{ + using ObjectSharedPtr = PyTypes::ObjectSharedPtr; + using namespace db0::object_model; + + EmbeddedTupleRef::EmbeddedTupleRef(PyObject *rootObject, const o_py_tuple *tuple) + : m_root_object(rootObject) + , m_tuple(tuple) + { + Py_XINCREF(m_root_object); + } + + EmbeddedTupleRef::~EmbeddedTupleRef() + { + Py_XDECREF(m_root_object); + } + + PyObject *EmbeddedTupleRef::rootObject() const + { + return m_root_object; + } + + const o_py_tuple &EmbeddedTupleRef::tuple() const + { + return *m_tuple; + } + + namespace + { + db0::swine_ptr getRootFixture(PyObject *rootObject) + { + return reinterpret_cast(rootObject)->ext().getFixture(); + } + + PyObject *tryEmbeddedTupleGetItem(EmbeddedTuple *self, Py_ssize_t index) + { + auto fixture = getRootFixture(self->ext().rootObject()); + fixture->refreshIfUpdated(); + + auto size = static_cast(self->ext().tuple().size()); + if (index < 0) { + index += size; + } + if (index < 0 || index >= size) { + PyErr_SetString(PyExc_IndexError, "tuple index out of range"); + return nullptr; + } + return PyToolkit::unloadEmbeddedInstance( + fixture, self->ext().rootObject(), self->ext().tuple().item(static_cast(index)) + ).steal(); + } + + PyObject *PyAPI_EmbeddedTuple_GetItem(EmbeddedTuple *self, Py_ssize_t index) + { + PY_API_FUNC + return runSafe(tryEmbeddedTupleGetItem, self, index); + } + + Py_ssize_t tryEmbeddedTupleLen(EmbeddedTuple *self) + { + return static_cast(self->ext().tuple().size()); + } + + Py_ssize_t PyAPI_EmbeddedTuple_len(EmbeddedTuple *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedTupleLen, self); + } + + PyObject *tryEmbeddedTupleStr(EmbeddedTuple *self) + { + std::stringstream str; + str << "("; + auto size = self->ext().tuple().size(); + for (std::size_t i = 0; i < size; ++i) { + if (i != 0) { + str << ", "; + } + auto item = Py_OWN(tryEmbeddedTupleGetItem(self, static_cast(i))); + if (!item) { + return nullptr; + } + auto repr = Py_OWN(PyObject_Repr(*item)); + if (!repr) { + return nullptr; + } + str << PyUnicode_AsUTF8(*repr); + } + if (size == 1) { + str << ","; + } + str << ")"; + return PyUnicode_FromString(str.str().c_str()); + } + + PyObject *PyAPI_EmbeddedTuple_str(EmbeddedTuple *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedTupleStr, self); + } + + PyObject *tryEmbeddedTupleCount(EmbeddedTuple *self, PyObject *const *args, Py_ssize_t) + { + Py_ssize_t count = 0; + for (std::size_t i = 0; i < self->ext().tuple().size(); ++i) { + auto item = Py_OWN(tryEmbeddedTupleGetItem(self, static_cast(i))); + if (!item) { + return nullptr; + } + int equal = PyObject_RichCompareBool(*item, args[0], Py_EQ); + if (equal < 0) { + return nullptr; + } + count += equal; + } + return PyLong_FromSsize_t(count); + } + + PyObject *PyAPI_EmbeddedTuple_count(EmbeddedTuple *self, PyObject *const *args, Py_ssize_t nargs) + { + PY_API_FUNC + if (nargs != 1) { + PyErr_SetString(PyExc_TypeError, "count() takes one argument."); + return nullptr; + } + return runSafe(tryEmbeddedTupleCount, self, args, nargs); + } + + PyObject *tryEmbeddedTupleIndex(EmbeddedTuple *self, PyObject *const *args, Py_ssize_t) + { + for (std::size_t i = 0; i < self->ext().tuple().size(); ++i) { + auto item = Py_OWN(tryEmbeddedTupleGetItem(self, static_cast(i))); + if (!item) { + return nullptr; + } + int equal = PyObject_RichCompareBool(*item, args[0], Py_EQ); + if (equal < 0) { + return nullptr; + } + if (equal) { + return PyLong_FromSize_t(i); + } + } + PyErr_SetString(PyExc_ValueError, "tuple.index(x): x not in tuple"); + return nullptr; + } + + PyObject *PyAPI_EmbeddedTuple_index(EmbeddedTuple *self, PyObject *const *args, Py_ssize_t nargs) + { + PY_API_FUNC + if (nargs != 1) { + PyErr_SetString(PyExc_TypeError, "index() takes one argument."); + return nullptr; + } + return runSafe(tryEmbeddedTupleIndex, self, args, nargs); + } + + void PyAPI_EmbeddedTuple_del(EmbeddedTuple *self) + { + PY_API_FUNC + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + self->destroy(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + int EmbeddedTuple_traverse(EmbeddedTuple *self, visitproc visit, void *arg) + { + Py_VISIT(self->ext().rootObject()); + return 0; + } + + static PySequenceMethods EmbeddedTuple_sq = { + .sq_length = reinterpret_cast(PyAPI_EmbeddedTuple_len), + .sq_item = reinterpret_cast(PyAPI_EmbeddedTuple_GetItem), + }; + + static PyMethodDef EmbeddedTuple_methods[] = { + {"count", reinterpret_cast(PyAPI_EmbeddedTuple_count), METH_FASTCALL, nullptr}, + {"index", reinterpret_cast(PyAPI_EmbeddedTuple_index), METH_FASTCALL, nullptr}, + {NULL} + }; + } + + PyTypeObject EmbeddedTupleType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "dbzero.EmbeddedTuple", + .tp_basicsize = static_cast(EmbeddedTuple::sizeOf()), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedTuple_del), + .tp_repr = reinterpret_cast(PyAPI_EmbeddedTuple_str), + .tp_as_sequence = &EmbeddedTuple_sq, + .tp_str = reinterpret_cast(PyAPI_EmbeddedTuple_str), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = "dbzero embedded immutable tuple view", + .tp_traverse = reinterpret_cast(EmbeddedTuple_traverse), + .tp_methods = EmbeddedTuple_methods, + .tp_alloc = PyType_GenericAlloc, + .tp_free = PyObject_GC_Del, + }; + + ObjectSharedPtr makeEmbeddedTuple(PyObject *rootObject, const o_py_tuple &tuple) + { + auto *pyObject = reinterpret_cast(EmbeddedTupleType.tp_alloc(&EmbeddedTupleType, 0)); + if (!pyObject) { + return {}; + } + pyObject->makeNew(rootObject, &tuple); + return Py_OWN(reinterpret_cast(pyObject)); + } +} diff --git a/src/dbzero/bindings/python/embedded/EmbeddedTuple.hpp b/src/dbzero/bindings/python/embedded/EmbeddedTuple.hpp new file mode 100644 index 00000000..fbd51de1 --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedTuple.hpp @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include + +#include +#include + +namespace db0::object_model +{ + class o_py_tuple; +} + +namespace db0::python +{ + class EmbeddedTupleRef + { + public: + EmbeddedTupleRef(PyObject *rootObject, const db0::object_model::o_py_tuple *tuple); + ~EmbeddedTupleRef(); + + EmbeddedTupleRef(const EmbeddedTupleRef &) = delete; + EmbeddedTupleRef &operator=(const EmbeddedTupleRef &) = delete; + + PyObject *rootObject() const; + const db0::object_model::o_py_tuple &tuple() const; + + private: + PyObject *m_root_object = nullptr; + const db0::object_model::o_py_tuple *m_tuple = nullptr; + }; + + using EmbeddedTuple = PyWrapper; + + extern PyTypeObject EmbeddedTupleType; + + PyTypes::ObjectSharedPtr makeEmbeddedTuple( + PyObject *rootObject, const db0::object_model::o_py_tuple &tuple + ); +} From 2dc55aac8250d637b6b14ccfaf8a519c68441ff1 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 21 May 2026 13:08:33 +0200 Subject: [PATCH 3/7] embedding objects + test parallelism --- AGENTS.md | 1 + CONTRIBUTING.md | 3 + dbzero/dbzero/dbzero.py | 2 +- python_tests/conftest.py | 14 +- python_tests/test_copy_prefix.py | 36 +-- python_tests/test_issues_14.py | 6 +- python_tests/test_memo_immutable.py | 109 +++++++ python_tests/test_multiprocess.py | 12 +- requirements.3.8.txt | 1 + requirements.3.9.txt | 1 + requirements.txt | 1 + scripts/run_tests.sh | 29 +- src/dbzero/bindings/python/Memo.cpp | 11 + src/dbzero/bindings/python/PyHash.cpp | 14 +- src/dbzero/bindings/python/PyHash.hpp | 6 +- src/dbzero/bindings/python/PyToolkit.cpp | 94 +----- src/dbzero/bindings/python/PyToolkit.hpp | 8 - src/dbzero/bindings/python/dbzero.cpp | 3 + .../python/embedded/EmbeddedObject.cpp | 154 +++++++++- .../python/embedded/EmbeddedObject.hpp | 23 ++ .../bindings/python/embedded/EmbeddedSet.cpp | 268 ++++++++++++++++++ .../bindings/python/embedded/EmbeddedSet.hpp | 63 ++++ .../object/ObjectImmutableImpl.cpp | 55 +++- src/dbzero/object_model/set/o_py_set.cpp | 38 +++ 24 files changed, 815 insertions(+), 137 deletions(-) create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedSet.cpp create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedSet.hpp diff --git a/AGENTS.md b/AGENTS.md index 0d8fbae0..35f78ec1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,6 +23,7 @@ All tests must pass before a change is considered complete. ### Running tests - Python tests: `./scripts/run_tests.sh` +- Final Python test checks: `./scripts/run_tests.sh -j 6` - C++ tests after a `-t` build: `./build/release/tests.x` - During development, do not run stress tests by default; they are intentionally slow. Run focused tests specific to the feature or refactor being worked on before finalization. - If any C++ source under the native/core part of the project was modified, also run the C++ test suite (do not rely on the Python tests alone to cover native changes). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 01c12375..079d439d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -94,6 +94,9 @@ This project adheres to a code of conduct that all contributors are expected to # Run Python tests (excludes stress tests) ./scripts/run_tests.sh + # Run Python tests in parallel + ./scripts/run_tests.sh -j 6 + # Run specific Python test ./scripts/run_tests.sh -k=test_name diff --git a/dbzero/dbzero/dbzero.py b/dbzero/dbzero/dbzero.py index 21899e3d..c9e4f4dc 100644 --- a/dbzero/dbzero/dbzero.py +++ b/dbzero/dbzero/dbzero.py @@ -10,7 +10,7 @@ def load_dynamic(name, path): def __bootstrap__(): global __bootstrap__, __loader__, __file__ - paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"] + paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"] __file__ = None for path in paths: if os.path.isdir(path): diff --git a/python_tests/conftest.py b/python_tests/conftest.py index ecd0f5c3..85203097 100644 --- a/python_tests/conftest.py +++ b/python_tests/conftest.py @@ -13,7 +13,17 @@ TEST_FILES_DIR_ROOT = os.path.join(os.getcwd(), "python_tests", "files") -DB0_DIR = os.path.join(os.getcwd(), "db0-test-data") +PYTEST_WORKER_ID = os.environ.get("PYTEST_XDIST_WORKER") +WORKER_SUFFIX = f"-{PYTEST_WORKER_ID}" if PYTEST_WORKER_ID else "" +DB0_DIR = os.path.join(os.getcwd(), f"db0-test-data{WORKER_SUFFIX}") + + +def worker_path(path): + if not WORKER_SUFFIX: + return path + directory, filename = os.path.split(path) + name, extension = os.path.splitext(filename) + return os.path.join(directory, f"{name}{WORKER_SUFFIX}{extension}") def __extract_param(request, key, default): @@ -211,4 +221,4 @@ def db0_large_lang_cache_no_autocommit(): yield db0 db0.close() if os.path.exists(DB0_DIR): - shutil.rmtree(DB0_DIR) \ No newline at end of file + shutil.rmtree(DB0_DIR) diff --git a/python_tests/test_copy_prefix.py b/python_tests/test_copy_prefix.py index 73557da7..e8ba25b1 100644 --- a/python_tests/test_copy_prefix.py +++ b/python_tests/test_copy_prefix.py @@ -6,12 +6,12 @@ import os import time from .memo_test_types import MemoTestClass, MemoTestSingleton -from .conftest import DB0_DIR +from .conftest import DB0_DIR, worker_path import multiprocessing def test_copy_current_prefix(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") # remove file if it exists if os.path.exists(file_name): os.remove(file_name) @@ -30,7 +30,7 @@ def test_copy_current_prefix(db0_fixture): def test_recover_prefix_from_copy(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") # remove file if it exists if os.path.exists(file_name): os.remove(file_name) @@ -58,7 +58,7 @@ def test_recover_prefix_from_copy(db0_fixture): def test_copy_prefix_custom_step_size(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") if os.path.exists(file_name): os.remove(file_name) @@ -110,7 +110,7 @@ def writer_process(prefix, obj_count = 50, commit_count = 50, long_run = False): def test_copy_prefix_being_actively_modified(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") if os.path.exists(file_name): os.remove(file_name) @@ -153,7 +153,7 @@ def test_copy_prefix_being_actively_modified(db0_fixture): def test_copy_prefix_fails_if_no_active_prefix(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") # remove file if it exists if os.path.exists(file_name): os.remove(file_name) @@ -172,7 +172,7 @@ def test_copy_prefix_fails_if_no_active_prefix(db0_fixture): def test_copy_prefix_without_opening_it(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") # remove file if it exists if os.path.exists(file_name): os.remove(file_name) @@ -207,7 +207,7 @@ def validate_current_prefix(expected_len = None, expected_min_len = None): return len(root.value) def validate_copy(copy_id, expected_len = None, expected_min_len = None): - file_name = f"./test-copy-{copy_id}.db0" + file_name = worker_path(f"./test-copy-{copy_id}.db0") os.remove(px_path) # restore the copy os.rename(file_name, px_path) @@ -250,7 +250,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): while True: if not p.is_alive(): break - file_name = f"./test-copy-{copy_id}.db0" + file_name = worker_path(f"./test-copy-{copy_id}.db0") if os.path.exists(file_name): os.remove(file_name) # copy prefix without opening it, use default step size @@ -266,7 +266,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): total_len += obj_count * commit_count # make final stale copy (i.e. without active modifications) - final_copy = f"./test-copy-final.db0" + final_copy = worker_path("./test-copy-final.db0") if os.path.exists(final_copy): os.remove(final_copy) db0.copy_prefix(final_copy, prefix=px_name) @@ -284,7 +284,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): def test_modify_copied_prefix(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") # remove file if it exists if os.path.exists(file_name): os.remove(file_name) @@ -326,7 +326,7 @@ def modify_prefix(): @pytest.mark.parametrize("db0_fixture", [{"autocommit": False}], indirect=True) def test_copy_prefix_of_recovered_copy(db0_fixture): - file_name = "./test-copy.db0" + file_name = worker_path("./test-copy.db0") # remove file if it exists if os.path.exists(file_name): os.remove(file_name) @@ -398,7 +398,7 @@ def validate_current_prefix(expected_len = None, expected_min_len = None): return len(root.value) def validate_copy(copy_id, expected_len = None, expected_min_len = None): - file_name = f"./test-copy-{copy_id}.db0" + file_name = worker_path(f"./test-copy-{copy_id}.db0") os.remove(px_path) # restore the copy os.rename(file_name, px_path) @@ -437,7 +437,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): while True: if not p.is_alive(): break - file_name = f"./test-copy-{copy_id}.db0" + file_name = worker_path(f"./test-copy-{copy_id}.db0") if os.path.exists(file_name): os.remove(file_name) db0.copy_prefix(file_name, prefix=px_name) @@ -468,7 +468,7 @@ def validate_current_prefix(expected_len = None, expected_min_len = None): return len(root.value) def validate_copy(copy_id, expected_len = None, expected_min_len = None): - file_name = f"./test-copy-{copy_id}.db0" + file_name = worker_path(f"./test-copy-{copy_id}.db0") os.remove(px_path) # restore the copy os.rename(file_name, px_path) @@ -513,7 +513,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): while True: if not p.is_alive(): break - file_name = f"./test-copy-{copy_id}.db0" + file_name = worker_path(f"./test-copy-{copy_id}.db0") if os.path.exists(file_name): os.remove(file_name) # copy prefix without opening it, use default step size @@ -529,7 +529,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): total_len += obj_count * commit_count # make final stale copy (i.e. without active modifications) - final_copy = f"./test-copy-final.db0" + final_copy = worker_path("./test-copy-final.db0") if os.path.exists(final_copy): os.remove(final_copy) db0.copy_prefix(final_copy, prefix=px_name) @@ -543,4 +543,4 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): # this is the restored version total_len = last_len - \ No newline at end of file + diff --git a/python_tests/test_issues_14.py b/python_tests/test_issues_14.py index ad5838c3..52605ae5 100644 --- a/python_tests/test_issues_14.py +++ b/python_tests/test_issues_14.py @@ -3,7 +3,7 @@ import dbzero as db0 import pytest -from .conftest import DB0_DIR +from .conftest import DB0_DIR, worker_path from .memo_test_types import MemoTestSingleton, MemoTestClass import multiprocessing import os @@ -43,7 +43,7 @@ def validate_current_prefix(expected_len = None, expected_min_len = None): return len(root.value) def validate_copy(copy_id, expected_len = None, expected_min_len = None): - file_name = f"./test-copy-{copy_id}.db0" + file_name = worker_path(f"./test-copy-{copy_id}.db0") os.remove(px_path) # restore the copy os.rename(file_name, px_path) @@ -69,7 +69,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None): db0.open(px_name, "r") # make final stale copy (i.e. without active modifications) - final_copy = f"./test-copy-final.db0" + final_copy = worker_path("./test-copy-final.db0") if os.path.exists(final_copy): os.remove(final_copy) db0.copy_prefix(final_copy, prefix=px_name) diff --git a/python_tests/test_memo_immutable.py b/python_tests/test_memo_immutable.py index 7f853cd2..48d0f6c7 100644 --- a/python_tests/test_memo_immutable.py +++ b/python_tests/test_memo_immutable.py @@ -56,6 +56,18 @@ def __init__(self, payload): self.payload = payload +@db0.memo(immutable=True, no_default_tags=True) +class MemoImmutableSetHolder: + def __init__(self, payload): + self.payload = payload + + +@db0.memo(no_default_tags=True) +class MemoSetReferenceHolder: + def __init__(self, payload): + self.payload = payload + + @db0.memo(immutable=True, no_default_tags=True) class MemoImmutableReadInConstructor: def __init__(self, data, payload): @@ -145,6 +157,8 @@ def test_read_embedded_immutable_nested_object_after_reopen(db0_fixture): def test_prebound_immutable_nested_object_embeds_into_owner(db0_fixture): inner = MemoImmutableNestedPayload(name="prebound child", count=8) + assert isinstance(hash(inner), int) + obj = MemoImmutablePreboundNestedHolder(inner, "root") db0.tags(obj).add("keep-prebound-embedded") @@ -184,6 +198,8 @@ def test_embedded_list_field_is_exposed_as_embedded_tuple(db0_fixture): def test_embedded_tuple_with_prebound_immutable_object_element(db0_fixture): inner = MemoImmutableNestedPayload(name="tuple child", count=11) + assert isinstance(hash(inner), int) + obj = MemoImmutableTupleHolder(("prefix", inner)) db0.tags(obj).add("keep-embedded-tuple-object") @@ -196,4 +212,97 @@ def test_embedded_tuple_with_prebound_immutable_object_element(db0_fixture): assert db0.is_memo(inner) with pytest.raises(Exception): db0.uuid(inner) + + +def test_read_embedded_set_field_after_reopen(db0_fixture): + payload = {"alpha", "beta", 7, b"bytes", None} + obj = MemoImmutableSetHolder(payload) + db0.tags(obj).add("keep-embedded-set") + obj_id = db0.uuid(obj) + + assert type(obj.payload).__name__ == "EmbeddedSet" + assert len(obj.payload) == len(payload) + assert "alpha" in obj.payload + assert b"bytes" in obj.payload + assert set(obj.payload) == payload + + del obj + gc.collect() + db0.commit() + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix", "rw") + + reopened = db0.fetch(obj_id) + assert type(reopened.payload).__name__ == "EmbeddedSet" + assert set(reopened.payload) == payload + + +def test_embedded_set_with_prebound_immutable_object_element(db0_fixture): + inner = MemoImmutableNestedPayload(name="set child", count=13) + obj = MemoImmutableSetHolder({inner, "marker"}) + db0.tags(obj).add("keep-embedded-set-object") + + values = list(obj.payload) + embedded_inner = next(item for item in values if isinstance(item, MemoImmutableNestedPayload)) + assert "marker" in obj.payload + assert embedded_inner.name == "set child" + assert embedded_inner.count == 13 + assert inner.name == "set child" + assert inner.count == 13 + assert isinstance(inner, MemoImmutableNestedPayload) + assert db0.is_memo(inner) + with pytest.raises(Exception): + db0.uuid(inner) + + +def test_python_set_lookup_survives_prebound_immutable_object_embedding(db0_fixture): + inner = MemoImmutableNestedPayload(name="python set embedded child", count=31) + values = {inner, "marker"} + obj = MemoImmutableSetHolder(values) + db0.tags(obj).add("keep-python-set-lookup-after-embedding") + + assert inner in values + assert "marker" in values + assert inner.name == "python set embedded child" + with pytest.raises(Exception): + db0.uuid(inner) + + +def test_python_set_accepts_transient_immutable_object(db0_fixture): + inner = MemoImmutableNestedPayload(name="python set child", count=17) + values = {inner, "marker"} + + assert inner in values + assert "marker" in values + + +def test_db0_set_rejects_transient_immutable_object(db0_fixture): + inner = MemoImmutableNestedPayload(name="db0 set child", count=19) + with pytest.raises(Exception): + db0.set([inner]) + + +def test_db0_set_uses_durable_hash_for_materialized_immutable_after_reopen(db0_fixture): + obj = MemoImmutableClass1(data="durable set immutable", value=29) + db0.tags(obj).add("keep-durable-set-immutable") + obj_id = db0.uuid(obj) + + holder = MemoSetReferenceHolder(db0.set([obj])) + db0.tags(holder).add("keep-durable-set-holder") + holder_id = db0.uuid(holder) + + assert obj in holder.payload + + del obj + del holder + gc.collect() + db0.commit() + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix", "rw") + + reopened_obj = db0.fetch(obj_id) + reopened_holder = db0.fetch(holder_id) + assert reopened_obj in reopened_holder.payload diff --git a/python_tests/test_multiprocess.py b/python_tests/test_multiprocess.py index 2f548dc5..6ead6fd5 100644 --- a/python_tests/test_multiprocess.py +++ b/python_tests/test_multiprocess.py @@ -7,7 +7,9 @@ import multiprocessing import dbzero as db0 from python_tests.memo_test_types import MemoTestClass, MemoTestSingleton -from .conftest import DB0_DIR +from .conftest import DB0_DIR, WORKER_SUFFIX + +SUBPROCESS_DB0_DIR = f"db0-test-data-subprocess{WORKER_SUFFIX}/" def test_hash_py_string(db0_fixture): assert db0.hash("abc") == db0.hash("abc") @@ -62,7 +64,7 @@ def get_test_without_remove(script, setup_script=""): import dbzero as db0 import shutil import gc -DB0_DIR = os.path.join(os.getcwd(), "db0-test-data-subprocess/") +DB0_DIR = os.path.join(os.getcwd(), {SUBPROCESS_DB0_DIR!r}) if not os.path.exists(DB0_DIR): # create empty directory os.mkdir(DB0_DIR) @@ -81,7 +83,7 @@ def get_cleanup_script(): import dbzero as db0 import shutil import gc -DB0_DIR = os.path.join(os.getcwd(), "db0-test-data-subprocess/") +DB0_DIR = os.path.join(os.getcwd(), {SUBPROCESS_DB0_DIR!r}) if os.path.exists(DB0_DIR): shutil.rmtree(DB0_DIR) """ @@ -93,7 +95,7 @@ def get_test_for_subprocess(value_to_hash, setup_script=""): import shutil import gc -DB0_DIR = os.path.join(os.getcwd(), "db0-test-data-subprocess/") +DB0_DIR = os.path.join(os.getcwd(), {SUBPROCESS_DB0_DIR!r}) if os.path.exists(DB0_DIR): shutil.rmtree(DB0_DIR) # create empty directory @@ -274,4 +276,4 @@ def test_hash_datetime_with_tz_subprocess(db0_fixture): t1 = datetime(2021, 12, 12, 5, 5, 5, tzinfo=timezone.utc) sr1 = run_hash_in_subprocess(prefix_name, t1) sr2 = run_hash_in_subprocess(prefix_name, t1) - assert sr1 == sr2 \ No newline at end of file + assert sr1 == sr2 diff --git a/requirements.3.8.txt b/requirements.3.8.txt index df8799ee..785b95a1 100644 --- a/requirements.3.8.txt +++ b/requirements.3.8.txt @@ -1,5 +1,6 @@ pytest==8.3.5 pytest-asyncio==0.24.0 +pytest-xdist==3.6.1 build==0.10.0 meson==1.9.1 meson-python==0.18.0 diff --git a/requirements.3.9.txt b/requirements.3.9.txt index c9d02ac1..68334b3f 100644 --- a/requirements.3.9.txt +++ b/requirements.3.9.txt @@ -1,5 +1,6 @@ pytest==8.4.2 pytest-asyncio==1.2.0 +pytest-xdist==3.8.0 build==0.10.0 meson==1.9.1 meson-python==0.18.0 diff --git a/requirements.txt b/requirements.txt index bea262ea..089cd8d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ pytest==9.0.1 pytest-asyncio==1.3.0 +pytest-xdist==3.8.0 build==0.10.0 meson==1.9.1 meson-python==0.18.0 diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 894c16b6..18309552 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -1,3 +1,30 @@ #!/bin/bash +set -e + export PYTHONIOENCODING=utf8 -python3 -m pytest -m 'not integration_test' -m 'not stress_test' -c pytest.ini --capture=no "$@" -vv + +pytest_args=() +parallel_args=() + +while [[ $# -gt 0 ]]; do + case "$1" in + -j|--jobs) + if [[ $# -lt 2 ]]; then + echo "error: $1 requires a worker count" >&2 + exit 2 + fi + parallel_args=(-n "$2") + shift 2 + ;; + --jobs=*) + parallel_args=(-n "${1#--jobs=}") + shift + ;; + *) + pytest_args+=("$1") + shift + ;; + esac +done + +python3 -m pytest -m 'not integration_test' -m 'not stress_test' -c pytest.ini --capture=no "${parallel_args[@]}" "${pytest_args[@]}" -vv diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index a1356b69..6becea76 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -14,6 +14,7 @@ #include "Migration.hpp" #include "PyHash.hpp" #include "DataMasking.hpp" +#include #include #include #include @@ -89,6 +90,16 @@ namespace db0::python Py_hash_t PyAPI_MemoHash(MemoObject *self) { PY_API_FUNC + // Python's tp_hash is only for in-process Python hash tables. Immutable + // wrappers can materialize after being inserted into a Python set, so + // keep their hash tied to the wrapper address for that wrapper lifetime. + // db0 persisted collections must use getPyHash() for durable hashes. + if (PyMemo_Check(reinterpret_cast(self))) { + auto *immutable = reinterpret_cast(self); + auto pyHash = static_cast(reinterpret_cast(immutable)); + return pyHash == -1 ? -2 : pyHash; + } + auto fixture = self->ext().getFixture(); return runSafe(getPyHashImpl, fixture, self); } diff --git a/src/dbzero/bindings/python/PyHash.cpp b/src/dbzero/bindings/python/PyHash.cpp index 7d9a42ca..27e155c5 100644 --- a/src/dbzero/bindings/python/PyHash.cpp +++ b/src/dbzero/bindings/python/PyHash.cpp @@ -67,7 +67,16 @@ namespace db0::python if (!obj.hasInstance()) { THROWF(db0::InputException) << "Memo object is not initialized" << THROWF_END; } - return obj.getAddress().getValue(); + return obj.getUniqueAddress().getValue(); + } + + template <> std::int64_t getPyHashImpl(db0::swine_ptr &, PyObject *key) + { + auto &obj = reinterpret_cast(key)->ext(); + if (!obj.hasInstance()) { + THROWF(db0::InputException) << "Memo immutable object is not initialized" << THROWF_END; + } + return obj.getUniqueAddress().getValue(); } // MEMO_TYPE specialization @@ -112,6 +121,7 @@ namespace db0::python functions[static_cast(TypeId::DB0_ENUM_VALUE)] = getPyHashImpl; functions[static_cast(TypeId::DB0_ENUM_VALUE_REPR)] = getPyHashImpl; functions[static_cast(TypeId::MEMO_OBJECT)] = getPyHashImpl; + functions[static_cast(TypeId::MEMO_IMMUTABLE_OBJECT)] = getPyHashImpl; functions[static_cast(TypeId::MEMO_TYPE)] = getPyHashImpl; functions[static_cast(TypeId::DATETIME)] = getPyHashImpl_for_simple_obj; functions[static_cast(TypeId::DATETIME_TZ)] = getPyHashImpl_for_simple_obj; @@ -160,4 +170,4 @@ namespace db0::python return std::make_pair(getPyHash(fixture, *lang_enum), lang_enum); } -} \ No newline at end of file +} diff --git a/src/dbzero/bindings/python/PyHash.hpp b/src/dbzero/bindings/python/PyHash.hpp index 8ec85086..44b82aee 100644 --- a/src/dbzero/bindings/python/PyHash.hpp +++ b/src/dbzero/bindings/python/PyHash.hpp @@ -20,7 +20,9 @@ namespace db0::python PyObject* getPyHashAsPyObject(db0::swine_ptr &, PyObject *); - // calculate hash or raise an exception (unhashable type) + // Calculate a durable hash for db0 persisted collections/indexes, or raise + // if the object cannot provide one. This intentionally differs from + // Python's tp_hash, which may use runtime-only wrapper identity. std::int64_t getPyHash(db0::swine_ptr &, PyObject *); template std::int64_t getPyHashImpl(db0::swine_ptr &, PyObject *); @@ -31,4 +33,4 @@ namespace db0::python std::optional > getPyHashIfExists( db0::swine_ptr &fixture, PyObject *obj_ptr); -} \ No newline at end of file +} diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index 54ea3bb5..b255e729 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -3,6 +3,7 @@ #include "PyToolkit.hpp" #include +#include #include #include "Memo.hpp" #include "MemoExpiredRef.hpp" @@ -37,6 +38,7 @@ #include #include #include +#include namespace db0::python @@ -70,66 +72,6 @@ namespace db0::python return reinterpret_cast(pyObject)->ext().hasRefs(); } - Py_ssize_t embeddedSequenceSize(PyObject *sequence) - { - if (PyTuple_Check(sequence)) { - return PyTuple_GET_SIZE(sequence); - } - if (PyList_Check(sequence)) { - return PyList_GET_SIZE(sequence); - } - return -1; - } - - PyObject *embeddedSequenceItem(PyObject *sequence, Py_ssize_t index) - { - if (PyTuple_Check(sequence)) { - return PyTuple_GET_ITEM(sequence, index); - } - return PyList_GET_ITEM(sequence, index); - } - - void transformEmbeddedTupleObjects( - db0::swine_ptr &fixture, db0::object_model::ClassFactory &classFactory, - PyObject *rootObject, PyObject *sourceSequence, const db0::object_model::o_py_tuple &embeddedTuple - ) - { - // During immutable materialization, tuple/list fields are copied into the root object's embedded - // storage. Any non-materialized immutable memo object originally present in that Python sequence - // must then be morphed in place into an embedded memo view. The Python object keeps its identity, - // but its native payload now points at the embedded object stored under rootObject. Walk the source - // Python sequence in lockstep with the persisted embedded tuple so nested tuple/list elements can - // be fixed up recursively. - auto sourceSize = embeddedSequenceSize(sourceSequence); - assert(sourceSize >= 0); - assert(static_cast(sourceSize) == embeddedTuple.size()); - - for (Py_ssize_t index = 0; index < sourceSize; ++index) { - auto *sourceItem = embeddedSequenceItem(sourceSequence, index); - const auto &embeddedItem = embeddedTuple.item(static_cast(index)); - - if (PyEmbeddedMemo_Check(sourceItem)) { - continue; - } - - if (PyMemo_Check(sourceItem)) { - assert(embeddedItem.itemKind() == db0::object_model::StorageClass::EMBEDDED_OBJECT); - const auto &embeddedObject = db0::object_model::o_embedded_object::__const_ref( - embeddedItem.embeddedPayload().begin() - ); - PyToolkit::transformEmbeddedObject(fixture, rootObject, sourceItem, embeddedObject); - continue; - } - - if (PyTuple_Check(sourceItem) || PyList_Check(sourceItem)) { - assert(embeddedItem.itemKind() == db0::object_model::StorageClass::EMBEDDED_TUPLE); - const auto &nestedTuple = db0::object_model::o_py_tuple::__const_ref( - embeddedItem.embeddedPayload().begin() - ); - transformEmbeddedTupleObjects(fixture, classFactory, rootObject, sourceItem, nestedTuple); - } - } - } } PyToolkit::ObjectSharedPtr PyToolkit::unloadEmbeddedInstance( @@ -188,6 +130,13 @@ namespace db0::python const auto &tuple = db0::object_model::o_py_tuple::__const_ref(item.embeddedPayload().begin()); return makeEmbeddedTuple(rootObject, tuple); } + case StorageClass::EMBEDDED_SET: { + if (!rootObject) { + THROWF(db0::InputException) << "Embedded set retrieval requires a root memo object"; + } + const auto &set = db0::object_model::o_py_set::__const_ref(item.embeddedPayload().begin()); + return makeEmbeddedSet(rootObject, set); + } case StorageClass::EMBEDDED_OBJECT: { if (!rootObject) { THROWF(db0::InputException) << "Embedded object retrieval requires a root memo object"; @@ -210,31 +159,6 @@ namespace db0::python return {}; } - void PyToolkit::transformEmbeddedObject( - db0::swine_ptr &fixture, ObjectPtr rootObject, ObjectPtr sourceObject, - const db0::object_model::o_embedded_object &embeddedObject - ) - { - if (PyEmbeddedMemo_Check(sourceObject)) { - return; - } - - assert(PyMemo_Check(sourceObject)); - auto &classFactory = fixture->get(); - auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; - auto *embeddedMemo = reinterpret_cast(sourceObject); - transformMemoImmutableObjectToEmbedded(embeddedMemo, rootObject, embeddedObject, std::move(type)); - } - - void PyToolkit::transformEmbeddedTuple( - db0::swine_ptr &fixture, ObjectPtr rootObject, ObjectPtr sourceSequence, - const db0::object_model::o_py_tuple &embeddedTuple - ) - { - auto &classFactory = fixture->get(); - transformEmbeddedTupleObjects(fixture, classFactory, rootObject, sourceSequence, embeddedTuple); - } - bool PyToolkit::hasMemoInstance(ObjectPtr pyObject) { if (PyMemo_Check(pyObject)) { diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index d30fdb8f..056a539d 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -138,14 +138,6 @@ namespace db0::python static ObjectSharedPtr unloadEmbeddedInstance( db0::swine_ptr &, ObjectPtr root_object, const db0::object_model::o_tuple_item & ); - static void transformEmbeddedObject( - db0::swine_ptr &, ObjectPtr root_object, ObjectPtr source_object, - const db0::object_model::o_embedded_object & - ); - static void transformEmbeddedTuple( - db0::swine_ptr &, ObjectPtr root_object, ObjectPtr source_sequence, - const db0::object_model::o_py_tuple & - ); // Unload dbzero block instance static ObjectSharedPtr unloadBlock(db0::swine_ptr, Address, std::uint16_t instance_id = 0, AccessFlags = {}); diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index 3fae26ef..91f651d5 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -8,6 +8,7 @@ #include "PyTagsAPI.hpp" #include "PyObjectTagManager.hpp" #include +#include #include #include "PySnapshot.hpp" #include "PyTagSet.hpp" @@ -206,6 +207,8 @@ PyMODINIT_FUNC PyInit_dbzero(void) &py::PyObjectTagManagerType, &py::EmbeddedObjectType, &py::EmbeddedTupleType, + &py::EmbeddedSetType, + &py::EmbeddedSetIteratorType, &py::PySnapshotObjectType, &py::PyObjectIterableType, &py::PyObjectIteratorType, diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp index c90112f6..9c049d4e 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp @@ -3,6 +3,7 @@ #include +#include #include #include #include @@ -10,13 +11,18 @@ #include #include +#include #include +#include #include #include +#include +#include #include #include #include +#include #include namespace db0::python @@ -311,11 +317,14 @@ namespace db0::python return result.steal(); } - Py_hash_t PyAPI_EmbeddedMemo_hash(MemoImmutableObject *) + Py_hash_t PyAPI_EmbeddedMemo_hash(MemoImmutableObject *self) { PY_API_FUNC - PyErr_SetString(PyExc_TypeError, "Embedded immutable memo objects do not have durable identity"); - return -1; + // Runtime Python hash only. Embedded memo wrappers may be transformed + // in-place after insertion into a Python set, so keep the hash tied + // to the wrapper address. Durable db0 hashing must use getPyHash(). + auto hash = static_cast(reinterpret_cast(self)); + return hash == -1 ? -2 : hash; } static PyMethodDef EmbeddedMemo_methods[] = { @@ -398,6 +407,111 @@ namespace db0::python return embeddedType; } + Py_ssize_t embeddedSequenceSize(PyObject *sequence) + { + if (PyTuple_Check(sequence)) { + return PyTuple_GET_SIZE(sequence); + } + if (PyList_Check(sequence)) { + return PyList_GET_SIZE(sequence); + } + return -1; + } + + PyObject *embeddedSequenceItem(PyObject *sequence, Py_ssize_t index) + { + if (PyTuple_Check(sequence)) { + return PyTuple_GET_ITEM(sequence, index); + } + return PyList_GET_ITEM(sequence, index); + } + + void transformEmbeddedTupleObjects( + db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, + PyObject *sourceSequence, const o_py_tuple &embeddedTuple + ); + + void transformEmbeddedSetObjects( + db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, + PyObject *sourceSet, const o_py_set &embeddedSet + ); + + void transformEmbeddedItem( + db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, + PyObject *sourceItem, const o_tuple_item &embeddedItem + ) + { + if (PyEmbeddedMemo_Check(sourceItem)) { + return; + } + + if (PyMemo_Check(sourceItem)) { + assert(embeddedItem.itemKind() == StorageClass::EMBEDDED_OBJECT); + const auto &embeddedObject = o_embedded_object::__const_ref( + embeddedItem.embeddedPayload().begin() + ); + transformEmbeddedObject(fixture, rootObject, sourceItem, embeddedObject); + return; + } + + if (PyTuple_Check(sourceItem) || PyList_Check(sourceItem)) { + assert(embeddedItem.itemKind() == StorageClass::EMBEDDED_TUPLE); + const auto &nestedTuple = o_py_tuple::__const_ref(embeddedItem.embeddedPayload().begin()); + transformEmbeddedTupleObjects(fixture, classFactory, rootObject, sourceItem, nestedTuple); + return; + } + + if (PySet_Check(sourceItem)) { + assert(embeddedItem.itemKind() == StorageClass::EMBEDDED_SET); + const auto &nestedSet = o_py_set::__const_ref(embeddedItem.embeddedPayload().begin()); + transformEmbeddedSetObjects(fixture, classFactory, rootObject, sourceItem, nestedSet); + } + } + + void transformEmbeddedTupleObjects( + db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, + PyObject *sourceSequence, const o_py_tuple &embeddedTuple + ) + { + // During immutable materialization, tuple/list fields are copied into the root object's embedded + // storage. Any non-materialized immutable memo object originally present in that Python sequence + // must then be morphed in place into an embedded memo view. The Python object keeps its identity, + // but its native payload now points at the embedded object stored under rootObject. Walk the source + // Python sequence in lockstep with the persisted embedded tuple so nested tuple/list elements can + // be fixed up recursively. + auto sourceSize = embeddedSequenceSize(sourceSequence); + assert(sourceSize >= 0); + assert(static_cast(sourceSize) == embeddedTuple.size()); + + for (Py_ssize_t index = 0; index < sourceSize; ++index) { + auto *sourceItem = embeddedSequenceItem(sourceSequence, index); + const auto &embeddedItem = embeddedTuple.item(static_cast(index)); + transformEmbeddedItem(fixture, classFactory, rootObject, sourceItem, embeddedItem); + } + } + + void transformEmbeddedSetObjects( + db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, + PyObject *sourceSet, const o_py_set &embeddedSet + ) + { + // o_py_set is constructed by iterating the source Python set, so while that set is unchanged + // we can walk both containers in the same order and morph any immutable memo elements in place. + assert(PySet_Check(sourceSet)); + assert(static_cast(PySet_GET_SIZE(sourceSet)) == embeddedSet.size()); + + auto iterator = Py_OWN(PyObject_GetIter(sourceSet)); + assert(iterator.get()); + + auto embeddedItem = embeddedSet.begin(); + Py_FOR(sourceItem, iterator) { + assert(embeddedItem != embeddedSet.end()); + transformEmbeddedItem(fixture, classFactory, rootObject, *sourceItem, *embeddedItem); + ++embeddedItem; + } + assert(embeddedItem == embeddedSet.end()); + } + std::string consumePyErrorMessage() { if (!PyErr_Occurred()) { @@ -490,6 +604,40 @@ namespace db0::python } } + void transformEmbeddedObject( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceObject, + const o_embedded_object &embeddedObject + ) + { + if (PyEmbeddedMemo_Check(sourceObject)) { + return; + } + + assert(PyMemo_Check(sourceObject)); + auto &classFactory = fixture->get(); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + auto *embeddedMemo = reinterpret_cast(sourceObject); + transformMemoImmutableObjectToEmbedded(embeddedMemo, rootObject, embeddedObject, std::move(type)); + } + + void transformEmbeddedTuple( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceSequence, + const o_py_tuple &embeddedTuple + ) + { + auto &classFactory = fixture->get(); + transformEmbeddedTupleObjects(fixture, classFactory, rootObject, sourceSequence, embeddedTuple); + } + + void transformEmbeddedSet( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceSet, + const o_py_set &embeddedSet + ) + { + auto &classFactory = fixture->get(); + transformEmbeddedSetObjects(fixture, classFactory, rootObject, sourceSet, embeddedSet); + } + bool PyEmbeddedMemoType_Check(PyTypeObject *type) { return PyToolkit::getTypeManager().isEmbeddedMemoType(type); diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp index 6370246d..ac22b818 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp @@ -10,11 +10,19 @@ #include #include #include +#include + +namespace db0 +{ + class Fixture; +} namespace db0::object_model { class Class; class o_embedded_object; + class o_py_tuple; + class o_py_set; } namespace db0::python @@ -60,6 +68,21 @@ namespace db0::python std::shared_ptr type ); + void transformEmbeddedObject( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceObject, + const db0::object_model::o_embedded_object &embeddedObject + ); + + void transformEmbeddedTuple( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceSequence, + const db0::object_model::o_py_tuple &embeddedTuple + ); + + void transformEmbeddedSet( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceSet, + const db0::object_model::o_py_set &embeddedSet + ); + bool PyEmbeddedMemo_Check(PyObject *object); bool PyEmbeddedMemoType_Check(PyTypeObject *type); } diff --git a/src/dbzero/bindings/python/embedded/EmbeddedSet.cpp b/src/dbzero/bindings/python/embedded/EmbeddedSet.cpp new file mode 100644 index 00000000..542ba3c4 --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedSet.cpp @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace db0::python +{ + using ObjectSharedPtr = PyTypes::ObjectSharedPtr; + using namespace db0::object_model; + + EmbeddedSetRef::EmbeddedSetRef(PyObject *rootObject, const o_py_set *set) + : m_root_object(rootObject) + , m_set(set) + { + Py_XINCREF(m_root_object); + } + + EmbeddedSetRef::~EmbeddedSetRef() + { + Py_XDECREF(m_root_object); + } + + PyObject *EmbeddedSetRef::rootObject() const + { + return m_root_object; + } + + const o_py_set &EmbeddedSetRef::set() const + { + return *m_set; + } + + EmbeddedSetIteratorRef::EmbeddedSetIteratorRef(PyObject *setObject) + : m_set_object(setObject) + , m_iterator(reinterpret_cast(setObject)->ext().set().begin()) + , m_end(reinterpret_cast(setObject)->ext().set().end()) + { + Py_XINCREF(m_set_object); + } + + EmbeddedSetIteratorRef::~EmbeddedSetIteratorRef() + { + Py_XDECREF(m_set_object); + } + + PyObject *EmbeddedSetIteratorRef::setObject() const + { + return m_set_object; + } + + const o_set::Item *EmbeddedSetIteratorRef::next() + { + if (m_iterator == m_end) { + return nullptr; + } + const auto *item = &*m_iterator; + ++m_iterator; + return item; + } + + namespace + { + db0::swine_ptr getRootFixture(PyObject *rootObject) + { + return reinterpret_cast(rootObject)->ext().getFixture(); + } + + PyObject *unloadEmbeddedSetItem(PyObject *rootObject, const o_set::Item &item) + { + auto fixture = getRootFixture(rootObject); + fixture->refreshIfUpdated(); + return PyToolkit::unloadEmbeddedInstance(fixture, rootObject, item).steal(); + } + + PyObject *tryEmbeddedSetIter(EmbeddedSet *self) + { + auto *pyObject = reinterpret_cast( + EmbeddedSetIteratorType.tp_alloc(&EmbeddedSetIteratorType, 0) + ); + if (!pyObject) { + return nullptr; + } + pyObject->makeNew(reinterpret_cast(self)); + return reinterpret_cast(pyObject); + } + + PyObject *PyAPI_EmbeddedSet_iter(EmbeddedSet *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedSetIter, self); + } + + PyObject *tryEmbeddedSetIteratorNext(EmbeddedSetIterator *self) + { + auto *setObject = reinterpret_cast(self->ext().setObject()); + auto *item = self->modifyExt().next(); + if (!item) { + return nullptr; + } + return unloadEmbeddedSetItem(setObject->ext().rootObject(), *item); + } + + PyObject *PyAPI_EmbeddedSetIterator_next(EmbeddedSetIterator *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedSetIteratorNext, self); + } + + Py_ssize_t tryEmbeddedSetLen(EmbeddedSet *self) + { + return static_cast(self->ext().set().size()); + } + + Py_ssize_t PyAPI_EmbeddedSet_len(EmbeddedSet *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedSetLen, self); + } + + int tryEmbeddedSetContains(EmbeddedSet *self, PyObject *needle) + { + for (const auto &item: self->ext().set()) { + auto pyItem = Py_OWN(unloadEmbeddedSetItem(self->ext().rootObject(), item)); + if (!pyItem) { + return -1; + } + int equal = PyObject_RichCompareBool(*pyItem, needle, Py_EQ); + if (equal < 0) { + return -1; + } + if (equal) { + return 1; + } + } + return 0; + } + + int PyAPI_EmbeddedSet_contains(EmbeddedSet *self, PyObject *needle) + { + PY_API_FUNC + return runSafe<-1>(tryEmbeddedSetContains, self, needle); + } + + PyObject *tryEmbeddedSetStr(EmbeddedSet *self) + { + if (self->ext().set().empty()) { + return PyUnicode_FromString("set()"); + } + + std::stringstream str; + str << "{"; + bool first = true; + for (const auto &setItem: self->ext().set()) { + if (!first) { + str << ", "; + } + first = false; + auto item = Py_OWN(unloadEmbeddedSetItem(self->ext().rootObject(), setItem)); + if (!item) { + return nullptr; + } + auto repr = Py_OWN(PyObject_Repr(*item)); + if (!repr) { + return nullptr; + } + str << PyUnicode_AsUTF8(*repr); + } + str << "}"; + return PyUnicode_FromString(str.str().c_str()); + } + + PyObject *PyAPI_EmbeddedSet_str(EmbeddedSet *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedSetStr, self); + } + + void PyAPI_EmbeddedSet_del(EmbeddedSet *self) + { + PY_API_FUNC + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + self->destroy(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + void PyAPI_EmbeddedSetIterator_del(EmbeddedSetIterator *self) + { + PY_API_FUNC + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + self->destroy(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + int EmbeddedSet_traverse(EmbeddedSet *self, visitproc visit, void *arg) + { + Py_VISIT(self->ext().rootObject()); + return 0; + } + + int EmbeddedSetIterator_traverse(EmbeddedSetIterator *self, visitproc visit, void *arg) + { + Py_VISIT(self->ext().setObject()); + return 0; + } + + static PySequenceMethods EmbeddedSet_sq = { + .sq_length = reinterpret_cast(PyAPI_EmbeddedSet_len), + .sq_contains = reinterpret_cast(PyAPI_EmbeddedSet_contains), + }; + } + + PyTypeObject EmbeddedSetType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "dbzero.EmbeddedSet", + .tp_basicsize = static_cast(EmbeddedSet::sizeOf()), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedSet_del), + .tp_repr = reinterpret_cast(PyAPI_EmbeddedSet_str), + .tp_as_sequence = &EmbeddedSet_sq, + .tp_str = reinterpret_cast(PyAPI_EmbeddedSet_str), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = "dbzero embedded immutable set view", + .tp_traverse = reinterpret_cast(EmbeddedSet_traverse), + .tp_iter = reinterpret_cast(PyAPI_EmbeddedSet_iter), + .tp_alloc = PyType_GenericAlloc, + .tp_free = PyObject_GC_Del, + }; + + PyTypeObject EmbeddedSetIteratorType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "dbzero.EmbeddedSetIterator", + .tp_basicsize = static_cast(EmbeddedSetIterator::sizeOf()), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedSetIterator_del), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = "dbzero embedded immutable set iterator", + .tp_traverse = reinterpret_cast(EmbeddedSetIterator_traverse), + .tp_iter = PyObject_SelfIter, + .tp_iternext = reinterpret_cast(PyAPI_EmbeddedSetIterator_next), + .tp_alloc = PyType_GenericAlloc, + .tp_free = PyObject_GC_Del, + }; + + ObjectSharedPtr makeEmbeddedSet(PyObject *rootObject, const o_py_set &set) + { + auto *pyObject = reinterpret_cast(EmbeddedSetType.tp_alloc(&EmbeddedSetType, 0)); + if (!pyObject) { + return {}; + } + pyObject->makeNew(rootObject, &set); + return Py_OWN(reinterpret_cast(pyObject)); + } +} diff --git a/src/dbzero/bindings/python/embedded/EmbeddedSet.hpp b/src/dbzero/bindings/python/embedded/EmbeddedSet.hpp new file mode 100644 index 00000000..78ff9171 --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedSet.hpp @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include + +#include +#include +#include + +namespace db0::object_model +{ + class o_py_set; +} + +namespace db0::python +{ + class EmbeddedSetRef + { + public: + EmbeddedSetRef(PyObject *rootObject, const db0::object_model::o_py_set *set); + ~EmbeddedSetRef(); + + EmbeddedSetRef(const EmbeddedSetRef &) = delete; + EmbeddedSetRef &operator=(const EmbeddedSetRef &) = delete; + + PyObject *rootObject() const; + const db0::object_model::o_py_set &set() const; + + private: + PyObject *m_root_object = nullptr; + const db0::object_model::o_py_set *m_set = nullptr; + }; + + class EmbeddedSetIteratorRef + { + public: + explicit EmbeddedSetIteratorRef(PyObject *setObject); + ~EmbeddedSetIteratorRef(); + + EmbeddedSetIteratorRef(const EmbeddedSetIteratorRef &) = delete; + EmbeddedSetIteratorRef &operator=(const EmbeddedSetIteratorRef &) = delete; + + PyObject *setObject() const; + const db0::object_model::o_set::Item *next(); + + private: + PyObject *m_set_object = nullptr; + db0::object_model::o_set::const_iterator m_iterator; + db0::object_model::o_set::const_iterator m_end; + }; + + using EmbeddedSet = PyWrapper; + using EmbeddedSetIterator = PyWrapper; + + extern PyTypeObject EmbeddedSetType; + extern PyTypeObject EmbeddedSetIteratorType; + + PyTypes::ObjectSharedPtr makeEmbeddedSet( + PyObject *rootObject, const db0::object_model::o_py_set &set + ); +} diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index b8b8c0a6..d6dbb2aa 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -3,11 +3,13 @@ #include "ObjectImmutableImpl.hpp" +#include #include #include #include #include #include +#include #include #include @@ -35,6 +37,40 @@ namespace db0::object_model } void unrefNestedEmbeddedObjects(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject); + void unrefEmbeddedObject(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject); + + void unrefEmbeddedItem(db0::swine_ptr &fixture, const o_tuple_item &item); + + void unrefEmbeddedTuple(db0::swine_ptr &fixture, const o_py_tuple &tuple) + { + for (const auto &item: tuple) { + unrefEmbeddedItem(fixture, item); + } + } + + void unrefEmbeddedSet(db0::swine_ptr &fixture, const o_py_set &set) + { + for (const auto &item: set) { + unrefEmbeddedItem(fixture, item); + } + } + + void unrefEmbeddedItem(db0::swine_ptr &fixture, const o_tuple_item &item) + { + switch (item.itemKind()) { + case StorageClass::EMBEDDED_OBJECT: + unrefEmbeddedObject(fixture, o_embedded_object::__const_ref(item.embeddedPayload().begin())); + return; + case StorageClass::EMBEDDED_TUPLE: + unrefEmbeddedTuple(fixture, o_py_tuple::__const_ref(item.embeddedPayload().begin())); + return; + case StorageClass::EMBEDDED_SET: + unrefEmbeddedSet(fixture, o_py_set::__const_ref(item.embeddedPayload().begin())); + return; + default: + return; + } + } void unrefEmbeddedObjectTables(db0::swine_ptr &fixture, const o_embedded_object &embeddedObject) { @@ -66,11 +102,7 @@ namespace db0::object_model { for (const auto &entry: embeddedObject.field_map()) { const auto &value = entry.value(); - if (value.itemKind() != StorageClass::EMBEDDED_OBJECT) { - // Embedded collection traversal is intentionally left for a later implementation. - continue; - } - unrefEmbeddedObject(fixture, o_embedded_object::__const_ref(value.embeddedPayload().begin())); + unrefEmbeddedItem(fixture, value); } } @@ -97,7 +129,7 @@ namespace db0::object_model const auto &embeddedObject = o_embedded_object::__const_ref( embeddedValue->embeddedPayload().begin() ); - LangConfig::LangToolkit::transformEmbeddedObject( + db0::python::transformEmbeddedObject( fixture, rootObject, value.m_object.get(), embeddedObject ); continue; @@ -106,9 +138,18 @@ namespace db0::object_model if (value.m_storage_class == StorageClass::DB0_TUPLE || value.m_storage_class == StorageClass::DB0_LIST) { assert(embeddedValue->itemKind() == StorageClass::EMBEDDED_TUPLE); const auto &embeddedTuple = o_py_tuple::__const_ref(embeddedValue->embeddedPayload().begin()); - LangConfig::LangToolkit::transformEmbeddedTuple( + db0::python::transformEmbeddedTuple( fixture, rootObject, value.m_object.get(), embeddedTuple ); + continue; + } + + if (value.m_storage_class == StorageClass::DB0_SET) { + assert(embeddedValue->itemKind() == StorageClass::EMBEDDED_SET); + const auto &embeddedSet = o_py_set::__const_ref(embeddedValue->embeddedPayload().begin()); + db0::python::transformEmbeddedSet( + fixture, rootObject, value.m_object.get(), embeddedSet + ); } } } diff --git a/src/dbzero/object_model/set/o_py_set.cpp b/src/dbzero/object_model/set/o_py_set.cpp index f07dc373..17d2a86f 100644 --- a/src/dbzero/object_model/set/o_py_set.cpp +++ b/src/dbzero/object_model/set/o_py_set.cpp @@ -10,7 +10,10 @@ #include #include #include +#include #include +#include +#include #include namespace db0::object_model @@ -31,6 +34,36 @@ namespace db0::object_model { o_py_dict::__new(buf, const_cast(static_cast(source))); } + + const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + + assert(db0::python::PyToolkit::isMemoImmutableObject(pyObject)); + + const auto &object = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject); + if (object.hasInstance()) { + THROWF(db0::InputException) + << "Only non-materialized immutable memo objects can be embedded"; + } + + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(object) + ); + if (!initializer) { + THROWF(db0::InputException) + << "Non-materialized immutable memo object has no active initializer"; + } + return *initializer; + } + + void writeEmbeddedObject(void *buf, const void *source) + { + auto *pyObject = const_cast(static_cast(source)); + const auto &initializer = getInitializer(pyObject); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } } o_py_set::o_py_set(PyObject *iterable) @@ -136,6 +169,11 @@ namespace db0::object_model return Element::embeddedSet(o_py_set::measure(object), writePySet, object); case db0::bindings::TypeId::DICT: return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object); + case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: { + const auto &initializer = getInitializer(object); + auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); + return Element::embeddedObject(size, writeEmbeddedObject, object); + } default: break; } From 5e02553c3270350545d3123bfcdeb9e549d0e100 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 21 May 2026 13:47:53 +0200 Subject: [PATCH 4/7] embedding dicts --- python_tests/test_memo_immutable.py | 112 +++++ src/dbzero/bindings/python/PyToolkit.cpp | 9 + src/dbzero/bindings/python/dbzero.cpp | 3 + .../bindings/python/embedded/EmbeddedDict.cpp | 469 ++++++++++++++++++ .../bindings/python/embedded/EmbeddedDict.hpp | 72 +++ .../python/embedded/EmbeddedObject.cpp | 45 ++ .../python/embedded/EmbeddedObject.hpp | 6 + src/dbzero/object_model/dict/o_py_dict.cpp | 38 ++ src/dbzero/object_model/dict/o_py_dict.hpp | 2 +- .../object/ObjectImmutableImpl.cpp | 21 + tests/unit_tests/EmbeddedDictTest.cpp | 109 ++++ 11 files changed, 885 insertions(+), 1 deletion(-) create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedDict.cpp create mode 100644 src/dbzero/bindings/python/embedded/EmbeddedDict.hpp diff --git a/python_tests/test_memo_immutable.py b/python_tests/test_memo_immutable.py index 48d0f6c7..264e7839 100644 --- a/python_tests/test_memo_immutable.py +++ b/python_tests/test_memo_immutable.py @@ -62,6 +62,12 @@ def __init__(self, payload): self.payload = payload +@db0.memo(immutable=True, no_default_tags=True) +class MemoImmutableDictHolder: + def __init__(self, payload): + self.payload = payload + + @db0.memo(no_default_tags=True) class MemoSetReferenceHolder: def __init__(self, payload): @@ -305,4 +311,110 @@ def test_db0_set_uses_durable_hash_for_materialized_immutable_after_reopen(db0_f reopened_obj = db0.fetch(obj_id) reopened_holder = db0.fetch(holder_id) assert reopened_obj in reopened_holder.payload + + +def test_read_embedded_dict_field_after_reopen(db0_fixture): + payload = {"name": "dbzero", "count": 3, 7: b"bytes", None: True} + obj = MemoImmutableDictHolder(payload) + db0.tags(obj).add("keep-embedded-dict") + obj_id = db0.uuid(obj) + + assert type(obj.payload).__name__ == "EmbeddedDict" + assert len(obj.payload) == len(payload) + assert "name" in obj.payload + assert obj.payload["name"] == "dbzero" + assert obj.payload.get("count") == 3 + assert obj.payload.get("missing", "fallback") == "fallback" + keys = obj.payload.keys() + assert not isinstance(keys, tuple) + assert iter(keys) is keys + assert not isinstance(obj.payload.values(), tuple) + assert not isinstance(obj.payload.items(), tuple) + assert set(obj.payload.keys()) == set(payload.keys()) + assert set(obj.payload.values()) == set(payload.values()) + assert set(obj.payload.items()) == set(payload.items()) + assert dict(obj.payload) == payload + assert repr(obj.payload) == repr(payload) + + del obj + gc.collect() + db0.commit() + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix", "rw") + + reopened = db0.fetch(obj_id) + assert type(reopened.payload).__name__ == "EmbeddedDict" + assert dict(reopened.payload) == payload + + +def test_embedded_dict_numeric_key_lookup_uses_python_equality(db0_fixture): + bool_key = MemoImmutableDictHolder({True: "bool-key"}) + int_key = MemoImmutableDictHolder({1: "int-key"}) + + assert bool_key.payload[1] == "bool-key" + assert bool_key.payload[1.0] == "bool-key" + assert bool_key.payload.get(1) == "bool-key" + assert 1 in bool_key.payload + assert int_key.payload[True] == "int-key" + assert int_key.payload[1.0] == "int-key" + assert int_key.payload.get(True) == "int-key" + assert True in int_key.payload + + +def test_embedded_dict_lookup_rejects_unhashable_key(db0_fixture): + obj = MemoImmutableDictHolder({"name": "dbzero"}) + + with pytest.raises(TypeError): + [] in obj.payload + with pytest.raises(TypeError): + obj.payload[[]] + with pytest.raises(TypeError): + obj.payload.get([]) + + +def test_embedded_dict_with_prebound_immutable_object_value(db0_fixture): + inner = MemoImmutableNestedPayload(name="dict child", count=37) + obj = MemoImmutableDictHolder({"child": inner, "marker": "value"}) + db0.tags(obj).add("keep-embedded-dict-value-object") + + embedded_inner = obj.payload["child"] + assert embedded_inner.name == "dict child" + assert embedded_inner.count == 37 + assert inner.name == "dict child" + assert inner.count == 37 + assert isinstance(inner, MemoImmutableNestedPayload) + assert db0.is_memo(inner) + with pytest.raises(Exception): + db0.uuid(inner) + + +def test_embedded_dict_with_prebound_immutable_object_key(db0_fixture): + inner = MemoImmutableNestedPayload(name="dict key child", count=41) + payload = {inner: "child-value", "marker": "value"} + obj = MemoImmutableDictHolder(payload) + db0.tags(obj).add("keep-embedded-dict-key-object") + + assert inner in payload + assert obj.payload[inner] == "child-value" + assert inner in obj.payload + assert inner.name == "dict key child" + assert inner.count == 41 + with pytest.raises(Exception): + db0.uuid(inner) + + +def test_embedded_dict_recursively_exposes_nested_collections(db0_fixture): + inner = MemoImmutableNestedPayload(name="nested dict child", count=43) + payload = {"nested": {"tuple": ("prefix", inner), "set": {"marker", 5}}} + obj = MemoImmutableDictHolder(payload) + db0.tags(obj).add("keep-embedded-dict-nested") + + nested = obj.payload["nested"] + assert type(nested).__name__ == "EmbeddedDict" + assert tuple(nested["tuple"])[0] == "prefix" + embedded_inner = nested["tuple"][1] + assert embedded_inner.name == "nested dict child" + assert embedded_inner.count == 43 + assert set(nested["set"]) == {"marker", 5} diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index b255e729..7598dd52 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -3,6 +3,7 @@ #include "PyToolkit.hpp" #include +#include #include #include #include "Memo.hpp" @@ -39,6 +40,7 @@ #include #include #include +#include namespace db0::python @@ -137,6 +139,13 @@ namespace db0::python const auto &set = db0::object_model::o_py_set::__const_ref(item.embeddedPayload().begin()); return makeEmbeddedSet(rootObject, set); } + case StorageClass::EMBEDDED_DICT: { + if (!rootObject) { + THROWF(db0::InputException) << "Embedded dict retrieval requires a root memo object"; + } + const auto &dict = db0::object_model::o_py_dict::__const_ref(item.embeddedPayload().begin()); + return makeEmbeddedDict(rootObject, dict); + } case StorageClass::EMBEDDED_OBJECT: { if (!rootObject) { THROWF(db0::InputException) << "Embedded object retrieval requires a root memo object"; diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index 91f651d5..d712d57d 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -8,6 +8,7 @@ #include "PyTagsAPI.hpp" #include "PyObjectTagManager.hpp" #include +#include #include #include #include "PySnapshot.hpp" @@ -209,6 +210,8 @@ PyMODINIT_FUNC PyInit_dbzero(void) &py::EmbeddedTupleType, &py::EmbeddedSetType, &py::EmbeddedSetIteratorType, + &py::EmbeddedDictType, + &py::EmbeddedDictIteratorType, &py::PySnapshotObjectType, &py::PyObjectIterableType, &py::PyObjectIteratorType, diff --git a/src/dbzero/bindings/python/embedded/EmbeddedDict.cpp b/src/dbzero/bindings/python/embedded/EmbeddedDict.cpp new file mode 100644 index 00000000..4176c671 --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedDict.cpp @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace db0::python +{ + using ObjectSharedPtr = PyTypes::ObjectSharedPtr; + using namespace db0::object_model; + + EmbeddedDictRef::EmbeddedDictRef(PyObject *rootObject, const o_py_dict *dict) + : m_root_object(rootObject) + , m_dict(dict) + { + Py_XINCREF(m_root_object); + } + + EmbeddedDictRef::~EmbeddedDictRef() + { + Py_XDECREF(m_root_object); + } + + PyObject *EmbeddedDictRef::rootObject() const + { + return m_root_object; + } + + const o_py_dict &EmbeddedDictRef::dict() const + { + return *m_dict; + } + + EmbeddedDictIteratorRef::EmbeddedDictIteratorRef(PyObject *dictObject, ItemKind itemKind) + : m_dict_object(dictObject) + , m_item_kind(itemKind) + , m_iterator(reinterpret_cast(dictObject)->ext().dict().begin()) + , m_end(reinterpret_cast(dictObject)->ext().dict().end()) + { + Py_XINCREF(m_dict_object); + } + + EmbeddedDictIteratorRef::~EmbeddedDictIteratorRef() + { + Py_XDECREF(m_dict_object); + } + + PyObject *EmbeddedDictIteratorRef::dictObject() const + { + return m_dict_object; + } + + EmbeddedDictIteratorRef::ItemKind EmbeddedDictIteratorRef::itemKind() const + { + return m_item_kind; + } + + const o_dict::Pair *EmbeddedDictIteratorRef::next() + { + if (m_iterator == m_end) { + return nullptr; + } + const auto *pair = &*m_iterator; + ++m_iterator; + return pair; + } + + namespace + { + db0::swine_ptr getRootFixture(PyObject *rootObject) + { + return reinterpret_cast(rootObject)->ext().getFixture(); + } + + PyObject *unloadEmbeddedDictItem(PyObject *rootObject, const o_dict::Item &item) + { + auto fixture = getRootFixture(rootObject); + fixture->refreshIfUpdated(); + return PyToolkit::unloadEmbeddedInstance(fixture, rootObject, item).steal(); + } + + PyObject *makeEmbeddedDictIterator(EmbeddedDict *self, EmbeddedDictIteratorRef::ItemKind itemKind) + { + auto *pyObject = reinterpret_cast( + EmbeddedDictIteratorType.tp_alloc(&EmbeddedDictIteratorType, 0) + ); + if (!pyObject) { + return nullptr; + } + pyObject->makeNew(reinterpret_cast(self), itemKind); + return reinterpret_cast(pyObject); + } + + PyObject *tryEmbeddedDictIter(EmbeddedDict *self) + { + return makeEmbeddedDictIterator(self, EmbeddedDictIteratorRef::ItemKind::KEY); + } + + PyObject *PyAPI_EmbeddedDict_iter(EmbeddedDict *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictIter, self); + } + + PyObject *tryEmbeddedDictIteratorNext(EmbeddedDictIterator *self) + { + auto *dictObject = reinterpret_cast(self->ext().dictObject()); + auto *pair = self->modifyExt().next(); + if (!pair) { + return nullptr; + } + switch (self->ext().itemKind()) { + case EmbeddedDictIteratorRef::ItemKind::KEY: + return unloadEmbeddedDictItem(dictObject->ext().rootObject(), pair->key()); + case EmbeddedDictIteratorRef::ItemKind::VALUE: + return unloadEmbeddedDictItem(dictObject->ext().rootObject(), pair->value()); + case EmbeddedDictIteratorRef::ItemKind::PAIR: { + auto key = Py_OWN(unloadEmbeddedDictItem(dictObject->ext().rootObject(), pair->key())); + auto value = Py_OWN(unloadEmbeddedDictItem(dictObject->ext().rootObject(), pair->value())); + if (!key || !value) { + return nullptr; + } + auto pairTuple = Py_OWN(PyTuple_New(2)); + if (!pairTuple) { + return nullptr; + } + PySafeTuple_SetItem(*pairTuple, 0, std::move(key)); + PySafeTuple_SetItem(*pairTuple, 1, std::move(value)); + return pairTuple.steal(); + } + } + PyErr_SetString(PyExc_SystemError, "unknown embedded dict iterator mode"); + return nullptr; + } + + PyObject *PyAPI_EmbeddedDictIterator_next(EmbeddedDictIterator *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictIteratorNext, self); + } + + Py_ssize_t tryEmbeddedDictLen(EmbeddedDict *self) + { + return static_cast(self->ext().dict().size()); + } + + Py_ssize_t PyAPI_EmbeddedDict_len(EmbeddedDict *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictLen, self); + } + + template + void forLookupElementsFromPythonObject(EmbeddedDict *self, PyObject *needle, ActionT action) + { + if (PyEmbeddedMemo_Check(needle)) { + auto *embeddedMemo = reinterpret_cast(needle); + const auto *embeddedRef = reinterpret_cast(&embeddedMemo->ext()); + if (embeddedRef->rootObject() != self->ext().rootObject()) { + return; + } + const auto &embeddedObject = embeddedRef->embeddedObject(); + action(o_dict::Element::embeddedObject(&embeddedObject, embeddedObject.sizeOf())); + return; + } + + if (PyObject_Hash(needle) == -1) { + return; + } + + auto primaryElement = o_py_dict::elementFromPythonObject(needle); + if (action(primaryElement)) { + return; + } + + if (PyBool_Check(needle)) { + auto value = needle == Py_True ? 1 : 0; + if (action(o_dict::Element::integer(value))) { + return; + } + action(o_dict::Element::floating(static_cast(value))); + } else if (PyLong_Check(needle)) { + auto value = PyLong_AsLongLong(needle); + if (PyErr_Occurred()) { + PyErr_Clear(); + } else { + if (value == 0 || value == 1) { + if (action(o_dict::Element::boolean(value != 0))) { + return; + } + } + auto floatValue = PyFloat_AsDouble(needle); + if (!PyErr_Occurred() && std::isfinite(floatValue) + && floatValue >= static_cast(LLONG_MIN) + && floatValue <= static_cast(LLONG_MAX) + && static_cast(floatValue) == value) { + action(o_dict::Element::floating(floatValue)); + } else if (PyErr_Occurred()) { + PyErr_Clear(); + } + } + } else if (PyFloat_Check(needle)) { + auto floatValue = PyFloat_AsDouble(needle); + if (!PyErr_Occurred() && std::isfinite(floatValue) + && std::trunc(floatValue) == floatValue + && floatValue >= static_cast(LLONG_MIN) + && floatValue <= static_cast(LLONG_MAX)) { + auto intValue = static_cast(floatValue); + if (action(o_dict::Element::integer(intValue))) { + return; + } + if (intValue == 0 || intValue == 1) { + action(o_dict::Element::boolean(intValue != 0)); + } + } + if (PyErr_Occurred()) { + PyErr_Clear(); + } + } + } + + const o_dict::Item *findEmbeddedDictValue(EmbeddedDict *self, PyObject *needle) + { + const o_dict::Item *foundValue = nullptr; + forLookupElementsFromPythonObject(self, needle, [&](const o_dict::Element &element) { + auto *value = self->ext().dict().get(element); + if (value) { + foundValue = value; + return true; + } + return false; + }); + return foundValue; + } + + int tryEmbeddedDictContains(EmbeddedDict *self, PyObject *needle) + { + auto *value = findEmbeddedDictValue(self, needle); + if (PyErr_Occurred()) { + return -1; + } + return value ? 1 : 0; + } + + int PyAPI_EmbeddedDict_contains(EmbeddedDict *self, PyObject *needle) + { + PY_API_FUNC + return runSafe<-1>(tryEmbeddedDictContains, self, needle); + } + + PyObject *tryEmbeddedDictSubscript(EmbeddedDict *self, PyObject *key) + { + auto *value = findEmbeddedDictValue(self, key); + if (PyErr_Occurred()) { + return nullptr; + } + if (!value) { + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + return unloadEmbeddedDictItem(self->ext().rootObject(), *value); + } + + PyObject *PyAPI_EmbeddedDict_subscript(EmbeddedDict *self, PyObject *key) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictSubscript, self, key); + } + + PyObject *tryEmbeddedDictKeys(EmbeddedDict *self, PyObject *) + { + return makeEmbeddedDictIterator(self, EmbeddedDictIteratorRef::ItemKind::KEY); + } + + PyObject *PyAPI_EmbeddedDict_keys(EmbeddedDict *self, PyObject *args) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictKeys, self, args); + } + + PyObject *tryEmbeddedDictValues(EmbeddedDict *self, PyObject *) + { + return makeEmbeddedDictIterator(self, EmbeddedDictIteratorRef::ItemKind::VALUE); + } + + PyObject *PyAPI_EmbeddedDict_values(EmbeddedDict *self, PyObject *args) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictValues, self, args); + } + + PyObject *tryEmbeddedDictItems(EmbeddedDict *self, PyObject *) + { + return makeEmbeddedDictIterator(self, EmbeddedDictIteratorRef::ItemKind::PAIR); + } + + PyObject *PyAPI_EmbeddedDict_items(EmbeddedDict *self, PyObject *args) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictItems, self, args); + } + + PyObject *tryEmbeddedDictGet(EmbeddedDict *self, PyObject *const *args, Py_ssize_t nargs) + { + auto *value = findEmbeddedDictValue(self, args[0]); + if (PyErr_Occurred()) { + return nullptr; + } + if (value) { + return unloadEmbeddedDictItem(self->ext().rootObject(), *value); + } + if (nargs == 2) { + Py_INCREF(args[1]); + return args[1]; + } + Py_RETURN_NONE; + } + + PyObject *PyAPI_EmbeddedDict_get(EmbeddedDict *self, PyObject *const *args, Py_ssize_t nargs) + { + PY_API_FUNC + if (nargs < 1 || nargs > 2) { + PyErr_SetString(PyExc_TypeError, "get() takes one or two arguments."); + return nullptr; + } + return runSafe(tryEmbeddedDictGet, self, args, nargs); + } + + PyObject *tryEmbeddedDictStr(EmbeddedDict *self) + { + std::stringstream str; + str << "{"; + bool first = true; + for (const auto &pair: self->ext().dict()) { + if (!first) { + str << ", "; + } + first = false; + + auto key = Py_OWN(unloadEmbeddedDictItem(self->ext().rootObject(), pair.key())); + auto value = Py_OWN(unloadEmbeddedDictItem(self->ext().rootObject(), pair.value())); + if (!key || !value) { + return nullptr; + } + auto keyRepr = Py_OWN(PyObject_Repr(*key)); + auto valueRepr = Py_OWN(PyObject_Repr(*value)); + if (!keyRepr || !valueRepr) { + return nullptr; + } + str << PyUnicode_AsUTF8(*keyRepr) << ": " << PyUnicode_AsUTF8(*valueRepr); + } + str << "}"; + return PyUnicode_FromString(str.str().c_str()); + } + + PyObject *PyAPI_EmbeddedDict_str(EmbeddedDict *self) + { + PY_API_FUNC + return runSafe(tryEmbeddedDictStr, self); + } + + void PyAPI_EmbeddedDict_del(EmbeddedDict *self) + { + PY_API_FUNC + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + self->destroy(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + void PyAPI_EmbeddedDictIterator_del(EmbeddedDictIterator *self) + { + PY_API_FUNC + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); + } + self->destroy(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + int EmbeddedDict_traverse(EmbeddedDict *self, visitproc visit, void *arg) + { + Py_VISIT(self->ext().rootObject()); + return 0; + } + + int EmbeddedDictIterator_traverse(EmbeddedDictIterator *self, visitproc visit, void *arg) + { + Py_VISIT(self->ext().dictObject()); + return 0; + } + + static PySequenceMethods EmbeddedDict_sq = { + .sq_length = reinterpret_cast(PyAPI_EmbeddedDict_len), + .sq_contains = reinterpret_cast(PyAPI_EmbeddedDict_contains), + }; + + static PyMappingMethods EmbeddedDict_mp = { + .mp_length = reinterpret_cast(PyAPI_EmbeddedDict_len), + .mp_subscript = reinterpret_cast(PyAPI_EmbeddedDict_subscript), + }; + + static PyMethodDef EmbeddedDict_methods[] = { + {"get", reinterpret_cast(PyAPI_EmbeddedDict_get), METH_FASTCALL, nullptr}, + {"keys", reinterpret_cast(PyAPI_EmbeddedDict_keys), METH_NOARGS, nullptr}, + {"values", reinterpret_cast(PyAPI_EmbeddedDict_values), METH_NOARGS, nullptr}, + {"items", reinterpret_cast(PyAPI_EmbeddedDict_items), METH_NOARGS, nullptr}, + {NULL} + }; + } + + PyTypeObject EmbeddedDictType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "dbzero.EmbeddedDict", + .tp_basicsize = static_cast(EmbeddedDict::sizeOf()), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedDict_del), + .tp_repr = reinterpret_cast(PyAPI_EmbeddedDict_str), + .tp_as_sequence = &EmbeddedDict_sq, + .tp_as_mapping = &EmbeddedDict_mp, + .tp_str = reinterpret_cast(PyAPI_EmbeddedDict_str), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = "dbzero embedded immutable dict view", + .tp_traverse = reinterpret_cast(EmbeddedDict_traverse), + .tp_iter = reinterpret_cast(PyAPI_EmbeddedDict_iter), + .tp_methods = EmbeddedDict_methods, + .tp_alloc = PyType_GenericAlloc, + .tp_free = PyObject_GC_Del, + }; + + PyTypeObject EmbeddedDictIteratorType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "dbzero.EmbeddedDictIterator", + .tp_basicsize = static_cast(EmbeddedDictIterator::sizeOf()), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyAPI_EmbeddedDictIterator_del), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = "dbzero embedded immutable dict iterator", + .tp_traverse = reinterpret_cast(EmbeddedDictIterator_traverse), + .tp_iter = PyObject_SelfIter, + .tp_iternext = reinterpret_cast(PyAPI_EmbeddedDictIterator_next), + .tp_alloc = PyType_GenericAlloc, + .tp_free = PyObject_GC_Del, + }; + + ObjectSharedPtr makeEmbeddedDict(PyObject *rootObject, const o_py_dict &dict) + { + auto *pyObject = reinterpret_cast(EmbeddedDictType.tp_alloc(&EmbeddedDictType, 0)); + if (!pyObject) { + return {}; + } + pyObject->makeNew(rootObject, &dict); + return Py_OWN(reinterpret_cast(pyObject)); + } +} diff --git a/src/dbzero/bindings/python/embedded/EmbeddedDict.hpp b/src/dbzero/bindings/python/embedded/EmbeddedDict.hpp new file mode 100644 index 00000000..71e4fcc5 --- /dev/null +++ b/src/dbzero/bindings/python/embedded/EmbeddedDict.hpp @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include + +#include +#include +#include + +namespace db0::object_model +{ + class o_py_dict; +} + +namespace db0::python +{ + class EmbeddedDictRef + { + public: + EmbeddedDictRef(PyObject *rootObject, const db0::object_model::o_py_dict *dict); + ~EmbeddedDictRef(); + + EmbeddedDictRef(const EmbeddedDictRef &) = delete; + EmbeddedDictRef &operator=(const EmbeddedDictRef &) = delete; + + PyObject *rootObject() const; + const db0::object_model::o_py_dict &dict() const; + + private: + PyObject *m_root_object = nullptr; + const db0::object_model::o_py_dict *m_dict = nullptr; + }; + + class EmbeddedDictIteratorRef + { + public: + enum class ItemKind + { + KEY, + VALUE, + PAIR, + }; + + EmbeddedDictIteratorRef(PyObject *dictObject, ItemKind itemKind); + ~EmbeddedDictIteratorRef(); + + EmbeddedDictIteratorRef(const EmbeddedDictIteratorRef &) = delete; + EmbeddedDictIteratorRef &operator=(const EmbeddedDictIteratorRef &) = delete; + + PyObject *dictObject() const; + ItemKind itemKind() const; + const db0::object_model::o_dict::Pair *next(); + + private: + PyObject *m_dict_object = nullptr; + ItemKind m_item_kind = ItemKind::KEY; + db0::object_model::o_dict::const_iterator m_iterator; + db0::object_model::o_dict::const_iterator m_end; + }; + + using EmbeddedDict = PyWrapper; + using EmbeddedDictIterator = PyWrapper; + + extern PyTypeObject EmbeddedDictType; + extern PyTypeObject EmbeddedDictIteratorType; + + PyTypes::ObjectSharedPtr makeEmbeddedDict( + PyObject *rootObject, const db0::object_model::o_py_dict &dict + ); +} diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp index 9c049d4e..4e81d904 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -436,6 +437,11 @@ namespace db0::python PyObject *sourceSet, const o_py_set &embeddedSet ); + void transformEmbeddedDictObjects( + db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, + PyObject *sourceDict, const o_py_dict &embeddedDict + ); + void transformEmbeddedItem( db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, PyObject *sourceItem, const o_tuple_item &embeddedItem @@ -465,6 +471,13 @@ namespace db0::python assert(embeddedItem.itemKind() == StorageClass::EMBEDDED_SET); const auto &nestedSet = o_py_set::__const_ref(embeddedItem.embeddedPayload().begin()); transformEmbeddedSetObjects(fixture, classFactory, rootObject, sourceItem, nestedSet); + return; + } + + if (PyDict_Check(sourceItem)) { + assert(embeddedItem.itemKind() == StorageClass::EMBEDDED_DICT); + const auto &nestedDict = o_py_dict::__const_ref(embeddedItem.embeddedPayload().begin()); + transformEmbeddedDictObjects(fixture, classFactory, rootObject, sourceItem, nestedDict); } } @@ -512,6 +525,29 @@ namespace db0::python assert(embeddedItem == embeddedSet.end()); } + void transformEmbeddedDictObjects( + db0::swine_ptr &fixture, ClassFactory &classFactory, PyObject *rootObject, + PyObject *sourceDict, const o_py_dict &embeddedDict + ) + { + assert(PyDict_Check(sourceDict)); + assert(static_cast(PyDict_Size(sourceDict)) == embeddedDict.size()); + + auto iterator = Py_OWN(PyObject_GetIter(sourceDict)); + assert(iterator.get()); + + auto embeddedPair = embeddedDict.begin(); + Py_FOR(sourceKey, iterator) { + assert(embeddedPair != embeddedDict.end()); + auto *sourceValue = PyDict_GetItemWithError(sourceDict, *sourceKey); + assert(sourceValue); + transformEmbeddedItem(fixture, classFactory, rootObject, *sourceKey, embeddedPair->key()); + transformEmbeddedItem(fixture, classFactory, rootObject, sourceValue, embeddedPair->value()); + ++embeddedPair; + } + assert(embeddedPair == embeddedDict.end()); + } + std::string consumePyErrorMessage() { if (!PyErr_Occurred()) { @@ -638,6 +674,15 @@ namespace db0::python transformEmbeddedSetObjects(fixture, classFactory, rootObject, sourceSet, embeddedSet); } + void transformEmbeddedDict( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceDict, + const o_py_dict &embeddedDict + ) + { + auto &classFactory = fixture->get(); + transformEmbeddedDictObjects(fixture, classFactory, rootObject, sourceDict, embeddedDict); + } + bool PyEmbeddedMemoType_Check(PyTypeObject *type) { return PyToolkit::getTypeManager().isEmbeddedMemoType(type); diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp index ac22b818..81ffc6e9 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.hpp @@ -23,6 +23,7 @@ namespace db0::object_model class o_embedded_object; class o_py_tuple; class o_py_set; + class o_py_dict; } namespace db0::python @@ -83,6 +84,11 @@ namespace db0::python const db0::object_model::o_py_set &embeddedSet ); + void transformEmbeddedDict( + db0::swine_ptr &fixture, PyTypes::ObjectPtr rootObject, PyTypes::ObjectPtr sourceDict, + const db0::object_model::o_py_dict &embeddedDict + ); + bool PyEmbeddedMemo_Check(PyObject *object); bool PyEmbeddedMemoType_Check(PyTypeObject *type); } diff --git a/src/dbzero/object_model/dict/o_py_dict.cpp b/src/dbzero/object_model/dict/o_py_dict.cpp index 25528865..439fd0a3 100644 --- a/src/dbzero/object_model/dict/o_py_dict.cpp +++ b/src/dbzero/object_model/dict/o_py_dict.cpp @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include #include #include @@ -31,6 +34,36 @@ namespace db0::object_model { o_py_dict::__new(buf, const_cast(static_cast(source))); } + + const ImmutableObjectInitializer &getInitializer(PyObject *pyObject) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + + assert(db0::python::PyToolkit::isMemoImmutableObject(pyObject)); + + const auto &object = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject); + if (object.hasInstance()) { + THROWF(db0::InputException) + << "Only non-materialized immutable memo objects can be embedded"; + } + + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(object) + ); + if (!initializer) { + THROWF(db0::InputException) + << "Non-materialized immutable memo object has no active initializer"; + } + return *initializer; + } + + void writeEmbeddedObject(void *buf, const void *source) + { + auto *pyObject = const_cast(static_cast(source)); + const auto &initializer = getInitializer(pyObject); + o_embedded_object::__new(buf, initializer.getClassPtr()->getClassRef(), initializer); + } } o_py_dict::o_py_dict(PyObject *dict) @@ -136,6 +169,11 @@ namespace db0::object_model return Element::embeddedSet(o_py_set::measure(object), writePySet, object); case db0::bindings::TypeId::DICT: return Element::embeddedDict(o_py_dict::measure(object), writePyDict, object); + case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: { + const auto &initializer = getInitializer(object); + auto size = o_embedded_object::measure(initializer.getClassPtr()->getClassRef(), initializer); + return Element::embeddedObject(size, writeEmbeddedObject, object); + } default: break; } diff --git a/src/dbzero/object_model/dict/o_py_dict.hpp b/src/dbzero/object_model/dict/o_py_dict.hpp index 3d45db16..12769e8e 100644 --- a/src/dbzero/object_model/dict/o_py_dict.hpp +++ b/src/dbzero/object_model/dict/o_py_dict.hpp @@ -22,6 +22,7 @@ DB0_PACKED_BEGIN explicit o_py_dict(PyObject *dict); static std::size_t measure(PyObject *dict); + static Element elementFromPythonObject(PyObject *object); template static std::size_t safeSizeOf(BufT buf) { @@ -39,7 +40,6 @@ DB0_PACKED_BEGIN static db0::Foundation::Type type(); private: - static Element elementFromPythonObject(PyObject *object); static Element valueFromPythonDict(PyObject *dict, PyObject *key); static std::uint32_t dictSize(PyObject *dict); static std::size_t measurePairs(PyObject *dict); diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index d6dbb2aa..b7389e90 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,14 @@ namespace db0::object_model } } + void unrefEmbeddedDict(db0::swine_ptr &fixture, const o_py_dict &dict) + { + for (const auto &pair: dict) { + unrefEmbeddedItem(fixture, pair.key()); + unrefEmbeddedItem(fixture, pair.value()); + } + } + void unrefEmbeddedItem(db0::swine_ptr &fixture, const o_tuple_item &item) { switch (item.itemKind()) { @@ -67,6 +76,9 @@ namespace db0::object_model case StorageClass::EMBEDDED_SET: unrefEmbeddedSet(fixture, o_py_set::__const_ref(item.embeddedPayload().begin())); return; + case StorageClass::EMBEDDED_DICT: + unrefEmbeddedDict(fixture, o_py_dict::__const_ref(item.embeddedPayload().begin())); + return; default: return; } @@ -150,6 +162,15 @@ namespace db0::object_model db0::python::transformEmbeddedSet( fixture, rootObject, value.m_object.get(), embeddedSet ); + continue; + } + + if (value.m_storage_class == StorageClass::DB0_DICT) { + assert(embeddedValue->itemKind() == StorageClass::EMBEDDED_DICT); + const auto &embeddedDict = o_py_dict::__const_ref(embeddedValue->embeddedPayload().begin()); + db0::python::transformEmbeddedDict( + fixture, rootObject, value.m_object.get(), embeddedDict + ); } } } diff --git a/tests/unit_tests/EmbeddedDictTest.cpp b/tests/unit_tests/EmbeddedDictTest.cpp index b2a54387..56e27c0d 100644 --- a/tests/unit_tests/EmbeddedDictTest.cpp +++ b/tests/unit_tests/EmbeddedDictTest.cpp @@ -5,14 +5,21 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include +#include #include #include +#include +#include +#include #include #include @@ -161,6 +168,36 @@ namespace tests ASSERT_EQ(itemKey(item), elementKey(element)); } + static db0::python::shared_py_object makeMemoType() + { + static std::uint64_t memoTypeIndex = 0; + auto className = std::string("EmbeddedDictNestedImmutable") + std::to_string(memoTypeIndex); + auto typeId = "tests/" + className; + ++memoTypeIndex; + + if (PyRun_SimpleString(("class " + className + ": pass\n").c_str()) != 0) { + return {}; + } + + auto mainModule = Py_BORROW(PyImport_AddModule("__main__")); + auto pyClass = Py_OWN(PyObject_GetAttrString(mainModule.get(), className.c_str())); + auto args = Py_OWN(PyTuple_Pack(1, pyClass.get())); + auto kwargs = Py_OWN(PyDict_New()); + auto pyTypeId = Py_OWN(PyUnicode_FromString(typeId.c_str())); + auto pyImmutable = Py_OWN(PyBool_FromLong(1)); + if (!mainModule.get() || !pyClass.get() || !args.get() || !kwargs.get() + || !pyTypeId.get() || !pyImmutable.get()) { + return {}; + } + db0::python::PySafeDict_SetItemString(kwargs.get(), "id", std::move(pyTypeId)); + db0::python::PySafeDict_SetItemString(kwargs.get(), "immutable", std::move(pyImmutable)); + + return db0::python::shared_py_object( + reinterpret_cast(db0::python::PyAPI_wrapPyClass(nullptr, args.get(), kwargs.get())), + false + ); + } + static std::size_t testHashIndexCapacity(std::size_t count) { if (count == 0) { @@ -465,4 +502,76 @@ namespace tests ASSERT_EQ(o_py_dict::measure(*pyDict), dict->sizeOf()); } + TEST_F( EmbeddedDictTest , testPyDictConstructsFromImmutableMemoKeyAndValue ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("embedded-dict-nested-memo"); + auto nestedClass = getTestClass(fixture); + auto pyMemoType = makeMemoType(); + ASSERT_TRUE(pyMemoType.get()); + + auto pyMemoKey = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemoKey->makeNew(nestedClass); + auto *keyInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemoKey->ext()) + ); + ASSERT_NE(keyInitializer, nullptr); + keyInitializer->set({0, 0}, StorageClass::INT64, Value(47)); + + auto pyMemoValue = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemoValue->makeNew(nestedClass); + auto *valueInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemoValue->ext()) + ); + ASSERT_NE(valueInitializer, nullptr); + valueInitializer->set({0, 0}, StorageClass::INT64, Value(53)); + + auto pyDict = Py_OWN(PyDict_New()); + ASSERT_EQ(PySafeDict_SetItem( + *pyDict, Py_OWN(Py_NewRef(reinterpret_cast(pyMemoKey.get()))), + Py_OWN(PyUnicode_FromString("key-object")) + ), 0); + ASSERT_EQ(PySafeDict_SetItem( + *pyDict, Py_OWN(PyUnicode_FromString("value-object")), + Py_OWN(Py_NewRef(reinterpret_cast(pyMemoValue.get()))) + ), 0); + + auto memspace = getMemspace(); + v_object dict(memspace, *pyDict); + + ASSERT_EQ(dict->size(), 2u); + bool sawEmbeddedKey = false; + bool sawEmbeddedValue = false; + for (auto it = dict->begin(); it != dict->end(); ++it) { + if (it->key().itemKind() == StorageClass::EMBEDDED_OBJECT) { + sawEmbeddedKey = true; + const auto &embeddedObject = o_embedded_object::__const_ref(it->key().embeddedPayload().begin()); + ASSERT_EQ(embeddedObject.getClassRef(), nestedClass->getClassRef()); + auto fixedValue = embeddedObject.fixedValue(0); + ASSERT_TRUE(fixedValue.has_value()); + ASSERT_EQ(fixedValue->m_value, 47u); + ASSERT_EQ(asString(it->value()), "key-object"); + } + if (it->value().itemKind() == StorageClass::EMBEDDED_OBJECT) { + sawEmbeddedValue = true; + const auto &embeddedObject = o_embedded_object::__const_ref(it->value().embeddedPayload().begin()); + ASSERT_EQ(embeddedObject.getClassRef(), nestedClass->getClassRef()); + auto fixedValue = embeddedObject.fixedValue(0); + ASSERT_TRUE(fixedValue.has_value()); + ASSERT_EQ(fixedValue->m_value, 53u); + ASSERT_EQ(asString(it->key()), "value-object"); + } + } + ASSERT_TRUE(sawEmbeddedKey); + ASSERT_TRUE(sawEmbeddedValue); + + workspace.close(); + } + } From 3da420eb700a7ea9091bdf1cd2a97543058c72b0 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 21 May 2026 14:09:45 +0200 Subject: [PATCH 5/7] deep unreferencing tests --- tests/unit_tests/ObjectInitializerTest.cpp | 262 +++++++++++++++++++++ 1 file changed, 262 insertions(+) diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 217586ee..01b270ac 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -23,6 +23,7 @@ #include #include +#include using namespace std; using namespace db0; @@ -78,6 +79,88 @@ namespace tests ); } + static db0::python::shared_py_object makeImmutableMemoHoldingReference( + PyTypeObject *pyMemoType, const std::shared_ptr &nestedClass, + std::pair nestedLoc, Address referencedAddress + ) + { + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType) + )); + pyMemo->makeNew(nestedClass); + auto *nestedInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemo->ext()) + ); + EXPECT_NE(nestedInitializer, nullptr); + nestedInitializer->set(nestedLoc, StorageClass::OBJECT_REF, Value(referencedAddress)); + return pyMemo; + } + + static void assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( + const char *fieldName, StorageClass fieldStorageClass, + const std::function( + db0::python::MemoImmutableObject * + )> &makePayload + ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(ObjectInitializerTest::prefix_name); + auto rootClass = getTestClass(fixture); + auto referencedClass = getTestClass(fixture); + auto pyMemoType = makeImmutableMemoType(); + ASSERT_TRUE(pyMemoType.get()); + auto nestedClass = fixture->get().getOrCreateType(pyMemoType.get()); + auto rootLoc = rootClass->addField(fieldName, 0).get(0).getIndexAndOffset(); + auto nestedLoc = nestedClass->addField("held", 0).get(0).getIndexAndOffset(); + rootClass->flush(); + nestedClass->flush(); + + { + Object referenced(referencedClass); + { + db0::FixtureLock lock(fixture); + referenced.postInit(lock); + } + referenced.incRef(false); + referenced.incRef(false); + ASSERT_EQ(referenced.getRefCounts().second, 2u); + + auto pyMemo = makeImmutableMemoHoldingReference( + pyMemoType.get(), nestedClass, nestedLoc, referenced.getAddress() + ); + ASSERT_TRUE(pyMemo.get()); + + auto payload = makePayload(pyMemo.get()); + ASSERT_TRUE(payload.get()); + + ObjectImmutableImpl root(rootClass); + auto *rootInitializer = dynamic_cast( + InitManager::instance.findInitializer(root) + ); + ASSERT_NE(rootInitializer, nullptr); + rootInitializer->setObject( + rootLoc, fieldStorageClass, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(payload.get()) + ); + + { + db0::FixtureLock lock(fixture); + root.postInit(lock); + } + + root.destroy(); + rootClass->flush(); + ASSERT_EQ(referenced.getRefCounts().second, 1u); + } + + rootClass.reset(); + referencedClass.reset(); + nestedClass.reset(); + workspace.close(); + } + TEST_F( ObjectInitializerTest, testIncompletePosVT ) { Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); @@ -864,6 +947,185 @@ namespace tests workspace.close(); } + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedMemoInsideTupleField ) + { + assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( + "items", StorageClass::DB0_TUPLE, + [](db0::python::MemoImmutableObject *pyMemo) { + auto tuple = Py_OWN(PyTuple_New(1)); + Py_INCREF(pyMemo); + db0::python::PySafeTuple_SetItem( + tuple.get(), 0, Py_OWN(reinterpret_cast(pyMemo)) + ); + return tuple; + } + ); + } + + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedMemoInsideListField ) + { + assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( + "items", StorageClass::DB0_LIST, + [](db0::python::MemoImmutableObject *pyMemo) { + auto list = Py_OWN(PyList_New(1)); + Py_INCREF(pyMemo); + db0::python::PySafeList_SetItem( + list.get(), 0, Py_OWN(reinterpret_cast(pyMemo)) + ); + return list; + } + ); + } + + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedMemoInsideSetField ) + { + assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( + "items", StorageClass::DB0_SET, + [](db0::python::MemoImmutableObject *pyMemo) { + auto set = Py_OWN(PySet_New(nullptr)); + Py_INCREF(pyMemo); + db0::python::PySafeSet_Add(set.get(), Py_OWN(reinterpret_cast(pyMemo))); + return set; + } + ); + } + + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedMemoDictValue ) + { + assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( + "items", StorageClass::DB0_DICT, + [](db0::python::MemoImmutableObject *pyMemo) { + auto dict = Py_OWN(PyDict_New()); + Py_INCREF(pyMemo); + db0::python::PySafeDict_SetItem( + dict.get(), Py_OWN(PyUnicode_FromString("child")), + Py_OWN(reinterpret_cast(pyMemo)) + ); + return dict; + } + ); + } + + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedMemoDictKey ) + { + assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( + "items", StorageClass::DB0_DICT, + [](db0::python::MemoImmutableObject *pyMemo) { + auto dict = Py_OWN(PyDict_New()); + Py_INCREF(pyMemo); + db0::python::PySafeDict_SetItem( + dict.get(), Py_OWN(reinterpret_cast(pyMemo)), + Py_OWN(PyUnicode_FromString("child")) + ); + return dict; + } + ); + } + + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedMemoInsideDeepNestedCollection ) + { + assertDestroyImmutableRootUnrefsEmbeddedCollectionReference( + "items", StorageClass::DB0_DICT, + [](db0::python::MemoImmutableObject *pyMemo) { + auto set = Py_OWN(PySet_New(nullptr)); + Py_INCREF(pyMemo); + db0::python::PySafeSet_Add(set.get(), Py_OWN(reinterpret_cast(pyMemo))); + + auto tuple = Py_OWN(PyTuple_New(1)); + db0::python::PySafeTuple_SetItem(tuple.get(), 0, std::move(set)); + + auto dict = Py_OWN(PyDict_New()); + db0::python::PySafeDict_SetItem( + dict.get(), Py_OWN(PyUnicode_FromString("outer")), std::move(tuple) + ); + return dict; + } + ); + } + + TEST_F( ObjectInitializerTest, testDestroyImmutableRootUnrefsEmbeddedMemoInsideCollectionInsideEmbeddedMemo ) + { + Py_Initialize(); + + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture(prefix_name); + auto rootClass = getTestClass(fixture); + auto referencedClass = getTestClass(fixture); + auto pyMemoType = makeImmutableMemoType(); + ASSERT_TRUE(pyMemoType.get()); + auto nestedClass = fixture->get().getOrCreateType(pyMemoType.get()); + auto rootLoc = rootClass->addField("outer", 0).get(0).getIndexAndOffset(); + auto outerCollectionLoc = nestedClass->addField("items", 0).get(0).getIndexAndOffset(); + auto innerHeldLoc = nestedClass->addField("held", 0).get(0).getIndexAndOffset(); + rootClass->flush(); + nestedClass->flush(); + + { + Object referenced(referencedClass); + { + db0::FixtureLock lock(fixture); + referenced.postInit(lock); + } + referenced.incRef(false); + referenced.incRef(false); + ASSERT_EQ(referenced.getRefCounts().second, 2u); + + auto pyInnerMemo = makeImmutableMemoHoldingReference( + pyMemoType.get(), nestedClass, innerHeldLoc, referenced.getAddress() + ); + ASSERT_TRUE(pyInnerMemo.get()); + + auto pyOuterMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyOuterMemo->makeNew(nestedClass); + auto *outerInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyOuterMemo->ext()) + ); + ASSERT_NE(outerInitializer, nullptr); + + auto nestedTuple = Py_OWN(PyTuple_New(1)); + Py_INCREF(pyInnerMemo.get()); + db0::python::PySafeTuple_SetItem( + nestedTuple.get(), 0, Py_OWN(reinterpret_cast(pyInnerMemo.get())) + ); + outerInitializer->setObject( + outerCollectionLoc, StorageClass::DB0_TUPLE, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(nestedTuple.get()) + ); + + auto rootTuple = Py_OWN(PyTuple_New(1)); + Py_INCREF(pyOuterMemo.get()); + db0::python::PySafeTuple_SetItem( + rootTuple.get(), 0, Py_OWN(reinterpret_cast(pyOuterMemo.get())) + ); + + ObjectImmutableImpl root(rootClass); + auto *rootInitializer = dynamic_cast( + InitManager::instance.findInitializer(root) + ); + ASSERT_NE(rootInitializer, nullptr); + rootInitializer->setObject( + rootLoc, StorageClass::DB0_TUPLE, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(rootTuple.get()) + ); + + { + db0::FixtureLock lock(fixture); + root.postInit(lock); + } + + root.destroy(); + rootClass->flush(); + ASSERT_EQ(referenced.getRefCounts().second, 1u); + } + + rootClass.reset(); + referencedClass.reset(); + nestedClass.reset(); + workspace.close(); + } + TEST_F( ObjectInitializerTest, testImmutablePreInitChangingRegularValueToLoFiClearsEmbeddedObject ) { Py_Initialize(); From 8598af63a93e849fe58bd7ecf26ff1baccb20d96 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 21 May 2026 15:33:17 +0200 Subject: [PATCH 6/7] defensive thread shutdown --- python_tests/test_volume_stress.py | 12 ++++ src/dbzero/workspace/FixtureThreads.cpp | 94 ++++++++++++++++++------- src/dbzero/workspace/FixtureThreads.hpp | 9 ++- src/dbzero/workspace/Workspace.cpp | 25 ++++++- 4 files changed, 110 insertions(+), 30 deletions(-) diff --git a/python_tests/test_volume_stress.py b/python_tests/test_volume_stress.py index 8edafbab..2ef752d5 100644 --- a/python_tests/test_volume_stress.py +++ b/python_tests/test_volume_stress.py @@ -111,3 +111,15 @@ def test_create_large_objects_low_cache(db0_slab_size): print(f"Memory usage: {get_memory_usage() - init_mem_usage}") print(f"Base lock usage: {db0.get_base_lock_usage() if 'D' in db0.build_flags() else 'unavailable'}") report_bytes += report_bytes_step + + +@pytest.mark.stress_test +@pytest.mark.parametrize("db0_slab_size", [{"slab_size": 1024 * 1024 * 1024}], indirect=True) +def test_low_cache_close_after_volume_churn(db0_slab_size): + init_mem_usage = get_memory_usage() + db0.set_cache_size(4 * 1024 * 1024) + buf = db0.list() + for _ in range(10000): + obj = MemoTestClass(rand_array(8 * 1024 + 8192)) + buf.append(obj) + print(f"Memory usage: {get_memory_usage() - init_mem_usage}") diff --git a/src/dbzero/workspace/FixtureThreads.cpp b/src/dbzero/workspace/FixtureThreads.cpp index 1b3bbc7b..86a77e05 100644 --- a/src/dbzero/workspace/FixtureThreads.cpp +++ b/src/dbzero/workspace/FixtureThreads.cpp @@ -60,38 +60,70 @@ namespace db0 } m_cv.notify_all(); } + + void FixtureThread::rethrowIfFailed() const + { + std::exception_ptr failure; + { + std::lock_guard lock(m_mutex); + failure = m_failure; + } + if (failure) { + std::rethrow_exception(failure); + } + } void FixtureThread::run() { while (true) { - std::unique_lock lock(m_mutex); - m_cv.wait_for(lock, std::chrono::milliseconds(m_interval_ms)); - if (m_stopped) { - break; - } - // prepare commit context if configured - lock.unlock(); - prepareContext(); - // collect fixtures first - std::vector > fixtures; - lock.lock(); - fixtures.reserve(m_fixtures.size()); - for (auto it = m_fixtures.begin(); it != m_fixtures.end();) { - auto fixture_ptr = it->lock(); - if (!fixture_ptr) { - it = m_fixtures.erase(it); - continue; + bool contextPrepared = false; + try { + std::unique_lock lock(m_mutex); + m_cv.wait_for(lock, std::chrono::milliseconds(m_interval_ms)); + if (m_stopped) { + break; } - fixtures.push_back(fixture_ptr); - ++it; - } - // then process as unlocked - lock.unlock(); - for (auto &fixture_ptr : fixtures) { - onUpdate(*fixture_ptr); + // prepare commit context if configured + lock.unlock(); + prepareContext(); + contextPrepared = true; + // collect fixtures first + std::vector > fixtures; + lock.lock(); + fixtures.reserve(m_fixtures.size()); + for (auto it = m_fixtures.begin(); it != m_fixtures.end();) { + auto fixture_ptr = it->lock(); + if (!fixture_ptr) { + it = m_fixtures.erase(it); + continue; + } + fixtures.push_back(fixture_ptr); + ++it; + } + // then process as unlocked + lock.unlock(); + for (auto &fixture_ptr : fixtures) { + onUpdate(*fixture_ptr); + } + + closeContext(); + contextPrepared = false; + } catch (...) { + // Preserve background-thread failures so explicit workspace close can report them. + auto failure = std::current_exception(); + if (contextPrepared) { + abortContext(); + } + { + std::lock_guard lock(m_mutex); + if (!m_failure) { + m_failure = failure; + } + m_stopped = true; + } + m_cv.notify_all(); + break; } - - closeContext(); } } @@ -123,6 +155,11 @@ namespace db0 m_context->finalize(); m_context = nullptr; } + + void RefreshThread::abortContext() noexcept + { + m_context = nullptr; + } void RefreshThread::onUpdate(Fixture &fixture) { @@ -241,6 +278,11 @@ namespace db0 m_context = nullptr; } + void AutoCommitThread::abortContext() noexcept + { + m_context = nullptr; + } + std::unique_lock AutoCommitThread::preventAutoCommit() { return std::unique_lock(m_commit_mutex); } diff --git a/src/dbzero/workspace/FixtureThreads.hpp b/src/dbzero/workspace/FixtureThreads.hpp index c46783ca..bb63f125 100644 --- a/src/dbzero/workspace/FixtureThreads.hpp +++ b/src/dbzero/workspace/FixtureThreads.hpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace db0 @@ -37,6 +38,8 @@ namespace db0 void run(); void stop(); + + void rethrowIfFailed() const; void setInterval(std::uint64_t interval_ms); @@ -47,13 +50,15 @@ namespace db0 protected: std::atomic m_interval_ms; std::condition_variable m_cv; - std::mutex m_mutex; + mutable std::mutex m_mutex; bool m_stopped = false; + std::exception_ptr m_failure; std::vector> m_fixtures; virtual void prepareContext() = 0; virtual void closeContext() = 0; + virtual void abortContext() noexcept = 0; }; /** @@ -85,6 +90,7 @@ namespace db0 void prepareContext() override; void closeContext() override; + void abortContext() noexcept override; }; /** @@ -108,6 +114,7 @@ namespace db0 void prepareContext() override; void closeContext() override; + void abortContext() noexcept override; }; } diff --git a/src/dbzero/workspace/Workspace.cpp b/src/dbzero/workspace/Workspace.cpp index e76253e4..850ef1c0 100644 --- a/src/dbzero/workspace/Workspace.cpp +++ b/src/dbzero/workspace/Workspace.cpp @@ -204,11 +204,27 @@ namespace db0 ~WorkspaceThreads() { - // stop refresh/autocommit threads + stop(false); + } + + void stopAndRethrow() + { + // Used by explicit close: join threads first, then surface any worker failure. + stop(true); + } + + void stop(bool rethrow) + { m_auto_commit_thread.stop(); m_refresh_thread.stop(); for (auto &m_thread : m_threads) { - m_thread.join(); + if (m_thread.joinable()) { + m_thread.join(); + } + } + if (rethrow) { + m_refresh_thread.rethrowIfFailed(); + m_auto_commit_thread.rethrowIfFailed(); } } @@ -294,7 +310,10 @@ namespace db0 } void Workspace::stopThreads() { - m_workspace_threads = nullptr; + if (m_workspace_threads) { + auto threads = std::move(m_workspace_threads); + threads->stopAndRethrow(); + } } void Workspace::close(bool as_defunct, ProcessTimer *timer_ptr) From 3465ecce1b0bc5e950a76a39b2c608c86f48d7d0 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 21 May 2026 16:15:35 +0200 Subject: [PATCH 7/7] refresh fix (edge case for initial prefix) --- src/dbzero/core/storage/DiffIndex.cpp | 10 +++++- src/dbzero/core/storage/DiffIndex.hpp | 4 +++ src/dbzero/core/storage/SparseIndexBase.hpp | 38 +++++++++++++++++++-- src/dbzero/core/storage/SparsePair.cpp | 24 +++++++++++-- tests/unit_tests/BDevStorageTest.cpp | 24 +++++++++++++ 5 files changed, 95 insertions(+), 5 deletions(-) diff --git a/src/dbzero/core/storage/DiffIndex.cpp b/src/dbzero/core/storage/DiffIndex.cpp index 16dbece0..80b09be1 100644 --- a/src/dbzero/core/storage/DiffIndex.cpp +++ b/src/dbzero/core/storage/DiffIndex.cpp @@ -206,6 +206,14 @@ namespace db0 void DiffIndex::refresh() { super_t::refresh(); } + + void DiffIndex::reopen(Address address) { + super_t::reopen(address); + } + + bool DiffIndex::isOpen() const { + return super_t::isOpen(); + } void DiffIndex::commit() { super_t::commit(); @@ -220,4 +228,4 @@ namespace db0 return item.findLower(state_num); } -} \ No newline at end of file +} diff --git a/src/dbzero/core/storage/DiffIndex.hpp b/src/dbzero/core/storage/DiffIndex.hpp index d9918842..bb42d22b 100644 --- a/src/dbzero/core/storage/DiffIndex.hpp +++ b/src/dbzero/core/storage/DiffIndex.hpp @@ -150,6 +150,10 @@ DB0_PACKED_END void commit(); void refresh(); + + void reopen(Address); + + bool isOpen() const; }; } diff --git a/src/dbzero/core/storage/SparseIndexBase.hpp b/src/dbzero/core/storage/SparseIndexBase.hpp index 22f189bd..6c3a8ddb 100644 --- a/src/dbzero/core/storage/SparseIndexBase.hpp +++ b/src/dbzero/core/storage/SparseIndexBase.hpp @@ -20,6 +20,7 @@ namespace db0 #include #include #include +#include namespace db0 @@ -174,6 +175,8 @@ DB0_PACKED_END void update(std::uint64_t max_storage_page_num); void update(PageNumT page_num, StateNumT state_num, std::uint64_t max_storage_page_num); + void reopen(Address address = {}); + bool isOpen() const; private: std::shared_ptr m_dram_prefix; @@ -380,11 +383,42 @@ DB0_PACKED_END template void SparseIndexBase::refresh() - { + { + if (!m_index) { + this->reopen(); + return; + } + m_index.detach(); m_next_page_num = m_index.treeHeader().m_next_page_num; m_max_state_num = m_index.treeHeader().m_max_state_num; } + + template + void SparseIndexBase::reopen(Address address) + { + if (m_dram_prefix->empty()) { + return; + } + + if (!address.isValid()) { + address = m_dram_allocator->firstAlloc(); + } + if (!address.isValid()) { + return; + } + + m_index.~IndexT(); + new (&m_index) IndexT(m_dram_space.myPtr(address), m_dram_prefix->getPageSize(), m_access_type); + m_next_page_num = m_index.treeHeader().m_next_page_num; + m_max_state_num = m_index.treeHeader().m_max_state_num; + } + + template + bool SparseIndexBase::isOpen() const + { + return !!m_index; + } template std::string SparseIndexBase::BlockHeader::toString(const CompressedItemT &item) const { @@ -457,4 +491,4 @@ DB0_PACKED_END return !m_index; } -} \ No newline at end of file +} diff --git a/src/dbzero/core/storage/SparsePair.cpp b/src/dbzero/core/storage/SparsePair.cpp index c9a3ce92..1e09b3e5 100644 --- a/src/dbzero/core/storage/SparsePair.cpp +++ b/src/dbzero/core/storage/SparsePair.cpp @@ -43,7 +43,27 @@ namespace db0 void SparsePair::refresh() { m_sparse_index.refresh(); - m_diff_index.refresh(); + // A read-only storage may be opened before the writer's DRAM changelog + // update is visible, leaving SparsePair with unopened indexes. Refreshing + // later can apply the DRAM pages that contain the sparse index, but the + // diff index address is only available from the freshly opened sparse + // index header. Without reopening the diff index from that address, + // BDevStorage::completeRefresh() can see a DRAM changelog state ahead of + // getMaxStateNum() and report a false inconsistency. + // + // Reproduced by BDevStorageTest.testNoLoadReaderCanRefreshAfterWriterCommit + // and observed as intermittent Python failures in + // test_refreshing_group_by_results on concurrent read-only open. + if (!!m_sparse_index.m_index) { + auto diffIndexAddress = Address::fromOffset(m_sparse_index.getExtraData()); + if (!m_diff_index.isOpen() || m_diff_index.getIndexAddress() != diffIndexAddress) { + m_diff_index.reopen(diffIndexAddress); + } else { + m_diff_index.refresh(); + } + } else { + m_diff_index.refresh(); + } } std::size_t SparsePair::size() const { @@ -92,4 +112,4 @@ namespace db0 return {}; } -} \ No newline at end of file +} diff --git a/tests/unit_tests/BDevStorageTest.cpp b/tests/unit_tests/BDevStorageTest.cpp index b6a46d9d..70f853e9 100644 --- a/tests/unit_tests/BDevStorageTest.cpp +++ b/tests/unit_tests/BDevStorageTest.cpp @@ -431,6 +431,30 @@ namespace tests cut.close(); reader.join(); } + + TEST_F( BDevStorageTest , testNoLoadReaderCanRefreshAfterWriterCommit ) + { + std::size_t page_size = 4096; + BDevStorage::create(file_name, page_size); + + BDevStorage reader(file_name, AccessType::READ_ONLY, {}, {}, { StorageOptions::NO_LOAD }); + + std::vector data(page_size, 'r'); + { + BDevStorage writer(file_name, AccessType::READ_WRITE); + writer.write(0, 1, data.size(), data.data()); + writer.flush(); + writer.close(); + } + + ASSERT_NO_THROW(reader.refresh()); + ASSERT_EQ(reader.getMaxStateNum(), 1u); + + std::vector buffer(page_size); + reader.read(0, 1, buffer.size(), buffer.data(), { AccessOptions::read }); + ASSERT_TRUE(equal(data, buffer)); + reader.close(); + } TEST_F( BDevStorageTest , testSparseIndexDurability ) {