From 5697e74fa5de51c14a75e23fd818ca9935ffc457 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 15 Jun 2026 13:00:25 +0200 Subject: [PATCH 1/4] immutable issue repro --- python_tests/test_issues_18.py | 151 ++++++++++++++++++++++++++++----- 1 file changed, 131 insertions(+), 20 deletions(-) diff --git a/python_tests/test_issues_18.py b/python_tests/test_issues_18.py index 15f45113..cb029045 100644 --- a/python_tests/test_issues_18.py +++ b/python_tests/test_issues_18.py @@ -1,45 +1,156 @@ # SPDX-License-Identifier: LGPL-2.1-or-later -# Copyright (c) 2026 DBZero Software sp. z o.o. - -"""Regression coverage for set difference against memo-backed set fields.""" - -from __future__ import annotations +# Copyright (c) 2025 DBZero Software sp. z o.o. +import os import subprocess import sys import textwrap -def test_python_set_difference_with_memo_set_field_does_not_segfault(tmp_path): + +def test_unhandled_exception_with_nested_durable_list_does_not_segfault(tmp_path): + """Regression for SIGSEGV during interpreter shutdown after an exception.""" + model_path = tmp_path / "repro_model.py" + model_path.write_text( + textwrap.dedent( + """ + from __future__ import annotations + + from dataclasses import dataclass, field + + import dbzero as db0 + + DATA_PREFIX = "/issue-18/data" + + + @db0.memo(prefix=DATA_PREFIX, immutable=True) + @dataclass + class Metadata: + title: str + subtitle: str + parent: LegalExcerptRecord | int + url: str + source: str + + + @db0.memo(prefix=DATA_PREFIX, immutable=True) + @dataclass + class LegalExcerptRecord: + id: int + content: str + metadata: Metadata + + @classmethod + def from_schema_data(cls, data, parent=None): + metadata = Metadata.from_schema_data(data["metadata"], parent=parent) + return cls(int(data["id"]), data["content"], metadata) + + + @classmethod + def metadata_from_schema_data(cls, data, parent=None): + parent_id = int(data["parentId"]) + return cls( + data["title"], + data["subtitle"], + parent_id if parent is None else parent, + data["url"], + data["source"], + ) + + + Metadata.from_schema_data = metadata_from_schema_data + + + @db0.memo(prefix=DATA_PREFIX, singleton=True) + @dataclass + class Root: + records: list[LegalExcerptRecord] = field(default_factory=list) + records_by_id: dict[str, LegalExcerptRecord] = field(default_factory=dict) + record_positions_by_id: dict[str, int] = field(default_factory=dict) + unresolved_child_ids_by_parent_id: dict[str, list[str]] = field(default_factory=dict) + + def add_record_from_schema_data(self, data): + parent_id = int(data["metadata"]["parentId"]) + existing_parent = self.get_record(parent_id) + parent = existing_parent if existing_parent is not None else parent_id + record = LegalExcerptRecord.from_schema_data(data, parent=parent) + self.store_record(record) + return record + + def store_record(self, record): + record_key = str(record.id) + self._remove_unresolved_child(record_key) + if isinstance(record.metadata.parent, int): + self._add_unresolved_child(record_key, record.metadata.parent) + + position = self.record_positions_by_id.get(record_key) + if position is None: + self.record_positions_by_id[record_key] = len(self.records) + self.records.append(record) + else: + self.records[position] = record + self.records_by_id[record_key] = record + + def get_record(self, record_id): + return self.records_by_id.get(str(record_id)) + + def _add_unresolved_child(self, child_key, parent_id): + parent_key = str(parent_id) + child_ids = self.unresolved_child_ids_by_parent_id.setdefault(parent_key, []) + if child_key not in child_ids: + child_ids.append(child_key) + + def _remove_unresolved_child(self, child_key): + for _parent_key, child_ids in self.unresolved_child_ids_by_parent_id.items(): + child_ids.discard(child_key) + """ + ), + encoding="utf-8", + ) + script = textwrap.dedent( f""" - from dataclasses import dataclass, field + import sys + import tempfile import dbzero as db0 + sys.path.insert(0, {str(tmp_path)!r}) - @db0.memo(prefix="/issue-18") - @dataclass(eq=False) - class Contact: - tags: set[str] = field(default_factory=set) + from repro_model import DATA_PREFIX, Root + def record_data(record_id): + return {{ + "id": str(record_id), + "content": "Legal text excerpt body.", + "metadata": {{ + "title": "Legal act title", + "subtitle": "Legal act subtitle", + "parentId": "14", + "url": "data/html/DU_1919_364.html", + "source": "Art. 1.", + }}, + }} - db0.init({str(tmp_path)!r}, prefix="/issue-18", autocommit=True) - contact = Contact({{"lead", "technical"}}) - removed = {{"lead", "technical"}} - contact.tags - assert removed == set() - db0.close() + db0.init(tempfile.mkdtemp() + "/dbzero", prefix=DATA_PREFIX, autocommit=True) + root = Root() + root.add_record_from_schema_data(record_data(19458)) + root.add_record_from_schema_data(record_data(19459)) """ ) + env = os.environ.copy() + env["PYTHONDONTWRITEBYTECODE"] = "1" result = subprocess.run( [sys.executable, "-c", script], - capture_output=True, + check=False, + env=env, text=True, - timeout=10, + capture_output=True, ) - assert result.returncode == 0, ( - f"set difference repro exited with {result.returncode}\n" + assert result.returncode == 1, ( + f"subprocess exited with {result.returncode}; expected ordinary AttributeError exit\n" f"stdout:\n{result.stdout}\n" f"stderr:\n{result.stderr}" ) + assert "AttributeError" in result.stderr From 57dff089ba46013c4cfa58221f8682d99e9d4b32 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 15 Jun 2026 15:31:17 +0200 Subject: [PATCH 2/4] dealloc guard introduced --- src/dbzero/bindings/python/Memo.cpp | 12 ++++------- src/dbzero/bindings/python/PyAtomic.cpp | 1 + src/dbzero/bindings/python/PyLocked.cpp | 1 + src/dbzero/bindings/python/PyLocks.hpp | 20 +++++++++++++++++++ .../bindings/python/PyObjectTagManager.cpp | 1 + src/dbzero/bindings/python/PyReadOnly.cpp | 1 + src/dbzero/bindings/python/PySnapshot.cpp | 3 ++- src/dbzero/bindings/python/PyTagSet.cpp | 3 ++- src/dbzero/bindings/python/PyToolkit.cpp | 4 ++-- src/dbzero/bindings/python/PyWeakProxy.cpp | 1 + .../python/collections/PyByteArray.cpp | 1 + .../bindings/python/collections/PyDict.cpp | 1 + .../python/collections/PyDictView.cpp | 1 + .../bindings/python/collections/PyIndex.cpp | 1 + .../python/collections/PyIterator.hpp | 1 + .../bindings/python/collections/PyList.cpp | 1 + .../bindings/python/collections/PySet.cpp | 1 + .../bindings/python/collections/PyTuple.cpp | 1 + .../bindings/python/collections/PyWeakSet.cpp | 1 + .../bindings/python/embedded/EmbeddedDict.cpp | 2 ++ .../python/embedded/EmbeddedObject.cpp | 12 +++++------ .../bindings/python/embedded/EmbeddedSet.cpp | 2 ++ .../python/embedded/EmbeddedTuple.cpp | 1 + .../bindings/python/iter/PyJoinIterable.cpp | 3 ++- .../bindings/python/iter/PyJoinIterator.cpp | 3 ++- .../bindings/python/iter/PyObjectIterable.cpp | 1 + .../bindings/python/iter/PyObjectIterator.cpp | 1 + src/dbzero/bindings/python/types/PyClass.cpp | 1 + .../bindings/python/types/PyClassFields.cpp | 4 +++- .../bindings/python/types/PyCompositeTag.cpp | 1 + src/dbzero/bindings/python/types/PyEnum.cpp | 3 +++ src/dbzero/bindings/python/types/PyTag.cpp | 1 + 32 files changed, 70 insertions(+), 21 deletions(-) diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index d2156b55..3894af52 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -226,15 +226,11 @@ namespace db0::python template void PyAPI_MemoObject_del(MemoImplT *memo_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC - if (Py_IsInitialized()) - { - // destroy associated db0 Object instance - memo_obj->destroy(); - // Skip deallocation during/after Python finalization - // Python Garbage Collector might be finalized (i.e. destroyed) at this point - Py_TYPE(memo_obj)->tp_free((PyObject*)memo_obj); - } + // destroy associated db0 Object instance + memo_obj->destroy(); + Py_TYPE(memo_obj)->tp_free((PyObject*)memo_obj); } template diff --git a/src/dbzero/bindings/python/PyAtomic.cpp b/src/dbzero/bindings/python/PyAtomic.cpp index 0fd18299..8dba5486 100644 --- a/src/dbzero/bindings/python/PyAtomic.cpp +++ b/src/dbzero/bindings/python/PyAtomic.cpp @@ -26,6 +26,7 @@ namespace db0::python void PyAPI_PyAtomic_del(PyAtomic* self) { + PY_DEALLOC_GUARD(); PY_API_FUNC self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); diff --git a/src/dbzero/bindings/python/PyLocked.cpp b/src/dbzero/bindings/python/PyLocked.cpp index f9292177..6d8c365d 100644 --- a/src/dbzero/bindings/python/PyLocked.cpp +++ b/src/dbzero/bindings/python/PyLocked.cpp @@ -29,6 +29,7 @@ namespace db0::python void PyAPI_PyLocked_del(PyLocked* self) { + PY_DEALLOC_GUARD(); PY_API_FUNC self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); diff --git a/src/dbzero/bindings/python/PyLocks.hpp b/src/dbzero/bindings/python/PyLocks.hpp index b6df2393..1f5fad4c 100644 --- a/src/dbzero/bindings/python/PyLocks.hpp +++ b/src/dbzero/bindings/python/PyLocks.hpp @@ -24,6 +24,26 @@ namespace db0::python { + inline bool isPythonFinalizing() + { +#if PY_VERSION_HEX >= 0x030D0000 + return Py_IsFinalizing(); +#else + return _Py_IsFinalizing(); +#endif + } + +// Avoid running dbzero/Python cleanup from tp_dealloc while the interpreter is +// finalizing. This surfaced as a shutdown SIGSEGV after an unhandled Python +// exception left nested durable objects alive; deallocators tried to enter the +// dbzero API lock / Python C API after finalization had started. +#define PY_DEALLOC_GUARD() \ + do { \ + if (!Py_IsInitialized() || db0::python::isPythonFinalizing()) { \ + return; \ + } \ + } while (false) + struct GIL_Lock { PyGILState_STATE m_state; diff --git a/src/dbzero/bindings/python/PyObjectTagManager.cpp b/src/dbzero/bindings/python/PyObjectTagManager.cpp index 038530fa..7e6ba454 100644 --- a/src/dbzero/bindings/python/PyObjectTagManager.cpp +++ b/src/dbzero/bindings/python/PyObjectTagManager.cpp @@ -32,6 +32,7 @@ namespace db0::python void PyAPI_PyObjectTagManager_del(PyObjectTagManager* tags_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 instance tags_obj->destroy(); diff --git a/src/dbzero/bindings/python/PyReadOnly.cpp b/src/dbzero/bindings/python/PyReadOnly.cpp index e9a9fe4c..fe1a4c85 100644 --- a/src/dbzero/bindings/python/PyReadOnly.cpp +++ b/src/dbzero/bindings/python/PyReadOnly.cpp @@ -277,6 +277,7 @@ namespace db0::python void PyAPI_PyReadOnly_del(PyReadOnly* self) { + PY_DEALLOC_GUARD(); PY_API_FUNC self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); diff --git a/src/dbzero/bindings/python/PySnapshot.cpp b/src/dbzero/bindings/python/PySnapshot.cpp index 2770ae48..3c214c7a 100644 --- a/src/dbzero/bindings/python/PySnapshot.cpp +++ b/src/dbzero/bindings/python/PySnapshot.cpp @@ -18,7 +18,8 @@ namespace db0::python } void PyAPI_PySnapshot_del(PySnapshotObject* snapshot_obj) - { + { + PY_DEALLOC_GUARD(); // NOTE: it's safe to destroy without API lock (not a v_object) // also API lock here would result in a deadlock snapshot_obj->destroy(); diff --git a/src/dbzero/bindings/python/PyTagSet.cpp b/src/dbzero/bindings/python/PyTagSet.cpp index 1c1bde05..4dd05f79 100644 --- a/src/dbzero/bindings/python/PyTagSet.cpp +++ b/src/dbzero/bindings/python/PyTagSet.cpp @@ -21,7 +21,8 @@ namespace db0::python }; void PyAPI_PyTagSet_del(PyTagSet *py_tag_set) - { + { + PY_DEALLOC_GUARD(); py_tag_set->m_tag_set.~TagSet(); PyObject_Del(py_tag_set); } diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index f3b7553d..1d49781f 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -1380,8 +1380,8 @@ namespace db0::python return {}; } - if (!Py_IsInitialized()) { - // Simply return the lock after python instance was finalized + if (!Py_IsInitialized() || isPythonFinalizing()) { + // Simply return the lock after Python is finalizing/finalized. // This is safe because fixture threads should be stopped at this point return SafeRLock(m_api_mutex); } diff --git a/src/dbzero/bindings/python/PyWeakProxy.cpp b/src/dbzero/bindings/python/PyWeakProxy.cpp index 993de335..02f47543 100644 --- a/src/dbzero/bindings/python/PyWeakProxy.cpp +++ b/src/dbzero/bindings/python/PyWeakProxy.cpp @@ -14,6 +14,7 @@ namespace db0::python void PyAPI_PyWeakProxy_del(PyWeakProxy *py_weak_proxy) { + PY_DEALLOC_GUARD(); PY_API_FUNC if (py_weak_proxy->m_py_object) { Py_DECREF(py_weak_proxy->m_py_object); diff --git a/src/dbzero/bindings/python/collections/PyByteArray.cpp b/src/dbzero/bindings/python/collections/PyByteArray.cpp index 99f96de7..3b2594e4 100644 --- a/src/dbzero/bindings/python/collections/PyByteArray.cpp +++ b/src/dbzero/bindings/python/collections/PyByteArray.cpp @@ -279,6 +279,7 @@ namespace db0::python void PyAPI_ByteArrayObject_del(ByteArrayObject* bytearray_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 ByteArray instance bytearray_obj->destroy(); diff --git a/src/dbzero/bindings/python/collections/PyDict.cpp b/src/dbzero/bindings/python/collections/PyDict.cpp index ded6842f..0a656bfd 100644 --- a/src/dbzero/bindings/python/collections/PyDict.cpp +++ b/src/dbzero/bindings/python/collections/PyDict.cpp @@ -126,6 +126,7 @@ namespace db0::python void PyAPI_DictObject_del(DictObject* dict_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 Dict instance dict_obj->destroy(); diff --git a/src/dbzero/bindings/python/collections/PyDictView.cpp b/src/dbzero/bindings/python/collections/PyDictView.cpp index 349c9c70..18ff758a 100644 --- a/src/dbzero/bindings/python/collections/PyDictView.cpp +++ b/src/dbzero/bindings/python/collections/PyDictView.cpp @@ -77,6 +77,7 @@ namespace db0::python void PyAPI_DictViewObject_del(DictViewObject* dict_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 Dict instance dict_obj->ext().~DictView(); diff --git a/src/dbzero/bindings/python/collections/PyIndex.cpp b/src/dbzero/bindings/python/collections/PyIndex.cpp index 99c21597..7343e52e 100644 --- a/src/dbzero/bindings/python/collections/PyIndex.cpp +++ b/src/dbzero/bindings/python/collections/PyIndex.cpp @@ -51,6 +51,7 @@ namespace db0::python void PyAPI_IndexObject_del(IndexObject* index_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 Index instance index_obj->destroy(); diff --git a/src/dbzero/bindings/python/collections/PyIterator.hpp b/src/dbzero/bindings/python/collections/PyIterator.hpp index 16167231..2bdd03a7 100644 --- a/src/dbzero/bindings/python/collections/PyIterator.hpp +++ b/src/dbzero/bindings/python/collections/PyIterator.hpp @@ -19,6 +19,7 @@ namespace db0::python template void IteratorObject_del(IteratorObjectT* self) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 instance // calls destructor of ext object diff --git a/src/dbzero/bindings/python/collections/PyList.cpp b/src/dbzero/bindings/python/collections/PyList.cpp index 0e17bc30..9266ec7e 100644 --- a/src/dbzero/bindings/python/collections/PyList.cpp +++ b/src/dbzero/bindings/python/collections/PyList.cpp @@ -385,6 +385,7 @@ namespace db0::python void PyAPI_ListObject_del(ListObject* list_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 List instance list_obj->destroy(); diff --git a/src/dbzero/bindings/python/collections/PySet.cpp b/src/dbzero/bindings/python/collections/PySet.cpp index 3103f55e..63dfebf2 100644 --- a/src/dbzero/bindings/python/collections/PySet.cpp +++ b/src/dbzero/bindings/python/collections/PySet.cpp @@ -300,6 +300,7 @@ namespace db0::python void SetObject_del(SetObject* set_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 Set instance set_obj->destroy(); diff --git a/src/dbzero/bindings/python/collections/PyTuple.cpp b/src/dbzero/bindings/python/collections/PyTuple.cpp index c0c6f9b8..e71b9f1c 100644 --- a/src/dbzero/bindings/python/collections/PyTuple.cpp +++ b/src/dbzero/bindings/python/collections/PyTuple.cpp @@ -209,6 +209,7 @@ namespace db0::python void PyAPI_TupleObject_del(TupleObject* tuple_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 Tuple instance tuple_obj->destroy(); diff --git a/src/dbzero/bindings/python/collections/PyWeakSet.cpp b/src/dbzero/bindings/python/collections/PyWeakSet.cpp index a9bf3adc..6b1a5c5d 100644 --- a/src/dbzero/bindings/python/collections/PyWeakSet.cpp +++ b/src/dbzero/bindings/python/collections/PyWeakSet.cpp @@ -222,6 +222,7 @@ namespace db0::python void WeakSetObject_del(WeakSetObject *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); diff --git a/src/dbzero/bindings/python/embedded/EmbeddedDict.cpp b/src/dbzero/bindings/python/embedded/EmbeddedDict.cpp index 82341ef3..bd3c61e1 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedDict.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedDict.cpp @@ -375,6 +375,7 @@ namespace db0::python void PyAPI_EmbeddedDict_del(EmbeddedDict *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC if (PyObject_GC_IsTracked(self)) { PyObject_GC_UnTrack(self); @@ -385,6 +386,7 @@ namespace db0::python void PyAPI_EmbeddedDictIterator_del(EmbeddedDictIterator *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC if (PyObject_GC_IsTracked(self)) { PyObject_GC_UnTrack(self); diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp index b561c4e6..399323c1 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp @@ -244,6 +244,7 @@ namespace db0::python void PyAPI_EmbeddedObject_del(EmbeddedObject *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC if (PyObject_GC_IsTracked(self)) { PyObject_GC_UnTrack(self); @@ -254,14 +255,13 @@ namespace db0::python void PyAPI_EmbeddedMemo_del(MemoImmutableObject *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC - if (Py_IsInitialized()) { - if (PyObject_GC_IsTracked(self)) { - PyObject_GC_UnTrack(self); - } - embeddedMemoRef(self).~EmbeddedObjectRef(); - Py_TYPE(self)->tp_free(reinterpret_cast(self)); + if (PyObject_GC_IsTracked(self)) { + PyObject_GC_UnTrack(self); } + embeddedMemoRef(self).~EmbeddedObjectRef(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); } int EmbeddedObject_traverse(EmbeddedObject *self, visitproc visit, void *arg) diff --git a/src/dbzero/bindings/python/embedded/EmbeddedSet.cpp b/src/dbzero/bindings/python/embedded/EmbeddedSet.cpp index f21949f0..f53d938b 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedSet.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedSet.cpp @@ -188,6 +188,7 @@ namespace db0::python void PyAPI_EmbeddedSet_del(EmbeddedSet *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC if (PyObject_GC_IsTracked(self)) { PyObject_GC_UnTrack(self); @@ -198,6 +199,7 @@ namespace db0::python void PyAPI_EmbeddedSetIterator_del(EmbeddedSetIterator *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC if (PyObject_GC_IsTracked(self)) { PyObject_GC_UnTrack(self); diff --git a/src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp b/src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp index 219228e4..0845ec2d 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedTuple.cpp @@ -173,6 +173,7 @@ namespace db0::python void PyAPI_EmbeddedTuple_del(EmbeddedTuple *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC if (PyObject_GC_IsTracked(self)) { PyObject_GC_UnTrack(self); diff --git a/src/dbzero/bindings/python/iter/PyJoinIterable.cpp b/src/dbzero/bindings/python/iter/PyJoinIterable.cpp index d9319226..d15524f8 100644 --- a/src/dbzero/bindings/python/iter/PyJoinIterable.cpp +++ b/src/dbzero/bindings/python/iter/PyJoinIterable.cpp @@ -18,6 +18,7 @@ namespace db0::python void PyJoinIterable_del(PyJoinIterable* self) { + PY_DEALLOC_GUARD(); // destroy associated db0 instance self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); @@ -128,4 +129,4 @@ namespace db0::python num_args, on_arg, nullptr, prefix_name); } -} \ No newline at end of file +} diff --git a/src/dbzero/bindings/python/iter/PyJoinIterator.cpp b/src/dbzero/bindings/python/iter/PyJoinIterator.cpp index 38406350..2b5c3226 100644 --- a/src/dbzero/bindings/python/iter/PyJoinIterator.cpp +++ b/src/dbzero/bindings/python/iter/PyJoinIterator.cpp @@ -19,6 +19,7 @@ namespace db0::python void PyJoinIterator_del(PyJoinIterator* self) { + PY_DEALLOC_GUARD(); // destroy associated db0 instance self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); @@ -78,4 +79,4 @@ namespace db0::python return Py_TYPE(py_object) == &PyJoinIteratorType; } -} \ No newline at end of file +} diff --git a/src/dbzero/bindings/python/iter/PyObjectIterable.cpp b/src/dbzero/bindings/python/iter/PyObjectIterable.cpp index 5d621e6f..86f843b5 100644 --- a/src/dbzero/bindings/python/iter/PyObjectIterable.cpp +++ b/src/dbzero/bindings/python/iter/PyObjectIterable.cpp @@ -38,6 +38,7 @@ namespace db0::python void PyObjectIterable_del(PyObjectIterable* self) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated db0 instance self->destroy(); diff --git a/src/dbzero/bindings/python/iter/PyObjectIterator.cpp b/src/dbzero/bindings/python/iter/PyObjectIterator.cpp index 15db3b76..7da19ffa 100644 --- a/src/dbzero/bindings/python/iter/PyObjectIterator.cpp +++ b/src/dbzero/bindings/python/iter/PyObjectIterator.cpp @@ -21,6 +21,7 @@ namespace db0::python void PyObjectIterator_del(PyObjectIterator* self) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated instance self->destroy(); diff --git a/src/dbzero/bindings/python/types/PyClass.cpp b/src/dbzero/bindings/python/types/PyClass.cpp index de52f2d1..fff63422 100644 --- a/src/dbzero/bindings/python/types/PyClass.cpp +++ b/src/dbzero/bindings/python/types/PyClass.cpp @@ -63,6 +63,7 @@ namespace db0::python void PyAPI_ClassObject_del(ClassObject* class_obj) { + PY_DEALLOC_GUARD(); PY_API_FUNC // release associated shared_ptr class_obj->destroy(); diff --git a/src/dbzero/bindings/python/types/PyClassFields.cpp b/src/dbzero/bindings/python/types/PyClassFields.cpp index 8bcaca73..7a290595 100644 --- a/src/dbzero/bindings/python/types/PyClassFields.cpp +++ b/src/dbzero/bindings/python/types/PyClassFields.cpp @@ -32,7 +32,8 @@ namespace db0::python } void PyClassFields_del(PyClassFields* self) - { + { + PY_DEALLOC_GUARD(); // destroy associated DB0 instance self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); @@ -40,6 +41,7 @@ namespace db0::python void PyFieldDef_del(PyFieldDef *self) { + PY_DEALLOC_GUARD(); // destroy associated DB0 instance self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); diff --git a/src/dbzero/bindings/python/types/PyCompositeTag.cpp b/src/dbzero/bindings/python/types/PyCompositeTag.cpp index f1e38714..ba6ebacb 100644 --- a/src/dbzero/bindings/python/types/PyCompositeTag.cpp +++ b/src/dbzero/bindings/python/types/PyCompositeTag.cpp @@ -21,6 +21,7 @@ namespace db0::python void PyCompositeTag_del(PyCompositeTag *self) { + PY_DEALLOC_GUARD(); PY_API_FUNC self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); diff --git a/src/dbzero/bindings/python/types/PyEnum.cpp b/src/dbzero/bindings/python/types/PyEnum.cpp index 2d0275a8..21f1af5f 100644 --- a/src/dbzero/bindings/python/types/PyEnum.cpp +++ b/src/dbzero/bindings/python/types/PyEnum.cpp @@ -38,6 +38,7 @@ namespace db0::python void PyEnum_del(PyEnum* self) { + PY_DEALLOC_GUARD(); PY_API_FUNC // destroy associated DB0 instance self->destroy(); @@ -85,6 +86,7 @@ namespace db0::python void PyEnumValue_del(PyEnumValue* self) { + PY_DEALLOC_GUARD(); // destroy associated DB0 instance self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); @@ -92,6 +94,7 @@ namespace db0::python void PyEnumValueRepr_del(PyEnumValueRepr* self) { + PY_DEALLOC_GUARD(); // destroy associated DB0 instance self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); diff --git a/src/dbzero/bindings/python/types/PyTag.cpp b/src/dbzero/bindings/python/types/PyTag.cpp index a77b30eb..a4988da4 100644 --- a/src/dbzero/bindings/python/types/PyTag.cpp +++ b/src/dbzero/bindings/python/types/PyTag.cpp @@ -23,6 +23,7 @@ namespace db0::python void PyTag_del(PyTag* self) { + PY_DEALLOC_GUARD(); // destroy associated instance self->destroy(); Py_TYPE(self)->tp_free((PyObject*)self); From 799af761113b45c16e0fb156b8698ae12633e989 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 15 Jun 2026 16:35:57 +0200 Subject: [PATCH 3/4] intern type finalization fix + test --- python_tests/test_memo_intern.py | 228 ++++++++++++++++++ .../object/ObjectImmutableImpl.cpp | 4 +- 2 files changed, 230 insertions(+), 2 deletions(-) diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py index 16852861..fe5cacf4 100644 --- a/python_tests/test_memo_intern.py +++ b/python_tests/test_memo_intern.py @@ -2,7 +2,11 @@ # Copyright (c) 2025 DBZero Software sp. z o.o. import gc +import os import random +import subprocess +import sys +import textwrap import time from dataclasses import dataclass from dataclasses import field @@ -15,6 +19,26 @@ from .conftest import DB0_DIR +def run_intern_script(script): + env = os.environ.copy() + env["PYTHONDONTWRITEBYTECODE"] = "1" + return subprocess.run( + [sys.executable, "-c", textwrap.dedent(script)], + check=False, + env=env, + text=True, + capture_output=True, + ) + + +def assert_intern_script_exits_cleanly(result): + assert result.returncode == 0, ( + f"subprocess exited with {result.returncode}; expected clean shutdown\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + + def get_memo_class_object(obj): return db0.get_memo_class(obj).get_class() @@ -31,6 +55,13 @@ class MemoInternLeafSibling: name: str +@db0.memo(immutable=True, intern=True) +@dataclass +class MemoInternSourceNode: + parent: Optional["MemoInternSourceNode"] + contents: str + + @db0.memo(immutable=True) @dataclass class MemoNonInternImmutableLeaf: @@ -346,6 +377,203 @@ def test_embedded_interned_object_inside_container_reuses_embedded_instance( assert second.name == "container embedded" +def test_hierarchical_interned_immutable_sources_dedupe_and_preserve_parents(db0_fixture): + def make_source(parts): + source = None + for part in parts: + source = db0.materialized(MemoInternSourceNode(source, part)) + return source + + def source_parts(source): + parts = [] + while source is not None: + parts.append(source.contents) + source = source.parent + return tuple(reversed(parts)) + + paths = [] + for index in range(120): + depth = index % 4 + 1 + paths.append(( + f"title-{index % 6}", + f"section-{index % 5}", + f"chapter-{index % 4}", + f"article-{index % 3}", + )[:depth]) + assert len(paths) >= 100 + + expected_prefixes = { + path[:prefix_len] + for path in paths + for prefix_len in range(1, len(path) + 1) + } + expected_leaf_paths = set(paths) + + objects = [db0.materialized(make_source(path)) for path in paths] + uuids_by_path = {} + for path, source in zip(paths, objects): + source_uuid = db0.uuid(source) + uuids_by_path.setdefault(path, source_uuid) + assert source_uuid == uuids_by_path[path] + assert source_parts(source) == path + + db0.clear_cache() + duplicates = [db0.materialized(make_source(path)) for path in paths] + for path, source in zip(paths, duplicates): + assert db0.uuid(source) == uuids_by_path[path] + assert source_parts(source) == path + + assert len(uuids_by_path) == len(expected_leaf_paths) + assert len({db0.uuid(source) for source in objects + duplicates}) == len(expected_leaf_paths) + assert db0.get_type_stats(MemoInternSourceNode)["content_index"]["size"] == len(expected_prefixes) + + +def test_nested_interned_immutable_references_in_singleton_list_exit_cleanly(): + result = run_intern_script( + """ + from __future__ import annotations + + from dataclasses import dataclass, field + from pathlib import Path + import tempfile + + import dbzero as db0 + + DATA_PREFIX = "/tests/intern/nested-singleton-list" + + + @db0.memo(prefix=DATA_PREFIX, immutable=True, intern=True) + @dataclass + class Source: + parent: Source | None + contents: str + + + @db0.memo(prefix=DATA_PREFIX, immutable=True, intern=True) + @dataclass + class Metadata: + title: Source + source: Source + + + @db0.memo(prefix=DATA_PREFIX, immutable=True, intern=True) + @dataclass + class Record: + metadata: Metadata + + + @db0.memo(prefix=DATA_PREFIX, singleton=True) + @dataclass + class Root: + records: list[Record | None] = field(default_factory=list) + + + db0.init(str(Path(tempfile.mkdtemp()) / "dbzero"), prefix=DATA_PREFIX, autocommit=True) + + root = Root() + title = Source(None, "Legal act title") + section = Source(title, "Dzial dziewiaty") + chapter = Source(section, "Rozdzial I") + article = Source(chapter, "Art. 1.") + record = Record(Metadata(title=title, source=article)) + + root.records.extend([None, record]) + print("stored", flush=True) + db0.close() + print("closed", flush=True) + """ + ) + + assert_intern_script_exits_cleanly(result) + assert "stored" in result.stdout + assert "closed" in result.stdout + + +def test_nested_interned_immutable_keyword_factory_record_gets_uuid(): + result = run_intern_script( + """ + from __future__ import annotations + + from dataclasses import dataclass + from pathlib import Path + import tempfile + + import dbzero as db0 + + DATA_PREFIX = "/tests/intern/keyword-factory-record" + + + @db0.memo(prefix=DATA_PREFIX, immutable=True, intern=True) + @dataclass + class Source: + parent: Source | None + contents: str + + @classmethod + def root(cls, contents: str) -> Source: + return cls(parent=None, contents=contents) + + @classmethod + def from_path(cls, root: Source, path: str) -> Source: + source = root + for part in path.split("/"): + source = cls(parent=source, contents=part) + return source + + + @db0.memo(prefix=DATA_PREFIX, immutable=True, intern=True) + @dataclass + class Metadata: + title: Source + subtitle: str + source: Source + + + @db0.memo(prefix=DATA_PREFIX, immutable=True, intern=True) + @dataclass + class Record: + id: int + content: str + metadata: Metadata + + @classmethod + def from_schema_data(cls, data): + title = Source.root(data["title"]) + source = Source.from_path(title, data["source"]) + return cls( + id=int(data["id"]), + content=data["content"], + metadata=Metadata( + title=title, + subtitle=data["subtitle"], + source=source, + ), + ) + + + db0.init(str(Path(tempfile.mkdtemp()) / "dbzero"), prefix=DATA_PREFIX, autocommit=True) + + record = Record.from_schema_data( + { + "id": "2", + "content": "Legal text excerpt body.", + "title": "Legal act title", + "subtitle": "Legal act subtitle", + "source": "Dzial dziewiaty/Rozdzial I/Art. 1.", + } + ) + print("uuid-start", flush=True) + print(db0.uuid(record), flush=True) + db0.close() + print("closed", flush=True) + """ + ) + + assert_intern_script_exits_cleanly(result) + assert "uuid-start" in result.stdout + assert "closed" in result.stdout + + def test_standalone_interned_object_reuses_existing_instance(db0_fixture): first = db0.materialized(MemoInternLeaf("dedupe")) db0.clear_cache() diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index 6b532e74..1097dff0 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -295,7 +295,7 @@ namespace db0::object_model if (type.isIntern()) { auto candidate = type.getContentIndex().lookup(*immutableInitializer); if (candidate) { - initializer.close(); + InitManager::instance.tryCloseInitializer(*this); return candidate; } } @@ -329,7 +329,7 @@ namespace db0::object_model if (type.isIntern()) { type.getContentIndex().insert((*this)->getObject(), this->getUniqueAddress()); } - initializer.close(); + InitManager::instance.tryCloseInitializer(*this); } assert(this->hasInstance()); From 17e6ad03c3fe2a26635531648add503d0fe39393 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 15 Jun 2026 16:37:41 +0200 Subject: [PATCH 4/4] version update --- dbzero/setup.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbzero/setup.py b/dbzero/setup.py index 2b2e451d..969ef6d9 100644 --- a/dbzero/setup.py +++ b/dbzero/setup.py @@ -10,7 +10,7 @@ setup( name='dbzero', - version='0.3.5', + version='0.3.6', description='DBZero community edition', packages=['dbzero'], python_requires='>=3.9', diff --git a/pyproject.toml b/pyproject.toml index 01cac082..3f472ce9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = ['meson-python'] [project] name = 'dbzero' -version = '0.3.5' +version = '0.3.6' description = 'A state management system for Python 3.x that unifies your applications business logic, data persistence, and caching into a single, efficient layer.' readme = 'README.md' requires-python = '>=3.9'