diff --git a/dbzero/dbzero/dbzero.py b/dbzero/dbzero/dbzero.py index 21899e3d..c9e4f4dc 100644 --- a/dbzero/dbzero/dbzero.py +++ b/dbzero/dbzero/dbzero.py @@ -10,7 +10,7 @@ def load_dynamic(name, path): def __bootstrap__(): global __bootstrap__, __loader__, __file__ - paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"] + paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"] __file__ = None for path in paths: if os.path.isdir(path): diff --git a/docker/Dockerfile-dev b/docker/Dockerfile-dev index e9306a6d..faef88bc 100644 --- a/docker/Dockerfile-dev +++ b/docker/Dockerfile-dev @@ -3,6 +3,7 @@ FROM python:3.11-bullseye ARG USERNAME=dbzero ARG USER_UID=1000 ARG USER_GID=1000 +ARG CODEX_VERSION=latest RUN apt-get update && apt-get install -y \ cmake \ @@ -26,7 +27,7 @@ RUN mkdir -p /etc/apt/keyrings \ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \ && apt-get update \ && apt-get install nodejs -y \ - && npm install -g @openai/codex + && npm install -g "@openai/codex@${CODEX_VERSION}" # for demo purposes only (bokeh is a charts package) RUN pip3 install jupyter diff --git a/python_tests/test_issues_18.py b/python_tests/test_issues_18.py index aa9bc010..cb029045 100644 --- a/python_tests/test_issues_18.py +++ b/python_tests/test_issues_18.py @@ -5,17 +5,8 @@ import subprocess import sys import textwrap -import pytest -SHUTDOWN_LIFETIME_SKIP_REASON = ( - "Known shutdown-lifetime crash in subprocesses that leave singleton-backed durable " - "collections with nested immutable memo references alive at interpreter teardown; " - "currently observed on Python 3.12/3.13 and macOS builds only." -) - - -@pytest.mark.skip(reason=SHUTDOWN_LIFETIME_SKIP_REASON) def test_unhandled_exception_with_nested_durable_list_does_not_segfault(tmp_path): """Regression for SIGSEGV during interpreter shutdown after an exception.""" model_path = tmp_path / "repro_model.py" diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py index a193e0d7..82be4575 100644 --- a/python_tests/test_memo_intern.py +++ b/python_tests/test_memo_intern.py @@ -19,13 +19,6 @@ from .conftest import DB0_DIR -SHUTDOWN_LIFETIME_SKIP_REASON = ( - "Known shutdown-lifetime crash in subprocesses that leave singleton-backed durable " - "collections with nested immutable memo references alive at interpreter teardown; " - "currently observed on Python 3.12/3.13 and macOS builds only." -) - - def run_intern_script(script): env = os.environ.copy() env["PYTHONDONTWRITEBYTECODE"] = "1" @@ -62,6 +55,18 @@ class MemoInternLeafSibling: name: str +@db0.memo(immutable=True, intern=True) +@dataclass +class MemoInternKeyword: + name: str + + +@db0.memo(no_default_tags=True, singleton=True) +@dataclass +class MemoInternKeywordArrayRoot: + keyword_arrays: list[list[MemoInternKeyword]] = field(default_factory=list) + + @db0.memo(immutable=True, intern=True) @dataclass class MemoInternSourceNode: @@ -93,6 +98,18 @@ def __init__(self): self.value = None +@db0.memo(no_default_tags=True) +@dataclass +class MemoInternReferenceRecord: + values: list[MemoInternLeaf] = field(default_factory=list) + + +@db0.memo(no_default_tags=True, singleton=True) +@dataclass +class MemoInternReferenceRecordRoot: + records: list[MemoInternReferenceRecord] = field(default_factory=list) + + @db0.memo(immutable=True, intern=True) class MemoInternHolder: def __init__(self, value): @@ -435,7 +452,6 @@ def source_parts(source): assert db0.get_type_stats(MemoInternSourceNode)["content_index"]["size"] == len(expected_prefixes) -@pytest.mark.skip(reason=SHUTDOWN_LIFETIME_SKIP_REASON) def test_nested_interned_immutable_references_in_singleton_list_exit_cleanly(): result = run_intern_script( """ @@ -497,7 +513,6 @@ class Root: assert "closed" in result.stdout -@pytest.mark.skip(reason=SHUTDOWN_LIFETIME_SKIP_REASON) def test_nested_interned_immutable_keyword_factory_record_gets_uuid(): result = run_intern_script( """ @@ -645,6 +660,56 @@ def test_standalone_interned_object_reuses_after_close_and_reopen(db0_fixture): assert second.name == "reopened" +def test_embedded_interned_values_do_not_break_later_explicit_materialization(db0_fixture): + """Focused repro for materializing an interned value already embedded many times. + + This mirrors application reindexing failures where records had durable lists + of interned keyword-like labels, and later `db0.materialized(Label(name))` + raised "critical internal error - object version invalid". + """ + root = MemoInternReferenceRecordRoot() + for index in range(329): + record = MemoInternReferenceRecord() + record.values = [ + MemoInternLeaf(f"keyword-{index % 100}"), + MemoInternLeaf(f"keyword-{(index + 1) % 100}"), + ] + root.records.append(record) + + duplicate = db0.materialized(MemoInternLeaf("keyword-0")) + + assert duplicate.name == "keyword-0" + + +@pytest.mark.stress_test +def test_interned_keywords_can_fill_random_durable_arrays_without_explicit_materialization(db0_fixture): + random_generator = random.Random(19791206) + array_count = 97 + instance_count = 3000 + keyword_count = 613 + root = MemoInternKeywordArrayRoot([[] for _ in range(array_count)]) + expected_names = [[] for _ in range(array_count)] + + for instance_index in range(instance_count): + array_index = random_generator.randrange(array_count) + name = f"keyword-{random_generator.randrange(keyword_count)}" + + root.keyword_arrays[array_index].append(MemoInternKeyword(name)) + expected_names[array_index].append(name) + + if instance_index % 31 == 0: + nonempty_array_indexes = [ + index for index, expected_keywords in enumerate(expected_names) if expected_keywords + ] + read_array_index = random_generator.choice(nonempty_array_indexes) + read_keyword_index = random_generator.randrange(len(expected_names[read_array_index])) + assert root.keyword_arrays[read_array_index][read_keyword_index].name == expected_names[read_array_index][ + read_keyword_index + ] + + assert [[keyword.name for keyword in keywords] for keywords in root.keyword_arrays] == expected_names + + def test_composite_interned_object_reuses_equivalent_content(db0_fixture): first = db0.materialized(MemoInternComposite( "composite", 7, {"items": ("alpha", 1), "flags": {"x", "y"}} diff --git a/scripts/build.sh b/scripts/build.sh index 3797edfd..53e448eb 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -75,6 +75,12 @@ else meson_buildtype="release" fi +python_include_dir="$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["include"])')" +if [ -f "$build_dir/compile_commands.json" ] && ! grep -q -- "-I$python_include_dir" "$build_dir/compile_commands.json"; then + echo "Python include path changed; recreating $build_dir" + rm -rf "$build_dir" +fi + if [ -d "$build_dir/meson-info" ]; then meson configure "$build_dir" -Dbuildtype="$meson_buildtype" $options else diff --git a/src/dbzero/bindings/python/PyWorkspace.cpp b/src/dbzero/bindings/python/PyWorkspace.cpp index 9f408465..011a4617 100644 --- a/src/dbzero/bindings/python/PyWorkspace.cpp +++ b/src/dbzero/bindings/python/PyWorkspace.cpp @@ -125,6 +125,7 @@ namespace db0::python // NOTE: must unlock API because workspace destroy may trigger db0 object deletions m_workspace = nullptr; } + db0::object_model::InitManager::instance.close(); PyToolkit::getTypeManager().close(timer.get()); m_config = nullptr; m_workspace = nullptr; diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index cdb40359..9f9f9f15 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -146,12 +146,25 @@ static PyMethodDef dbzero_methods[] = {NULL} // Sentinel }; +static void dbzero_module_free(void *) +{ + try { + py::PyToolkit::getPyWorkspace().stopThreads(); + py::PyToolkit::getPyWorkspace().close(); + } catch (...) { + } +} + static struct PyModuleDef dbzero_module = { PyModuleDef_HEAD_INIT, "dbzero", NULL, -1, - dbzero_methods + dbzero_methods, + NULL, + NULL, + NULL, + dbzero_module_free }; static struct PyModuleDef dbzero_types_module = { diff --git a/src/dbzero/object_model/object/ContentIndex.cpp b/src/dbzero/object_model/object/ContentIndex.cpp index af8acfc3..617eeaa8 100644 --- a/src/dbzero/object_model/object/ContentIndex.cpp +++ b/src/dbzero/object_model/object/ContentIndex.cpp @@ -4,30 +4,18 @@ #include "ContentIndex.hpp" #include +#include #include #include #include namespace db0::object_model { - namespace - { - ContentIndex::TypeObjectSharedPtr tryResolveLangType( - const db0::swine_ptr &fixture, const std::shared_ptr &type) - { - auto &classFactory = fixture->get(); - if (classFactory.hasLangType(*type)) { - return classFactory.getLangType(*type); - } - return {}; - } - } - ContentIndex::ContentIndex(db0::swine_ptr &fixture, std::shared_ptr type) : super_t(*fixture) , m_fixture(fixture) + , m_class_factory(fixture->get()) , m_class(std::move(type)) - , m_lang_type(tryResolveLangType(fixture, m_class)) , m_base_index(*fixture) { modify().m_base_index_ptr = m_base_index.getAddress(); @@ -36,8 +24,8 @@ namespace db0::object_model ContentIndex::ContentIndex(mptr ptr, db0::swine_ptr &fixture, std::shared_ptr type) : super_t(ptr) , m_fixture(fixture) + , m_class_factory(fixture->get()) , m_class(std::move(type)) - , m_lang_type(tryResolveLangType(fixture, m_class)) , m_base_index(myPtr((*this)->m_base_index_ptr)) { } @@ -151,22 +139,15 @@ namespace db0::object_model return contains(intern_hash(fixture, initializer), address); } - ContentIndex::LangToolkit::TypeObjectPtr ContentIndex::getLangType() const - { - if (!!m_lang_type) { - return m_lang_type.get(); - } - m_lang_type = tryResolveLangType(m_fixture, m_class); - return m_lang_type.get(); - } - bool ContentIndex::candidateMatches(const ImmutableObjectInitializer &initializer, UniqueAddress candidate) const { auto fixture = m_fixture; - auto candidateObject = LangToolkit::unloadAnyObject( - fixture, candidate.getAddress(), m_class, getLangType(), candidate.getInstanceId(), AccessFlags {} + auto candidateObject = LangConfig::LangToolkit::unloadAnyObject( + fixture, candidate.getAddress(), m_class_factory, nullptr, candidate.getInstanceId(), AccessFlags {} ); - return intern_compare(fixture, initializer, LangToolkit::getMemoImmutableObject(candidateObject.get())) == 0; + return intern_compare( + fixture, initializer, LangConfig::LangToolkit::getMemoImmutableObject(candidateObject.get()) + ) == 0; } std::optional ContentIndex::lookup(const ImmutableObjectInitializer &initializer) const diff --git a/src/dbzero/object_model/object/ContentIndex.hpp b/src/dbzero/object_model/object/ContentIndex.hpp index c4609b15..6a3c862e 100644 --- a/src/dbzero/object_model/object/ContentIndex.hpp +++ b/src/dbzero/object_model/object/ContentIndex.hpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -28,6 +27,7 @@ namespace db0::object_model { class Class; + class ClassFactory; DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_content_index: public db0::o_fixed_versioned @@ -69,9 +69,6 @@ DB0_PACKED_END using BucketIndexT = ContentBucketIndex; using BucketItemT = db0::key_value; using BaseIndexT = db0::v_bindex; - using LangToolkit = LangConfig::LangToolkit; - using TypeObjectSharedPtr = typename LangToolkit::TypeObjectSharedPtr; - ContentIndex(db0::swine_ptr &, std::shared_ptr); ContentIndex(mptr, db0::swine_ptr &, std::shared_ptr); ~ContentIndex(); @@ -92,8 +89,8 @@ DB0_PACKED_END private: db0::swine_ptr m_fixture; + ClassFactory &m_class_factory; std::shared_ptr m_class; - mutable TypeObjectSharedPtr m_lang_type; mutable BaseIndexT m_base_index; struct PendingUpdate { @@ -109,7 +106,6 @@ DB0_PACKED_END void decrementSize() const; bool contains(HashT, UniqueAddress) const; void resyncBucket(typename BaseIndexT::iterator &, const BucketIndexT &) const; - typename LangToolkit::TypeObjectPtr getLangType() const; bool candidateMatches(const ImmutableObjectInitializer &, UniqueAddress) const; }; diff --git a/src/dbzero/object_model/object/ObjectInitializer.cpp b/src/dbzero/object_model/object/ObjectInitializer.cpp index 778ef4a2..e7e5cb8c 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.cpp +++ b/src/dbzero/object_model/object/ObjectInitializer.cpp @@ -352,5 +352,21 @@ namespace db0::object_model *(m_initializers[m_active_count - 1]) = m_active_count - 1; --m_active_count; } + + void ObjectInitializerManager::close() + { + for (auto it = m_initializers.begin(); it != m_initializers.begin() + m_total_count; ++it) { + auto &initializer = *it; + if (auto *immutable_initializer = dynamic_cast(initializer.get())) { + immutable_initializer->resetObjects(); + } + if (initializer) { + initializer->reset(); + } + } + m_initializers.clear(); + m_active_count = 0; + m_total_count = 0; + } } diff --git a/src/dbzero/object_model/object/ObjectInitializer.hpp b/src/dbzero/object_model/object/ObjectInitializer.hpp index bcc72f8b..25f52814 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.hpp +++ b/src/dbzero/object_model/object/ObjectInitializer.hpp @@ -78,6 +78,8 @@ namespace db0::object_model template ObjectInitializer *findInitializer(const T &object) const; + void close(); + protected: friend class ObjectInitializer; void closeAt(std::uint32_t loc);