From 104d8101c0aa80f4d229cc783bfdd4a6f0afaaea Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 22 Jun 2026 11:08:15 +0200 Subject: [PATCH 1/6] failing repro --- dbzero/dbzero/dbzero.py | 2 +- docker/Dockerfile-dev | 3 ++- python_tests/test_memo_intern.py | 41 ++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/dbzero/dbzero/dbzero.py b/dbzero/dbzero/dbzero.py index 21899e3d4..c9e4f4dcf 100644 --- a/dbzero/dbzero/dbzero.py +++ b/dbzero/dbzero/dbzero.py @@ -10,7 +10,7 @@ def load_dynamic(name, path): def __bootstrap__(): global __bootstrap__, __loader__, __file__ - paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"] + paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"] __file__ = None for path in paths: if os.path.isdir(path): diff --git a/docker/Dockerfile-dev b/docker/Dockerfile-dev index e9306a6d2..faef88bc8 100644 --- a/docker/Dockerfile-dev +++ b/docker/Dockerfile-dev @@ -3,6 +3,7 @@ FROM python:3.11-bullseye ARG USERNAME=dbzero ARG USER_UID=1000 ARG USER_GID=1000 +ARG CODEX_VERSION=latest RUN apt-get update && apt-get install -y \ cmake \ @@ -26,7 +27,7 @@ RUN mkdir -p /etc/apt/keyrings \ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \ && apt-get update \ && apt-get install nodejs -y \ - && npm install -g @openai/codex + && npm install -g "@openai/codex@${CODEX_VERSION}" # for demo purposes only (bokeh is a charts package) RUN pip3 install jupyter diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py index a193e0d74..a855c05fd 100644 --- a/python_tests/test_memo_intern.py +++ b/python_tests/test_memo_intern.py @@ -93,6 +93,18 @@ def __init__(self): self.value = None +@db0.memo(no_default_tags=True) +@dataclass +class MemoInternReferenceRecord: + values: list[MemoInternLeaf] = field(default_factory=list) + + +@db0.memo(no_default_tags=True, singleton=True) +@dataclass +class MemoInternReferenceRecordRoot: + records: list[MemoInternReferenceRecord] = field(default_factory=list) + + @db0.memo(immutable=True, intern=True) class MemoInternHolder: def __init__(self, value): @@ -645,6 +657,35 @@ def test_standalone_interned_object_reuses_after_close_and_reopen(db0_fixture): assert second.name == "reopened" +@pytest.mark.xfail( + raises=RuntimeError, + reason="Intern content lookup can hit stale embedded candidates with an invalid object version", +) +def test_embedded_interned_values_do_not_break_later_explicit_materialization(db0_fixture): + """Focused repro for materializing an interned value already embedded many times. + + This mirrors application reindexing failures where records had durable lists + of interned keyword-like labels, and later `db0.materialized(Label(name))` + raised "critical internal error - object version invalid". + """ + root = MemoInternReferenceRecordRoot() + for index in range(329): + record = MemoInternReferenceRecord() + record.values = [ + MemoInternLeaf(f"keyword-{index % 100}"), + MemoInternLeaf(f"keyword-{(index + 1) % 100}"), + ] + root.records.append(record) + + try: + duplicate = db0.materialized(MemoInternLeaf("keyword-0")) + except RuntimeError as exc: + assert "object version invalid" in str(exc) + raise + + assert duplicate.name == "keyword-0" + + def test_composite_interned_object_reuses_equivalent_content(db0_fixture): first = db0.materialized(MemoInternComposite( "composite", 7, {"items": ("alpha", 1), "flags": {"x", "y"}} From ad04638ab6243f9153953b85e7adeb61eaf26dc3 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 22 Jun 2026 11:55:04 +0200 Subject: [PATCH 2/6] content index lookup bugfix + test unblocked --- python_tests/test_memo_intern.py | 10 +----- .../object_model/object/ContentIndex.cpp | 35 +++++-------------- .../object_model/object/ContentIndex.hpp | 8 ++--- 3 files changed, 11 insertions(+), 42 deletions(-) diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py index a855c05fd..f6782242b 100644 --- a/python_tests/test_memo_intern.py +++ b/python_tests/test_memo_intern.py @@ -657,10 +657,6 @@ def test_standalone_interned_object_reuses_after_close_and_reopen(db0_fixture): assert second.name == "reopened" -@pytest.mark.xfail( - raises=RuntimeError, - reason="Intern content lookup can hit stale embedded candidates with an invalid object version", -) def test_embedded_interned_values_do_not_break_later_explicit_materialization(db0_fixture): """Focused repro for materializing an interned value already embedded many times. @@ -677,11 +673,7 @@ def test_embedded_interned_values_do_not_break_later_explicit_materialization(db ] root.records.append(record) - try: - duplicate = db0.materialized(MemoInternLeaf("keyword-0")) - except RuntimeError as exc: - assert "object version invalid" in str(exc) - raise + duplicate = db0.materialized(MemoInternLeaf("keyword-0")) assert duplicate.name == "keyword-0" diff --git a/src/dbzero/object_model/object/ContentIndex.cpp b/src/dbzero/object_model/object/ContentIndex.cpp index af8acfc3b..617eeaa81 100644 --- a/src/dbzero/object_model/object/ContentIndex.cpp +++ b/src/dbzero/object_model/object/ContentIndex.cpp @@ -4,30 +4,18 @@ #include "ContentIndex.hpp" #include +#include #include #include #include namespace db0::object_model { - namespace - { - ContentIndex::TypeObjectSharedPtr tryResolveLangType( - const db0::swine_ptr &fixture, const std::shared_ptr &type) - { - auto &classFactory = fixture->get(); - if (classFactory.hasLangType(*type)) { - return classFactory.getLangType(*type); - } - return {}; - } - } - ContentIndex::ContentIndex(db0::swine_ptr &fixture, std::shared_ptr type) : super_t(*fixture) , m_fixture(fixture) + , m_class_factory(fixture->get()) , m_class(std::move(type)) - , m_lang_type(tryResolveLangType(fixture, m_class)) , m_base_index(*fixture) { modify().m_base_index_ptr = m_base_index.getAddress(); @@ -36,8 +24,8 @@ namespace db0::object_model ContentIndex::ContentIndex(mptr ptr, db0::swine_ptr &fixture, std::shared_ptr type) : super_t(ptr) , m_fixture(fixture) + , m_class_factory(fixture->get()) , m_class(std::move(type)) - , m_lang_type(tryResolveLangType(fixture, m_class)) , m_base_index(myPtr((*this)->m_base_index_ptr)) { } @@ -151,22 +139,15 @@ namespace db0::object_model return contains(intern_hash(fixture, initializer), address); } - ContentIndex::LangToolkit::TypeObjectPtr ContentIndex::getLangType() const - { - if (!!m_lang_type) { - return m_lang_type.get(); - } - m_lang_type = tryResolveLangType(m_fixture, m_class); - return m_lang_type.get(); - } - bool ContentIndex::candidateMatches(const ImmutableObjectInitializer &initializer, UniqueAddress candidate) const { auto fixture = m_fixture; - auto candidateObject = LangToolkit::unloadAnyObject( - fixture, candidate.getAddress(), m_class, getLangType(), candidate.getInstanceId(), AccessFlags {} + auto candidateObject = LangConfig::LangToolkit::unloadAnyObject( + fixture, candidate.getAddress(), m_class_factory, nullptr, candidate.getInstanceId(), AccessFlags {} ); - return intern_compare(fixture, initializer, LangToolkit::getMemoImmutableObject(candidateObject.get())) == 0; + return intern_compare( + fixture, initializer, LangConfig::LangToolkit::getMemoImmutableObject(candidateObject.get()) + ) == 0; } std::optional ContentIndex::lookup(const ImmutableObjectInitializer &initializer) const diff --git a/src/dbzero/object_model/object/ContentIndex.hpp b/src/dbzero/object_model/object/ContentIndex.hpp index c4609b15a..6a3c862e2 100644 --- a/src/dbzero/object_model/object/ContentIndex.hpp +++ b/src/dbzero/object_model/object/ContentIndex.hpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -28,6 +27,7 @@ namespace db0::object_model { class Class; + class ClassFactory; DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_content_index: public db0::o_fixed_versioned @@ -69,9 +69,6 @@ DB0_PACKED_END using BucketIndexT = ContentBucketIndex; using BucketItemT = db0::key_value; using BaseIndexT = db0::v_bindex; - using LangToolkit = LangConfig::LangToolkit; - using TypeObjectSharedPtr = typename LangToolkit::TypeObjectSharedPtr; - ContentIndex(db0::swine_ptr &, std::shared_ptr); ContentIndex(mptr, db0::swine_ptr &, std::shared_ptr); ~ContentIndex(); @@ -92,8 +89,8 @@ DB0_PACKED_END private: db0::swine_ptr m_fixture; + ClassFactory &m_class_factory; std::shared_ptr m_class; - mutable TypeObjectSharedPtr m_lang_type; mutable BaseIndexT m_base_index; struct PendingUpdate { @@ -109,7 +106,6 @@ DB0_PACKED_END void decrementSize() const; bool contains(HashT, UniqueAddress) const; void resyncBucket(typename BaseIndexT::iterator &, const BucketIndexT &) const; - typename LangToolkit::TypeObjectPtr getLangType() const; bool candidateMatches(const ImmutableObjectInitializer &, UniqueAddress) const; }; From cdc19a88c9d1a315a39d0134055c5343ebb1c0d7 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 22 Jun 2026 12:02:43 +0200 Subject: [PATCH 3/6] test unskipped --- python_tests/test_issues_18.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python_tests/test_issues_18.py b/python_tests/test_issues_18.py index aa9bc0108..cb029045d 100644 --- a/python_tests/test_issues_18.py +++ b/python_tests/test_issues_18.py @@ -5,17 +5,8 @@ import subprocess import sys import textwrap -import pytest -SHUTDOWN_LIFETIME_SKIP_REASON = ( - "Known shutdown-lifetime crash in subprocesses that leave singleton-backed durable " - "collections with nested immutable memo references alive at interpreter teardown; " - "currently observed on Python 3.12/3.13 and macOS builds only." -) - - -@pytest.mark.skip(reason=SHUTDOWN_LIFETIME_SKIP_REASON) def test_unhandled_exception_with_nested_durable_list_does_not_segfault(tmp_path): """Regression for SIGSEGV during interpreter shutdown after an exception.""" model_path = tmp_path / "repro_model.py" From 4d39ceb029543c08e0b3bea1e99e34294b75ba55 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 22 Jun 2026 12:13:46 +0200 Subject: [PATCH 4/6] passing stres-test --- python_tests/test_memo_intern.py | 41 ++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py index f6782242b..f157c41fb 100644 --- a/python_tests/test_memo_intern.py +++ b/python_tests/test_memo_intern.py @@ -62,6 +62,18 @@ class MemoInternLeafSibling: name: str +@db0.memo(immutable=True, intern=True) +@dataclass +class MemoInternKeyword: + name: str + + +@db0.memo(no_default_tags=True, singleton=True) +@dataclass +class MemoInternKeywordArrayRoot: + keyword_arrays: list[list[MemoInternKeyword]] = field(default_factory=list) + + @db0.memo(immutable=True, intern=True) @dataclass class MemoInternSourceNode: @@ -678,6 +690,35 @@ def test_embedded_interned_values_do_not_break_later_explicit_materialization(db assert duplicate.name == "keyword-0" +@pytest.mark.stress_test +def test_interned_keywords_can_fill_random_durable_arrays_without_explicit_materialization(db0_fixture): + random_generator = random.Random(19791206) + array_count = 97 + instance_count = 3000 + keyword_count = 613 + root = MemoInternKeywordArrayRoot([[] for _ in range(array_count)]) + expected_names = [[] for _ in range(array_count)] + + for instance_index in range(instance_count): + array_index = random_generator.randrange(array_count) + name = f"keyword-{random_generator.randrange(keyword_count)}" + + root.keyword_arrays[array_index].append(MemoInternKeyword(name)) + expected_names[array_index].append(name) + + if instance_index % 31 == 0: + nonempty_array_indexes = [ + index for index, expected_keywords in enumerate(expected_names) if expected_keywords + ] + read_array_index = random_generator.choice(nonempty_array_indexes) + read_keyword_index = random_generator.randrange(len(expected_names[read_array_index])) + assert root.keyword_arrays[read_array_index][read_keyword_index].name == expected_names[read_array_index][ + read_keyword_index + ] + + assert [[keyword.name for keyword in keywords] for keywords in root.keyword_arrays] == expected_names + + def test_composite_interned_object_reuses_equivalent_content(db0_fixture): first = db0.materialized(MemoInternComposite( "composite", 7, {"items": ("alpha", 1), "flags": {"x", "y"}} From 69e6e411cb660c0e3d633c02431bae40ac3b9935 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 22 Jun 2026 13:44:36 +0200 Subject: [PATCH 5/6] 3.12 / 3.13 specific fixes --- python_tests/test_memo_intern.py | 9 --------- scripts/build.sh | 6 ++++++ src/dbzero/bindings/python/PyWorkspace.cpp | 1 + src/dbzero/bindings/python/dbzero.cpp | 15 ++++++++++++++- .../object_model/object/ObjectInitializer.cpp | 16 ++++++++++++++++ .../object_model/object/ObjectInitializer.hpp | 2 ++ 6 files changed, 39 insertions(+), 10 deletions(-) diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py index f157c41fb..82be45750 100644 --- a/python_tests/test_memo_intern.py +++ b/python_tests/test_memo_intern.py @@ -19,13 +19,6 @@ from .conftest import DB0_DIR -SHUTDOWN_LIFETIME_SKIP_REASON = ( - "Known shutdown-lifetime crash in subprocesses that leave singleton-backed durable " - "collections with nested immutable memo references alive at interpreter teardown; " - "currently observed on Python 3.12/3.13 and macOS builds only." -) - - def run_intern_script(script): env = os.environ.copy() env["PYTHONDONTWRITEBYTECODE"] = "1" @@ -459,7 +452,6 @@ def source_parts(source): assert db0.get_type_stats(MemoInternSourceNode)["content_index"]["size"] == len(expected_prefixes) -@pytest.mark.skip(reason=SHUTDOWN_LIFETIME_SKIP_REASON) def test_nested_interned_immutable_references_in_singleton_list_exit_cleanly(): result = run_intern_script( """ @@ -521,7 +513,6 @@ class Root: assert "closed" in result.stdout -@pytest.mark.skip(reason=SHUTDOWN_LIFETIME_SKIP_REASON) def test_nested_interned_immutable_keyword_factory_record_gets_uuid(): result = run_intern_script( """ diff --git a/scripts/build.sh b/scripts/build.sh index 3797edfdb..53e448eb7 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -75,6 +75,12 @@ else meson_buildtype="release" fi +python_include_dir="$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["include"])')" +if [ -f "$build_dir/compile_commands.json" ] && ! grep -q -- "-I$python_include_dir" "$build_dir/compile_commands.json"; then + echo "Python include path changed; recreating $build_dir" + rm -rf "$build_dir" +fi + if [ -d "$build_dir/meson-info" ]; then meson configure "$build_dir" -Dbuildtype="$meson_buildtype" $options else diff --git a/src/dbzero/bindings/python/PyWorkspace.cpp b/src/dbzero/bindings/python/PyWorkspace.cpp index 9f408465a..011a4617a 100644 --- a/src/dbzero/bindings/python/PyWorkspace.cpp +++ b/src/dbzero/bindings/python/PyWorkspace.cpp @@ -125,6 +125,7 @@ namespace db0::python // NOTE: must unlock API because workspace destroy may trigger db0 object deletions m_workspace = nullptr; } + db0::object_model::InitManager::instance.close(); PyToolkit::getTypeManager().close(timer.get()); m_config = nullptr; m_workspace = nullptr; diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index cdb403592..9f9f9f158 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -146,12 +146,25 @@ static PyMethodDef dbzero_methods[] = {NULL} // Sentinel }; +static void dbzero_module_free(void *) +{ + try { + py::PyToolkit::getPyWorkspace().stopThreads(); + py::PyToolkit::getPyWorkspace().close(); + } catch (...) { + } +} + static struct PyModuleDef dbzero_module = { PyModuleDef_HEAD_INIT, "dbzero", NULL, -1, - dbzero_methods + dbzero_methods, + NULL, + NULL, + NULL, + dbzero_module_free }; static struct PyModuleDef dbzero_types_module = { diff --git a/src/dbzero/object_model/object/ObjectInitializer.cpp b/src/dbzero/object_model/object/ObjectInitializer.cpp index 778ef4a26..e7e5cb8c8 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.cpp +++ b/src/dbzero/object_model/object/ObjectInitializer.cpp @@ -352,5 +352,21 @@ namespace db0::object_model *(m_initializers[m_active_count - 1]) = m_active_count - 1; --m_active_count; } + + void ObjectInitializerManager::close() + { + for (auto it = m_initializers.begin(); it != m_initializers.begin() + m_total_count; ++it) { + auto &initializer = *it; + if (auto *immutable_initializer = dynamic_cast(initializer.get())) { + immutable_initializer->resetObjects(); + } + if (initializer) { + initializer->reset(); + } + } + m_initializers.clear(); + m_active_count = 0; + m_total_count = 0; + } } diff --git a/src/dbzero/object_model/object/ObjectInitializer.hpp b/src/dbzero/object_model/object/ObjectInitializer.hpp index bcc72f8b9..25f528149 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.hpp +++ b/src/dbzero/object_model/object/ObjectInitializer.hpp @@ -78,6 +78,8 @@ namespace db0::object_model template ObjectInitializer *findInitializer(const T &object) const; + void close(); + protected: friend class ObjectInitializer; void closeAt(std::uint32_t loc); From bb03b3314eb1adbbd9c995908b1af037e9b92d17 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Mon, 22 Jun 2026 16:45:43 +0200 Subject: [PATCH 6/6] fix: handling nested intern instances --- docker/Dockerfile-dev-12 | 57 +++++++++++++++++++ docker/Dockerfile-dev-13 | 57 +++++++++++++++++++ docker/Dockerfile-dev-14 | 57 +++++++++++++++++++ docker/Dockerfile-dev-15 | 57 +++++++++++++++++++ python_tests/test_memo_intern.py | 44 ++++++++++++++ src/dbzero/bindings/python/PyToolkit.cpp | 56 +++++++++++++++++- src/dbzero/bindings/python/PyToolkit.hpp | 3 + .../object_model/object/ContentIndex.cpp | 51 ++++++++++++++--- .../object_model/object/ContentIndex.hpp | 9 ++- .../object/ObjectImmutableImpl.cpp | 32 ++++++++++- .../object/ObjectImmutableImpl.hpp | 1 + tests/unit_tests/ContentIndexTest.cpp | 17 +++++- 12 files changed, 426 insertions(+), 15 deletions(-) create mode 100644 docker/Dockerfile-dev-12 create mode 100644 docker/Dockerfile-dev-13 create mode 100644 docker/Dockerfile-dev-14 create mode 100644 docker/Dockerfile-dev-15 diff --git a/docker/Dockerfile-dev-12 b/docker/Dockerfile-dev-12 new file mode 100644 index 000000000..f0cdf6b4c --- /dev/null +++ b/docker/Dockerfile-dev-12 @@ -0,0 +1,57 @@ +FROM python:3.12-bullseye + +ARG USERNAME=dbzero +ARG USER_UID=1000 +ARG USER_GID=1000 +ARG CODEX_VERSION=latest + +RUN apt-get update && apt-get install -y \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN apt-get update +RUN apt-get install psmisc +RUN apt-get install python3-pip -y +RUN apt-get install gdb -y +RUN apt-get install screen -y +RUN apt-get install rsync -y +RUN apt-get install ripgrep -y +RUN apt-get install meson ninja-build -y +RUN apt-get install python3-venv -y +RUN apt-get install gettext-base -y +RUN apt-get install valgrind -y +RUN apt-get install curl ca-certificates gnupg -y + +RUN mkdir -p /etc/apt/keyrings \ + && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \ + && apt-get update \ + && apt-get install nodejs -y \ + && npm install -g "@openai/codex@${CODEX_VERSION}" + +# for demo purposes only (bokeh is a charts package) +RUN pip3 install jupyter +RUN pip3 install bokeh +RUN pip3 install psycopg2-binary +RUN pip3 install sqlalchemy +RUN pip3 install fastapi uvicorn + +RUN pip3 install build +ADD requirements.txt requirements.txt +RUN pip3 install -r requirements.txt --upgrade + +RUN ulimit -c unlimited +RUN mkdir -p "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" +RUN chmod 777 "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" + +RUN groupadd --gid "$USER_GID" "$USERNAME" \ + && useradd --uid "$USER_UID" --gid "$USER_GID" --create-home --shell /bin/bash "$USERNAME" + +RUN chown -R "$USERNAME:$USERNAME" /usr/local/lib/python3.12/site-packages /usr/local/bin + +WORKDIR /usr/src/dbzero +USER $USERNAME +# RUN ./build.sh -r +# WORKDIR /usr/src/dbzero/build/release/ +# RUN meson install +# WORKDIR /usr/src/dbzero diff --git a/docker/Dockerfile-dev-13 b/docker/Dockerfile-dev-13 new file mode 100644 index 000000000..5fae4eba2 --- /dev/null +++ b/docker/Dockerfile-dev-13 @@ -0,0 +1,57 @@ +FROM python:3.13-bullseye + +ARG USERNAME=dbzero +ARG USER_UID=1000 +ARG USER_GID=1000 +ARG CODEX_VERSION=latest + +RUN apt-get update && apt-get install -y \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN apt-get update +RUN apt-get install psmisc +RUN apt-get install python3-pip -y +RUN apt-get install gdb -y +RUN apt-get install screen -y +RUN apt-get install rsync -y +RUN apt-get install ripgrep -y +RUN apt-get install meson ninja-build -y +RUN apt-get install python3-venv -y +RUN apt-get install gettext-base -y +RUN apt-get install valgrind -y +RUN apt-get install curl ca-certificates gnupg -y + +RUN mkdir -p /etc/apt/keyrings \ + && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \ + && apt-get update \ + && apt-get install nodejs -y \ + && npm install -g "@openai/codex@${CODEX_VERSION}" + +# for demo purposes only (bokeh is a charts package) +RUN pip3 install jupyter +RUN pip3 install bokeh +RUN pip3 install psycopg2-binary +RUN pip3 install sqlalchemy +RUN pip3 install fastapi uvicorn + +RUN pip3 install build +ADD requirements.txt requirements.txt +RUN pip3 install -r requirements.txt --upgrade + +RUN ulimit -c unlimited +RUN mkdir -p "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" +RUN chmod 777 "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" + +RUN groupadd --gid "$USER_GID" "$USERNAME" \ + && useradd --uid "$USER_UID" --gid "$USER_GID" --create-home --shell /bin/bash "$USERNAME" + +RUN chown -R "$USERNAME:$USERNAME" /usr/local/lib/python3.13/site-packages /usr/local/bin + +WORKDIR /usr/src/dbzero +USER $USERNAME +# RUN ./build.sh -r +# WORKDIR /usr/src/dbzero/build/release/ +# RUN meson install +# WORKDIR /usr/src/dbzero diff --git a/docker/Dockerfile-dev-14 b/docker/Dockerfile-dev-14 new file mode 100644 index 000000000..d308865ea --- /dev/null +++ b/docker/Dockerfile-dev-14 @@ -0,0 +1,57 @@ +FROM python:3.14-bullseye + +ARG USERNAME=dbzero +ARG USER_UID=1000 +ARG USER_GID=1000 +ARG CODEX_VERSION=latest + +RUN apt-get update && apt-get install -y \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN apt-get update +RUN apt-get install psmisc +RUN apt-get install python3-pip -y +RUN apt-get install gdb -y +RUN apt-get install screen -y +RUN apt-get install rsync -y +RUN apt-get install ripgrep -y +RUN apt-get install meson ninja-build -y +RUN apt-get install python3-venv -y +RUN apt-get install gettext-base -y +RUN apt-get install valgrind -y +RUN apt-get install curl ca-certificates gnupg -y + +RUN mkdir -p /etc/apt/keyrings \ + && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \ + && apt-get update \ + && apt-get install nodejs -y \ + && npm install -g "@openai/codex@${CODEX_VERSION}" + +# for demo purposes only (bokeh is a charts package) +RUN pip3 install jupyter +RUN pip3 install bokeh +RUN pip3 install psycopg2-binary +RUN pip3 install sqlalchemy +RUN pip3 install fastapi uvicorn + +RUN pip3 install build +ADD requirements.txt requirements.txt +RUN pip3 install -r requirements.txt --upgrade + +RUN ulimit -c unlimited +RUN mkdir -p "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" +RUN chmod 777 "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" + +RUN groupadd --gid "$USER_GID" "$USERNAME" \ + && useradd --uid "$USER_UID" --gid "$USER_GID" --create-home --shell /bin/bash "$USERNAME" + +RUN chown -R "$USERNAME:$USERNAME" /usr/local/lib/python3.14/site-packages /usr/local/bin + +WORKDIR /usr/src/dbzero +USER $USERNAME +# RUN ./build.sh -r +# WORKDIR /usr/src/dbzero/build/release/ +# RUN meson install +# WORKDIR /usr/src/dbzero diff --git a/docker/Dockerfile-dev-15 b/docker/Dockerfile-dev-15 new file mode 100644 index 000000000..f176431f4 --- /dev/null +++ b/docker/Dockerfile-dev-15 @@ -0,0 +1,57 @@ +FROM python:3.15-bullseye + +ARG USERNAME=dbzero +ARG USER_UID=1000 +ARG USER_GID=1000 +ARG CODEX_VERSION=latest + +RUN apt-get update && apt-get install -y \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN apt-get update +RUN apt-get install psmisc +RUN apt-get install python3-pip -y +RUN apt-get install gdb -y +RUN apt-get install screen -y +RUN apt-get install rsync -y +RUN apt-get install ripgrep -y +RUN apt-get install meson ninja-build -y +RUN apt-get install python3-venv -y +RUN apt-get install gettext-base -y +RUN apt-get install valgrind -y +RUN apt-get install curl ca-certificates gnupg -y + +RUN mkdir -p /etc/apt/keyrings \ + && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \ + && apt-get update \ + && apt-get install nodejs -y \ + && npm install -g "@openai/codex@${CODEX_VERSION}" + +# for demo purposes only (bokeh is a charts package) +RUN pip3 install jupyter +RUN pip3 install bokeh +RUN pip3 install psycopg2-binary +RUN pip3 install sqlalchemy +RUN pip3 install fastapi uvicorn + +RUN pip3 install build +ADD requirements.txt requirements.txt +RUN pip3 install -r requirements.txt --upgrade + +RUN ulimit -c unlimited +RUN mkdir -p "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" +RUN chmod 777 "$(cat /proc/sys/kernel/core_pattern | sed 's/%.*//')" + +RUN groupadd --gid "$USER_GID" "$USERNAME" \ + && useradd --uid "$USER_UID" --gid "$USER_GID" --create-home --shell /bin/bash "$USERNAME" + +RUN chown -R "$USERNAME:$USERNAME" /usr/local/lib/python3.15/site-packages /usr/local/bin + +WORKDIR /usr/src/dbzero +USER $USERNAME +# RUN ./build.sh -r +# WORKDIR /usr/src/dbzero/build/release/ +# RUN meson install +# WORKDIR /usr/src/dbzero diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py index 82be45750..80d8665a8 100644 --- a/python_tests/test_memo_intern.py +++ b/python_tests/test_memo_intern.py @@ -74,6 +74,20 @@ class MemoInternSourceNode: contents: str +@db0.memo(immutable=True, intern=True) +@dataclass +class MemoInternNestedInner: + leaf: MemoInternLeaf + label: str + + +@db0.memo(immutable=True, intern=True) +@dataclass +class MemoInternNestedOuter: + inner: MemoInternNestedInner + sibling: MemoInternLeaf + + @db0.memo(immutable=True) @dataclass class MemoNonInternImmutableLeaf: @@ -274,6 +288,36 @@ def test_interned_object_can_reference_interned_immutable_instance(db0_fixture): assert holder.value.name == "nested" +def test_interned_object_inside_interned_object_reuses_without_explicit_materialize(db0_fixture): + first = MemoInternNestedOuter( + inner=MemoInternNestedInner( + leaf=MemoInternLeaf("nested leaf"), + label="inner", + ), + sibling=MemoInternLeaf("nested sibling"), + ) + duplicate = MemoInternNestedOuter( + inner=MemoInternNestedInner( + leaf=MemoInternLeaf("nested leaf"), + label="inner", + ), + sibling=MemoInternLeaf("nested sibling"), + ) + + first_uuid = db0.uuid(first) + duplicate_uuid = db0.uuid(duplicate) + + assert duplicate_uuid == first_uuid + assert db0.uuid(duplicate.inner) == db0.uuid(first.inner) + assert db0.uuid(duplicate.inner.leaf) == db0.uuid(first.inner.leaf) + assert db0.uuid(duplicate.sibling) == db0.uuid(first.sibling) + assert duplicate.inner.leaf.name == "nested leaf" + assert duplicate.inner.label == "inner" + assert duplicate.sibling.name == "nested sibling" + assert db0.get_type_stats(MemoInternNestedOuter)["content_index"]["size"] == 1 + assert db0.get_type_stats(MemoInternNestedInner)["content_index"]["size"] == 1 + + def test_assigning_non_materialized_intern_to_existing_regular_memo_materializes_reference(db0_fixture): holder = MemoRegularInternReferenceHolder() db0.tags(holder).add("keep-regular-intern-reference-holder") diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index afcdb828a..79f4d6631 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -289,7 +289,7 @@ namespace db0::python } return reinterpret_cast(pyObject)->ext()->getObject(); } - + void PyToolkit::throwErrorWithPyErrorCheck(const std::string& message, const std::string& error_detail) { if (PyErr_Occurred()) { PyObject *ptype, *pvalue, *ptraceback; @@ -610,6 +610,60 @@ namespace db0::python return obj_ptr; } + const object_model::o_embedded_object &PyToolkit::getMemoImmutableObjectView( + db0::swine_ptr &fixture, const ClassFactory &class_factory, UniqueAddress address, + AccessFlags access_mode) + { + auto &lang_cache = fixture->getLangCache(); + auto cached_object = lang_cache.get(address.getAddress()); + if (!!cached_object) { + if (auto cached_address = tryGetEmbeddedUniqueAddress(cached_object.get())) { + if (cached_address->getInstanceId() != address.getInstanceId()) { + THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; + } + } else if ( + PyToolkit::isAnyMemoObject(cached_object.get()) + && getMemoInstanceId(cached_object.get()) != address.getInstanceId() + ) { + THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; + } + return getMemoImmutableObject(cached_object.get()); + } + + auto allocation = fixture->findAllocation( + address.getAddress(), db0::object_model::ObjectImmutableImpl::REALM_ID + ); + auto embedded_offset = address.getAddress().getOffset() - allocation.address.getOffset(); + auto stem = db0::object_model::ObjectImmutableImpl::unloadStem( + fixture, allocation.address, address.getInstanceId(), access_mode + ); + auto type = class_factory.getTypeByClassRef(stem->getClassRef()).m_class; + + if (class_factory.hasLangType(*type)) { + auto lang_type = class_factory.getLangType(*type); + auto root_object = tryUnloadObjectResolved( + fixture, allocation.address, class_factory, lang_type.get(), address.getInstanceId(), access_mode, + &allocation, false + ); + if (!!root_object) { + auto *root_memo = reinterpret_cast(root_object.get()); + if (embedded_offset == 0) { + return root_memo->ext()->getObject(); + } + return root_memo->ext().getEmbeddedObjectAtOffset(embedded_offset); + } + } + + if (embedded_offset == 0) { + return stem->getObject(); + } + + db0::object_model::ObjectImmutableImpl root( + db0::object_model::ObjectImmutableImpl::tag_no_gc(), fixture, std::move(stem), std::move(type) + ); + return root.getEmbeddedObjectAtOffset(embedded_offset); + } + PyToolkit::ObjectSharedPtr PyToolkit::tryUnloadObject( db0::swine_ptr &fixture, Address address, const ClassFactory &class_factory, TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode, diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index 8e4d753ff..3cd1820f6 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -240,6 +240,9 @@ namespace db0::python static bool hasMemoAnyRefs(ObjectPtr); static const object_model::Class &getMemoType(ObjectPtr); static const object_model::o_embedded_object &getMemoImmutableObject(ObjectPtr); + static const object_model::o_embedded_object &getMemoImmutableObjectView( + db0::swine_ptr &, const ClassFactory &, UniqueAddress, AccessFlags = {} + ); inline static void incRef(ObjectPtr py_object) { Py_INCREF(py_object); diff --git a/src/dbzero/object_model/object/ContentIndex.cpp b/src/dbzero/object_model/object/ContentIndex.cpp index 617eeaa81..f84a94e70 100644 --- a/src/dbzero/object_model/object/ContentIndex.cpp +++ b/src/dbzero/object_model/object/ContentIndex.cpp @@ -6,11 +6,14 @@ #include #include #include +#include #include #include namespace db0::object_model { + using LangToolkit = LangConfig::LangToolkit; + ContentIndex::ContentIndex(db0::swine_ptr &fixture, std::shared_ptr type) : super_t(*fixture) , m_fixture(fixture) @@ -139,18 +142,52 @@ namespace db0::object_model return contains(intern_hash(fixture, initializer), address); } - bool ContentIndex::candidateMatches(const ImmutableObjectInitializer &initializer, UniqueAddress candidate) const + ContentIndex::ObjectSharedPtr ContentIndex::lookupCandidate( + const ImmutableObjectInitializer &initializer, UniqueAddress candidate + ) const { auto fixture = m_fixture; - auto candidateObject = LangConfig::LangToolkit::unloadAnyObject( + auto candidateObject = LangToolkit::unloadAnyObject( fixture, candidate.getAddress(), m_class_factory, nullptr, candidate.getInstanceId(), AccessFlags {} ); - return intern_compare( - fixture, initializer, LangConfig::LangToolkit::getMemoImmutableObject(candidateObject.get()) - ) == 0; + if (intern_compare( + fixture, initializer, LangToolkit::getMemoImmutableObject(candidateObject.get()) + ) == 0) { + return candidateObject; + } + return {}; + } + + const o_embedded_object &ContentIndex::candidateObjectView(UniqueAddress candidate) const + { + auto fixture = m_fixture; + return LangToolkit::getMemoImmutableObjectView(fixture, m_class_factory, candidate, AccessFlags {}); + } + + ContentIndex::ObjectSharedPtr ContentIndex::lookup(const ImmutableObjectInitializer &initializer) const + { + flush(); + + auto fixture = m_fixture; + auto iterator = m_base_index.find(intern_hash(fixture, initializer)); + if (iterator == m_base_index.end()) { + return {}; + } + + auto bucket = (*iterator).value.getIndex(getMemspace()); + auto bucketIterator = bucket.beginJoin(1); + while (!bucketIterator.is_end()) { + auto candidateAddress = *bucketIterator; + auto candidate = lookupCandidate(initializer, candidateAddress); + if (!!candidate.get()) { + return candidate; + } + ++bucketIterator; + } + return {}; } - std::optional ContentIndex::lookup(const ImmutableObjectInitializer &initializer) const + std::optional ContentIndex::lookupAddress(const ImmutableObjectInitializer &initializer) const { flush(); @@ -164,7 +201,7 @@ namespace db0::object_model auto bucketIterator = bucket.beginJoin(1); while (!bucketIterator.is_end()) { auto candidateAddress = *bucketIterator; - if (candidateMatches(initializer, candidateAddress)) { + if (intern_compare(fixture, initializer, candidateObjectView(candidateAddress)) == 0) { return candidateAddress; } ++bucketIterator; diff --git a/src/dbzero/object_model/object/ContentIndex.hpp b/src/dbzero/object_model/object/ContentIndex.hpp index 6a3c862e2..9a3caa85c 100644 --- a/src/dbzero/object_model/object/ContentIndex.hpp +++ b/src/dbzero/object_model/object/ContentIndex.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,7 @@ namespace db0::object_model class Class; class ClassFactory; + class o_embedded_object; DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_content_index: public db0::o_fixed_versioned @@ -69,6 +71,7 @@ DB0_PACKED_END using BucketIndexT = ContentBucketIndex; using BucketItemT = db0::key_value; using BaseIndexT = db0::v_bindex; + using ObjectSharedPtr = LangConfig::ObjectSharedPtr; ContentIndex(db0::swine_ptr &, std::shared_ptr); ContentIndex(mptr, db0::swine_ptr &, std::shared_ptr); ~ContentIndex(); @@ -77,7 +80,8 @@ DB0_PACKED_END void remove(const o_embedded_object &, UniqueAddress) const; bool contains(const o_embedded_object &, UniqueAddress) const; bool contains(const ImmutableObjectInitializer &, UniqueAddress) const; - std::optional lookup(const ImmutableObjectInitializer &) const; + ObjectSharedPtr lookup(const ImmutableObjectInitializer &) const; + std::optional lookupAddress(const ImmutableObjectInitializer &) const; void rollback(); void flush() const; @@ -106,7 +110,8 @@ DB0_PACKED_END void decrementSize() const; bool contains(HashT, UniqueAddress) const; void resyncBucket(typename BaseIndexT::iterator &, const BucketIndexT &) const; - bool candidateMatches(const ImmutableObjectInitializer &, UniqueAddress) const; + ObjectSharedPtr lookupCandidate(const ImmutableObjectInitializer &, UniqueAddress) const; + const o_embedded_object &candidateObjectView(UniqueAddress) const; }; } diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index 1097dff09..48b5e70df 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -294,9 +294,8 @@ namespace db0::object_model auto &type = *this->m_type; if (type.isIntern()) { auto candidate = type.getContentIndex().lookup(*immutableInitializer); - if (candidate) { - InitManager::instance.tryCloseInitializer(*this); - return candidate; + if (!!candidate.get()) { + return bindToExistingInternRoot(fixture, std::move(candidate)); } } @@ -336,6 +335,33 @@ namespace db0::object_model return std::nullopt; } + UniqueAddress ObjectImmutableImpl::bindToExistingInternRoot(FixtureLock &fixture, ObjectSharedPtr candidate) + { + if (LangConfig::LangToolkit::isEmbeddedMemoObject(candidate.get())) { + auto address = LangConfig::LangToolkit::getMemoUniqueAddress(candidate.get()); + InitManager::instance.tryCloseInitializer(*this); + return address; + } + + using MemoImmutableObject = LangConfig::LangToolkit::TypeManager::MemoImmutableObject; + auto &candidateObject = reinterpret_cast(candidate.get())->ext(); + auto address = candidateObject.getUniqueAddress(); + ObjectStem stem(static_cast(candidateObject)); + auto type = this->m_type; + auto langObject = m_lang_object; + + // Intern lookup found an existing durable root for this value. The Python + // wrapper being initialized may still be used by caller code, so turn its + // native extension into a non-owning view of the canonical root instead of + // leaving it as a closed non-materialized initializer. + this->~ObjectImmutableImpl(); + new ((void *)this) ObjectImmutableImpl( + tag_no_gc(), *fixture, std::move(stem), std::move(type) + ); + m_lang_object = langObject; + return address; + } + void ObjectImmutableImpl::setLangObject(ObjectPtr object) const { m_lang_object = object; diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp index 861832c97..a22e8d14a 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp @@ -56,6 +56,7 @@ namespace db0::object_model void dropMembers(db0::swine_ptr &, Class &) const; private: + UniqueAddress bindToExistingInternRoot(FixtureLock &, ObjectSharedPtr); ObjectPtr getLangObject() const; mutable ObjectPtr m_lang_object = nullptr; diff --git a/tests/unit_tests/ContentIndexTest.cpp b/tests/unit_tests/ContentIndexTest.cpp index df1e59063..4b9c5f16c 100644 --- a/tests/unit_tests/ContentIndexTest.cpp +++ b/tests/unit_tests/ContentIndexTest.cpp @@ -105,7 +105,7 @@ namespace tests ObjectImmutableImpl probe(type); auto &initializer = setInitializerValue(probe, type, 43); - ASSERT_FALSE(index.lookup(initializer).has_value()); + ASSERT_FALSE(index.lookupAddress(initializer).has_value()); } TEST_F(ContentIndexTest, testLookupMissesSameFieldsFromDifferentClass) @@ -119,7 +119,20 @@ namespace tests ObjectImmutableImpl probe(lookupType); auto &initializer = setInitializerValue(probe, lookupType, 42); - ASSERT_FALSE(index.lookup(initializer).has_value()); + ASSERT_FALSE(index.lookupAddress(initializer).has_value()); + } + + TEST_F(ContentIndexTest, testLookupAddressReturnsMatchingAddress) + { + auto type = makeClass("ContentIndexLookupObject"); + auto object = makeObject(type, 42); + auto &index = type->getContentIndex(); + index.insert((*object)->getObject(), object->getUniqueAddress()); + + ObjectImmutableImpl probe(type); + auto &initializer = setInitializerValue(probe, type, 42); + + ASSERT_EQ(index.lookupAddress(initializer), object->getUniqueAddress()); } TEST_F(ContentIndexTest, testRollbackDiscardsPendingInsert)