From 817193ba453c9fc0385312c321960572e2910939 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 14 May 2026 13:15:03 +0200 Subject: [PATCH 1/2] init data masking --- dbzero/dbzero/__init__.py | 3 +- dbzero/dbzero/dbzero.pyi | 9 + python_tests/test_data_masking.py | 133 +++++++++++++++ src/dbzero/bindings/python/PyAPI.cpp | 223 ++++++++++++++++++++++++- src/dbzero/bindings/python/PyAPI.hpp | 2 + src/dbzero/bindings/python/dbzero.cpp | 1 + src/dbzero/workspace/Fixture.cpp | 19 ++- src/dbzero/workspace/Fixture.hpp | 7 +- src/dbzero/workspace/Snapshot.cpp | 10 +- src/dbzero/workspace/Snapshot.hpp | 6 +- src/dbzero/workspace/Workspace.cpp | 49 +++++- src/dbzero/workspace/Workspace.hpp | 10 +- src/dbzero/workspace/WorkspaceView.cpp | 10 +- src/dbzero/workspace/WorkspaceView.hpp | 5 +- 14 files changed, 476 insertions(+), 11 deletions(-) create mode 100644 python_tests/test_data_masking.py diff --git a/dbzero/dbzero/__init__.py b/dbzero/dbzero/__init__.py index a0b4e9e8..ab72e42a 100644 --- a/dbzero/dbzero/__init__.py +++ b/dbzero/dbzero/__init__.py @@ -2,6 +2,7 @@ # Copyright (c) 2025 DBZero Software sp. z o.o. from .dbzero import * +from .dbzero import _init_data_masking from .memo import * from .enum import * from .fast_query import * @@ -13,4 +14,4 @@ from .decorators import * from .select import * from .compare import * -from .initialization import init \ No newline at end of file +from .initialization import init diff --git a/dbzero/dbzero/dbzero.pyi b/dbzero/dbzero/dbzero.pyi index da68fd37..f6aaf0c0 100644 --- a/dbzero/dbzero/dbzero.pyi +++ b/dbzero/dbzero/dbzero.pyi @@ -619,6 +619,15 @@ def get_field_access(class_obj: type, account_id: int) -> Iterable[Tuple[str, Tu """Return protected-field access flags for a memo class and account.""" ... +def _init_data_masking( + context_var: Any, + prefix: Union[str, Any, Sequence[Any], None] = None, + missing_value_placeholder: Optional[Any] = None, + mode: str = "RELEASE", +) -> None: + """Initialize prefix-scoped data masking for the current process.""" + ... + # Cache management def clear_cache() -> None: diff --git a/python_tests/test_data_masking.py b/python_tests/test_data_masking.py new file mode 100644 index 00000000..7eabe201 --- /dev/null +++ b/python_tests/test_data_masking.py @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright (c) 2025 DBZero Software sp. z o.o. + +from contextvars import ContextVar + +import pytest + +import dbzero as db0 + + +account_id = ContextVar("account_id") +missing_value = object() + + +def test_init_data_masking_prefix_scoped_lifecycle(db0_fixture): + current_prefix = db0.get_current_prefix() + + db0._init_data_masking( + account_id, + prefix=current_prefix, + missing_value_placeholder=missing_value, + mode="DEBUG", + ) + + db0._init_data_masking( + account_id, + prefix=current_prefix.name, + missing_value_placeholder=missing_value, + mode="DEBUG", + ) + + db0.open("data-masking-extra-prefix") + db0._init_data_masking( + account_id, + prefix=["data-masking-extra-prefix"], + missing_value_placeholder=missing_value, + mode="DEBUG", + ) + + +def test_init_data_masking_rejects_general_scope_until_implemented(db0_fixture): + with pytest.raises(NotImplementedError, match="prefix"): + db0._init_data_masking(account_id) + + +def test_init_data_masking_requires_open_mutable_prefix(db0_fixture): + with pytest.raises(ValueError, match="open.*read-write"): + db0._init_data_masking(account_id, prefix="not-opened") + + db0.open("readonly-data-masking-prefix") + db0.close("readonly-data-masking-prefix") + db0.open("readonly-data-masking-prefix", "r") + with pytest.raises(ValueError, match="open.*read-write"): + db0._init_data_masking(account_id, prefix="readonly-data-masking-prefix") + + +def test_init_data_masking_rejects_parameter_changes(db0_fixture): + db0._init_data_masking( + account_id, + prefix=db0.get_current_prefix(), + missing_value_placeholder=missing_value, + mode="DEBUG", + ) + + other_account_id = ContextVar("other_account_id") + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_masking( + other_account_id, + prefix=db0.get_current_prefix(), + missing_value_placeholder=missing_value, + mode="DEBUG", + ) + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_masking( + account_id, + prefix=db0.get_current_prefix(), + missing_value_placeholder=missing_value, + mode="RELEASE", + ) + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_masking( + account_id, + prefix=db0.get_current_prefix(), + missing_value_placeholder=object(), + mode="DEBUG", + ) + + +def test_init_data_masking_defaults_mode_to_release(db0_fixture): + db0._init_data_masking(account_id, prefix=db0.get_current_prefix()) + + db0._init_data_masking( + account_id, + prefix=db0.get_current_prefix(), + mode="RELEASE", + ) + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_masking( + account_id, + prefix=db0.get_current_prefix(), + mode="DEBUG", + ) + + +def test_init_data_masking_binding_survives_prefix_reopen(db0_fixture): + prefix_name = db0.get_current_prefix().name + + db0._init_data_masking(account_id, prefix=prefix_name) + db0.close(prefix_name) + db0.open(prefix_name) + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_masking( + ContextVar("reopened_prefix_account_id"), + prefix=prefix_name, + ) + + +def test_init_data_masking_allows_different_bindings_for_different_prefixes(db0_fixture): + db0.open("first-data-masking-binding") + db0._init_data_masking(account_id, prefix="first-data-masking-binding", mode="DEBUG") + + other_account_id = ContextVar("different_prefix_account_id") + db0.open("different-data-masking-binding") + db0._init_data_masking( + other_account_id, + prefix="different-data-masking-binding", + missing_value_placeholder=object(), + mode="RELEASE", + ) diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index 9d2f420e..9cdded5d 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -39,6 +39,51 @@ #include #include #include +#include +#include +#include + +namespace db0 + +{ + + enum class DataMaskingMode + { + RELEASE, + DEBUG + }; + + struct DataMaskingState + { + PyObject *contextVar = nullptr; + PyObject *missingValuePlaceholder = nullptr; + bool hasMissingValuePlaceholder = false; + DataMaskingMode mode = DataMaskingMode::RELEASE; + + DataMaskingState(PyObject *contextVar, PyObject *missingValuePlaceholder, + bool hasMissingValuePlaceholder, DataMaskingMode mode) + : contextVar(contextVar) + , missingValuePlaceholder(missingValuePlaceholder) + , hasMissingValuePlaceholder(hasMissingValuePlaceholder) + , mode(mode) + { + Py_INCREF(contextVar); + if (missingValuePlaceholder) { + Py_INCREF(missingValuePlaceholder); + } + } + + bool matches(PyObject *otherContextVar, PyObject *otherMissingValuePlaceholder, + bool otherHasMissingValuePlaceholder, DataMaskingMode otherMode) const + { + return contextVar == otherContextVar + && missingValuePlaceholder == otherMissingValuePlaceholder + && hasMissingValuePlaceholder == otherHasMissingValuePlaceholder + && mode == otherMode; + } + }; + +} namespace db0::python @@ -46,6 +91,98 @@ namespace db0::python using ObjectSharedPtr = PyTypes::ObjectSharedPtr; + namespace + { + DataMaskingMode parseDataMaskingMode(PyObject *pyMode) + { + if (!pyMode || pyMode == Py_None) { + return DataMaskingMode::RELEASE; + } + if (!PyUnicode_Check(pyMode)) { + PyErr_SetString(PyExc_TypeError, "mode must be either 'DEBUG' or 'RELEASE'"); + return DataMaskingMode::RELEASE; + } + + auto mode = PyUnicode_AsUTF8(pyMode); + if (!mode) { + return DataMaskingMode::RELEASE; + } + if (strcmp(mode, "DEBUG") == 0) { + return DataMaskingMode::DEBUG; + } + if (strcmp(mode, "RELEASE") == 0) { + return DataMaskingMode::RELEASE; + } + + PyErr_SetString(PyExc_ValueError, "mode must be either 'DEBUG' or 'RELEASE'"); + return DataMaskingMode::RELEASE; + } + + std::optional tryExtractPrefixName(PyObject *pyPrefix) + { + if (PyUnicode_Check(pyPrefix)) { + auto prefix = PyUnicode_AsUTF8(pyPrefix); + if (!prefix) { + return {}; + } + return std::string(prefix); + } + + if (PyTuple_Check(pyPrefix) && PyTuple_Size(pyPrefix) == 2) { + auto pyName = PyTuple_GetItem(pyPrefix, 0); + auto pyUuid = PyTuple_GetItem(pyPrefix, 1); + if (PyUnicode_Check(pyName) && PyLong_Check(pyUuid)) { + auto prefix = PyUnicode_AsUTF8(pyName); + if (!prefix) { + return {}; + } + return std::string(prefix); + } + } + + return {}; + } + + bool appendPrefixSpec(PyObject *pyPrefix, std::vector &prefixes) + { + if (auto prefixName = tryExtractPrefixName(pyPrefix)) { + prefixes.push_back(*prefixName); + return true; + } + + auto iterator = Py_OWN(PyObject_GetIter(pyPrefix)); + if (!iterator) { + PyErr_SetString(PyExc_TypeError, "prefix must be a string, PrefixMetaData, or a sequence of those values"); + return false; + } + + Py_FOR(item, iterator) { + if (auto prefixName = tryExtractPrefixName(*item)) { + prefixes.push_back(*prefixName); + continue; + } + PyErr_SetString(PyExc_TypeError, "prefix sequence items must be strings or PrefixMetaData values"); + return false; + } + + if (PyErr_Occurred()) { + return false; + } + return true; + } + + bool isOpenReadWriteFixture(const std::string &prefixName) + { + auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); + auto fixture = workspace.tryFindFixture(PrefixName(prefixName)); + if (!fixture || fixture->getAccessType() != AccessType::READ_WRITE) { + PyErr_SetString(PyExc_ValueError, "data masking prefix must be open in read-write mode"); + return false; + } + return true; + } + } + PyObject *tryGetCacheStats() { auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); @@ -609,12 +746,94 @@ namespace db0::python Py_RETURN_NONE; } - PyObject *renameField(PyObject *, PyObject *args, PyObject *kwargs) + PyObject *renameField(PyObject *, PyObject *args, PyObject *kwargs) { - PY_API_FUNC + PY_API_FUNC return runSafe(tryRenameField, args, kwargs); } + PyObject *tryInitDataMasking(PyObject *args, PyObject *kwargs) + { + PyObject *pyContextVar = nullptr; + PyObject *pyPrefix = nullptr; + PyObject *pyMissingValuePlaceholder = nullptr; + PyObject *pyMode = nullptr; + static const char *kwlist[] = { + "context_var", "prefix", "missing_value_placeholder", "mode", NULL + }; + if (!PyArg_ParseTupleAndKeywords( + args, + kwargs, + "O|OOO:_init_data_masking", + const_cast(kwlist), + &pyContextVar, + &pyPrefix, + &pyMissingValuePlaceholder, + &pyMode)) { + return nullptr; + } + + if (!pyPrefix || pyPrefix == Py_None) { + PyErr_SetString(PyExc_NotImplementedError, "prefix-scoped data masking requires an explicit prefix"); + return nullptr; + } + + PyObject *contextValue = nullptr; + if (PyContextVar_Get(pyContextVar, NULL, &contextValue) < 0) { + PyErr_SetString(PyExc_TypeError, "context_var must be a contextvars.ContextVar"); + return nullptr; + } + Py_XDECREF(contextValue); + + auto mode = parseDataMaskingMode(pyMode); + if (PyErr_Occurred()) { + return nullptr; + } + + std::vector prefixes; + if (!appendPrefixSpec(pyPrefix, prefixes)) { + return nullptr; + } + if (prefixes.empty()) { + PyErr_SetString(PyExc_ValueError, "prefix must include at least one prefix"); + return nullptr; + } + + auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); + for (const auto &prefixName: prefixes) { + if (!isOpenReadWriteFixture(prefixName)) { + return nullptr; + } + } + + bool hasMissingValuePlaceholder = pyMissingValuePlaceholder && pyMissingValuePlaceholder != Py_None; + auto *missingValuePlaceholder = hasMissingValuePlaceholder ? pyMissingValuePlaceholder : nullptr; + auto binding = std::make_shared( + pyContextVar, missingValuePlaceholder, hasMissingValuePlaceholder, mode); + + for (const auto &prefixName: prefixes) { + auto prefix = PrefixName(prefixName); + auto existingState = workspace.getDataMaskingState(prefix); + if (existingState) { + if (!existingState->matches( + pyContextVar, missingValuePlaceholder, hasMissingValuePlaceholder, mode)) { + PyErr_SetString(PyExc_RuntimeError, "data masking binding for fixture cannot be changed"); + return nullptr; + } + continue; + } + workspace.initDataMasking(prefix, binding); + } + + Py_RETURN_NONE; + } + + PyObject *initDataMasking(PyObject *, PyObject *args, PyObject *kwargs) + { + PY_API_FUNC + return runSafe(tryInitDataMasking, args, kwargs); + } + namespace { std::vector extractAccountIDs(PyObject *py_account_id) diff --git a/src/dbzero/bindings/python/PyAPI.hpp b/src/dbzero/bindings/python/PyAPI.hpp index 78fc50a4..13261c9a 100644 --- a/src/dbzero/bindings/python/PyAPI.hpp +++ b/src/dbzero/bindings/python/PyAPI.hpp @@ -106,6 +106,8 @@ namespace db0::python PyObject *renameField(PyObject *self, PyObject *args, PyObject *kwargs); + PyObject *initDataMasking(PyObject *self, PyObject *args, PyObject *kwargs); + PyObject *setFieldAccess(PyObject *self, PyObject *args); PyObject *getFieldAccess(PyObject *self, PyObject *args); diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index 4476ce6b..4a0d2053 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -66,6 +66,7 @@ static PyMethodDef dbzero_methods[] = {"begin_locked", (PyCFunction)&py::PyAPI_beginLocked, METH_FASTCALL, "Enter a new locked section"}, {"describe", &py::describeObject, METH_VARARGS, "Get dbzero object's description"}, {"rename_field", (PyCFunction)&py::renameField, METH_VARARGS | METH_KEYWORDS, "Get snapshot of dbzero state"}, + {"_init_data_masking", (PyCFunction)&py::initDataMasking, METH_VARARGS | METH_KEYWORDS, "Initialize data masking for specific prefixes"}, {"set_field_access", (PyCFunction)&py::setFieldAccess, METH_VARARGS, "Set protected field access masks for a memo class"}, {"get_field_access", (PyCFunction)&py::getFieldAccess, METH_VARARGS, "Get protected field access masks for a memo class and account"}, {"is_singleton", &py::PyAPI_isSingleton, METH_VARARGS, "Check if a specific instance is a dbzero singleton"}, diff --git a/src/dbzero/workspace/Fixture.cpp b/src/dbzero/workspace/Fixture.cpp index 0f41ff35..ca36d4f3 100644 --- a/src/dbzero/workspace/Fixture.cpp +++ b/src/dbzero/workspace/Fixture.cpp @@ -295,9 +295,26 @@ namespace db0 { auto px_snapshot = m_prefix->getSnapshot(state_num); auto allocator_snapshot = std::make_shared(px_snapshot, m_meta_allocator.getSlabRecyclerPtr()); - return db0::make_swine( + auto result = db0::make_swine( workspace_view, m_v_object_cache.getSharedObjectList(), px_snapshot, allocator_snapshot ); + result->initMaskingState(workspace_view.getDataMaskingState(PrefixName(px_snapshot->getName()))); + return result; + } + + void Fixture::initMaskingState(std::shared_ptr state) + { + if (m_masking_state && state && m_masking_state != state) { + THROWF(db0::InternalException) << "Data masking state is already initialized for fixture"; + } + if (state) { + m_masking_state = std::move(state); + } + } + + std::shared_ptr Fixture::getMaskingState() const + { + return m_masking_state; } bool Fixture::commit() diff --git a/src/dbzero/workspace/Fixture.hpp b/src/dbzero/workspace/Fixture.hpp index ec67ef5f..ced527d8 100644 --- a/src/dbzero/workspace/Fixture.hpp +++ b/src/dbzero/workspace/Fixture.hpp @@ -34,6 +34,7 @@ namespace db0 DB0_PACKED_BEGIN class GC0; + struct DataMaskingState; class MetaAllocator; class Snapshot; class Workspace; @@ -280,6 +281,9 @@ DB0_PACKED_BEGIN void registerPrefixStateReachedCallback(StateNumType state_num, std::unique_ptr &&callback); PrefixName tryGetPrefixName() const; + + void initMaskingState(std::shared_ptr); + std::shared_ptr getMaskingState() const; private: const AccessType m_access_type; @@ -319,6 +323,7 @@ DB0_PACKED_BEGIN // flush handlers, to release some memory on resource exhaustion std::vector > m_flush_handlers; std::list > m_mutation_handlers; + std::shared_ptr m_masking_state; std::uint64_t getUUID(MetaAllocator &); @@ -419,4 +424,4 @@ DB0_PACKED_BEGIN DB0_PACKED_END -} \ No newline at end of file +} diff --git a/src/dbzero/workspace/Snapshot.cpp b/src/dbzero/workspace/Snapshot.cpp index 7ecb56b7..1109a166 100644 --- a/src/dbzero/workspace/Snapshot.cpp +++ b/src/dbzero/workspace/Snapshot.cpp @@ -64,5 +64,13 @@ namespace db0 std::optional Snapshot::tryGetAccessType() const { return std::nullopt; } + + std::shared_ptr Snapshot::getDataMaskingState() const { + return {}; + } + + std::shared_ptr Snapshot::getDataMaskingState(const PrefixName &) const { + return {}; + } -} \ No newline at end of file +} diff --git a/src/dbzero/workspace/Snapshot.hpp b/src/dbzero/workspace/Snapshot.hpp index ffa4a5b5..2eb7aa69 100644 --- a/src/dbzero/workspace/Snapshot.hpp +++ b/src/dbzero/workspace/Snapshot.hpp @@ -15,6 +15,7 @@ namespace db0 { class Fixture; + struct DataMaskingState; class LangCache; class PrefixName; class ProcessTimer; @@ -53,6 +54,9 @@ namespace db0 virtual std::shared_ptr getLangCache() const = 0; virtual bool isMutable() const = 0; + + virtual std::shared_ptr getDataMaskingState() const; + virtual std::shared_ptr getDataMaskingState(const PrefixName &) const; db0::swine_ptr findFixture(const PrefixName &) const; @@ -78,4 +82,4 @@ namespace db0 // throws if the requested access type is not allowed void assureAccessType(const Fixture &fixture, std::optional requested); -} \ No newline at end of file +} diff --git a/src/dbzero/workspace/Workspace.cpp b/src/dbzero/workspace/Workspace.cpp index 7eddfd36..65541132 100644 --- a/src/dbzero/workspace/Workspace.cpp +++ b/src/dbzero/workspace/Workspace.cpp @@ -359,6 +359,7 @@ namespace db0 Fixture::formatFixture(Memspace(prefix, allocator), *allocator); } auto fixture = db0::make_swine(*this, prefix, allocator, m_next_locked_section_id); + fixture->initMaskingState(getDataMaskingState(prefix_name)); if (m_fixture_initializer) { // initialize fixture with a model-specific initializer m_fixture_initializer(fixture, file_created, read_only, false); @@ -551,6 +552,52 @@ namespace db0 } return result; } + + void Workspace::initDataMasking(std::shared_ptr state) + { + if (!m_prefix_data_masking_states.empty()) { + THROWF(db0::InputException) << "Data masking is already configured per prefix"; + } + if (m_data_masking_state && m_data_masking_state != state) { + THROWF(db0::InputException) << "Data masking is already configured for the workspace"; + } + m_data_masking_state = std::move(state); + for (auto &[uuid, fixture]: m_fixtures) { + fixture->initMaskingState(m_data_masking_state); + } + } + + void Workspace::initDataMasking(const PrefixName &prefix_name, std::shared_ptr state) + { + if (m_data_masking_state) { + THROWF(db0::InputException) << "Data masking is already configured for the workspace"; + } + auto [it, inserted] = m_prefix_data_masking_states.emplace(prefix_name.get(), state); + if (!inserted && it->second != state) { + THROWF(db0::InputException) << "Data masking is already configured for prefix: " << prefix_name; + } + auto fixture = tryFindFixture(prefix_name); + if (fixture) { + fixture->initMaskingState(it->second); + } + } + + std::shared_ptr Workspace::getDataMaskingState() const + { + return m_data_masking_state; + } + + std::shared_ptr Workspace::getDataMaskingState(const PrefixName &prefix_name) const + { + if (m_data_masking_state) { + return m_data_masking_state; + } + auto it = m_prefix_data_masking_states.find(prefix_name.get()); + if (it == m_prefix_data_masking_states.end()) { + return {}; + } + return it->second; + } db0::swine_ptr Workspace::getCurrentFixture() { @@ -817,4 +864,4 @@ namespace db0 return std::nullopt; } -} \ No newline at end of file +} diff --git a/src/dbzero/workspace/Workspace.hpp b/src/dbzero/workspace/Workspace.hpp index 16fc1524..2d419394 100644 --- a/src/dbzero/workspace/Workspace.hpp +++ b/src/dbzero/workspace/Workspace.hpp @@ -34,6 +34,7 @@ namespace db0 class RefreshThread; class AutoCommitThread; class AtomicContext; + struct DataMaskingState; class LangCache; class Config; class WorkspaceView; @@ -283,6 +284,11 @@ namespace db0 void clearCache() const; const FixtureCatalog &getFixtureCatalog() const; + + void initDataMasking(std::shared_ptr); + void initDataMasking(const PrefixName &, std::shared_ptr); + std::shared_ptr getDataMaskingState() const override; + std::shared_ptr getDataMaskingState(const PrefixName &) const override; std::shared_ptr getWorkspaceView( std::optional state_num = {}, @@ -331,6 +337,8 @@ namespace db0 std::unordered_set m_locked_section_ids; // log of prefixes closed inside locked sections std::unordered_map > > m_locked_section_log; + std::shared_ptr m_data_masking_state; + std::unordered_map > m_prefix_data_masking_states; // this is to prevent recursive cleanups (which might result in a deadlock) mutable std::atomic m_cleanup_pending = false; @@ -345,4 +353,4 @@ namespace db0 std::shared_ptr getWorkspaceHeadView() const; }; -} \ No newline at end of file +} diff --git a/src/dbzero/workspace/WorkspaceView.cpp b/src/dbzero/workspace/WorkspaceView.cpp index 02d1880c..7fa83474 100644 --- a/src/dbzero/workspace/WorkspaceView.cpp +++ b/src/dbzero/workspace/WorkspaceView.cpp @@ -265,6 +265,14 @@ namespace db0 bool WorkspaceView::isMutable() const { return false; } + + std::shared_ptr WorkspaceView::getDataMaskingState() const { + return m_workspace_ptr->getDataMaskingState(); + } + + std::shared_ptr WorkspaceView::getDataMaskingState(const PrefixName &prefix_name) const { + return m_workspace_ptr->getDataMaskingState(prefix_name); + } db0::swine_ptr WorkspaceView::tryFindFixture(const PrefixName &prefix_name) const { @@ -296,4 +304,4 @@ namespace db0 return m_state_nums.size(); } -} \ No newline at end of file +} diff --git a/src/dbzero/workspace/WorkspaceView.hpp b/src/dbzero/workspace/WorkspaceView.hpp index eb20dce6..4ac396f3 100644 --- a/src/dbzero/workspace/WorkspaceView.hpp +++ b/src/dbzero/workspace/WorkspaceView.hpp @@ -42,6 +42,9 @@ namespace db0 bool isMutable() const override; + std::shared_ptr getDataMaskingState() const override; + std::shared_ptr getDataMaskingState(const PrefixName &) const override; + Snapshot &getHeadWorkspace() const override; std::optional tryGetAccessType() const override; @@ -85,4 +88,4 @@ namespace db0 std::optional tryGetFixtureUUID(const PrefixName &) const; }; -} \ No newline at end of file +} From f2b720fae8eb187fccbacd3ca893375085933c86 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 14 May 2026 13:24:31 +0200 Subject: [PATCH 2/2] post-review cleanups and fixes --- dbzero/dbzero/dbzero.pyi | 2 +- python_tests/test_data_masking.py | 24 ++++++++++++----- src/dbzero/bindings/python/PyAPI.cpp | 39 +++++++++++++++++----------- tests/unit_tests/WorkspaceTest.cpp | 34 ++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 23 deletions(-) diff --git a/dbzero/dbzero/dbzero.pyi b/dbzero/dbzero/dbzero.pyi index f6aaf0c0..999324f7 100644 --- a/dbzero/dbzero/dbzero.pyi +++ b/dbzero/dbzero/dbzero.pyi @@ -625,7 +625,7 @@ def _init_data_masking( missing_value_placeholder: Optional[Any] = None, mode: str = "RELEASE", ) -> None: - """Initialize prefix-scoped data masking for the current process.""" + """Initialize workspace-wide or prefix-scoped data masking for the current process.""" ... # Cache management diff --git a/python_tests/test_data_masking.py b/python_tests/test_data_masking.py index 7eabe201..67ed5db2 100644 --- a/python_tests/test_data_masking.py +++ b/python_tests/test_data_masking.py @@ -38,20 +38,30 @@ def test_init_data_masking_prefix_scoped_lifecycle(db0_fixture): ) -def test_init_data_masking_rejects_general_scope_until_implemented(db0_fixture): - with pytest.raises(NotImplementedError, match="prefix"): - db0._init_data_masking(account_id) +def test_init_data_masking_general_scope_lifecycle(db0_fixture): + db0._init_data_masking(account_id) + db0._init_data_masking(account_id, mode="RELEASE") -def test_init_data_masking_requires_open_mutable_prefix(db0_fixture): - with pytest.raises(ValueError, match="open.*read-write"): + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_masking(ContextVar("other_general_account_id")) + + db0.open("data-masking-general-prefix") + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_masking( + ContextVar("other_general_prefix_account_id"), + prefix="data-masking-general-prefix", + ) + + +def test_init_data_masking_requires_open_prefix(db0_fixture): + with pytest.raises(ValueError, match="open"): db0._init_data_masking(account_id, prefix="not-opened") db0.open("readonly-data-masking-prefix") db0.close("readonly-data-masking-prefix") db0.open("readonly-data-masking-prefix", "r") - with pytest.raises(ValueError, match="open.*read-write"): - db0._init_data_masking(account_id, prefix="readonly-data-masking-prefix") + db0._init_data_masking(account_id, prefix="readonly-data-masking-prefix") def test_init_data_masking_rejects_parameter_changes(db0_fixture): diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index 9cdded5d..3ff06ba2 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -171,12 +171,12 @@ namespace db0::python return true; } - bool isOpenReadWriteFixture(const std::string &prefixName) + bool isOpenFixture(const std::string &prefixName) { auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); auto fixture = workspace.tryFindFixture(PrefixName(prefixName)); - if (!fixture || fixture->getAccessType() != AccessType::READ_WRITE) { - PyErr_SetString(PyExc_ValueError, "data masking prefix must be open in read-write mode"); + if (!fixture) { + PyErr_SetString(PyExc_ValueError, "data masking prefix must be open"); return false; } return true; @@ -773,11 +773,6 @@ namespace db0::python return nullptr; } - if (!pyPrefix || pyPrefix == Py_None) { - PyErr_SetString(PyExc_NotImplementedError, "prefix-scoped data masking requires an explicit prefix"); - return nullptr; - } - PyObject *contextValue = nullptr; if (PyContextVar_Get(pyContextVar, NULL, &contextValue) < 0) { PyErr_SetString(PyExc_TypeError, "context_var must be a contextvars.ContextVar"); @@ -790,6 +785,26 @@ namespace db0::python return nullptr; } + auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); + bool hasMissingValuePlaceholder = pyMissingValuePlaceholder && pyMissingValuePlaceholder != Py_None; + auto *missingValuePlaceholder = hasMissingValuePlaceholder ? pyMissingValuePlaceholder : nullptr; + auto binding = std::make_shared( + pyContextVar, missingValuePlaceholder, hasMissingValuePlaceholder, mode); + + if (!pyPrefix || pyPrefix == Py_None) { + auto existingState = workspace.getDataMaskingState(); + if (existingState) { + if (!existingState->matches( + pyContextVar, missingValuePlaceholder, hasMissingValuePlaceholder, mode)) { + PyErr_SetString(PyExc_RuntimeError, "data masking binding for workspace cannot be changed"); + return nullptr; + } + Py_RETURN_NONE; + } + workspace.initDataMasking(binding); + Py_RETURN_NONE; + } + std::vector prefixes; if (!appendPrefixSpec(pyPrefix, prefixes)) { return nullptr; @@ -799,18 +814,12 @@ namespace db0::python return nullptr; } - auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); for (const auto &prefixName: prefixes) { - if (!isOpenReadWriteFixture(prefixName)) { + if (!isOpenFixture(prefixName)) { return nullptr; } } - bool hasMissingValuePlaceholder = pyMissingValuePlaceholder && pyMissingValuePlaceholder != Py_None; - auto *missingValuePlaceholder = hasMissingValuePlaceholder ? pyMissingValuePlaceholder : nullptr; - auto binding = std::make_shared( - pyContextVar, missingValuePlaceholder, hasMissingValuePlaceholder, mode); - for (const auto &prefixName: prefixes) { auto prefix = PrefixName(prefixName); auto existingState = workspace.getDataMaskingState(prefix); diff --git a/tests/unit_tests/WorkspaceTest.cpp b/tests/unit_tests/WorkspaceTest.cpp index 20ccf295..19722ca4 100644 --- a/tests/unit_tests/WorkspaceTest.cpp +++ b/tests/unit_tests/WorkspaceTest.cpp @@ -19,6 +19,13 @@ using namespace db0::tests; namespace tests { + + std::shared_ptr makeTestMaskingState(std::uintptr_t value) + { + return std::shared_ptr( + reinterpret_cast(value), + [](DataMaskingState *) {}); + } class WorkspaceTest: public testing::Test { @@ -112,6 +119,33 @@ namespace tests v_object obj(snap->myPtr(address)); ASSERT_EQ(obj->a, 7); } + + TEST_F( WorkspaceTest , testWorkspaceViewFixtureByNameKeepsWorkspaceMaskingState ) + { + auto masking_state = makeTestMaskingState(1); + m_workspace.initDataMasking(masking_state); + + auto fixture = m_workspace.getFixture(getPrefixName()); + fixture->commit(); + + auto workspace_view = m_workspace.getWorkspaceView(fixture->getStateNum()); + auto snapshot_fixture = workspace_view->getFixture(getPrefixName(), AccessType::READ_ONLY); + + ASSERT_EQ(snapshot_fixture->getMaskingState(), masking_state); + } + + TEST_F( WorkspaceTest , testWorkspaceViewFixtureByUuidKeepsPrefixMaskingState ) + { + auto fixture = m_workspace.getFixture(getPrefixName()); + auto masking_state = makeTestMaskingState(2); + m_workspace.initDataMasking(getPrefixName(), masking_state); + fixture->commit(); + + auto workspace_view = m_workspace.getWorkspaceView(fixture->getStateNum()); + auto snapshot_fixture = workspace_view->getFixture(fixture->getUUID(), AccessType::READ_ONLY); + + ASSERT_EQ(snapshot_fixture->getMaskingState(), masking_state); + } TEST_F( WorkspaceTest , testFreeCanBePerformedBetweenTransactions ) {