From db67b70d0f1572c5bf8eb4fa970f0013a6dc584d Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 27 May 2026 14:45:24 +0200 Subject: [PATCH 01/11] data filters functionality --- dbzero/dbzero/__init__.py | 2 +- dbzero/dbzero/dbzero.pyi | 8 + dbzero/dbzero/initialization.py | 9 +- design/DATA_FILTERS_DESIGN.md | 301 +++++++++++++++++++++ python_tests/test_data_filter.py | 172 ++++++++++++ src/dbzero/bindings/python/DataMasking.hpp | 19 ++ src/dbzero/bindings/python/PyAPI.cpp | 93 ++++++- src/dbzero/bindings/python/PyAPI.hpp | 2 + src/dbzero/bindings/python/dbzero.cpp | 1 + src/dbzero/core/memory/config.cpp | 2 + src/dbzero/core/memory/config.hpp | 4 + src/dbzero/workspace/Fixture.cpp | 16 ++ src/dbzero/workspace/Fixture.hpp | 4 + src/dbzero/workspace/Snapshot.cpp | 8 + src/dbzero/workspace/Snapshot.hpp | 3 + src/dbzero/workspace/Workspace.cpp | 62 +++++ src/dbzero/workspace/Workspace.hpp | 8 + src/dbzero/workspace/WorkspaceView.cpp | 8 + src/dbzero/workspace/WorkspaceView.hpp | 2 + tests/unit_tests/WorkspaceTest.cpp | 60 ++++ 20 files changed, 779 insertions(+), 5 deletions(-) create mode 100644 design/DATA_FILTERS_DESIGN.md create mode 100644 python_tests/test_data_filter.py diff --git a/dbzero/dbzero/__init__.py b/dbzero/dbzero/__init__.py index fb95affd..97406d45 100644 --- a/dbzero/dbzero/__init__.py +++ b/dbzero/dbzero/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) 2025 DBZero Software sp. z o.o. from .dbzero import * -from .dbzero import _check_interned, _init_data_masking +from .dbzero import _check_interned, _init_data_filter, _init_data_masking from .memo import * from .enum import * from .fast_query import * diff --git a/dbzero/dbzero/dbzero.pyi b/dbzero/dbzero/dbzero.pyi index a353b3ef..df8736c1 100644 --- a/dbzero/dbzero/dbzero.pyi +++ b/dbzero/dbzero/dbzero.pyi @@ -646,6 +646,14 @@ def _init_data_masking( """Initialize workspace-wide or prefix-scoped data masking for the current process.""" ... +def _init_data_filter( + context_var: Any, + prefix: Union[str, Any, Sequence[Any], None] = None, + mode: str = "RELEASE", +) -> None: + """Initialize workspace-wide or prefix-scoped data filtering for the current process.""" + ... + # Cache management def clear_cache() -> None: diff --git a/dbzero/dbzero/initialization.py b/dbzero/dbzero/initialization.py index 4ad908cf..7509cdf5 100644 --- a/dbzero/dbzero/initialization.py +++ b/dbzero/dbzero/initialization.py @@ -4,7 +4,7 @@ """dbzero initialization functions""" from collections.abc import Mapping from typing import Any -from .dbzero import _init, _init_data_masking, open as dbzero_open +from .dbzero import _init, _init_data_filter, _init_data_masking, open as dbzero_open def init(dbzero_root: str, **kwargs: Any) -> None: """Initialize the dbzero environment in a specified directory and apply global configurations. @@ -31,6 +31,7 @@ def init(dbzero_root: str, **kwargs: Any) -> None: * lang_cache_size (int, default 1024) for language model data cache size * lock_flags (dict) to configure locking behavior when opening the prefix in read-write mode * data_masking (dict) to initialize data masking via _init_data_masking + * data_filter (dict) to initialize data filtering via _init_data_filter Lock flags (dict): * blocking (bool, default False) wait when trying to acquire the lock @@ -63,3 +64,9 @@ def init(dbzero_root: str, **kwargs: Any) -> None: if not isinstance(data_masking, Mapping): raise TypeError("data_masking must be a mapping") _init_data_masking(**data_masking) + + if "data_filter" in kwargs: + data_filter = kwargs["data_filter"] + if not isinstance(data_filter, Mapping): + raise TypeError("data_filter must be a mapping") + _init_data_filter(**data_filter) diff --git a/design/DATA_FILTERS_DESIGN.md b/design/DATA_FILTERS_DESIGN.md new file mode 100644 index 00000000..4c56e875 --- /dev/null +++ b/design/DATA_FILTERS_DESIGN.md @@ -0,0 +1,301 @@ +# Data Filters Design + +This is a design document for dbzero data filters: a security mechanism that prevents unauthorized access to durable objects through `find`, deserialized queries, `fetch`, and object references. + +## Goal + +Data filters let application developers declare access-controlled memo types and provide an execution-context predicate that dbzero applies automatically at every application-visible object access boundary. The application should not need to duplicate fragile authorization checks around each query, fetch, or field dereference. + +The mechanism is intentionally conservative. Once a type is marked as access controlled, dbzero must assume that every access to instances of that type needs filtering unless explicitly running in debug mode with a null predicate. + +## Python API + +Data filters are initialized process-locally: + +```python +from contextvars import ContextVar + +predicate: ContextVar[db0.ObjectIterable | None] = ContextVar("predicate") + +db0._init_data_filter( + predicate, + prefix=get_current_prefix(), + mode="DEBUG", +) +``` + +Arguments: + +- `predicate`: a `ContextVar` that yields the current filtering predicate. The value is an `ObjectIterable` query expression or `None`. +- `prefix`: an optional prefix name or collection of prefix names where filtering is enabled. `None` means all current prefixes and prefixes opened later. +- `mode`: optional mode string. The default is `RELEASE`, and `None` is treated the same as `RELEASE`. `DEBUG` must be specified explicitly. `DEBUG` allows a null predicate and treats it as filtering disabled. `RELEASE` requires a non-null predicate whenever an access-controlled object is read. + +Access-controlled memo types are declared with the memo decorator: + +```python +@db0.memo(access_control=True) +@dataclass +class RestrictedData: + value: str +``` + +A predicate is not limited to a plain list of tags. It can be any dbzero `ObjectIterable` query expression, including a complex tag-based statement composed from tags, object references, nested `find` queries, alternatives, negation, and other query operators. + +A simple predicate can grant access through an explicit tag relation: + +```python +pred = db0.find(db0.as_tag("GRANT-ACCESS", account)) +predicate.set(pred) +``` + +A more selective predicate can combine multiple query clauses: + +```python +pred = db0.find( + [ + db0.as_tag("GRANT-ACCESS", account), + db0.as_tag("GRANT-ACCESS", "PUBLIC"), + ], + db0.no(db0.as_tag("DENY-ACCESS", account)), +) +predicate.set(pred) +``` + +## Activation Model + +Initialization only enables data filtering for selected prefixes. It does not make every type restricted. + +Filtering is active for an object access when: + +- The target prefix is configured for data filtering. +- The access target is restricted. +- The operation is not in `DEBUG` mode with a null predicate. + +An access target is restricted when any of these are true: + +- The target type is directly decorated with `access_control=True`. +- The target type inherits from a base type decorated with `access_control=True`. +- The queried type has any descendant decorated with `access_control=True` that may appear in that query's result set. + +The descendant-to-base rule is necessary for typed base queries. If any `MemoBase` descendant defines access restriction, the whole base query must be evaluated with the current predicate. This effectively filters inaccessible objects out of the complete result set, rather than trying to filter only restricted descendants after results have already been produced. Restricted derived classes therefore make their queryable bases require the same protections. + +## Security Invariants + +The implementation must preserve these invariants: + +- Typeless `find` is not allowed for filtered prefixes. +- An access-controlled type cannot be queried unless data filtering has been initialized for the relevant prefix. +- An access-controlled object cannot be fetched unless the current predicate includes it. +- An access-controlled object cannot be exposed through an application-visible durable reference unless the current predicate includes it. +- Deserialized queries must be checked as if they had been constructed in-process. +- Null predicates are allowed only in `DEBUG` mode. +- Missing initialization, missing predicates, and rejected objects raise `PermissionError`, except for public UUID fetch predicate exclusion. +- UUID fetch must not distinguish between a missing object and an unauthorized access-controlled object. + +These checks are part of dbzero's access path, not a convenience wrapper around public APIs. + +## `find` Behavior + +When `db0.find(...)` is called, dbzero first determines the requested prefix and whether data filtering is enabled for that prefix. + +If prefix-level filtering is enabled: + +- A query without an explicit type raises `PermissionError`. +- This applies to direct calls and deserialized queries. + +If a query has an explicit type, dbzero checks whether that type requires access control. This type check happens even when prefix-level data filtering is disabled. If the type is access controlled but data filters are not initialized for the prefix, dbzero raises `PermissionError` explaining that data filtering must be initialized before the query can run. + +If the type is access controlled and filtering is initialized: + +- Resolve the predicate from the configured `ContextVar`. +- If the predicate is `None` and mode is not `DEBUG`, raise `PermissionError`. +- If the predicate is `None` and mode is `DEBUG`, run the original typed query without adding a filter. +- If the predicate is non-null, attach it to the query before sorting or range/index ordering is applied. + +Conceptually: + +```python +query = db0.find(RestrictedData, requested_tags) +secured = db0.find(query, predicate.get()) +result = index.sort(secured) +``` + +The predicate is an additional intersection constraint. It must not widen the result set. + +## `fetch` Behavior + +`fetch` can begin without knowing the target type, especially for UUID-based fetches. dbzero should unload only enough object metadata to determine the object's type. + +Once the type is known: + +- If the type is not access controlled, continue with normal fetch behavior. +- If the type is access controlled and filtering is not initialized for the object's prefix, raise `PermissionError`. +- Resolve the predicate from the configured `ContextVar`. +- If the predicate is `None` and mode is not `DEBUG`, raise `PermissionError`. +- If the predicate is `None` and mode is `DEBUG`, continue with normal fetch behavior. +- If the predicate is non-null, test the single object against the predicate. + +The authorization check can be represented as: + +```python +allowed = db0.find(obj, predicate) +if not allowed: + raise PermissionError # or the missing-object error for public UUID fetch +``` + +The object is returned only if the single-instance filtered query contains that object. + +For UUID-based fetch, dbzero must use a non-distinguishing error policy: callers must not be able to tell whether the UUID is missing or whether it names an access-controlled object excluded by the current predicate. The public error should match the normal missing-object fetch behavior for both cases. Internal diagnostics may preserve the distinction, but it must not be observable through the public API. + +## Dereference Behavior + +Dereference uses the same authorization rule as `fetch`, but the enforcement point is member unload. + +In this design, dereference means exposing an object to application code by reading a durable reference stored in another object, such as accessing a memo field or an item inside a dbzero list, dict, or set. It does not mean every internal read of a durable address. + +When `unloadMember` resolves a durable object reference for application-visible field or collection access: + +- Determine the referenced object's type. +- If the type is not access controlled, return the reference normally. +- If the type is access controlled, resolve and apply the current predicate exactly as in `fetch`. +- Raise `PermissionError` if the predicate is required but missing, or if the predicate does not include the referenced object. + +This makes member access, list/dict/set traversal, embedded object references, weak proxies, and other application-visible reference-based access paths follow the same policy as explicit `fetch`. + +Internal maintenance operations must not be blocked by data filters merely because they read object addresses or object metadata. This includes reference counting, garbage collection, tag maintenance, deletion checks, index maintenance, serialization/deserialization internals, flush/reopen internals, consistency repair, and other storage-engine bookkeeping that does not expose the protected object to application code. These paths may still need to read restricted objects in order to preserve storage correctness. + +## Prefix Semantics + +`prefix` controls where data filtering is enabled: + +- A single prefix enables filtering for that prefix. +- Multiple prefixes enable filtering for each listed prefix. +- `None` enables filtering globally, including prefixes opened after initialization. + +The implementation should store both an explicit enabled-prefix set and a global-enabled flag. Access checks must use the prefix of the object or query, not only the current default prefix. + +## Predicate Resolution + +Predicate resolution should be centralized so `find`, `fetch`, and dereference share one error policy. + +The resolver should return one of three states: + +- Filtering disabled by debug null predicate. +- A concrete `ObjectIterable` predicate. +- A `PermissionError` with an operation-specific message. + +Errors should distinguish: + +- Data filters were never initialized for an access-controlled type. +- The current predicate is missing outside debug mode. +- The current predicate does not include the requested object. +- A typeless query was attempted while prefix filtering is enabled. + +For public UUID fetch errors, predicate exclusion must be converted to the same observable error used for missing objects. Other operations that already expose a candidate object or typed query context may still raise `PermissionError` for authorization failures. + +## Predicate Lifetime + +The value stored in the predicate `ContextVar` may be a lazy query object. Data filters must not evaluate that query in whatever ambient prefix, head, or snapshot happens to be active later. + +When an access-controlled operation needs a predicate, dbzero should: + +1. Read the current `ContextVar` value. +2. If the value is non-null, serialize the predicate query expression at retrieval time. +3. Deserialize the predicate inside the operation context that will perform the access check. This context is either the current head or the specific snapshot being queried. +4. Evaluate the access check using that context-local predicate. + +The deserialized predicate may be cached for as long as the operation context is preserved. A cached predicate must not be reused across a different head, snapshot, prefix context, or transaction context where query resolution could produce different results. + +This gives predicates snapshot-consistent behavior: a filtered snapshot query uses the predicate as interpreted inside that snapshot, while a head query uses the predicate as interpreted against the current head. + +## Query Composition + +Data predicates are authorization filters and must be composed as intersections. A predicate may already be a complex query expression; dbzero must treat that expression as a single authorization constraint and intersect it with the requested access query. It should be attached before sorting so ordering cannot influence authorization. + +This matters for: + +- Tag queries. +- Query objects passed into `find`. +- Deserialized queries. +- `index.sort(...)`. +- `index.range(...)` and other index operations that can unload object groups without directly relying on tags. + +Initial implementation can use the existing query-composition path. Later speedups may push predicate filtering into specific index implementations, but those optimizations must preserve the same visible behavior and error policy. + +## Type Metadata + +Memo type decoration needs persistent metadata for `access_control=True`. + +Requirements: + +- The decorator accepts `access_control`. +- Class metadata records whether the type is directly access controlled. +- Query planning can determine whether a requested type requires filtering because of direct decoration or restricted descendants. +- Reopened type metadata preserves the flag. +- Redeclaring a type with an incompatible access-control flag after durable instances exist should be rejected or handled consistently with existing type-contract validation. + +The base-type propagation rule should be computed through the type hierarchy and cached where practical. Cache invalidation must account for registering new derived types. + +## Deserialized Queries + +Serialized query payloads must not bypass type and predicate checks. Query deserialization should preserve explicit type information and reject or mark typeless queries so the normal `find` authorization path can raise `PermissionError` under filtered prefixes. + +Do not rely on the Python caller to re-wrap a deserialized query with a type or predicate. + +## Debug Mode + +`mode="DEBUG"` exists to allow incremental adoption and tests that need to initialize the mechanism before a predicate is available. It must be specified explicitly; omitting `mode` or passing `None` selects `RELEASE`. + +In debug mode: + +- A null predicate disables predicate filtering for access-controlled operations. +- Typeless `find` is still rejected when prefix filtering is enabled. +- Access-controlled typed queries still require data-filter initialization for that prefix. + +In release mode: + +- A null predicate always raises `PermissionError` for access-controlled operations. +- `RELEASE` is the default when `mode` is omitted. +- Passing `mode=None` is equivalent to `mode="RELEASE"`. + +Debug mode should be explicit. Release mode is the secure default. + +## Development Guidance + +Follow TDD for this feature. Start with Python behavior tests for public access paths, then add native tests for type metadata and query enforcement. + +Recommended implementation slices: + +1. Add decorator parsing and persistent type metadata for `access_control=True`. +2. Add data-filter initialization state, prefix matching, and predicate resolution. +3. Reject typeless `find` under filtered prefixes. +4. Enforce typed `find` checks and attach predicates before sorting. +5. Enforce `fetch` checks after minimal type discovery. +6. Enforce application-visible dereference checks from `unloadMember`. +7. Cover deserialized query behavior. +8. Add predicate serialization/deserialization for head and snapshot contexts. +9. Add index/range-focused tests and optimize only after behavior is stable. + +Tests should cover: + +- `_init_data_filter` accepts one prefix, multiple prefixes, and `None`. +- `_init_data_filter` defaults to `RELEASE` when `mode` is omitted or `None`. +- `_init_data_filter` enables null predicates only when `mode="DEBUG"` is explicitly specified. +- Typeless `find` raises `PermissionError` under a filtered prefix. +- Typeless `find` works normally for unfiltered prefixes. +- Typed `find` on an unrestricted type is unchanged. +- Typed `find` on an access-controlled type raises when filters are not initialized. +- Typed `find` on an access-controlled type intersects with the current predicate. +- Predicate queries are serialized on retrieval and deserialized in the current head or snapshot context. +- Deserialized predicates are cached only while the same operation context is preserved. +- A null predicate raises outside `DEBUG`. +- A null predicate is allowed in `DEBUG`. +- Base-type queries are restricted when any derived type is access controlled. +- `fetch(uuid)` authorizes after discovering the object type. +- `fetch(uuid)` uses the same public error for missing objects and unauthorized access-controlled objects. +- `fetch(Type, uuid)` follows the same authorization outcome as `fetch(uuid)`. +- Application-visible member dereference raises when the referenced object is not in the predicate. +- Internal maintenance paths such as reference counting, tag maintenance, index maintenance, and flush/reopen are not blocked by data filters. +- Deserialized typeless queries cannot bypass the typeless-query rule. +- Deserialized typed queries receive the same predicate filtering as direct typed queries. +- Filtering uses the target object's prefix rather than only the current default prefix. diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py new file mode 100644 index 00000000..7dfe0095 --- /dev/null +++ b/python_tests/test_data_filter.py @@ -0,0 +1,172 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright (c) 2026 DBZero Software sp. z o.o. + +from contextvars import ContextVar +from dataclasses import dataclass + +import pytest + +import dbzero as db0 +from .conftest import DB0_DIR + + +predicate = ContextVar("predicate") + + +@db0.memo +@dataclass +class InitDataFilterClass: + value: str + + +def test_init_data_filter_prefix_scoped_lifecycle(db0_fixture): + current_prefix = db0.get_current_prefix() + + db0._init_data_filter(predicate, prefix=current_prefix, mode="DEBUG") + + db0._init_data_filter(predicate, prefix=current_prefix.name, mode="DEBUG") + + db0.open("data-filter-extra-prefix") + db0._init_data_filter( + predicate, + prefix=["data-filter-extra-prefix"], + mode="DEBUG", + ) + + +def test_init_data_filter_general_scope_lifecycle(db0_fixture): + db0._init_data_filter(predicate) + + db0._init_data_filter(predicate, mode="RELEASE") + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_filter(ContextVar("other_general_predicate")) + + db0.open("data-filter-general-prefix") + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_filter( + ContextVar("other_general_prefix_predicate"), + prefix="data-filter-general-prefix", + ) + + +def test_init_data_filter_requires_open_prefix(db0_fixture): + with pytest.raises(ValueError, match="open"): + db0._init_data_filter(predicate, prefix="not-opened") + + db0.open("readonly-data-filter-prefix") + db0.close("readonly-data-filter-prefix") + db0.open("readonly-data-filter-prefix", "r") + db0._init_data_filter(predicate, prefix="readonly-data-filter-prefix") + + +def test_init_data_filter_rejects_parameter_changes(db0_fixture): + db0._init_data_filter( + predicate, + prefix=db0.get_current_prefix(), + mode="DEBUG", + ) + + other_predicate = ContextVar("other_predicate") + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_filter( + other_predicate, + prefix=db0.get_current_prefix(), + mode="DEBUG", + ) + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_filter( + predicate, + prefix=db0.get_current_prefix(), + mode="RELEASE", + ) + + +def test_init_data_filter_defaults_mode_to_release(db0_fixture): + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + db0._init_data_filter( + predicate, + prefix=db0.get_current_prefix(), + mode="RELEASE", + ) + + db0._init_data_filter( + predicate, + prefix=db0.get_current_prefix(), + mode=None, + ) + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_filter( + predicate, + prefix=db0.get_current_prefix(), + mode="DEBUG", + ) + + +def test_init_data_filter_binding_survives_prefix_reopen(db0_fixture): + prefix_name = db0.get_current_prefix().name + + db0._init_data_filter(predicate, prefix=prefix_name) + db0.close(prefix_name) + db0.open(prefix_name) + + with pytest.raises(RuntimeError, match="binding"): + db0._init_data_filter( + ContextVar("reopened_prefix_predicate"), + prefix=prefix_name, + ) + + +def test_init_data_filter_allows_different_bindings_for_different_prefixes(db0_fixture): + db0.open("first-data-filter-binding") + db0._init_data_filter(predicate, prefix="first-data-filter-binding", mode="DEBUG") + + other_predicate = ContextVar("different_prefix_predicate") + db0.open("different-data-filter-binding") + db0._init_data_filter( + other_predicate, + prefix="different-data-filter-binding", + mode="RELEASE", + ) + + +def test_init_can_initialize_workspace_data_filter(db0_fixture): + db0.close() + init_predicate = ContextVar("init_workspace_data_filter_predicate") + + db0.init( + DB0_DIR, + data_filter={ + "context_var": init_predicate, + "mode": "DEBUG", + }, + ) + db0.open("init-workspace-data-filter") + init_predicate.set(None) + + obj = InitDataFilterClass("visible") + + assert obj.value == "visible" + + +def test_init_can_initialize_prefix_data_filter_after_opening_prefix(db0_fixture): + db0.close() + init_predicate = ContextVar("init_prefix_data_filter_predicate") + + db0.init( + DB0_DIR, + prefix="init-prefix-data-filter", + data_filter={ + "context_var": init_predicate, + "prefix": "init-prefix-data-filter", + "mode": "DEBUG", + }, + ) + init_predicate.set(None) + + obj = InitDataFilterClass("visible") + + assert obj.value == "visible" diff --git a/src/dbzero/bindings/python/DataMasking.hpp b/src/dbzero/bindings/python/DataMasking.hpp index 486c82f8..067ca1c9 100644 --- a/src/dbzero/bindings/python/DataMasking.hpp +++ b/src/dbzero/bindings/python/DataMasking.hpp @@ -45,4 +45,23 @@ namespace db0 } }; + struct DataFilterState + { + PyObject *contextVar = nullptr; + DataMaskingMode mode = DataMaskingMode::RELEASE; + + DataFilterState(PyObject *contextVar, DataMaskingMode mode) + : contextVar(contextVar) + , mode(mode) + { + Py_INCREF(contextVar); + } + + bool matches(PyObject *otherContextVar, DataMaskingMode otherMode) const + { + return contextVar == otherContextVar + && mode == otherMode; + } + }; + } diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index e52df80f..d9370988 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include namespace db0::python @@ -133,12 +134,14 @@ namespace db0::python return true; } - bool isOpenFixture(const std::string &prefixName) + bool isOpenFixture(const std::string &prefixName, const char *featureName) { auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); auto fixture = workspace.tryFindFixture(PrefixName(prefixName)); if (!fixture) { - PyErr_SetString(PyExc_ValueError, "data masking prefix must be open"); + std::ostringstream message; + message << featureName << " prefix must be open"; + PyErr_SetString(PyExc_ValueError, message.str().c_str()); return false; } return true; @@ -971,7 +974,7 @@ namespace db0::python } for (const auto &prefixName: prefixes) { - if (!isOpenFixture(prefixName)) { + if (!isOpenFixture(prefixName, "data masking")) { return nullptr; } } @@ -999,6 +1002,90 @@ namespace db0::python return runSafe(tryInitDataMasking, args, kwargs); } + PyObject *tryInitDataFilter(PyObject *args, PyObject *kwargs) + { + PyObject *pyContextVar = nullptr; + PyObject *pyPrefix = nullptr; + PyObject *pyMode = nullptr; + static const char *kwlist[] = { + "context_var", "prefix", "mode", NULL + }; + if (!PyArg_ParseTupleAndKeywords( + args, + kwargs, + "O|OO:_init_data_filter", + const_cast(kwlist), + &pyContextVar, + &pyPrefix, + &pyMode)) { + return nullptr; + } + + PyObject *contextValue = nullptr; + if (PyContextVar_Get(pyContextVar, NULL, &contextValue) < 0) { + PyErr_SetString(PyExc_TypeError, "context_var must be a contextvars.ContextVar"); + return nullptr; + } + Py_XDECREF(contextValue); + + auto mode = parseDataMaskingMode(pyMode); + if (PyErr_Occurred()) { + return nullptr; + } + + auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); + auto binding = std::make_shared(pyContextVar, mode); + + if (!pyPrefix || pyPrefix == Py_None) { + auto existingState = workspace.getDataFilterState(); + if (existingState) { + if (!existingState->matches(pyContextVar, mode)) { + PyErr_SetString(PyExc_RuntimeError, "data filter binding for workspace cannot be changed"); + return nullptr; + } + Py_RETURN_NONE; + } + workspace.initDataFilter(binding); + Py_RETURN_NONE; + } + + std::vector prefixes; + if (!appendPrefixSpec(pyPrefix, prefixes)) { + return nullptr; + } + if (prefixes.empty()) { + PyErr_SetString(PyExc_ValueError, "prefix must include at least one prefix"); + return nullptr; + } + + for (const auto &prefixName: prefixes) { + if (!isOpenFixture(prefixName, "data filter")) { + return nullptr; + } + } + + for (const auto &prefixName: prefixes) { + auto prefix = PrefixName(prefixName); + auto existingState = workspace.getDataFilterState(prefix); + if (existingState) { + if (!existingState->matches(pyContextVar, mode)) { + PyErr_SetString(PyExc_RuntimeError, "data filter binding for fixture cannot be changed"); + return nullptr; + } + continue; + } + workspace.initDataFilter(prefix, binding); + } + + Py_RETURN_NONE; + } + + PyObject *initDataFilter(PyObject *, PyObject *args, PyObject *kwargs) + { + PY_API_FUNC + return runSafe(tryInitDataFilter, args, kwargs); + } + namespace { std::vector extractAccountIDs(PyObject *py_account_id) diff --git a/src/dbzero/bindings/python/PyAPI.hpp b/src/dbzero/bindings/python/PyAPI.hpp index 609e4c6c..7b3fc947 100644 --- a/src/dbzero/bindings/python/PyAPI.hpp +++ b/src/dbzero/bindings/python/PyAPI.hpp @@ -112,6 +112,8 @@ namespace db0::python PyObject *initDataMasking(PyObject *self, PyObject *args, PyObject *kwargs); + PyObject *initDataFilter(PyObject *self, PyObject *args, PyObject *kwargs); + PyObject *setFieldAccess(PyObject *self, PyObject *args); PyObject *getFieldAccess(PyObject *self, PyObject *args); diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index 8be88d0e..e3bfef28 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -73,6 +73,7 @@ static PyMethodDef dbzero_methods[] = {"describe", &py::describeObject, METH_VARARGS, "Get dbzero object's description"}, {"rename_field", (PyCFunction)&py::renameField, METH_VARARGS | METH_KEYWORDS, "Get snapshot of dbzero state"}, {"_init_data_masking", (PyCFunction)&py::initDataMasking, METH_VARARGS | METH_KEYWORDS, "Initialize data masking for specific prefixes"}, + {"_init_data_filter", (PyCFunction)&py::initDataFilter, METH_VARARGS | METH_KEYWORDS, "Initialize data filtering for specific prefixes"}, {"set_field_access", (PyCFunction)&py::setFieldAccess, METH_VARARGS, "Set protected field access masks for a memo class"}, {"get_field_access", (PyCFunction)&py::getFieldAccess, METH_VARARGS, "Get protected field access masks for a memo class and account"}, {"reset_protect_fields", (PyCFunction)&py::resetProtectFields, METH_VARARGS, "Clear the persisted protected-fields flag for a memo class"}, diff --git a/src/dbzero/core/memory/config.cpp b/src/dbzero/core/memory/config.cpp index 29d50c54..157c7c4d 100644 --- a/src/dbzero/core/memory/config.cpp +++ b/src/dbzero/core/memory/config.cpp @@ -21,6 +21,7 @@ namespace db0 }; bool Settings::m_data_masking_enabled = false; + bool Settings::m_data_filter_enabled = false; void Settings::reset() { @@ -32,6 +33,7 @@ namespace db0 __dram_io_flush_poison = 0; #endif m_data_masking_enabled = false; + m_data_filter_enabled = false; } } diff --git a/src/dbzero/core/memory/config.hpp b/src/dbzero/core/memory/config.hpp index 1a570837..9e3e55c3 100644 --- a/src/dbzero/core/memory/config.hpp +++ b/src/dbzero/core/memory/config.hpp @@ -41,6 +41,10 @@ namespace db0 // Callers can use this to skip checking data masking rules when no open fixture can use them. static bool m_data_masking_enabled; + // Shortcut flag: true when data filtering is enabled for at least one open prefix. + // Callers can use this to skip checking data filtering rules when no open fixture can use them. + static bool m_data_filter_enabled; + // reset all settings to default values static void reset(); }; diff --git a/src/dbzero/workspace/Fixture.cpp b/src/dbzero/workspace/Fixture.cpp index deb5837a..f3d1b817 100644 --- a/src/dbzero/workspace/Fixture.cpp +++ b/src/dbzero/workspace/Fixture.cpp @@ -299,6 +299,7 @@ namespace db0 workspace_view, m_v_object_cache.getSharedObjectList(), px_snapshot, allocator_snapshot ); result->initMaskingState(workspace_view.getDataMaskingState(PrefixName(px_snapshot->getName()))); + result->initFilterState(workspace_view.getDataFilterState(PrefixName(px_snapshot->getName()))); return result; } @@ -316,6 +317,21 @@ namespace db0 { return m_masking_state; } + + void Fixture::initFilterState(std::shared_ptr state) + { + if (m_filter_state && state && m_filter_state != state) { + THROWF(db0::InternalException) << "Data filter state is already initialized for fixture"; + } + if (state) { + m_filter_state = std::move(state); + } + } + + std::shared_ptr Fixture::getFilterState() const + { + return m_filter_state; + } bool Fixture::commit() { diff --git a/src/dbzero/workspace/Fixture.hpp b/src/dbzero/workspace/Fixture.hpp index aee1a6c1..3ecdc4ed 100644 --- a/src/dbzero/workspace/Fixture.hpp +++ b/src/dbzero/workspace/Fixture.hpp @@ -36,6 +36,7 @@ DB0_PACKED_BEGIN class GC0; struct DataMaskingState; + struct DataFilterState; class MetaAllocator; class Snapshot; class Workspace; @@ -285,6 +286,8 @@ DB0_PACKED_BEGIN void initMaskingState(std::shared_ptr); std::shared_ptr getMaskingState() const; + void initFilterState(std::shared_ptr); + std::shared_ptr getFilterState() const; private: const AccessType m_access_type; @@ -327,6 +330,7 @@ DB0_PACKED_BEGIN std::vector > m_flush_handlers; std::list > m_mutation_handlers; std::shared_ptr m_masking_state; + std::shared_ptr m_filter_state; std::uint64_t getUUID(MetaAllocator &); diff --git a/src/dbzero/workspace/Snapshot.cpp b/src/dbzero/workspace/Snapshot.cpp index 1109a166..633a0bdf 100644 --- a/src/dbzero/workspace/Snapshot.cpp +++ b/src/dbzero/workspace/Snapshot.cpp @@ -72,5 +72,13 @@ namespace db0 std::shared_ptr Snapshot::getDataMaskingState(const PrefixName &) const { return {}; } + + std::shared_ptr Snapshot::getDataFilterState() const { + return {}; + } + + std::shared_ptr Snapshot::getDataFilterState(const PrefixName &) const { + return {}; + } } diff --git a/src/dbzero/workspace/Snapshot.hpp b/src/dbzero/workspace/Snapshot.hpp index 2eb7aa69..b698c69e 100644 --- a/src/dbzero/workspace/Snapshot.hpp +++ b/src/dbzero/workspace/Snapshot.hpp @@ -16,6 +16,7 @@ namespace db0 class Fixture; struct DataMaskingState; + struct DataFilterState; class LangCache; class PrefixName; class ProcessTimer; @@ -57,6 +58,8 @@ namespace db0 virtual std::shared_ptr getDataMaskingState() const; virtual std::shared_ptr getDataMaskingState(const PrefixName &) const; + virtual std::shared_ptr getDataFilterState() const; + virtual std::shared_ptr getDataFilterState(const PrefixName &) const; db0::swine_ptr findFixture(const PrefixName &) const; diff --git a/src/dbzero/workspace/Workspace.cpp b/src/dbzero/workspace/Workspace.cpp index 850ef1c0..2b0605ec 100644 --- a/src/dbzero/workspace/Workspace.cpp +++ b/src/dbzero/workspace/Workspace.cpp @@ -291,6 +291,7 @@ namespace db0 it->second->close(false); m_fixtures.erase(it); updateDataMaskingSettingsFlag(); + updateDataFilterSettingsFlag(); if (is_default) { m_default_fixture = {}; @@ -338,6 +339,7 @@ namespace db0 it = m_fixtures.erase(it); } updateDataMaskingSettingsFlag(); + updateDataFilterSettingsFlag(); if (as_defunct) { m_lang_cache->clearDefunct(); @@ -383,6 +385,7 @@ namespace db0 } auto fixture = db0::make_swine(*this, prefix, allocator, m_next_locked_section_id); fixture->initMaskingState(getDataMaskingState(prefix_name)); + fixture->initFilterState(getDataFilterState(prefix_name)); if (m_fixture_initializer) { // initialize fixture with a model-specific initializer m_fixture_initializer(fixture, file_created, read_only, false); @@ -402,6 +405,7 @@ namespace db0 it = m_fixtures.emplace(fixture->getUUID(), fixture).first; updateDataMaskingSettingsFlag(); + updateDataFilterSettingsFlag(); m_fixture_catalog.add(prefix_name, *fixture); if (*access_type == AccessType::READ_ONLY) { // add read-only fixture to be monitored by the refresh thread (will be removed automatically when closed) @@ -626,6 +630,54 @@ namespace db0 } return it->second; } + + void Workspace::initDataFilter(std::shared_ptr state) + { + if (!m_prefix_data_filter_states.empty()) { + THROWF(db0::InputException) << "Data filter is already configured per prefix"; + } + if (m_data_filter_state && m_data_filter_state != state) { + THROWF(db0::InputException) << "Data filter is already configured for the workspace"; + } + m_data_filter_state = std::move(state); + for (auto &[uuid, fixture]: m_fixtures) { + fixture->initFilterState(m_data_filter_state); + } + updateDataFilterSettingsFlag(); + } + + void Workspace::initDataFilter(const PrefixName &prefix_name, std::shared_ptr state) + { + if (m_data_filter_state) { + THROWF(db0::InputException) << "Data filter is already configured for the workspace"; + } + auto [it, inserted] = m_prefix_data_filter_states.emplace(prefix_name.get(), state); + if (!inserted && it->second != state) { + THROWF(db0::InputException) << "Data filter is already configured for prefix: " << prefix_name; + } + auto fixture = tryFindFixture(prefix_name); + if (fixture) { + fixture->initFilterState(it->second); + } + updateDataFilterSettingsFlag(); + } + + std::shared_ptr Workspace::getDataFilterState() const + { + return m_data_filter_state; + } + + std::shared_ptr Workspace::getDataFilterState(const PrefixName &prefix_name) const + { + if (m_data_filter_state) { + return m_data_filter_state; + } + auto it = m_prefix_data_filter_states.find(prefix_name.get()); + if (it == m_prefix_data_filter_states.end()) { + return {}; + } + return it->second; + } db0::swine_ptr Workspace::getCurrentFixture() { @@ -900,6 +952,16 @@ namespace db0 return static_cast(item.second->getMaskingState()); }); } + + void Workspace::updateDataFilterSettingsFlag() const + { + Settings::m_data_filter_enabled = std::any_of( + m_fixtures.begin(), + m_fixtures.end(), + [](const auto &item) { + return static_cast(item.second->getFilterState()); + }); + } std::optional Workspace::getLangCacheSize() const { diff --git a/src/dbzero/workspace/Workspace.hpp b/src/dbzero/workspace/Workspace.hpp index 2fd16b5d..9a613864 100644 --- a/src/dbzero/workspace/Workspace.hpp +++ b/src/dbzero/workspace/Workspace.hpp @@ -35,6 +35,7 @@ namespace db0 class AutoCommitThread; class AtomicContext; struct DataMaskingState; + struct DataFilterState; class LangCache; class Config; class WorkspaceView; @@ -290,6 +291,10 @@ namespace db0 void initDataMasking(const PrefixName &, std::shared_ptr); std::shared_ptr getDataMaskingState() const override; std::shared_ptr getDataMaskingState(const PrefixName &) const override; + void initDataFilter(std::shared_ptr); + void initDataFilter(const PrefixName &, std::shared_ptr); + std::shared_ptr getDataFilterState() const override; + std::shared_ptr getDataFilterState(const PrefixName &) const override; std::shared_ptr getWorkspaceView( std::optional state_num = {}, @@ -342,6 +347,8 @@ namespace db0 std::unordered_map > > m_locked_section_log; std::shared_ptr m_data_masking_state; std::unordered_map > m_prefix_data_masking_states; + std::shared_ptr m_data_filter_state; + std::unordered_map > m_prefix_data_filter_states; // this is to prevent recursive cleanups (which might result in a deadlock) mutable std::atomic m_cleanup_pending = false; @@ -355,6 +362,7 @@ namespace db0 std::optional getLangCacheSize() const; std::shared_ptr getWorkspaceHeadView() const; void updateDataMaskingSettingsFlag() const; + void updateDataFilterSettingsFlag() const; }; } diff --git a/src/dbzero/workspace/WorkspaceView.cpp b/src/dbzero/workspace/WorkspaceView.cpp index 7fa83474..ce6e9ed3 100644 --- a/src/dbzero/workspace/WorkspaceView.cpp +++ b/src/dbzero/workspace/WorkspaceView.cpp @@ -273,6 +273,14 @@ namespace db0 std::shared_ptr WorkspaceView::getDataMaskingState(const PrefixName &prefix_name) const { return m_workspace_ptr->getDataMaskingState(prefix_name); } + + std::shared_ptr WorkspaceView::getDataFilterState() const { + return m_workspace_ptr->getDataFilterState(); + } + + std::shared_ptr WorkspaceView::getDataFilterState(const PrefixName &prefix_name) const { + return m_workspace_ptr->getDataFilterState(prefix_name); + } db0::swine_ptr WorkspaceView::tryFindFixture(const PrefixName &prefix_name) const { diff --git a/src/dbzero/workspace/WorkspaceView.hpp b/src/dbzero/workspace/WorkspaceView.hpp index 4ac396f3..a86617fd 100644 --- a/src/dbzero/workspace/WorkspaceView.hpp +++ b/src/dbzero/workspace/WorkspaceView.hpp @@ -44,6 +44,8 @@ namespace db0 std::shared_ptr getDataMaskingState() const override; std::shared_ptr getDataMaskingState(const PrefixName &) const override; + std::shared_ptr getDataFilterState() const override; + std::shared_ptr getDataFilterState(const PrefixName &) const override; Snapshot &getHeadWorkspace() const override; diff --git a/tests/unit_tests/WorkspaceTest.cpp b/tests/unit_tests/WorkspaceTest.cpp index db1d1abd..a3e1a59f 100644 --- a/tests/unit_tests/WorkspaceTest.cpp +++ b/tests/unit_tests/WorkspaceTest.cpp @@ -27,6 +27,13 @@ namespace tests reinterpret_cast(value), [](DataMaskingState *) {}); } + + std::shared_ptr makeTestFilterState(std::uintptr_t value) + { + return std::shared_ptr( + reinterpret_cast(value), + [](DataFilterState *) {}); + } class WorkspaceTest: public testing::Test { @@ -175,6 +182,59 @@ namespace tests m_workspace.close(fixture->getPrefix().getName()); ASSERT_FALSE(Settings::m_data_masking_enabled); } + + TEST_F( WorkspaceTest , testWorkspaceViewFixtureByNameKeepsWorkspaceFilterState ) + { + auto filter_state = makeTestFilterState(1); + m_workspace.initDataFilter(filter_state); + + auto fixture = m_workspace.getFixture(getPrefixName()); + fixture->commit(); + + auto workspace_view = m_workspace.getWorkspaceView(fixture->getStateNum()); + auto snapshot_fixture = workspace_view->getFixture(getPrefixName(), AccessType::READ_ONLY); + + ASSERT_EQ(snapshot_fixture->getFilterState(), filter_state); + } + + TEST_F( WorkspaceTest , testWorkspaceViewFixtureByUuidKeepsPrefixFilterState ) + { + auto fixture = m_workspace.getFixture(getPrefixName()); + auto filter_state = makeTestFilterState(2); + m_workspace.initDataFilter(getPrefixName(), filter_state); + fixture->commit(); + + auto workspace_view = m_workspace.getWorkspaceView(fixture->getStateNum()); + auto snapshot_fixture = workspace_view->getFixture(fixture->getUUID(), AccessType::READ_ONLY); + + ASSERT_EQ(snapshot_fixture->getFilterState(), filter_state); + } + + TEST_F( WorkspaceTest , testSettingsDataFilterEnabledTracksWorkspaceScopeOpenFixtures ) + { + auto filter_state = makeTestFilterState(3); + m_workspace.initDataFilter(filter_state); + ASSERT_FALSE(Settings::m_data_filter_enabled); + + auto fixture = m_workspace.getFixture(getPrefixName()); + ASSERT_TRUE(Settings::m_data_filter_enabled); + + m_workspace.close(fixture->getPrefix().getName()); + ASSERT_FALSE(Settings::m_data_filter_enabled); + } + + TEST_F( WorkspaceTest , testSettingsDataFilterEnabledTracksPrefixScopeOpenFixtures ) + { + auto filter_state = makeTestFilterState(4); + m_workspace.initDataFilter(getPrefixName(), filter_state); + ASSERT_FALSE(Settings::m_data_filter_enabled); + + auto fixture = m_workspace.getFixture(getPrefixName()); + ASSERT_TRUE(Settings::m_data_filter_enabled); + + m_workspace.close(fixture->getPrefix().getName()); + ASSERT_FALSE(Settings::m_data_filter_enabled); + } TEST_F( WorkspaceTest , testFreeCanBePerformedBetweenTransactions ) { From 505863285b6ae9fa7dd952b3177718e34052db8c Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 27 May 2026 15:49:30 +0200 Subject: [PATCH 02/11] data filter API fixes + test --- dbzero/dbzero/dbzero.pyi | 8 ++++++ python_tests/test_data_filter.py | 10 +++++--- src/dbzero/bindings/python/PyAPI.cpp | 6 ----- src/dbzero/bindings/python/PyInternalAPI.cpp | 20 +++++++++++++++ src/dbzero/workspace/Snapshot.cpp | 8 ------ src/dbzero/workspace/Snapshot.hpp | 12 +++++---- src/dbzero/workspace/Workspace.cpp | 14 ++--------- src/dbzero/workspace/Workspace.hpp | 10 +++++--- src/dbzero/workspace/WorkspaceView.cpp | 8 ------ src/dbzero/workspace/WorkspaceView.hpp | 10 +++++--- tests/unit_tests/WorkspaceTest.cpp | 26 ++++++++++++++++++++ 11 files changed, 82 insertions(+), 50 deletions(-) diff --git a/dbzero/dbzero/dbzero.pyi b/dbzero/dbzero/dbzero.pyi index df8736c1..4f8b28bc 100644 --- a/dbzero/dbzero/dbzero.pyi +++ b/dbzero/dbzero/dbzero.pyi @@ -120,6 +120,14 @@ def get_type_stats(type: type, prefix: Optional[str] = None) -> Dict[str, Any]: """ ... +def get_prefix_stats(prefix: Optional[str] = None) -> Dict[str, Any]: + """Retrieve statistics for a prefix. + + The result includes ``data_masking["enabled"]`` and ``data_filter["enabled"]`` + to report whether the opened prefix has those runtime states attached. + """ + ... + # Object retrieval and management def fetch(identifier: Union[str, type], expected_type: Optional[type] = None, prefix: Optional[str] = None) -> Memo: diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index 7dfe0095..3dcce1f4 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -50,9 +50,13 @@ def test_init_data_filter_general_scope_lifecycle(db0_fixture): ) -def test_init_data_filter_requires_open_prefix(db0_fixture): - with pytest.raises(ValueError, match="open"): - db0._init_data_filter(predicate, prefix="not-opened") +def test_init_data_filter_allows_prefix_before_open(db0_fixture): + prefix_name = "not-yet-opened-data-filter-prefix" + db0._init_data_filter(predicate, prefix=prefix_name, mode="DEBUG") + db0.open(prefix_name) + + stats = db0.get_prefix_stats(prefix=prefix_name) + assert stats["data_filter"]["enabled"] is True db0.open("readonly-data-filter-prefix") db0.close("readonly-data-filter-prefix") diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index d9370988..e7debc60 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -1058,12 +1058,6 @@ namespace db0::python return nullptr; } - for (const auto &prefixName: prefixes) { - if (!isOpenFixture(prefixName, "data filter")) { - return nullptr; - } - } - for (const auto &prefixName: prefixes) { auto prefix = PrefixName(prefixName); auto existingState = workspace.getDataFilterState(prefix); diff --git a/src/dbzero/bindings/python/PyInternalAPI.cpp b/src/dbzero/bindings/python/PyInternalAPI.cpp index e873bbdf..7f38061d 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.cpp +++ b/src/dbzero/bindings/python/PyInternalAPI.cpp @@ -672,6 +672,26 @@ namespace db0::python PySafeDict_SetItemString(*sp_dict, "size", Py_OWN(PyLong_FromLong(fixture->getLimitedStringPool().size()))); PySafeDict_SetItemString(*stats_dict, "string_pool", sp_dict); + auto data_masking_dict = Py_OWN(PyDict_New()); + if (!data_masking_dict) { + return nullptr; + } + PySafeDict_SetItemString( + *data_masking_dict, + "enabled", + Py_OWN(PyBool_fromBool(static_cast(fixture->getMaskingState())))); + PySafeDict_SetItemString(*stats_dict, "data_masking", data_masking_dict); + + auto data_filter_dict = Py_OWN(PyDict_New()); + if (!data_filter_dict) { + return nullptr; + } + PySafeDict_SetItemString( + *data_filter_dict, + "enabled", + Py_OWN(PyBool_fromBool(static_cast(fixture->getFilterState())))); + PySafeDict_SetItemString(*stats_dict, "data_filter", data_filter_dict); + auto cache_dict = Py_OWN(PyDict_New()); if (!cache_dict) { return nullptr; diff --git a/src/dbzero/workspace/Snapshot.cpp b/src/dbzero/workspace/Snapshot.cpp index 633a0bdf..163b5670 100644 --- a/src/dbzero/workspace/Snapshot.cpp +++ b/src/dbzero/workspace/Snapshot.cpp @@ -65,18 +65,10 @@ namespace db0 return std::nullopt; } - std::shared_ptr Snapshot::getDataMaskingState() const { - return {}; - } - std::shared_ptr Snapshot::getDataMaskingState(const PrefixName &) const { return {}; } - std::shared_ptr Snapshot::getDataFilterState() const { - return {}; - } - std::shared_ptr Snapshot::getDataFilterState(const PrefixName &) const { return {}; } diff --git a/src/dbzero/workspace/Snapshot.hpp b/src/dbzero/workspace/Snapshot.hpp index b698c69e..277ec36d 100644 --- a/src/dbzero/workspace/Snapshot.hpp +++ b/src/dbzero/workspace/Snapshot.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -18,7 +19,6 @@ namespace db0 struct DataMaskingState; struct DataFilterState; class LangCache; - class PrefixName; class ProcessTimer; /** @@ -56,10 +56,12 @@ namespace db0 virtual bool isMutable() const = 0; - virtual std::shared_ptr getDataMaskingState() const; - virtual std::shared_ptr getDataMaskingState(const PrefixName &) const; - virtual std::shared_ptr getDataFilterState() const; - virtual std::shared_ptr getDataFilterState(const PrefixName &) const; + // Returns the data masking state configured for a prefix. Passing the default invalid PrefixName{} + // queries the workspace-global state, which applies to all prefixes. + virtual std::shared_ptr getDataMaskingState(const PrefixName & = {}) const; + // Returns the data filter state configured for a prefix. Passing the default invalid PrefixName{} + // queries the workspace-global state, which applies to all prefixes. + virtual std::shared_ptr getDataFilterState(const PrefixName & = {}) const; db0::swine_ptr findFixture(const PrefixName &) const; diff --git a/src/dbzero/workspace/Workspace.cpp b/src/dbzero/workspace/Workspace.cpp index 2b0605ec..dbc4799d 100644 --- a/src/dbzero/workspace/Workspace.cpp +++ b/src/dbzero/workspace/Workspace.cpp @@ -614,14 +614,9 @@ namespace db0 updateDataMaskingSettingsFlag(); } - std::shared_ptr Workspace::getDataMaskingState() const - { - return m_data_masking_state; - } - std::shared_ptr Workspace::getDataMaskingState(const PrefixName &prefix_name) const { - if (m_data_masking_state) { + if (!prefix_name || m_data_masking_state) { return m_data_masking_state; } auto it = m_prefix_data_masking_states.find(prefix_name.get()); @@ -662,14 +657,9 @@ namespace db0 updateDataFilterSettingsFlag(); } - std::shared_ptr Workspace::getDataFilterState() const - { - return m_data_filter_state; - } - std::shared_ptr Workspace::getDataFilterState(const PrefixName &prefix_name) const { - if (m_data_filter_state) { + if (!prefix_name || m_data_filter_state) { return m_data_filter_state; } auto it = m_prefix_data_filter_states.find(prefix_name.get()); diff --git a/src/dbzero/workspace/Workspace.hpp b/src/dbzero/workspace/Workspace.hpp index 9a613864..56d8669a 100644 --- a/src/dbzero/workspace/Workspace.hpp +++ b/src/dbzero/workspace/Workspace.hpp @@ -289,12 +289,14 @@ namespace db0 void initDataMasking(std::shared_ptr); void initDataMasking(const PrefixName &, std::shared_ptr); - std::shared_ptr getDataMaskingState() const override; - std::shared_ptr getDataMaskingState(const PrefixName &) const override; + // Returns the data masking state configured for a prefix. Passing the default invalid PrefixName{} + // queries the workspace-global state, which applies to all prefixes. + std::shared_ptr getDataMaskingState(const PrefixName & = {}) const override; void initDataFilter(std::shared_ptr); void initDataFilter(const PrefixName &, std::shared_ptr); - std::shared_ptr getDataFilterState() const override; - std::shared_ptr getDataFilterState(const PrefixName &) const override; + // Returns the data filter state configured for a prefix. Passing the default invalid PrefixName{} + // queries the workspace-global state, which applies to all prefixes. + std::shared_ptr getDataFilterState(const PrefixName & = {}) const override; std::shared_ptr getWorkspaceView( std::optional state_num = {}, diff --git a/src/dbzero/workspace/WorkspaceView.cpp b/src/dbzero/workspace/WorkspaceView.cpp index ce6e9ed3..a60e517f 100644 --- a/src/dbzero/workspace/WorkspaceView.cpp +++ b/src/dbzero/workspace/WorkspaceView.cpp @@ -266,18 +266,10 @@ namespace db0 return false; } - std::shared_ptr WorkspaceView::getDataMaskingState() const { - return m_workspace_ptr->getDataMaskingState(); - } - std::shared_ptr WorkspaceView::getDataMaskingState(const PrefixName &prefix_name) const { return m_workspace_ptr->getDataMaskingState(prefix_name); } - std::shared_ptr WorkspaceView::getDataFilterState() const { - return m_workspace_ptr->getDataFilterState(); - } - std::shared_ptr WorkspaceView::getDataFilterState(const PrefixName &prefix_name) const { return m_workspace_ptr->getDataFilterState(prefix_name); } diff --git a/src/dbzero/workspace/WorkspaceView.hpp b/src/dbzero/workspace/WorkspaceView.hpp index a86617fd..66b2f0d3 100644 --- a/src/dbzero/workspace/WorkspaceView.hpp +++ b/src/dbzero/workspace/WorkspaceView.hpp @@ -42,10 +42,12 @@ namespace db0 bool isMutable() const override; - std::shared_ptr getDataMaskingState() const override; - std::shared_ptr getDataMaskingState(const PrefixName &) const override; - std::shared_ptr getDataFilterState() const override; - std::shared_ptr getDataFilterState(const PrefixName &) const override; + // Returns the data masking state configured for a prefix. Passing the default invalid PrefixName{} + // queries the workspace-global state, which applies to all prefixes. + std::shared_ptr getDataMaskingState(const PrefixName & = {}) const override; + // Returns the data filter state configured for a prefix. Passing the default invalid PrefixName{} + // queries the workspace-global state, which applies to all prefixes. + std::shared_ptr getDataFilterState(const PrefixName & = {}) const override; Snapshot &getHeadWorkspace() const override; diff --git a/tests/unit_tests/WorkspaceTest.cpp b/tests/unit_tests/WorkspaceTest.cpp index a3e1a59f..65e42db3 100644 --- a/tests/unit_tests/WorkspaceTest.cpp +++ b/tests/unit_tests/WorkspaceTest.cpp @@ -157,6 +157,14 @@ namespace tests ASSERT_EQ(snapshot_fixture->getMaskingState(), masking_state); } + TEST_F( WorkspaceTest , testInvalidPrefixNameRetrievesWorkspaceMaskingState ) + { + auto masking_state = makeTestMaskingState(5); + m_workspace.initDataMasking(masking_state); + + ASSERT_EQ(m_workspace.getDataMaskingState(PrefixName()), masking_state); + } + TEST_F( WorkspaceTest , testSettingsDataMaskingEnabledTracksWorkspaceScopeOpenFixtures ) { auto masking_state = makeTestMaskingState(3); @@ -210,6 +218,24 @@ namespace tests ASSERT_EQ(snapshot_fixture->getFilterState(), filter_state); } + TEST_F( WorkspaceTest , testPrefixFilterStateIsAppliedWhenPrefixOpensLater ) + { + auto filter_state = makeTestFilterState(6); + m_workspace.initDataFilter(getPrefixName(), filter_state); + + auto fixture = m_workspace.getFixture(getPrefixName()); + + ASSERT_EQ(fixture->getFilterState(), filter_state); + } + + TEST_F( WorkspaceTest , testInvalidPrefixNameRetrievesWorkspaceFilterState ) + { + auto filter_state = makeTestFilterState(5); + m_workspace.initDataFilter(filter_state); + + ASSERT_EQ(m_workspace.getDataFilterState(PrefixName()), filter_state); + } + TEST_F( WorkspaceTest , testSettingsDataFilterEnabledTracksWorkspaceScopeOpenFixtures ) { auto filter_state = makeTestFilterState(3); From 18049e7c05192c0a8a8947e941983524a13d0a2a Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 27 May 2026 16:39:07 +0200 Subject: [PATCH 03/11] memo extension / access_control --- dbzero/dbzero/memo.py | 4 + python_tests/test_memo_protect_fields.py | 159 ++++++++++++++++++ src/dbzero/bindings/python/Memo.cpp | 16 +- src/dbzero/bindings/python/PyAPI.cpp | 1 + src/dbzero/bindings/python/PyToolkit.cpp | 9 + src/dbzero/bindings/python/PyToolkit.hpp | 1 + src/dbzero/bindings/python/types/PyClass.cpp | 1 + src/dbzero/object_model/class/Class.cpp | 31 +++- src/dbzero/object_model/class/Class.hpp | 10 +- .../object_model/class/ClassFactory.cpp | 46 +++-- src/dbzero/object_model/object/Options.cpp | 2 +- src/dbzero/object_model/object/Options.hpp | 5 +- 12 files changed, 260 insertions(+), 25 deletions(-) diff --git a/dbzero/dbzero/memo.py b/dbzero/dbzero/memo.py index a09900fa..6d635dff 100644 --- a/dbzero/dbzero/memo.py +++ b/dbzero/dbzero/memo.py @@ -173,6 +173,10 @@ def memo(cls: Optional[type] = None, **kwargs) -> type: materialized, removing this argument from the Python definition does not clear the persisted flag; use reset_protect_fields on the dbzero Class object instead. Derived memo classes inherit field protection and cannot disable it. + access_control : bool, default False + If True, the persistent class is marked as access controlled. This metadata is + recorded on the decorated class and may make loaded base classes report effective + access control at runtime; it does not enable query or fetch enforcement by itself. intern : bool, default False If True, the persistent class is marked for interned immutable materialization. This option requires immutable=True, and interned instances may only reference diff --git a/python_tests/test_memo_protect_fields.py b/python_tests/test_memo_protect_fields.py index 5eff15ca..5ba244fd 100644 --- a/python_tests/test_memo_protect_fields.py +++ b/python_tests/test_memo_protect_fields.py @@ -2,6 +2,10 @@ # Copyright (c) 2025 DBZero Software sp. z o.o. import asyncio +import json +import subprocess +import sys +import textwrap from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from contextvars import ContextVar @@ -250,6 +254,161 @@ class ProtectedAfter: assert get_memo_class_object(obj).get_type_flags()["protect_fields"] is True +def test_access_control_defaults_to_false(db0_fixture): + obj = MemoUnprotectedFieldsClass("alpha", 1) + + assert get_memo_class_object(obj).get_type_flags()["access_control"] is False + assert db0.get_type_stats(MemoUnprotectedFieldsClass)["access_control"] is False + + +def test_access_control_is_persisted_on_class(db0_fixture): + @db0.memo(access_control=True) + @dataclass + class AccessControlled: + name: str + + obj = AccessControlled("alpha") + + assert get_memo_class_object(obj).get_type_flags()["access_control"] is True + assert db0.get_type_stats(AccessControlled)["access_control"] is True + + +def test_access_control_survives_redecoration_without_parameter(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/access-control-redecorated", access_control=True) + @dataclass + class AccessControlledBefore: + name: str + + obj = AccessControlledBefore("alpha") + obj_id = db0.uuid(obj) + assert get_memo_class_object(obj).get_type_flags()["access_control"] is True + db0.commit() + + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix") + + @db0.memo(id="dbzero-software/dbzero/tests/access-control-redecorated") + @dataclass + class AccessControlledAfter: + name: str + + obj = db0.fetch(AccessControlledAfter, obj_id) + assert get_memo_class_object(obj).get_type_flags()["access_control"] is True + + +def test_explicit_false_does_not_clear_access_control(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/access-control-explicit-false", access_control=True) + @dataclass + class AccessControlledBefore: + name: str + + obj = AccessControlledBefore("alpha") + obj_id = db0.uuid(obj) + assert get_memo_class_object(obj).get_type_flags()["access_control"] is True + db0.commit() + + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix") + + @db0.memo(id="dbzero-software/dbzero/tests/access-control-explicit-false", access_control=False) + @dataclass + class AccessControlledAfter: + name: str + + obj = db0.fetch(AccessControlledAfter, obj_id) + assert get_memo_class_object(obj).get_type_flags()["access_control"] is True + + +def test_access_control_can_be_enabled_after_class_materialization(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/access-control-enabled-later") + @dataclass + class AccessControlledBefore: + name: str + + obj = AccessControlledBefore("alpha") + obj_id = db0.uuid(obj) + assert get_memo_class_object(obj).get_type_flags()["access_control"] is False + db0.commit() + + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix") + + @db0.memo(id="dbzero-software/dbzero/tests/access-control-enabled-later", access_control=True) + @dataclass + class AccessControlledAfter: + name: str + + obj = db0.fetch(AccessControlledAfter, obj_id) + assert get_memo_class_object(obj).get_type_flags()["access_control"] is True + + +def test_access_control_dynamically_propagates_to_loaded_base_class(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/access-control-base") + @dataclass + class AccessControlBase: + name: str + + @db0.memo(id="dbzero-software/dbzero/tests/access-control-derived", access_control=True) + @dataclass + class AccessControlDerived(AccessControlBase): + value: int + + base = AccessControlBase("base") + derived = AccessControlDerived("derived", 1) + + assert get_memo_class_object(derived).get_type_flags()["access_control"] is True + assert get_memo_class_object(base).get_type_flags()["access_control"] is True + assert db0.get_type_stats(AccessControlBase)["access_control"] is True + + +def test_access_control_base_is_not_persisted_by_derived_class(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/access-control-runtime-base") + @dataclass + class AccessControlBaseBefore: + name: str + + @db0.memo(id="dbzero-software/dbzero/tests/access-control-runtime-derived", access_control=True) + @dataclass + class AccessControlDerivedBefore(AccessControlBaseBefore): + value: int + + base = AccessControlBaseBefore("base") + _ = AccessControlDerivedBefore("derived", 1) + base_id = db0.uuid(base) + assert get_memo_class_object(base).get_type_flags()["access_control"] is True + db0.commit() + + db0.close() + script = f""" +import json +from dataclasses import dataclass +import dbzero as db0 + +db0.init({DB0_DIR!r}) +db0.open("my-test-prefix") + +@db0.memo(id="dbzero-software/dbzero/tests/access-control-runtime-base") +@dataclass +class AccessControlBaseAfter: + name: str + +base = db0.fetch(AccessControlBaseAfter, {base_id!r}) +print(json.dumps(db0.get_memo_class(base).get_class().get_type_flags()["access_control"])) +db0.close() +""" + result = subprocess.run( + [sys.executable, "-c", textwrap.dedent(script)], + check=True, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + assert json.loads(result.stdout) is False + + def test_reset_protect_fields_rejects_type_still_decorated_as_protected(db0_fixture): MemoProtectedFieldsClass("alpha", 1) diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index d5a4812c..274ea40f 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -899,7 +899,7 @@ namespace db0::python PyObject *wrapPyType(PyTypeObject *base_class, bool is_singleton, bool no_default_tags, const char *prefix_name, const char *type_id, const char *file_name, std::vector &&init_vars, PyObject *py_dyn_prefix_callable, std::vector &&migrations, bool no_cache, bool immutable, bool intern, - std::optional protect_fields_option) + std::optional protect_fields_option, bool access_control) { auto py_class = Py_BORROW(base_class); auto py_module = Py_OWN(findModule(*Py_OWN(PyObject_GetAttrString((PyObject*)*py_class, "__module__")))); @@ -958,6 +958,9 @@ namespace db0::python if (intern) { type_flags.set(MemoOptions::INTERN); } + if (access_control) { + type_flags.set(MemoOptions::ACCESS_CONTROL); + } auto type_info = MemoTypeDecoration( py_module, prefix_name, @@ -999,12 +1002,13 @@ namespace db0::python PyObject *py_immutable = nullptr; PyObject *py_intern = nullptr; PyObject *py_protect_fields = nullptr; + PyObject *py_access_control = nullptr; static const char *kwlist[] = { "input", "singleton", "no_default_tags", "prefix", "id", "py_file", "py_init_vars", - "py_dyn_prefix", "py_migrations", "no_cache", "immutable", "intern", "protect_fields", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOOOOOOOOO", const_cast(kwlist), &class_obj, &py_singleton, + "py_dyn_prefix", "py_migrations", "no_cache", "immutable", "intern", "protect_fields", "access_control", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOOOOOOOOOO", const_cast(kwlist), &class_obj, &py_singleton, &py_no_default_tags, &py_prefix_name, &py_type_id, &py_file_name, &py_init_vars, &py_dyn_prefix, &py_migrations, - &py_no_cache, &py_immutable, &py_intern, &py_protect_fields)) + &py_no_cache, &py_immutable, &py_intern, &py_protect_fields, &py_access_control)) { return NULL; } @@ -1018,6 +1022,7 @@ namespace db0::python if (py_protect_fields) { protect_fields_option = PyObject_IsTrue(py_protect_fields); } + bool access_control = py_access_control && PyObject_IsTrue(py_access_control); const char *prefix_name = (py_prefix_name && py_prefix_name != Py_None) ? PyUnicode_AsUTF8(py_prefix_name) : nullptr; const char *type_id = py_type_id ? PyUnicode_AsUTF8(py_type_id) : nullptr; const char *file_name = (py_file_name && py_file_name != Py_None) ? PyUnicode_AsUTF8(py_file_name) : nullptr; @@ -1052,7 +1057,8 @@ namespace db0::python auto migrations = extractMigrations(py_migrations); return wrapPyType(castToType(class_obj), is_singleton, no_default_tags, prefix_name, type_id, file_name, - std::move(init_vars), py_dyn_prefix, std::move(migrations), no_cache, immutable, intern, protect_fields_option + std::move(init_vars), py_dyn_prefix, std::move(migrations), no_cache, immutable, intern, protect_fields_option, + access_control ); } diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index e7debc60..0118870d 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -825,6 +825,7 @@ namespace db0::python PySafeDict_SetItemString(*stats, "singleton", Py_OWN(PyBool_fromBool(type->isSingleton()))); PySafeDict_SetItemString(*stats, "no_default_tags", Py_OWN(PyBool_fromBool(type->isNoDefaultTags()))); PySafeDict_SetItemString(*stats, "no_cache", Py_OWN(PyBool_fromBool(type->isNoCache()))); + PySafeDict_SetItemString(*stats, "access_control", Py_OWN(PyBool_fromBool(type->isAccessControl()))); auto refCountsDict = Py_OWN(PyDict_New()); if (!refCountsDict) { diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index c1f6aa23..5eebbe37 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -1177,6 +1177,15 @@ namespace db0::python } } + bool PyToolkit::isAccessControl(TypeObjectPtr py_type) + { + if (isAnyMemoType(py_type)) { + return MemoTypeDecoration::get(py_type).getFlags()[MemoOptions::ACCESS_CONTROL]; + } else { + return false; + } + } + FlagSet PyToolkit::getMemoFlags(TypeObjectPtr py_type) { if (isAnyMemoType(py_type)) { diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index 330af5a2..d361a66c 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -225,6 +225,7 @@ namespace db0::python static bool isImmutable(TypeObjectPtr); static bool isIntern(TypeObjectPtr); static bool isProtectFields(TypeObjectPtr); + static bool isAccessControl(TypeObjectPtr); static FlagSet getMemoFlags(TypeObjectPtr); static bool hasMemoInstance(ObjectPtr); static UniqueAddress getMemoUniqueAddress(ObjectPtr); diff --git a/src/dbzero/bindings/python/types/PyClass.cpp b/src/dbzero/bindings/python/types/PyClass.cpp index 98de86c7..c7f5b222 100644 --- a/src/dbzero/bindings/python/types/PyClass.cpp +++ b/src/dbzero/bindings/python/types/PyClass.cpp @@ -107,6 +107,7 @@ namespace db0::python PySafeDict_SetItemString(*py_result, "immutable", Py_OWN(PyBool_fromBool(type.isImmutable()))); PySafeDict_SetItemString(*py_result, "intern", Py_OWN(PyBool_fromBool(type.isIntern()))); PySafeDict_SetItemString(*py_result, "protect_fields", Py_OWN(PyBool_fromBool(type.isProtectFields()))); + PySafeDict_SetItemString(*py_result, "access_control", Py_OWN(PyBool_fromBool(type.isAccessControl()))); return py_result.steal(); } diff --git a/src/dbzero/object_model/class/Class.cpp b/src/dbzero/object_model/class/Class.cpp index cfb0a191..e84d461d 100644 --- a/src/dbzero/object_model/class/Class.cpp +++ b/src/dbzero/object_model/class/Class.cpp @@ -11,7 +11,7 @@ #include #include "Schema.hpp" -DEFINE_ENUM_VALUES(db0::ClassOptions, "SINGLETON", "NO_DEFAULT_TAGS", "IMMUTABLE", "PROTECT_FIELDS", "INTERN") +DEFINE_ENUM_VALUES(db0::ClassOptions, "SINGLETON", "NO_DEFAULT_TAGS", "IMMUTABLE", "PROTECT_FIELDS", "INTERN", "ACCESS_CONTROL") namespace db0::object_model @@ -115,6 +115,9 @@ namespace db0::object_model if (isProtectFields()) { ensureFieldSafe(); } + if (hasOwnAccessControl()) { + setAccessControl(); + } m_schema.postInit(getTotalFunc()); } @@ -134,6 +137,9 @@ namespace db0::object_model auto fixture = this->getFixture(); m_base_class_ptr = getClassFactory(*fixture).getTypeByClassRef((*this)->m_base_class_ref).m_class; } + if (hasOwnAccessControl()) { + setAccessControl(); + } } Class::~Class() @@ -311,6 +317,14 @@ namespace db0::object_model return (*this)->m_flags[ClassOptions::INTERN]; } + bool Class::hasOwnAccessControl() const { + return (*this)->m_flags[ClassOptions::ACCESS_CONTROL]; + } + + bool Class::isAccessControl() const { + return m_access_control; + } + bool Class::hasOwnProtectFields() const { return (*this)->m_flags[ClassOptions::PROTECT_FIELDS]; } @@ -391,6 +405,18 @@ namespace db0::object_model resetProtectFieldsCache(); } + void Class::setAccessControl() { + m_access_control = true; + if (m_base_class_ptr) { + m_base_class_ptr->setAccessControl(); + } + } + + void Class::setOwnAccessControl() { + modify().m_flags.set(ClassOptions::ACCESS_CONTROL, true); + setAccessControl(); + } + void Class::resetProtectFields() { if (m_base_class_ptr && m_base_class_ptr->isProtectFields()) { THROWF(db0::InputException) @@ -1164,6 +1190,9 @@ namespace db0::object_model void Class::setRuntimeFlags(FlagSet memo_options) { m_no_cache = memo_options[MemoOptions::NO_CACHE]; + if (memo_options[MemoOptions::ACCESS_CONTROL]) { + setAccessControl(); + } } bool Class::isBaseClass(const Class &other) const diff --git a/src/dbzero/object_model/class/Class.hpp b/src/dbzero/object_model/class/Class.hpp index 7c871e1a..b5fa2aea 100644 --- a/src/dbzero/object_model/class/Class.hpp +++ b/src/dbzero/object_model/class/Class.hpp @@ -39,14 +39,15 @@ namespace db0 NO_DEFAULT_TAGS = 0x0002, IMMUTABLE = 0x0004, PROTECT_FIELDS = 0x0008, - INTERN = 0x0010 + INTERN = 0x0010, + ACCESS_CONTROL = 0x0020 }; using ClassFlags = db0::FlagSet; } -DECLARE_ENUM_VALUES(db0::ClassOptions, 5) +DECLARE_ENUM_VALUES(db0::ClassOptions, 6) namespace db0::object_model @@ -179,9 +180,13 @@ DB0_PACKED_END bool isNoDefaultTags() const; bool isImmutable() const; bool isIntern() const; + bool isAccessControl() const; + bool hasOwnAccessControl() const; bool assignDefaultTags() const; bool isProtectFields() const; bool hasOwnProtectFields() const; + void setAccessControl(); + void setOwnAccessControl(); void setProtectFields(); void resetProtectFields(); bool hasFieldSafe() const; @@ -357,6 +362,7 @@ DB0_PACKED_END mutable MemberCacheT m_member_cache; // runtime flags bool m_no_cache = false; + bool m_access_control = false; // A function to retrieve the total number of instances of the schema std::function getTotalFunc() const; diff --git a/src/dbzero/object_model/class/ClassFactory.cpp b/src/dbzero/object_model/class/ClassFactory.cpp index f31ea248..fa4701c2 100644 --- a/src/dbzero/object_model/class/ClassFactory.cpp +++ b/src/dbzero/object_model/class/ClassFactory.cpp @@ -87,6 +87,20 @@ namespace db0::object_model } } + void applyAccessControlFlag(Class &type, ClassFactory::TypeObjectPtr lang_type) + { + // A direct @memo(access_control=True) decoration is durable metadata for this + // class, so persist it when the language type is first attached or loaded. + if (lang_type && ClassFactory::LangToolkit::isAccessControl(lang_type) && !type.hasOwnAccessControl()) { + type.setOwnAccessControl(); + // Classes reopened from storage may already own the durable flag. Re-apply the + // runtime state so access control is dynamically propagated to loaded bases + // without writing that inherited state into the base classes themselves. + } else if (type.hasOwnAccessControl()) { + type.setAccessControl(); + } + } + o_class_factory::o_class_factory(Memspace &memspace) : m_class_map_ptrs { VClassMap(memspace), VClassMap(memspace), VClassMap(memspace), VClassMap(memspace) } { @@ -180,6 +194,7 @@ namespace db0::object_model if (LangToolkit::isProtectFields(lang_type) && !type->hasOwnProtectFields()) { type->setProtectFields(); } + applyAccessControlFlag(*type, lang_type); } else { auto fixture = getFixture(); if (!checkAccessType(*fixture, AccessType::READ_WRITE)) { @@ -195,6 +210,7 @@ namespace db0::object_model flags.set(ClassOptions::NO_DEFAULT_TAGS, LangToolkit::isNoDefaultTags(lang_type)); flags.set(ClassOptions::IMMUTABLE, LangToolkit::isImmutable(lang_type)); flags.set(ClassOptions::INTERN, LangToolkit::isIntern(lang_type)); + flags.set(ClassOptions::ACCESS_CONTROL, LangToolkit::isAccessControl(lang_type)); auto memo_base = LangToolkit::getBaseMemoType(lang_type); std::shared_ptr base_class; if (memo_base) { @@ -225,16 +241,6 @@ namespace db0::object_model it_cached = m_type_cache.insert({lang_type, type}).first; m_pending_types.push_back(lang_type); - } else { - auto memo_base = LangToolkit::getBaseMemoType(lang_type); - if (memo_base) { - getOrCreateType(memo_base); - } - validateImmutableFlag(*it_cached->second, lang_type); - validateInternFlag(*it_cached->second, lang_type); - if (LangToolkit::isProtectFields(lang_type) && !it_cached->second->hasOwnProtectFields()) { - it_cached->second->setProtectFields(); - } } return it_cached->second; } @@ -257,6 +263,7 @@ namespace db0::object_model std::shared_ptr ClassFactory::getType(ClassPtr ptr, std::shared_ptr type, TypeObjectPtr lang_type) const { auto it_cached = m_ptr_cache.find(ptr); + bool apply_lang_metadata = false; if (it_cached == m_ptr_cache.end()) { // try looking-up language specific type with the TypeManager if (!lang_type) { @@ -265,6 +272,7 @@ namespace db0::object_model // add to by-pointer cache it_cached = m_ptr_cache.insert({ptr, ClassItem{ type, lang_type }}).first; m_pending_ptrs.push_back(ptr); + apply_lang_metadata = !!lang_type; } if (lang_type && !it_cached->second.m_lang_type) { validateImmutableFlag(*it_cached->second.m_class, lang_type); @@ -272,9 +280,13 @@ namespace db0::object_model it_cached->second.m_lang_type = lang_type; it_cached->second.m_class->setInitVars(LangToolkit::getInitVars(lang_type)); it_cached->second.m_class->setRuntimeFlags(LangToolkit::getMemoFlags(lang_type)); + apply_lang_metadata = true; } - if (lang_type && LangToolkit::isProtectFields(lang_type) && !it_cached->second.m_class->hasOwnProtectFields()) { - it_cached->second.m_class->setProtectFields(); + if (apply_lang_metadata) { + if (LangToolkit::isProtectFields(lang_type) && !it_cached->second.m_class->hasOwnProtectFields()) { + it_cached->second.m_class->setProtectFields(); + } + applyAccessControlFlag(*it_cached->second.m_class, lang_type); } return it_cached->second.m_class; } @@ -324,6 +336,7 @@ namespace db0::object_model ClassFactory::ClassItem ClassFactory::tryGetTypeByPtr(ClassPtr ptr, TypeObjectPtr lang_type) const { auto it_cached = m_ptr_cache.find(ptr); + bool apply_lang_metadata = false; if (it_cached == m_ptr_cache.end()) { // Since ptr points to existing instance, we can simply pull it from backend // note that Class has no associated language specific type object yet @@ -345,6 +358,7 @@ namespace db0::object_model if (LangToolkit::isProtectFields(lang_type) && !type->hasOwnProtectFields()) { type->setProtectFields(); } + applyAccessControlFlag(*type, lang_type); } // register the mapping to language specific type object it_cached = m_ptr_cache.insert({ptr, ClassItem { type, lang_type }}).first; @@ -357,9 +371,13 @@ namespace db0::object_model it_cached->second.m_lang_type = lang_type; it_cached->second.m_class->setInitVars(LangToolkit::getInitVars(lang_type)); it_cached->second.m_class->setRuntimeFlags(LangToolkit::getMemoFlags(lang_type)); + apply_lang_metadata = true; } - if (lang_type && LangToolkit::isProtectFields(lang_type) && !it_cached->second.m_class->hasOwnProtectFields()) { - it_cached->second.m_class->setProtectFields(); + if (apply_lang_metadata) { + if (LangToolkit::isProtectFields(lang_type) && !it_cached->second.m_class->hasOwnProtectFields()) { + it_cached->second.m_class->setProtectFields(); + } + applyAccessControlFlag(*it_cached->second.m_class, lang_type); } return it_cached->second; } diff --git a/src/dbzero/object_model/object/Options.cpp b/src/dbzero/object_model/object/Options.cpp index e627cd5e..895b64bb 100644 --- a/src/dbzero/object_model/object/Options.cpp +++ b/src/dbzero/object_model/object/Options.cpp @@ -3,4 +3,4 @@ #include "Options.hpp" -DEFINE_ENUM_VALUES(db0::object_model::MemoOptions, "NO_DEFAULT_TAGS", "NO_CACHE", "IMMUTABLE", "PROTECT_FIELDS", "INTERN") +DEFINE_ENUM_VALUES(db0::object_model::MemoOptions, "NO_DEFAULT_TAGS", "NO_CACHE", "IMMUTABLE", "PROTECT_FIELDS", "INTERN", "ACCESS_CONTROL") diff --git a/src/dbzero/object_model/object/Options.hpp b/src/dbzero/object_model/object/Options.hpp index 7f43ba8b..c989966b 100644 --- a/src/dbzero/object_model/object/Options.hpp +++ b/src/dbzero/object_model/object/Options.hpp @@ -18,11 +18,12 @@ namespace db0::object_model NO_CACHE = 0x0002, IMMUTABLE = 0x0004, PROTECT_FIELDS = 0x0008, - INTERN = 0x0010 + INTERN = 0x0010, + ACCESS_CONTROL = 0x0020 }; using MemoFlags = db0::FlagSet; } -DECLARE_ENUM_VALUES(db0::object_model::MemoOptions, 5) +DECLARE_ENUM_VALUES(db0::object_model::MemoOptions, 6) From f1ca3cc5dbc39d9369e2dacc4e3db14ac13299a2 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 27 May 2026 21:29:38 +0200 Subject: [PATCH 04/11] data filter integration + PredicateFactory --- dbzero/dbzero/dbzero.pyi | 9 + design/DATA_FILTERS_DESIGN.md | 9 +- python_tests/test_data_filter.py | 189 ++++++++++++++++++ src/dbzero/bindings/python/PyTagsAPI.cpp | 74 ++++++- src/dbzero/bindings/python/PyTagsAPI.hpp | 4 +- src/dbzero/bindings/python/PyToolkit.cpp | 12 ++ src/dbzero/bindings/python/PyToolkit.hpp | 2 + src/dbzero/bindings/python/dbzero.cpp | 1 + .../bindings/python/iter/PyObjectIterable.cpp | 46 ++++- .../bindings/python/iter/PyObjectIterable.hpp | 3 +- src/dbzero/object_model/ObjectModel.cpp | 4 + .../object_model/tags/ObjectIterable.cpp | 29 ++- .../object_model/tags/ObjectIterable.hpp | 11 +- .../object_model/tags/PredicateFactory.cpp | 148 ++++++++++++++ .../object_model/tags/PredicateFactory.hpp | 63 ++++++ src/dbzero/object_model/tags/TagIndex.cpp | 27 ++- src/dbzero/object_model/tags/TagIndex.hpp | 8 +- 17 files changed, 608 insertions(+), 31 deletions(-) create mode 100644 src/dbzero/object_model/tags/PredicateFactory.cpp create mode 100644 src/dbzero/object_model/tags/PredicateFactory.hpp diff --git a/dbzero/dbzero/dbzero.pyi b/dbzero/dbzero/dbzero.pyi index 4f8b28bc..f7f570d6 100644 --- a/dbzero/dbzero/dbzero.pyi +++ b/dbzero/dbzero/dbzero.pyi @@ -1067,6 +1067,15 @@ def find(*query_criteria: Union[Tag, List[Tag], Tuple[Tag], QueryObject, TagSet] """ ... +def predicate(*query_criteria: Union[Tag, List[Tag], Tuple[Tag], QueryObject, TagSet], prefix: Optional[str] = None) -> QueryObject: + """Build a predicate-only query for composing filters. + + Predicate queries use the same criteria grammar as ``find`` and can be used + as criteria in other queries or as data-filter predicates. They do not allow + direct iteration, counting, truth testing, indexing, or slicing. + """ + ... + def no(predicate: Union[str, QueryObject], /) -> TagSet: """Create a negative predicate (NOT condition) for find queries. diff --git a/design/DATA_FILTERS_DESIGN.md b/design/DATA_FILTERS_DESIGN.md index 4c56e875..584be725 100644 --- a/design/DATA_FILTERS_DESIGN.md +++ b/design/DATA_FILTERS_DESIGN.md @@ -41,17 +41,19 @@ class RestrictedData: A predicate is not limited to a plain list of tags. It can be any dbzero `ObjectIterable` query expression, including a complex tag-based statement composed from tags, object references, nested `find` queries, alternatives, negation, and other query operators. +Predicates should be built with `db0.predicate(...)`, which uses the same query grammar as `db0.find(...)` but bypasses data-filter authorization during construction. Predicate queries are useful outside data filtering as composable query constraints, so they work even when data filtering is not enabled for a prefix. To avoid leaking protected data, predicate queries are not directly iterable and do not expose result cardinality: iteration, `len()`, truth testing, indexing, and slicing raise `PermissionError`. They may still be passed into `find`, serialized, and used as the value of a data-filter context variable. + A simple predicate can grant access through an explicit tag relation: ```python -pred = db0.find(db0.as_tag("GRANT-ACCESS", account)) +pred = db0.predicate(db0.as_tag("GRANT-ACCESS", account)) predicate.set(pred) ``` A more selective predicate can combine multiple query clauses: ```python -pred = db0.find( +pred = db0.predicate( [ db0.as_tag("GRANT-ACCESS", account), db0.as_tag("GRANT-ACCESS", "PUBLIC"), @@ -102,6 +104,7 @@ If prefix-level filtering is enabled: - A query without an explicit type raises `PermissionError`. - This applies to direct calls and deserialized queries. +- Use `db0.predicate(...)`, not typeless `db0.find(...)`, to construct reusable predicate query expressions. If a query has an explicit type, dbzero checks whether that type requires access control. This type check happens even when prefix-level data filtering is disabled. If the type is access controlled but data filters are not initialized for the prefix, dbzero raises `PermissionError` explaining that data filtering must be initialized before the query can run. @@ -110,7 +113,7 @@ If the type is access controlled and filtering is initialized: - Resolve the predicate from the configured `ContextVar`. - If the predicate is `None` and mode is not `DEBUG`, raise `PermissionError`. - If the predicate is `None` and mode is `DEBUG`, run the original typed query without adding a filter. -- If the predicate is non-null, attach it to the query before sorting or range/index ordering is applied. +- If the predicate is non-null, require it to be a predicate query created by `db0.predicate(...)`, then attach it to the query before sorting or range/index ordering is applied. Conceptually: diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index 3dcce1f4..f77425b8 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -19,6 +19,37 @@ class InitDataFilterClass: value: str +@db0.memo(access_control=True) +@dataclass +class FilteredFindClass: + value: str + + +@db0.memo(access_control=True) +class DynamicPrefixFilteredFindClass: + def __init__(self, value: str, prefix=None): + db0.set_prefix(self, prefix) + self.value = value + + +@db0.memo +@dataclass +class FilteredFindPublicClass: + value: str + + +@db0.memo +@dataclass +class FilteredFindBaseClass: + value: str + + +@db0.memo(access_control=True) +@dataclass +class FilteredFindDerivedClass(FilteredFindBaseClass): + extra: str + + def test_init_data_filter_prefix_scoped_lifecycle(db0_fixture): current_prefix = db0.get_current_prefix() @@ -156,6 +187,164 @@ def test_init_can_initialize_workspace_data_filter(db0_fixture): assert obj.value == "visible" +def test_predicate_can_be_built_without_data_filter_enabled(db0_fixture): + obj = FilteredFindPublicClass("allowed") + db0.tags(obj).add("grant") + + pred = db0.predicate("grant") + + assert [item.value for item in db0.find(FilteredFindPublicClass, pred)] == ["allowed"] + + +def test_predicate_survives_serialization(db0_fixture): + obj = FilteredFindPublicClass("allowed") + db0.tags(obj).add("grant") + + pred = db0.deserialize(db0.serialize(db0.predicate("grant"))) + + with pytest.raises(PermissionError): + list(pred) + assert [item.value for item in db0.find(FilteredFindPublicClass, pred)] == ["allowed"] + + +def test_predicate_can_be_built_after_data_filter_enabled(db0_fixture): + db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") + + pred = db0.predicate("grant") + + with pytest.raises(PermissionError): + list(pred) + + +def test_predicate_blocks_direct_data_access(db0_fixture): + pred = db0.predicate("grant") + + with pytest.raises(PermissionError): + iter(pred) + with pytest.raises(PermissionError): + len(pred) + with pytest.raises(PermissionError): + bool(pred) + with pytest.raises(PermissionError): + pred[0] + with pytest.raises(PermissionError): + pred[:1] + + +def test_data_filter_rejects_typeless_find(db0_fixture): + db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") + + with pytest.raises(PermissionError): + db0.find("grant") + + +def test_access_controlled_find_requires_data_filter(db0_fixture): + FilteredFindClass("visible") + + with pytest.raises(PermissionError): + db0.find(FilteredFindClass) + + +def test_data_filter_release_mode_requires_predicate(db0_fixture): + FilteredFindClass("visible") + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + with pytest.raises(PermissionError): + db0.find(FilteredFindClass) + + +def test_data_filter_debug_mode_allows_null_predicate(db0_fixture): + obj = FilteredFindClass("visible") + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") + + assert list(db0.find(FilteredFindClass)) == [obj] + + +def test_data_filter_predicate_filters_access_controlled_find(db0_fixture): + allowed = FilteredFindClass("allowed") + denied = FilteredFindClass("denied") + db0.tags(allowed).add(["visible", "grant"]) + db0.tags(denied).add("visible") + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert list(db0.find(FilteredFindClass, "visible")) == [allowed] + + +def test_data_filter_predicate_refreshes_after_matching_objects_are_committed(db0_fixture): + initial = FilteredFindClass("initial") + db0.tags(initial).add(["visible", "grant"]) + db0.commit() + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + denied = FilteredFindClass("denied") + db0.tags(denied).add("visible") + assert list(db0.find(FilteredFindClass, "visible")) == [initial] + + later = FilteredFindClass("later") + db0.tags(later).add(["visible", "grant"]) + db0.commit() + + assert list(db0.find(FilteredFindClass, "visible")) == [later, initial] + + +@pytest.mark.skip(reason="TODO: preserve predicate expressions that initially resolve to no results") +def test_data_filter_predicate_created_before_tag_exists_refreshes_after_commit(db0_fixture): + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + allowed = FilteredFindClass("allowed") + denied = FilteredFindClass("denied") + db0.tags(allowed).add(["visible", "grant"]) + db0.tags(denied).add("visible") + db0.commit() + + assert list(db0.find(FilteredFindClass, "visible")) == [allowed] + + +def test_data_filter_requires_predicate_object(db0_fixture): + FilteredFindClass("visible") + predicate.set(db0.find(FilteredFindPublicClass)) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + with pytest.raises(PermissionError): + db0.find(FilteredFindClass) + + +def test_data_filter_filters_access_controlled_derived_from_base_query(db0_fixture): + allowed = FilteredFindDerivedClass("allowed", "x") + denied = FilteredFindDerivedClass("denied", "y") + db0.tags(allowed).add(["visible", "grant"]) + db0.tags(denied).add("visible") + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert list(db0.find(FilteredFindBaseClass, "visible")) == [allowed] + + +def test_data_filter_prefix_scope_only_filters_configured_prefix(db0_fixture): + px_name = db0.get_current_prefix().name + other_prefix = "unfiltered-data-filter-find-prefix" + + filtered = DynamicPrefixFilteredFindClass("filtered", prefix=px_name) + db0.tags(filtered).add("visible") + db0.open(other_prefix) + unfiltered = DynamicPrefixFilteredFindClass("unfiltered", prefix=other_prefix) + db0.tags(unfiltered).add("visible") + + predicate.set(db0.predicate("grant", prefix=px_name)) + db0._init_data_filter(predicate, prefix=px_name) + + assert list(db0.find(DynamicPrefixFilteredFindClass, "visible", prefix=px_name)) == [] + assert list(db0.find(DynamicPrefixFilteredFindClass, "visible", prefix=other_prefix)) == [unfiltered] + + def test_init_can_initialize_prefix_data_filter_after_opening_prefix(db0_fixture): db0.close() init_predicate = ContextVar("init_prefix_data_filter_predicate") diff --git a/src/dbzero/bindings/python/PyTagsAPI.cpp b/src/dbzero/bindings/python/PyTagsAPI.cpp index 273fc89e..1c9afddd 100644 --- a/src/dbzero/bindings/python/PyTagsAPI.cpp +++ b/src/dbzero/bindings/python/PyTagsAPI.cpp @@ -6,12 +6,16 @@ #include "PySnapshot.hpp" #include #include +#include #include #include #include #include #include +#include #include +#include +#include #include #include @@ -19,8 +23,58 @@ namespace db0::python { + bool setFindPermissionError(const char *message) + { + PyErr_SetString(PyExc_PermissionError, message); + return false; + } + + // Enforces data-filter rules for protected find() calls and, when a filter + // is active, appends its refreshed predicate query to native_predicates so + // TagIndex::find() intersects the user query with the access-control query. + // + // PyContextVar_Get() returns a new reference. find_args only stores borrowed + // raw PyObject* values, so owned_predicates keeps that reference alive until + // TagIndex::find() has consumed find_args. + bool appendDataFilterPredicate(db0::swine_ptr fixture, std::shared_ptr type, + std::vector > &native_predicates, + std::vector > &owned_predicates) + { + auto filter_state = fixture->getFilterState(); + if (filter_state && !type) { + return setFindPermissionError("typeless find is not allowed when data filtering is enabled for the prefix"); + } + if (!type || !type->isAccessControl()) { + return true; + } + if (!filter_state && !db0::Settings::m_data_filter_enabled) { + return setFindPermissionError("data filter must be initialized before querying an access-controlled type"); + } + if (!filter_state) { + return true; + } + + PyObject *py_predicate = nullptr; + if (PyContextVar_Get(filter_state->contextVar, NULL, &py_predicate) < 0) { + return false; + } + owned_predicates.emplace_back(Py_OWN(py_predicate)); + if (!py_predicate || py_predicate == Py_None) { + if (filter_state->mode == db0::DataMaskingMode::DEBUG) { + return true; + } + return setFindPermissionError("data filter predicate is not set"); + } + if (!PyObjectIterable_Check(py_predicate) + || !reinterpret_cast(py_predicate)->ext().isPredicateOnly()) { + return setFindPermissionError("data filter predicate must be created with db0.predicate"); + } + native_predicates.push_back(fixture->get().get(py_predicate)); + return true; + } + PyObject *findIn(db0::Snapshot &snapshot, PyObject* const *args, Py_ssize_t nargs, - PyObject *context, const char *prefix_name) + PyObject *context, const char *prefix_name, bool bypass_data_filters, bool predicate_only) { using ObjectIterable = db0::object_model::ObjectIterable; using TagIndex = db0::object_model::TagIndex; @@ -34,11 +88,23 @@ namespace db0::python snapshot, args, nargs, find_args, type, lang_type, no_result, prefix_name ); fixture->refreshIfUpdated(); + std::vector > owned_predicates; + std::vector > native_predicates; + if (!bypass_data_filters && !appendDataFilterPredicate(fixture, type, native_predicates, owned_predicates)) { + return nullptr; + } auto &tag_index = fixture->get(); std::vector > query_observers; - auto query_iterator = tag_index.find(find_args.data(), find_args.size(), type, query_observers, no_result); + std::vector native_predicate_ptrs; + native_predicate_ptrs.reserve(native_predicates.size()); + for (const auto &native_predicate: native_predicates) { + native_predicate_ptrs.push_back(native_predicate.get()); + } + auto query_iterator = tag_index.find(find_args.data(), find_args.size(), type, query_observers, no_result, + native_predicate_ptrs); auto iter_obj = PyObjectIterableDefault_new(); - iter_obj->makeNew(fixture, std::move(query_iterator), type, lang_type, std::move(query_observers)); + iter_obj->makeNew(fixture, std::move(query_iterator), type, lang_type, std::move(query_observers), + std::vector{}, predicate_only); if (context) { (iter_obj.get())->ext().attachContext(context); } @@ -321,4 +387,4 @@ namespace db0::python return iter_obj.steal(); } -} \ No newline at end of file +} diff --git a/src/dbzero/bindings/python/PyTagsAPI.hpp b/src/dbzero/bindings/python/PyTagsAPI.hpp index 25d3fe04..6a1243b6 100644 --- a/src/dbzero/bindings/python/PyTagsAPI.hpp +++ b/src/dbzero/bindings/python/PyTagsAPI.hpp @@ -43,7 +43,7 @@ namespace db0::python * @return PyObjectIterable */ PyObject *findIn(db0::Snapshot &, PyObject* const *args, Py_ssize_t nargs, PyObject *context = nullptr, - const char *prefix_name = nullptr); + const char *prefix_name = nullptr, bool bypass_data_filters = false, bool predicate_only = false); PyObject *PyAPI_splitBy(PyObject *, PyObject *args, PyObject *kwargs); @@ -60,4 +60,4 @@ namespace db0::python PyObject *joinIn(db0::Snapshot &, PyObject* const *args, Py_ssize_t nargs, PyObject *join_on_arg, PyObject *context = nullptr, const char *prefix_name = nullptr); -} \ No newline at end of file +} diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index 5eebbe37..d2292749 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -949,6 +949,18 @@ namespace db0::python py_iter->makeNew(std::move(*obj_iter)); return shared_py_cast(std::move(py_iter)); } + + const db0::object_model::ObjectIterable &PyToolkit::getPredicateIterable(ObjectPtr py_object) + { + if (!PyObjectIterable_Check(py_object)) { + THROWF(db0::InputException) << "Predicate object must be an ObjectIterable"; + } + auto &predicate = reinterpret_cast(py_object)->ext(); + if (!predicate.isPredicateOnly()) { + THROWF(db0::InputException) << "Predicate object must be created with db0.predicate"; + } + return predicate; + } PyToolkit::ObjectSharedPtr PyToolkit::deserializeEnumValue(db0::swine_ptr fixture, std::vector::const_iterator &iter, diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index d361a66c..a4196cb6 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -35,6 +35,7 @@ namespace db0::object_model class o_embedded_object; class o_py_tuple; class Object; + class ObjectIterable; class Class; class ClassFactory; struct EnumValue; @@ -154,6 +155,7 @@ namespace db0::python // Unload from serialized bytes static ObjectSharedPtr deserializeObjectIterable(db0::swine_ptr, std::vector::const_iterator &iter, std::vector::const_iterator end); + static const db0::object_model::ObjectIterable &getPredicateIterable(ObjectPtr); static ObjectSharedPtr deserializeEnumValue(db0::swine_ptr, std::vector::const_iterator &iter, std::vector::const_iterator end); static ObjectSharedPtr deserializeEnumValueRepr(db0::swine_ptr, std::vector::const_iterator &iter, diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index e3bfef28..f71d3b7b 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -61,6 +61,7 @@ static PyMethodDef dbzero_methods[] = {"bytearray", (PyCFunction)&py::PyAPI_makeByteArray, METH_FASTCALL, "Create a new dbzero bytearray instance"}, {"tags", (PyCFunction)&py::makeObjectTagManager, METH_FASTCALL, ""}, {"find", (PyCFunction)&py::PyAPI_find, METH_VARARGS | METH_KEYWORDS, "Find memo instances by tags with optional filtering"}, + {"predicate", (PyCFunction)&py::PyAPI_predicate, METH_VARARGS | METH_KEYWORDS, "Build a non-iterable predicate query for composing filters"}, {"join", (PyCFunction)&py::PyAPI_join, METH_VARARGS | METH_KEYWORDS, "Join memo collections by common tags with optional filtering"}, {"refresh", (PyCFunction)&py::refresh, METH_VARARGS, ""}, {"get_state_num", (PyCFunction)&py::PyAPI_getStateNum, METH_VARARGS | METH_KEYWORDS, ""}, diff --git a/src/dbzero/bindings/python/iter/PyObjectIterable.cpp b/src/dbzero/bindings/python/iter/PyObjectIterable.cpp index cb8a73fb..69f64aaf 100644 --- a/src/dbzero/bindings/python/iter/PyObjectIterable.cpp +++ b/src/dbzero/bindings/python/iter/PyObjectIterable.cpp @@ -71,6 +71,10 @@ namespace db0::python PyObject *tryPyAPI_PyObjectIterable_iter(PyObjectIterable *py_iterable) { + if (py_iterable->ext().isPredicateOnly()) { + PyErr_SetString(PyExc_PermissionError, "predicate queries cannot be iterated directly"); + return nullptr; + } // getFixture to prevent segfault in case the associated context (e.g. snapshot) has been destroyed auto fixture = py_iterable->ext().getFixture(); auto py_iter = PyObjectIteratorDefault_new(); @@ -86,6 +90,10 @@ namespace db0::python Py_ssize_t tryPyObjectIterable_len(PyObjectIterable *py_iterable) { + if (py_iterable->ext().isPredicateOnly()) { + PyErr_SetString(PyExc_PermissionError, "predicate queries cannot be counted directly"); + return -1; + } // getFixture to prevent segfault in case the associated context (e.g. snapshot) has been destroyed auto fixture = py_iterable->ext().getFixture(); return py_iterable->ext().getSize(); @@ -94,7 +102,7 @@ namespace db0::python Py_ssize_t PyAPI_PyObjectIterable_len(PyObjectIterable *py_iterable) { PY_API_FUNC - return runSafe(tryPyObjectIterable_len, py_iterable); + return runSafe<-1>(tryPyObjectIterable_len, py_iterable); } void PySlice_GetUnboundIndices(PyObject *py_slice, std::function &)> size_func, @@ -156,6 +164,11 @@ namespace db0::python using SliceDef = db0::object_model::SliceDef; using ObjectSharedPtr = PyToolkit::ObjectSharedPtr; + if (py_iterable->ext().isPredicateOnly()) { + PyErr_SetString(PyExc_PermissionError, "predicate queries cannot be indexed or sliced directly"); + return nullptr; + } + if (PyTuple_Check(py_key)) { // itemgetter's key (item indexes) auto indices = unpackTuple(py_key); @@ -204,6 +217,10 @@ namespace db0::python int PyAPI_PyObjectIterable_bool(PyObjectIterable *py_iterable) { PY_API_FUNC + if (py_iterable->ext().isPredicateOnly()) { + PyErr_SetString(PyExc_PermissionError, "predicate queries cannot be tested for truth directly"); + return -1; + } // check if the iterable is empty if (py_iterable->ext().empty()) { return 0; // False @@ -270,7 +287,32 @@ namespace db0::python PY_API_FUNC return runSafe(findIn, PyToolkit::getPyWorkspace().getWorkspace(), (PyObject* const*)args_data.data(), - num_args, nullptr, prefix_name); + num_args, nullptr, prefix_name, false, false); + } + + PyObject *PyAPI_predicate(PyObject *, PyObject *args, PyObject *kwargs) + { + Py_ssize_t num_args = PyTuple_Size(args); + std::vector args_data(num_args); + for (Py_ssize_t i = 0; i < num_args; ++i) { + args_data[i] = PyTuple_GetItem(args, i); + } + + const char prefix_arg[] = "prefix"; + const char *prefix_name = nullptr; + if (kwargs) { + PyObject *py_prefix_name = PyDict_GetItemString(kwargs, prefix_arg); + if (py_prefix_name) { + prefix_name = parseStringLikeArgument(py_prefix_name, "predicate", prefix_arg); + if (!prefix_name) { + return nullptr; + } + } + } + + PY_API_FUNC + return runSafe(findIn, PyToolkit::getPyWorkspace().getWorkspace(), (PyObject* const*)args_data.data(), + num_args, nullptr, prefix_name, true, true); } } diff --git a/src/dbzero/bindings/python/iter/PyObjectIterable.hpp b/src/dbzero/bindings/python/iter/PyObjectIterable.hpp index fba46ad2..7470b7b1 100644 --- a/src/dbzero/bindings/python/iter/PyObjectIterable.hpp +++ b/src/dbzero/bindings/python/iter/PyObjectIterable.hpp @@ -29,5 +29,6 @@ namespace db0::python * @return PyObjectIterable */ PyObject *PyAPI_find(PyObject *, PyObject *args, PyObject *kwargs); + PyObject *PyAPI_predicate(PyObject *, PyObject *args, PyObject *kwargs); -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/ObjectModel.cpp b/src/dbzero/object_model/ObjectModel.cpp index 2a8532e6..d44377c2 100644 --- a/src/dbzero/object_model/ObjectModel.cpp +++ b/src/dbzero/object_model/ObjectModel.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ namespace db0::object_model std::function &, bool is_new, bool read_only, bool is_snapshot)> initializer() { using TagIndex = db0::object_model::TagIndex; + using PredicateFactory = db0::object_model::PredicateFactory; using ClassFactory = db0::object_model::ClassFactory; using EnumFactory = db0::object_model::EnumFactory; using Index = db0::object_model::Index; @@ -59,6 +61,7 @@ namespace db0::object_model fixture->getVObjectCache(), fixture->addMutationHandler() ); + fixture->addResource(); // flush from tag index on fixture commit (or close on close) fixture->addCloseHandler([&](bool commit) { @@ -111,6 +114,7 @@ namespace db0::object_model fixture->getVObjectCache(), fixture->addMutationHandler() ); + fixture->addResource(); // flush from tag index on fixture commit (or close on close) fixture->addCloseHandler([&](bool commit) { diff --git a/src/dbzero/object_model/tags/ObjectIterable.cpp b/src/dbzero/object_model/tags/ObjectIterable.cpp index 81c5bce0..6b4aba99 100644 --- a/src/dbzero/object_model/tags/ObjectIterable.cpp +++ b/src/dbzero/object_model/tags/ObjectIterable.cpp @@ -36,7 +36,7 @@ namespace db0::object_model ObjectIterable::ObjectIterable(db0::swine_ptr fixture, std::unique_ptr &&ft_query_iterator, std::shared_ptr type, TypeObjectPtr lang_type, std::vector > &&query_observers, - const std::vector &filters) + const std::vector &filters, bool predicate_only) : m_fixture(fixture) , m_class_factory(getClassFactory(*fixture)) , m_query_iterator(validated(std::move(ft_query_iterator))) @@ -45,12 +45,13 @@ namespace db0::object_model , m_type(type) , m_lang_type(lang_type) , m_access_mode(getAccessMode(type)) + , m_predicate_only(predicate_only) { } ObjectIterable::ObjectIterable(db0::swine_ptr fixture, std::unique_ptr &&sorted_iterator, std::shared_ptr type, TypeObjectPtr lang_type, std::vector > &&query_observers, - const std::vector &filters) + const std::vector &filters, bool predicate_only) : m_fixture(fixture) , m_class_factory(getClassFactory(*fixture)) , m_sorted_iterator(validated(std::move(sorted_iterator))) @@ -59,12 +60,13 @@ namespace db0::object_model , m_type(type) , m_lang_type(lang_type) , m_access_mode(getAccessMode(type)) + , m_predicate_only(predicate_only) { } ObjectIterable::ObjectIterable(db0::swine_ptr fixture, std::shared_ptr factory, std::shared_ptr type, TypeObjectPtr lang_type, std::vector > &&query_observers, - const std::vector &filters) + const std::vector &filters, bool predicate_only) : m_fixture(fixture) , m_class_factory(getClassFactory(*fixture)) , m_factory(factory) @@ -73,6 +75,7 @@ namespace db0::object_model , m_type(type) , m_lang_type(lang_type) , m_access_mode(getAccessMode(type)) + , m_predicate_only(predicate_only) { } @@ -80,7 +83,7 @@ namespace db0::object_model std::unique_ptr &&ft_query_iterator, std::unique_ptr &&sorted_iterator, std::shared_ptr factory, std::vector > &&query_observers, std::vector &&filters, std::shared_ptr type, TypeObjectPtr lang_type, - const SliceDef &slice_def, AccessFlags access_mode) + const SliceDef &slice_def, AccessFlags access_mode, bool predicate_only) : m_fixture(fixture) , m_class_factory(class_factory) , m_query_iterator(std::move(ft_query_iterator)) @@ -92,6 +95,7 @@ namespace db0::object_model , m_lang_type(lang_type) , m_slice_def(slice_def) , m_access_mode(access_mode) + , m_predicate_only(predicate_only) { } @@ -104,6 +108,7 @@ namespace db0::object_model , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def) , m_access_mode(other.m_access_mode) + , m_predicate_only(other.m_predicate_only) { m_filters.insert(m_filters.end(), filters.begin(), filters.end()); @@ -126,6 +131,7 @@ namespace db0::object_model , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def.combineWith(slice_def)) , m_access_mode(other.m_access_mode) + , m_predicate_only(other.m_predicate_only) { std::unique_ptr query_iterator; std::unique_ptr sorted_iterator; @@ -150,6 +156,7 @@ namespace db0::object_model , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def) , m_access_mode(other.m_access_mode) + , m_predicate_only(other.m_predicate_only) { m_filters.insert(m_filters.end(), filters.begin(), filters.end()); } @@ -166,6 +173,7 @@ namespace db0::object_model , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def) , m_access_mode(other.m_access_mode) + , m_predicate_only(other.m_predicate_only) { m_filters.insert(m_filters.end(), filters.begin(), filters.end()); } @@ -235,6 +243,7 @@ namespace db0::object_model // FIXTURE uuid db0::serial::write(buf, fixture->getUUID()); db0::serial::write(buf, this->isNull()); + db0::serial::write(buf, m_predicate_only); if (this->isNull()) { return; } @@ -273,9 +282,11 @@ namespace db0::object_model fixture_ = fixture->getWorkspace().getFixture(fixture_uuid); } bool is_null = db0::serial::read(iter, end); + bool predicate_only = db0::serial::read(iter, end); if (is_null) { // deserialize as null - return std::make_unique(fixture_, std::unique_ptr()); + return std::make_unique(fixture_, std::unique_ptr(), nullptr, nullptr, + std::vector >{}, std::vector{}, predicate_only); } std::unique_ptr query_iterator; @@ -305,7 +316,8 @@ namespace db0::object_model auto &class_factory = fixture_->get(); return std::unique_ptr(new ObjectIterable(fixture_, class_factory, std::move(query_iterator), - std::move(sorted_iterator), factory, {}, {}, nullptr, nullptr, is_sliced ? SliceDef{start, stop, step} : SliceDef{}, {})); + std::move(sorted_iterator), factory, {}, {}, nullptr, nullptr, is_sliced ? SliceDef{start, stop, step} : SliceDef{}, {}, + predicate_only)); } double ObjectIterable::compareTo(const ObjectIterable &other) const @@ -445,6 +457,11 @@ namespace db0::object_model return true; } + bool ObjectIterable::isPredicateOnly() const + { + return m_predicate_only; + } + AccessFlags ObjectIterable::getAccessMode(std::shared_ptr type) const { if (type) { diff --git a/src/dbzero/object_model/tags/ObjectIterable.hpp b/src/dbzero/object_model/tags/ObjectIterable.hpp index bfa4c171..edb7b509 100644 --- a/src/dbzero/object_model/tags/ObjectIterable.hpp +++ b/src/dbzero/object_model/tags/ObjectIterable.hpp @@ -53,17 +53,17 @@ namespace db0::object_model // Construct from a full-text query iterator ObjectIterable(db0::swine_ptr, std::unique_ptr &&, std::shared_ptr = nullptr, TypeObjectPtr lang_type = nullptr, std::vector > && = {}, - const std::vector & = {}); + const std::vector & = {}, bool predicate_only = false); // Construct from a sorted iterator ObjectIterable(db0::swine_ptr, std::unique_ptr &&, std::shared_ptr = nullptr, TypeObjectPtr lang_type = nullptr, std::vector > && = {}, - const std::vector & = {}); + const std::vector & = {}, bool predicate_only = false); // Construct from IteratorFactory (specialized on first use) ObjectIterable(db0::swine_ptr, std::shared_ptr factory, std::shared_ptr = nullptr, TypeObjectPtr lang_type = nullptr, std::vector > && = {}, - const std::vector & = {}); + const std::vector & = {}, bool predicate_only = false); // Construct with additional filters ObjectIterable(const ObjectIterable &, const std::vector &); @@ -133,6 +133,8 @@ namespace db0::object_model void attachContext(ObjectPtr) const; bool empty() const; + + bool isPredicateOnly() const; protected: mutable db0::weak_swine_ptr m_fixture; @@ -148,12 +150,13 @@ namespace db0::object_model mutable ObjectSharedPtr m_lang_context; // object access mode (e.g. no_cache) const AccessFlags m_access_mode; + const bool m_predicate_only = false; // iter constructor ObjectIterable(db0::swine_ptr, const ClassFactory &, std::unique_ptr &&, std::unique_ptr &&, std::shared_ptr, std::vector > &&, std::vector &&filters, std::shared_ptr, TypeObjectPtr lang_type, const SliceDef & = {}, - AccessFlags access_mode = {}); + AccessFlags access_mode = {}, bool predicate_only = false); // get the base iterator, possibly initialized from the factory const BaseIterator &getBaseIterator(std::unique_ptr &) const; diff --git a/src/dbzero/object_model/tags/PredicateFactory.cpp b/src/dbzero/object_model/tags/PredicateFactory.cpp new file mode 100644 index 00000000..38fe0028 --- /dev/null +++ b/src/dbzero/object_model/tags/PredicateFactory.cpp @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2026 DBZero Software sp. z o.o. + +#include "PredicateFactory.hpp" +#include +#include + +namespace db0::object_model + +{ + + PredicateFactory::PredicateFactory(std::optional capacity) + : m_capacity(capacity.value_or(DEFAULT_CAPACITY)) + , m_cache(m_capacity) + , m_evict_hand(m_cache.begin()) + , m_insert_hand(m_cache.begin()) + , m_visited(m_capacity) + { + assert(m_capacity > 0); + } + + bool PredicateFactory::isFull() const + { + return m_size == m_capacity; + } + + std::shared_ptr PredicateFactory::deserialize( + db0::swine_ptr fixture, const std::vector &bytes) const + { + auto iter = bytes.cbegin(); + return ObjectIterable::deserialize(fixture, iter, bytes.cend()); + } + + std::shared_ptr PredicateFactory::get(ObjectPtr key) + { + if (!key) { + THROWF(db0::InputException) << "Invalid predicate key"; + } + + const auto &predicate = ObjectIterable::LangToolkit::getPredicateIterable(key); + auto fixture = predicate.getFixture(); + auto stateNum = fixture->getPrefix().getStateNum(); + std::unique_lock lock(m_mutex); + + auto it = m_key_to_index.find(key); + if (it != m_key_to_index.end()) { + auto slotId = it->second; + auto &item = m_cache[slotId]; + assert(item.m_key_ref.get() == key); + m_visited[slotId] = true; + if (item.m_state_num != stateNum) { + item.m_iterable = deserialize(fixture, item.m_bytes); + item.m_state_num = stateNum; + } + return item.m_iterable; + } + + std::optional slot; + if (isFull()) { + slot = evictOne(); + if (!slot) { + // Sieve gives every visited item a second chance. If all entries + // were marked visited, a second pass must find an evictable slot. + slot = evictOne(); + } + } else { + slot = findEmptySlot(); + } + assert(slot); + + std::vector bytes; + predicate.serialize(bytes); + + auto slotId = *slot; + auto &item = m_cache[slotId]; + assert(!item.m_key_ref.get()); + item.m_key_ref = ObjectSharedPtr(key); + item.m_bytes = std::move(bytes); + item.m_iterable = deserialize(fixture, item.m_bytes); + item.m_state_num = stateNum; + m_visited[slotId] = true; + m_key_to_index[key] = slotId; + ++m_size; + return item.m_iterable; + } + + std::optional PredicateFactory::evictOne() + { + if (m_size == 0) { + return std::nullopt; + } + + assert(m_evict_hand != m_cache.end()); + auto end = m_evict_hand; + ++m_evict_hand; + for (; m_evict_hand != end; ++m_evict_hand) { + if (m_evict_hand == m_cache.end()) { + m_evict_hand = m_cache.begin(); + if (m_evict_hand == end) { + return std::nullopt; + } + } + if (!m_evict_hand->m_key_ref.get()) { + continue; + } + auto slotId = m_evict_hand - m_cache.begin(); + if (m_visited[slotId]) { + m_visited[slotId] = false; + continue; + } + + m_key_to_index.erase(m_evict_hand->m_key_ref.get()); + *m_evict_hand = CacheItem{}; + --m_size; + return slotId; + } + return std::nullopt; + } + + std::optional PredicateFactory::findEmptySlot() + { + auto end = m_insert_hand; + for (;;) { + if (m_insert_hand == m_cache.end()) { + m_insert_hand = m_cache.begin(); + } + if (!m_insert_hand->m_key_ref.get()) { + return m_insert_hand - m_cache.begin(); + } + ++m_insert_hand; + if (m_insert_hand == end) { + return std::nullopt; + } + } + } + + std::size_t PredicateFactory::size() const + { + std::shared_lock lock(m_mutex); + return m_size; + } + + std::size_t PredicateFactory::getCapacity() const + { + return m_capacity; + } + +} diff --git a/src/dbzero/object_model/tags/PredicateFactory.hpp b/src/dbzero/object_model/tags/PredicateFactory.hpp new file mode 100644 index 00000000..e0a4a383 --- /dev/null +++ b/src/dbzero/object_model/tags/PredicateFactory.hpp @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2026 DBZero Software sp. z o.o. + +#pragma once + +#include "ObjectIterable.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace db0::object_model + +{ + + class PredicateFactory + { + public: + using ObjectPtr = ObjectIterable::ObjectPtr; + using ObjectSharedPtr = ObjectIterable::ObjectSharedPtr; + + static constexpr std::size_t DEFAULT_CAPACITY = 1024; + + explicit PredicateFactory(std::optional capacity = {}); + + // Return the cached predicate iterable identified by its language object + // pointer. The key must unwrap to a predicate-only ObjectIterable. The + // factory stores the serialized predicate bytes once, then recreates the + // cached native iterable whenever the prefix state number moves. + std::shared_ptr get(ObjectPtr key); + + std::size_t size() const; + std::size_t getCapacity() const; + + private: + struct CacheItem + { + ObjectSharedPtr m_key_ref; + std::shared_ptr m_iterable; + std::vector m_bytes; + StateNumType m_state_num = 0; + }; + + const std::size_t m_capacity; + mutable std::shared_mutex m_mutex; + std::size_t m_size = 0; + std::vector m_cache; + mutable std::vector::iterator m_evict_hand; + mutable std::vector::iterator m_insert_hand; + mutable std::vector m_visited; + std::unordered_map m_key_to_index; + + bool isFull() const; + std::shared_ptr deserialize(db0::swine_ptr fixture, + const std::vector &bytes) const; + std::optional evictOne(); + std::optional findEmptySlot(); + }; + +} diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index 0cd8eb66..eccabd7c 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -605,12 +605,13 @@ namespace db0::object_model } std::unique_ptr TagIndex::find(ObjectPtr const *args, std::size_t nargs, - std::shared_ptr type, std::vector > &observers, bool no_result) const + std::shared_ptr type, std::vector > &observers, bool no_result, + const std::vector &native_args) const { db0::FT_ANDIteratorFactory factory; // the negated root-level query components std::vector > neg_iterators; - if (nargs > 0 || type) { + if (nargs > 0 || type || !native_args.empty()) { // flush pending updates before querying flush(); // if the 1st argument is a type then resolve as a typed ObjectIterable @@ -625,6 +626,10 @@ namespace db0::object_model result &= addIterator(args[offset], factory, neg_iterators, observers); ++offset; } + for (auto *native_arg: native_args) { + assert(native_arg); + result &= addIterator(*native_arg, factory, neg_iterators, observers); + } if (!result) { // invalidate factory since no matching results exist factory.clear(); @@ -645,6 +650,17 @@ namespace db0::object_model return std::make_unique >(std::move(neg_iterators), -1); } } + + bool TagIndex::addIterator(const ObjectIterable &obj_iter, db0::FT_IteratorFactory &factory, + std::vector > &neg_iterators, std::vector > &query_observers) const + { + auto ft_query = obj_iter.beginFTQuery(query_observers, -1); + if (!ft_query || ft_query->isEnd()) { + return false; + } + factory.add(std::move(ft_query)); + return true; + } bool TagIndex::addIterator(ObjectPtr arg, db0::FT_IteratorFactory &factory, std::vector > &neg_iterators, std::vector > &query_observers) const @@ -731,12 +747,7 @@ namespace db0::object_model if (type_id == TypeId::OBJECT_ITERABLE) { auto &obj_iter = LangToolkit::getTypeManager().extractObjectIterable(arg); // try interpreting the iterator as FT-query - auto ft_query = obj_iter.beginFTQuery(query_observers, -1); - if (!ft_query || ft_query->isEnd()) { - return false; - } - factory.add(std::move(ft_query)); - return true; + return addIterator(obj_iter, factory, neg_iterators, query_observers); } if (type_id == TypeId::DB0_TAG_SET) { diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index 031eaf47..005eb8c3 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -26,6 +26,7 @@ namespace db0::object_model using LongTagT = db0::LongTagT; class EnumFactory; class CompositeTagDef; + class ObjectIterable; DB0_PACKED_BEGIN struct DB0_PACKED_ATTR o_tag_index: public o_fixed_versioned @@ -88,10 +89,12 @@ DB0_PACKED_END * @param type optional type to match by * @param observer buffer to receive query observers (possibly inherited from inner queries) * @param no_result flag indicating if an empty query iterator should be returned + * @param native_args already-resolved native ObjectIterable queries to AND-combine with args + * (used when the caller cannot or should not pass the original language object to query planning) */ std::unique_ptr find(ObjectPtr const *args, std::size_t nargs, std::shared_ptr type, std::vector > &observers, - bool no_result = false) const; + bool no_result = false, const std::vector &native_args = {}) const; /** * Split query by all values from a specific tags_list (can be either short or long tag definitions) @@ -227,6 +230,9 @@ DB0_PACKED_END bool addIterator(ObjectPtr, db0::FT_IteratorFactory &factory, std::vector > &neg_iterators, std::vector > &query_observers) const; + bool addIterator(const ObjectIterable &, db0::FT_IteratorFactory &factory, + std::vector > &neg_iterators, + std::vector > &query_observers) const; bool addCompositeIterator(const CompositeTagDef &, db0::FT_IteratorFactory &factory, std::vector > &query_observers) const; std::optional tryGetCompositeKey(ObjectPtr) const; From bd5f215be15aa811694d3a69c274f2f10c75b0b4 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 27 May 2026 22:03:40 +0200 Subject: [PATCH 05/11] data filters / fetch integration --- python_tests/test_data_filter.py | 78 +++++++++++++++++++- python_tests/test_memo_protect_fields.py | 9 +++ src/dbzero/bindings/python/PyInternalAPI.cpp | 56 +++++++++++++- src/dbzero/bindings/python/PyInternalAPI.hpp | 14 ++++ src/dbzero/bindings/python/PyTagsAPI.cpp | 4 - src/dbzero/bindings/python/PyToolkit.cpp | 31 ++++++-- src/dbzero/bindings/python/PyToolkit.hpp | 9 ++- src/dbzero/object_model/tags/TagIndex.cpp | 15 ++++ src/dbzero/object_model/tags/TagIndex.hpp | 2 + 9 files changed, 200 insertions(+), 18 deletions(-) diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index f77425b8..f185418c 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -312,7 +312,7 @@ def test_data_filter_requires_predicate_object(db0_fixture): predicate.set(db0.find(FilteredFindPublicClass)) db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) - with pytest.raises(PermissionError): + with pytest.raises(RuntimeError, match="db0.predicate"): db0.find(FilteredFindClass) @@ -345,6 +345,82 @@ def test_data_filter_prefix_scope_only_filters_configured_prefix(db0_fixture): assert list(db0.find(DynamicPrefixFilteredFindClass, "visible", prefix=other_prefix)) == [unfiltered] +def test_data_filter_fetch_requires_data_filter_for_access_controlled_type(db0_fixture): + obj = FilteredFindClass("visible") + + with pytest.raises(PermissionError): + db0.fetch(db0.uuid(obj)) + + +def test_data_filter_fetch_release_mode_requires_predicate(db0_fixture): + obj = FilteredFindClass("visible") + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + with pytest.raises(PermissionError): + db0.fetch(db0.uuid(obj)) + + +def test_data_filter_fetch_debug_mode_allows_null_predicate(db0_fixture): + obj = FilteredFindClass("visible") + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") + + assert db0.fetch(db0.uuid(obj)) is obj + + +def test_data_filter_predicate_filters_access_controlled_fetch(db0_fixture): + allowed = FilteredFindClass("allowed") + denied = FilteredFindClass("denied") + db0.tags(allowed).add("grant") + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert db0.fetch(db0.uuid(allowed)) is allowed + with pytest.raises(RuntimeError, match="Invalid UUID or object has been deleted"): + db0.fetch(db0.uuid(denied)) + + +def test_data_filter_predicate_filters_access_controlled_snapshot_fetch(db0_fixture): + allowed = FilteredFindClass("snapshot-allowed") + denied = FilteredFindClass("snapshot-denied") + db0.tags(allowed).add("grant") + db0.commit() + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + snap = db0.snapshot() + + assert snap.fetch(db0.uuid(allowed)).value == "snapshot-allowed" + with pytest.raises(RuntimeError, match="Invalid UUID or object has been deleted"): + snap.fetch(db0.uuid(denied)) + + +def test_data_filter_fetch_does_not_filter_public_type(db0_fixture): + obj = FilteredFindPublicClass("public") + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert db0.fetch(db0.uuid(obj)) is obj + + +def test_data_filter_prefix_scope_only_filters_configured_prefix_fetch(db0_fixture): + px_name = db0.get_current_prefix().name + other_prefix = "unfiltered-data-filter-fetch-prefix" + + filtered = DynamicPrefixFilteredFindClass("filtered", prefix=px_name) + db0.open(other_prefix) + unfiltered = DynamicPrefixFilteredFindClass("unfiltered", prefix=other_prefix) + + predicate.set(db0.predicate("grant", prefix=px_name)) + db0._init_data_filter(predicate, prefix=px_name) + + with pytest.raises(RuntimeError, match="Invalid UUID or object has been deleted"): + db0.fetch(db0.uuid(filtered)) + assert db0.fetch(db0.uuid(unfiltered)) is unfiltered + + def test_init_can_initialize_prefix_data_filter_after_opening_prefix(db0_fixture): db0.close() init_predicate = ContextVar("init_prefix_data_filter_predicate") diff --git a/python_tests/test_memo_protect_fields.py b/python_tests/test_memo_protect_fields.py index 5ba244fd..5d1baaa0 100644 --- a/python_tests/test_memo_protect_fields.py +++ b/python_tests/test_memo_protect_fields.py @@ -16,6 +16,12 @@ from .conftest import DB0_DIR +def allow_access_controlled_fetches(): + predicate = ContextVar("allow_access_controlled_fetches") + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") + + @db0.enum(values=["CREATE", "READ", "UPDATE", "DELETE"]) class FieldAccess: pass @@ -293,6 +299,7 @@ class AccessControlledBefore: class AccessControlledAfter: name: str + allow_access_controlled_fetches() obj = db0.fetch(AccessControlledAfter, obj_id) assert get_memo_class_object(obj).get_type_flags()["access_control"] is True @@ -317,6 +324,7 @@ class AccessControlledBefore: class AccessControlledAfter: name: str + allow_access_controlled_fetches() obj = db0.fetch(AccessControlledAfter, obj_id) assert get_memo_class_object(obj).get_type_flags()["access_control"] is True @@ -341,6 +349,7 @@ class AccessControlledBefore: class AccessControlledAfter: name: str + allow_access_controlled_fetches() obj = db0.fetch(AccessControlledAfter, obj_id) assert get_memo_class_object(obj).get_type_flags()["access_control"] is True diff --git a/src/dbzero/bindings/python/PyInternalAPI.cpp b/src/dbzero/bindings/python/PyInternalAPI.cpp index 7f38061d..9306cdfd 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.cpp +++ b/src/dbzero/bindings/python/PyInternalAPI.cpp @@ -25,8 +25,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -36,6 +38,7 @@ #include #include #include +#include namespace db0::python @@ -128,6 +131,51 @@ namespace db0::python } return result.steal(); } + + bool setFetchPermissionError(const char *message) + { + throw PermissionException(message); + } + + void throwMissingObject() + { + THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; + } + + } + + bool authorizeDataFilterFetch(db0::swine_ptr &fixture, const db0::object_model::Class &type, + UniqueAddress address) + { + if (!type.isAccessControl()) { + return true; + } + + auto filter_state = fixture->getFilterState(); + if (!filter_state && !db0::Settings::m_data_filter_enabled) { + return setFetchPermissionError("data filter must be initialized before fetching an access-controlled type"); + } + if (!filter_state) { + return true; + } + + PyObject *py_predicate = nullptr; + if (PyContextVar_Get(filter_state->contextVar, NULL, &py_predicate) < 0) { + THROWF(db0::InputException) << "Unable to get data filter predicate: " << PyToolkit::getLastError(); + } + auto owned_predicate = Py_OWN(py_predicate); + if (!py_predicate || py_predicate == Py_None) { + if (filter_state->mode == db0::DataMaskingMode::DEBUG) { + return true; + } + return setFetchPermissionError("data filter predicate is not set"); + } + auto native_predicate = fixture->get().get(py_predicate); + if (fixture->get().contains(address, *native_predicate)) { + return true; + } + throwMissingObject(); + return false; } LoadGuard::LoadGuard(std::unordered_set *load_stack_ptr, const void *arg_ptr) @@ -284,7 +332,7 @@ namespace db0::python auto expected_class = class_factory.getOrCreateType(py_expected_type); // honor class-specific access flags (e.g. type-level no_cache) auto result = PyToolkit::unloadAnyObject(fixture, addr.getAddress(), class_factory, nullptr, addr.getInstanceId(), - expected_class->getInstanceFlags() + expected_class->getInstanceFlags(), true ); // NOTE: base types should be accepted if (!PyToolkit::getMemoType(result.get()).isBaseClass(*expected_class)) { @@ -293,7 +341,8 @@ namespace db0::python return result; } else { // unload without type validation - return PyToolkit::unloadAnyObject(fixture, addr.getAddress(), class_factory, py_expected_type, addr.getInstanceId()); + return PyToolkit::unloadAnyObject(fixture, addr.getAddress(), class_factory, py_expected_type, + addr.getInstanceId(), {}, true); } } else if (storage_class == db0::object_model::StorageClass::DB0_CLASS) { auto &class_factory = db0::object_model::getClassFactory(*fixture); @@ -337,6 +386,9 @@ namespace db0::python if (!type->isExistingSingleton()) { THROWF(db0::InputException) << "Singleton instance does not exist"; } + if (!authorizeDataFilterFetch(fixture, *type, type->getSingletonObjectId().m_address)) { + return nullptr; + } MemoObject *memo_obj = reinterpret_cast(py_type->tp_alloc(py_type, 0)); type->unloadSingleton(&memo_obj->modifyExt()); diff --git a/src/dbzero/bindings/python/PyInternalAPI.hpp b/src/dbzero/bindings/python/PyInternalAPI.hpp index ab45d2d6..0f2537ca 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.hpp +++ b/src/dbzero/bindings/python/PyInternalAPI.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ namespace db0::object_model { class ObjectIterable; + class Class; } @@ -49,6 +51,12 @@ namespace db0::python using ObjectId = db0::object_model::ObjectId; using ObjectIterable = db0::object_model::ObjectIterable; + + class PermissionException : public std::runtime_error + { + public: + using std::runtime_error::runtime_error; + }; class LoadGuard { @@ -75,6 +83,9 @@ namespace db0::python PyObject *fetchMemoObject(db0::swine_ptr &, ObjectId); PyObject *fetchListObject(db0::swine_ptr &, ObjectId); + + bool authorizeDataFilterFetch(db0::swine_ptr &fixture, const db0::object_model::Class &type, + UniqueAddress address); /** * Open dbzero object from a specific fixture @@ -115,6 +126,9 @@ namespace db0::python return returnError(); } return result; + } catch (const PermissionException &e) { + PyErr_SetString(PyExc_PermissionError, e.what()); + return returnError(); } catch (const db0::BadAddressException &e) { PyErr_SetString(PyToolkit::getTypeManager().getReferenceError(), e.what()); return returnError(); diff --git a/src/dbzero/bindings/python/PyTagsAPI.cpp b/src/dbzero/bindings/python/PyTagsAPI.cpp index 1c9afddd..8d14961c 100644 --- a/src/dbzero/bindings/python/PyTagsAPI.cpp +++ b/src/dbzero/bindings/python/PyTagsAPI.cpp @@ -65,10 +65,6 @@ namespace db0::python } return setFindPermissionError("data filter predicate is not set"); } - if (!PyObjectIterable_Check(py_predicate) - || !reinterpret_cast(py_predicate)->ext().isPredicateOnly()) { - return setFindPermissionError("data filter predicate must be created with db0.predicate"); - } native_predicates.push_back(fixture->get().get(py_predicate)); return true; } diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index d2292749..c7d3dd13 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -521,7 +521,7 @@ namespace db0::python static PyToolkit::ObjectSharedPtr tryUnloadObjectResolved( db0::swine_ptr &fixture, Address address, const PyToolkit::ClassFactory &class_factory, PyToolkit::TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode, - const Allocator::AllocationInfo *allocationInfo) + const Allocator::AllocationInfo *allocationInfo, bool authorize_data_filter) { // try unloading from cache first auto &lang_cache = fixture->getLangCache(); @@ -538,6 +538,10 @@ namespace db0::python return {}; } } + if (authorize_data_filter) { + authorizeDataFilterFetch( + fixture, PyToolkit::getMemoType(obj_ptr.get()), PyToolkit::getMemoUniqueAddress(obj_ptr.get())); + } return obj_ptr; } @@ -565,6 +569,9 @@ namespace db0::python >(std::move(commonStem)); auto typeInfo = class_factory.getTypeByClassRef(stem->getClassRef()); auto type = typeInfo.m_class; + if (authorize_data_filter) { + authorizeDataFilterFetch(fixture, *type, UniqueAddress(address, stem->m_header.getInstanceId())); + } lang_type_ptr = resolveUnloadLangType(class_factory, type, typeInfo.m_lang_type, lang_type_ptr); auto *memo_ptr = reinterpret_cast(lang_type_ptr->tp_alloc(lang_type_ptr, 0)); @@ -584,6 +591,9 @@ namespace db0::python >(std::move(commonStem)); auto typeInfo = class_factory.getTypeByClassRef(stem->getClassRef()); auto type = typeInfo.m_class; + if (authorize_data_filter) { + authorizeDataFilterFetch(fixture, *type, UniqueAddress(address, stem->m_header.getInstanceId())); + } lang_type_ptr = resolveUnloadLangType(class_factory, type, typeInfo.m_lang_type, lang_type_ptr); // construct Python's memo object (placeholder for actual dbzero instance) @@ -601,10 +611,12 @@ namespace db0::python PyToolkit::ObjectSharedPtr PyToolkit::tryUnloadObject( db0::swine_ptr &fixture, Address address, const ClassFactory &class_factory, - TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode) + TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode, + bool authorize_data_filter) { return tryUnloadObjectResolved( - fixture, address, class_factory, lang_type_ptr, instance_id, access_mode, nullptr + fixture, address, class_factory, lang_type_ptr, instance_id, access_mode, nullptr, + authorize_data_filter ); } @@ -637,7 +649,7 @@ namespace db0::python if (!rootObject) { rootObject = tryUnloadObjectResolved( fixture, allocationInfo->address, class_factory, lang_type_ptr, instance_id, access_mode, - allocationInfo + allocationInfo, false ); if (!rootObject) { THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; @@ -654,11 +666,13 @@ namespace db0::python PyToolkit::ObjectSharedPtr PyToolkit::unloadAnyObject( db0::swine_ptr &fixture, Address address, const ClassFactory &class_factory, - TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode) + TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode, + bool authorize_data_filter) { auto allocation = fixture->findAllocation(address, db0::object_model::ObjectImmutableImpl::REALM_ID); auto rootObject = tryUnloadObjectResolved( - fixture, allocation.address, class_factory, lang_type_ptr, instance_id, access_mode, &allocation + fixture, allocation.address, class_factory, lang_type_ptr, instance_id, access_mode, &allocation, + authorize_data_filter ); if (!rootObject) { THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; @@ -701,10 +715,11 @@ namespace db0::python } PyToolkit::ObjectSharedPtr PyToolkit::unloadObject(db0::swine_ptr &fixture, Address address, - const ClassFactory &class_factory, TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode) + const ClassFactory &class_factory, TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode, + bool authorize_data_filter) { auto result = tryUnloadObject( - fixture, address, class_factory, lang_type_ptr, instance_id, access_mode + fixture, address, class_factory, lang_type_ptr, instance_id, access_mode, authorize_data_filter ); if (!result) { THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index a4196cb6..5f3a2a7a 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -107,11 +107,14 @@ namespace db0::python // Unload with type resolution // optionally may use specific lang class (e.g. MemoBase) static ObjectSharedPtr unloadObject(db0::swine_ptr &, Address, const ClassFactory &, - TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}); + TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}, + bool authorize_data_filter = false); static ObjectSharedPtr tryUnloadObject(db0::swine_ptr &, Address, const ClassFactory &, - TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}); + TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}, + bool authorize_data_filter = false); static ObjectSharedPtr unloadAnyObject(db0::swine_ptr &, Address, const ClassFactory &, - TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}); + TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}, + bool authorize_data_filter = false); static ObjectSharedPtr unloadAnyObject(db0::swine_ptr &, Address, std::shared_ptr type_hint, TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}); diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index eccabd7c..3f5967c2 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -661,6 +661,21 @@ namespace db0::object_model factory.add(std::move(ft_query)); return true; } + + bool TagIndex::contains(UniqueAddress address, const ObjectIterable &native_arg) const + { + std::vector > query_observers; + auto native_query = native_arg.beginFTQuery(query_observers, -1); + if (!native_query || native_query->isEnd()) { + return false; + } + + db0::FT_ANDIteratorFactory factory; + factory.add(std::make_unique >(&address, &address + 1)); + factory.add(std::move(native_query)); + auto query = factory.release(-1); + return query && !query->isEnd(); + } bool TagIndex::addIterator(ObjectPtr arg, db0::FT_IteratorFactory &factory, std::vector > &neg_iterators, std::vector > &query_observers) const diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index 005eb8c3..d122dd91 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -95,6 +95,8 @@ DB0_PACKED_END std::unique_ptr find(ObjectPtr const *args, std::size_t nargs, std::shared_ptr type, std::vector > &observers, bool no_result = false, const std::vector &native_args = {}) const; + + bool contains(UniqueAddress address, const ObjectIterable &native_arg) const; /** * Split query by all values from a specific tags_list (can be either short or long tag definitions) From ed0a0953d6d95368729775130a632be995808f32 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 28 May 2026 09:58:52 +0200 Subject: [PATCH 06/11] protecting inner references traversal --- AGENTS.md | 2 +- python_tests/test_data_filter.py | 68 +++++++++++++++++++- src/dbzero/bindings/python/PyInternalAPI.cpp | 45 ++++++++----- src/dbzero/bindings/python/PyToolkit.cpp | 61 +++++++++++------- src/dbzero/bindings/python/PyToolkit.hpp | 3 +- src/dbzero/object_model/value/Member.cpp | 27 ++++---- 6 files changed, 149 insertions(+), 57 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 176e7afb..df8ca9d4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -53,7 +53,7 @@ Variable-size overlaid types that derive from another overlaid type must use `db ### C++ style -- Use camelCase for local helper variables, lambdas, and method names in C++ code. +- Use snake_case for parameter names and local variable names in C++ code. Parameter names should be concise yet informative. Keep method names consistent with the surrounding code. - Project types often avoid implicit bool conversion because it can hide subtle ownership, state, and null-check bugs. Use explicit double-negation checks such as `if (!!obj)` or `while (!!item)` when a type supports `operator!()`. ### Python binding wrapper access diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index f185418c..24238e8d 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -50,6 +50,18 @@ class FilteredFindDerivedClass(FilteredFindBaseClass): extra: str +@db0.memo +class FilteredReferenceHolder: + def __init__(self, payload): + self.payload = payload + + +@db0.memo(immutable=True) +class FilteredImmutableReferenceHolder: + def __init__(self, payload): + self.payload = payload + + def test_init_data_filter_prefix_scoped_lifecycle(db0_fixture): current_prefix = db0.get_current_prefix() @@ -348,7 +360,7 @@ def test_data_filter_prefix_scope_only_filters_configured_prefix(db0_fixture): def test_data_filter_fetch_requires_data_filter_for_access_controlled_type(db0_fixture): obj = FilteredFindClass("visible") - with pytest.raises(PermissionError): + with pytest.raises(RuntimeError, match="Invalid UUID or object has been deleted"): db0.fetch(db0.uuid(obj)) @@ -357,7 +369,7 @@ def test_data_filter_fetch_release_mode_requires_predicate(db0_fixture): predicate.set(None) db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) - with pytest.raises(PermissionError): + with pytest.raises(RuntimeError, match="Invalid UUID or object has been deleted"): db0.fetch(db0.uuid(obj)) @@ -421,6 +433,58 @@ def test_data_filter_prefix_scope_only_filters_configured_prefix_fetch(db0_fixtu assert db0.fetch(db0.uuid(unfiltered)) is unfiltered +def test_data_filter_predicate_filters_mutable_memo_reference(db0_fixture): + allowed = FilteredFindClass("allowed") + denied = FilteredFindClass("denied") + db0.tags(allowed).add("grant") + allowed_holder = FilteredReferenceHolder(allowed) + denied_holder = FilteredReferenceHolder(denied) + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert allowed_holder.payload is allowed + with pytest.raises(PermissionError): + denied_holder.payload + + +def test_data_filter_predicate_filters_reference_from_immutable_memo(db0_fixture): + allowed = FilteredFindClass("immutable-source-allowed") + denied = FilteredFindClass("immutable-source-denied") + db0.tags(allowed).add("grant") + allowed_holder = FilteredImmutableReferenceHolder(allowed) + denied_holder = FilteredImmutableReferenceHolder(denied) + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert allowed_holder.payload is allowed + with pytest.raises(PermissionError): + denied_holder.payload + db0.commit() + + +def test_data_filter_predicate_filters_references_from_collections(db0_fixture): + allowed = FilteredFindClass("collection-allowed") + denied = FilteredFindClass("collection-denied") + db0.tags(allowed).add("grant") + list_holder = FilteredReferenceHolder(db0.list([allowed, denied])) + set_holder = FilteredReferenceHolder(db0.set([denied])) + dict_holder = FilteredReferenceHolder(db0.dict({"allowed": allowed, "denied": denied})) + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert list_holder.payload[0] is allowed + with pytest.raises(PermissionError): + list_holder.payload[1] + with pytest.raises(PermissionError): + next(iter(set_holder.payload)) + assert dict_holder.payload["allowed"] is allowed + with pytest.raises(PermissionError): + dict_holder.payload["denied"] + + def test_init_can_initialize_prefix_data_filter_after_opening_prefix(db0_fixture): db0.close() init_predicate = ContextVar("init_prefix_data_filter_predicate") diff --git a/src/dbzero/bindings/python/PyInternalAPI.cpp b/src/dbzero/bindings/python/PyInternalAPI.cpp index 9306cdfd..b09dbb7f 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.cpp +++ b/src/dbzero/bindings/python/PyInternalAPI.cpp @@ -174,8 +174,7 @@ namespace db0::python if (fixture->get().contains(address, *native_predicate)) { return true; } - throwMissingObject(); - return false; + return setFetchPermissionError("data filter predicate does not include referenced object"); } LoadGuard::LoadGuard(std::unordered_set *load_stack_ptr, const void *arg_ptr) @@ -326,23 +325,29 @@ namespace db0::python auto addr = object_id.m_address; if (storage_class == db0::object_model::StorageClass::OBJECT_REF) { auto &class_factory = db0::object_model::getClassFactory(*fixture); - // validate type if requested (no validation for MemoBase) - if (py_expected_type && !PyToolkit::getTypeManager().isMemoBase(py_expected_type)) { - // in other cases the type must match the actual object type - auto expected_class = class_factory.getOrCreateType(py_expected_type); - // honor class-specific access flags (e.g. type-level no_cache) - auto result = PyToolkit::unloadAnyObject(fixture, addr.getAddress(), class_factory, nullptr, addr.getInstanceId(), - expected_class->getInstanceFlags(), true - ); - // NOTE: base types should be accepted - if (!PyToolkit::getMemoType(result.get()).isBaseClass(*expected_class)) { - THROWF(db0::InputException) << "Object type mismatch"; + try { + // validate type if requested (no validation for MemoBase) + if (py_expected_type && !PyToolkit::getTypeManager().isMemoBase(py_expected_type)) { + // in other cases the type must match the actual object type + auto expected_class = class_factory.getOrCreateType(py_expected_type); + // honor class-specific access flags (e.g. type-level no_cache) + auto result = PyToolkit::unloadAnyObject( + fixture, addr.getAddress(), class_factory, nullptr, addr.getInstanceId(), + expected_class->getInstanceFlags(), true + ); + // NOTE: base types should be accepted + if (!PyToolkit::getMemoType(result.get()).isBaseClass(*expected_class)) { + THROWF(db0::InputException) << "Object type mismatch"; + } + return result; } - return result; - } else { // unload without type validation return PyToolkit::unloadAnyObject(fixture, addr.getAddress(), class_factory, py_expected_type, addr.getInstanceId(), {}, true); + } catch (const PermissionException &) { + // Explicit fetch must not reveal whether a denied object exists. + // Preserve the same public error used for missing or deleted UUIDs. + throwMissingObject(); } } else if (storage_class == db0::object_model::StorageClass::DB0_CLASS) { auto &class_factory = db0::object_model::getClassFactory(*fixture); @@ -386,8 +391,14 @@ namespace db0::python if (!type->isExistingSingleton()) { THROWF(db0::InputException) << "Singleton instance does not exist"; } - if (!authorizeDataFilterFetch(fixture, *type, type->getSingletonObjectId().m_address)) { - return nullptr; + try { + if (!authorizeDataFilterFetch(fixture, *type, type->getSingletonObjectId().m_address)) { + return nullptr; + } + } catch (const PermissionException &) { + // Explicit fetch must not reveal whether a denied object exists. + // Preserve the same public error used for missing or deleted UUIDs. + throwMissingObject(); } MemoObject *memo_obj = reinterpret_cast(py_type->tp_alloc(py_type, 0)); diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index c7d3dd13..7b0ada78 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -623,45 +623,55 @@ namespace db0::python PyToolkit::ObjectSharedPtr PyToolkit::unloadEmbeddedObject( db0::swine_ptr &fixture, Address address, const PyToolkit::ClassFactory &class_factory, PyToolkit::TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode, - ObjectSharedPtr rootObject, const Allocator::AllocationInfo *allocationInfo) + ObjectSharedPtr root_object, const Allocator::AllocationInfo *alloc_info, bool authorize_data_filter) { - auto &langCache = fixture->getLangCache(); - auto cachedObject = langCache.get(address); - if (!!cachedObject) { + auto &lang_cache = fixture->getLangCache(); + auto cached_object = lang_cache.get(address); + if (!!cached_object) { if (instance_id) { - auto cachedAddress = tryGetEmbeddedUniqueAddress(cachedObject.get()); - if (!cachedAddress || cachedAddress->getInstanceId() != instance_id) { + auto cached_address = tryGetEmbeddedUniqueAddress(cached_object.get()); + if (!cached_address || cached_address->getInstanceId() != instance_id) { THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; } } - return cachedObject; + if (authorize_data_filter) { + authorizeDataFilterFetch( + fixture, PyToolkit::getMemoType(cached_object.get()), + PyToolkit::getMemoUniqueAddress(cached_object.get())); + } + return cached_object; } - Allocator::AllocationInfo alloc_info; - if (!allocationInfo) { - alloc_info = fixture->findAllocation(address, db0::object_model::ObjectImmutableImpl::REALM_ID); - allocationInfo = &alloc_info; + Allocator::AllocationInfo local_alloc_info; + if (!alloc_info) { + local_alloc_info = fixture->findAllocation(address, db0::object_model::ObjectImmutableImpl::REALM_ID); + alloc_info = &local_alloc_info; } - assert(allocationInfo); - auto embeddedOffset = address.getOffset() - allocationInfo->address.getOffset(); + assert(alloc_info); + auto embedded_offset = address.getOffset() - alloc_info->address.getOffset(); // Resolve the root object if not provided - if (!rootObject) { - rootObject = tryUnloadObjectResolved( - fixture, allocationInfo->address, class_factory, lang_type_ptr, instance_id, access_mode, - allocationInfo, false + if (!root_object) { + root_object = tryUnloadObjectResolved( + fixture, alloc_info->address, class_factory, lang_type_ptr, instance_id, access_mode, + alloc_info, false ); - if (!rootObject) { + if (!root_object) { THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; } } - assert(!!rootObject); - auto *rootMemo = reinterpret_cast(rootObject.get()); - auto embeddedObject = rootMemo->ext().getEmbeddedInstanceAtOffset(embeddedOffset); - if (shouldCacheEmbeddedObject(embeddedObject.get())) { - langCache.add(address, embeddedObject.get()); + assert(!!root_object); + auto *root_memo = reinterpret_cast(root_object.get()); + auto embedded_object = root_memo->ext().getEmbeddedInstanceAtOffset(embedded_offset); + if (authorize_data_filter) { + authorizeDataFilterFetch( + fixture, PyToolkit::getMemoType(embedded_object.get()), + PyToolkit::getMemoUniqueAddress(embedded_object.get())); + } + if (shouldCacheEmbeddedObject(embedded_object.get())) { + lang_cache.add(address, embedded_object.get()); } - return embeddedObject; + return embedded_object; } PyToolkit::ObjectSharedPtr PyToolkit::unloadAnyObject( @@ -682,7 +692,8 @@ namespace db0::python } return unloadEmbeddedObject( - fixture, address, class_factory, lang_type_ptr, instance_id, access_mode, rootObject, &allocation + fixture, address, class_factory, lang_type_ptr, instance_id, access_mode, rootObject, &allocation, + authorize_data_filter ); } diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index 5f3a2a7a..8e4d753f 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -120,7 +120,8 @@ namespace db0::python std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadEmbeddedObject(db0::swine_ptr &, Address, const ClassFactory &, TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}, - ObjectSharedPtr root_object = {}, const Allocator::AllocationInfo *allocation_info = nullptr); + ObjectSharedPtr root_object = {}, const Allocator::AllocationInfo *allocation_info = nullptr, + bool authorize_data_filter = false); static ObjectSharedPtr unloadObject(db0::swine_ptr &, Address, TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}); diff --git a/src/dbzero/object_model/value/Member.cpp b/src/dbzero/object_model/value/Member.cpp index e98d7689..2e5f26d5 100644 --- a/src/dbzero/object_model/value/Member.cpp +++ b/src/dbzero/object_model/value/Member.cpp @@ -24,7 +24,6 @@ namespace db0::object_model { - // DB0_WEAK_PROXY specialization (defined early so resolveForFixture can call it) template <> Value createMember(db0::swine_ptr &fixture, PyObjectPtr obj_ptr, StorageClass storage_class, AccessFlags) @@ -450,17 +449,19 @@ namespace db0::object_model db0::swine_ptr &fixture, Value value, unsigned int, AccessFlags) { auto &class_factory = fixture->template get(); - return PyToolkit::unloadObject(fixture, value.asAddress(), class_factory); + auto address = value.asAddress(); + return PyToolkit::unloadObject(fixture, address, class_factory, nullptr, 0, {}, true); } // EMBEDDED_OBJECT_REF specialization template <> typename PyToolkit::ObjectSharedPtr unloadMember( db0::swine_ptr &fixture, Value value, unsigned int, AccessFlags access_mode) { - auto embeddedAddress = value.asUniqueAddress(); - auto &classFactory = fixture->template get(); + auto embedded_address = value.asUniqueAddress(); + auto &class_factory = fixture->template get(); return PyToolkit::unloadEmbeddedObject( - fixture, embeddedAddress.getAddress(), classFactory, nullptr, embeddedAddress.getInstanceId(), access_mode + fixture, embedded_address.getAddress(), class_factory, nullptr, embedded_address.getInstanceId(), access_mode, + {}, nullptr, true ); } @@ -639,9 +640,11 @@ namespace db0::object_model db0::swine_ptr &fixture, Value value, unsigned int, AccessFlags) { auto address = value.asUniqueAddress(); - // NOTE: instance_id not validated since it's a trusted reference - if (PyToolkit::isExistingObject(fixture, address.getAddress())) { - return PyToolkit::unloadObject(fixture, address); + auto &class_factory = fixture->template get(); + auto result = PyToolkit::tryUnloadObject( + fixture, address.getAddress(), class_factory, nullptr, address.getInstanceId(), {}, true); + if (!!result) { + return result; } else { // NOTE: expired objects are unloaded as MemoExpiredRef (placeholders) return PyToolkit::unloadExpiredRef(fixture, address.getAddress(), fixture->getUUID(), address); @@ -655,9 +658,11 @@ namespace db0::object_model LongWeakRef weak_ref(fixture, value.asAddress()); auto other_fixture = fixture->getWorkspace().getFixture(weak_ref->m_fixture_uuid); auto address = weak_ref->m_address; - if (PyToolkit::isExistingObject(other_fixture, address.getAddress())) { - // unload object from a foreign prefix - return PyToolkit::unloadObject(other_fixture, address); + auto &class_factory = other_fixture->template get(); + auto result = PyToolkit::tryUnloadObject( + other_fixture, address.getAddress(), class_factory, nullptr, address.getInstanceId(), {}, true); + if (!!result) { + return result; } else { // NOTE: expired objects are unloaded as MemoExpiredRef (placeholders) return PyToolkit::unloadExpiredRef(fixture, weak_ref); From 9216b1a54b0e87b361a040756fe587ec3e1529e9 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 28 May 2026 10:43:01 +0200 Subject: [PATCH 07/11] ObjectIterable / refactor to handle predicates in a dedicated manner --- design/DATA_FILTERS_DESIGN.md | 24 ++-- python_tests/test_data_filter.py | 127 +++++++++++++++++- src/dbzero/bindings/python/PyInternalAPI.cpp | 31 +++++ src/dbzero/bindings/python/PyInternalAPI.hpp | 7 + src/dbzero/bindings/python/PyTagsAPI.cpp | 57 +------- .../bindings/python/collections/PyIndex.cpp | 13 +- .../bindings/python/iter/PyObjectIterable.cpp | 29 ++++ .../object_model/tags/ObjectIterable.cpp | 67 +++++++-- .../object_model/tags/ObjectIterable.hpp | 5 + 9 files changed, 282 insertions(+), 78 deletions(-) diff --git a/design/DATA_FILTERS_DESIGN.md b/design/DATA_FILTERS_DESIGN.md index 584be725..3e75dad8 100644 --- a/design/DATA_FILTERS_DESIGN.md +++ b/design/DATA_FILTERS_DESIGN.md @@ -85,7 +85,8 @@ The descendant-to-base rule is necessary for typed base queries. If any `MemoBas The implementation must preserve these invariants: -- Typeless `find` is not allowed for filtered prefixes. +- Typeless queries are allowed for filtered prefixes, but are filtered by the current predicate. +- Predicate filtering is bypassed only when an explicit result type is provided and that type is known not to require access control. - An access-controlled type cannot be queried unless data filtering has been initialized for the relevant prefix. - An access-controlled object cannot be fetched unless the current predicate includes it. - An access-controlled object cannot be exposed through an application-visible durable reference unless the current predicate includes it. @@ -102,18 +103,18 @@ When `db0.find(...)` is called, dbzero first determines the requested prefix and If prefix-level filtering is enabled: -- A query without an explicit type raises `PermissionError`. -- This applies to direct calls and deserialized queries. +- A query without an explicit type is allowed, but dbzero resolves the current predicate and intersects it with the query. +- This applies to direct calls, deserialized queries, and index/range query objects. - Use `db0.predicate(...)`, not typeless `db0.find(...)`, to construct reusable predicate query expressions. -If a query has an explicit type, dbzero checks whether that type requires access control. This type check happens even when prefix-level data filtering is disabled. If the type is access controlled but data filters are not initialized for the prefix, dbzero raises `PermissionError` explaining that data filtering must be initialized before the query can run. +If a query has an explicit type, dbzero checks whether that type requires access control. This type check happens even when prefix-level data filtering is disabled. If the type is known not to require access control, dbzero runs the query unchanged. If the type is access controlled but data filters are not initialized for the prefix, dbzero raises `PermissionError` explaining that data filtering must be initialized before the query can run. -If the type is access controlled and filtering is initialized: +If filtering must be applied because the query is typeless or the type is access controlled: - Resolve the predicate from the configured `ContextVar`. - If the predicate is `None` and mode is not `DEBUG`, raise `PermissionError`. - If the predicate is `None` and mode is `DEBUG`, run the original typed query without adding a filter. -- If the predicate is non-null, require it to be a predicate query created by `db0.predicate(...)`, then attach it to the query before sorting or range/index ordering is applied. +- If the predicate is non-null, require it to be a predicate query created by `db0.predicate(...)`, then attach it as a discoverable data-filter branch before sorting or range/index ordering is applied. Conceptually: @@ -215,6 +216,8 @@ This gives predicates snapshot-consistent behavior: a filtered snapshot query us Data predicates are authorization filters and must be composed as intersections. A predicate may already be a complex query expression; dbzero must treat that expression as a single authorization constraint and intersect it with the requested access query. It should be attached before sorting so ordering cannot influence authorization. +Data-filter predicate branches may be physically present in executable query trees for performance. They must remain discoverable as authorization branches, and `db0.serialize(...)` must omit them from serialized query bytes. Serialized bytes represent the caller's base query, not the authorization state active when the query was serialized. `db0.predicate(...)` must never attach data filters to the predicate it constructs. + This matters for: - Tag queries. @@ -223,7 +226,7 @@ This matters for: - `index.sort(...)`. - `index.range(...)` and other index operations that can unload object groups without directly relying on tags. -Initial implementation can use the existing query-composition path. Later speedups may push predicate filtering into specific index implementations, but those optimizations must preserve the same visible behavior and error policy. +Initial implementation can use the existing query-composition path, provided data-filter branches remain identifiable and are skipped during serialization. Later speedups may push predicate filtering into specific index implementations, but those optimizations must preserve the same visible behavior and error policy. ## Type Metadata @@ -241,7 +244,7 @@ The base-type propagation rule should be computed through the type hierarchy and ## Deserialized Queries -Serialized query payloads must not bypass type and predicate checks. Query deserialization should preserve explicit type information and reject or mark typeless queries so the normal `find` authorization path can raise `PermissionError` under filtered prefixes. +Serialized query payloads must not bypass type and predicate checks, but they must not include data-filter predicate branches. Query deserialization should preserve explicit type information and reject or mark typeless queries so normal evaluation-time authorization can raise `PermissionError` under filtered prefixes. Do not rely on the Python caller to re-wrap a deserialized query with a type or predicate. @@ -284,7 +287,8 @@ Tests should cover: - `_init_data_filter` accepts one prefix, multiple prefixes, and `None`. - `_init_data_filter` defaults to `RELEASE` when `mode` is omitted or `None`. - `_init_data_filter` enables null predicates only when `mode="DEBUG"` is explicitly specified. -- Typeless `find` raises `PermissionError` under a filtered prefix. +- Typeless `find` under a filtered prefix returns only predicate-matching objects. +- Typeless index/range queries under a filtered prefix return only predicate-matching objects. - Typeless `find` works normally for unfiltered prefixes. - Typed `find` on an unrestricted type is unchanged. - Typed `find` on an access-controlled type raises when filters are not initialized. @@ -299,6 +303,6 @@ Tests should cover: - `fetch(Type, uuid)` follows the same authorization outcome as `fetch(uuid)`. - Application-visible member dereference raises when the referenced object is not in the predicate. - Internal maintenance paths such as reference counting, tag maintenance, index maintenance, and flush/reopen are not blocked by data filters. -- Deserialized typeless queries cannot bypass the typeless-query rule. +- Deserialized typeless queries receive predicate filtering when filtering is active. - Deserialized typed queries receive the same predicate filtering as direct typed queries. - Filtering uses the target object's prefix rather than only the current default prefix. diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index 24238e8d..b3a37481 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -243,11 +243,36 @@ def test_predicate_blocks_direct_data_access(db0_fixture): pred[:1] -def test_data_filter_rejects_typeless_find(db0_fixture): - db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") +def test_data_filter_predicate_filters_typeless_find(db0_fixture): + allowed = FilteredFindClass("allowed") + denied = FilteredFindClass("denied") + public = FilteredFindPublicClass("public") + db0.tags(allowed).add(["visible", "grant"]) + db0.tags(denied).add("visible") + db0.tags(public).add("visible") + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert list(db0.find("visible")) == [allowed] + + +def test_data_filter_release_mode_requires_predicate_for_typeless_find(db0_fixture): + FilteredFindClass("visible") + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) with pytest.raises(PermissionError): - db0.find("grant") + db0.find("visible") + + +def test_data_filter_debug_mode_allows_null_predicate_for_typeless_find(db0_fixture): + obj = FilteredFindClass("visible") + db0.tags(obj).add("visible") + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") + + assert list(db0.find("visible")) == [obj] def test_access_controlled_find_requires_data_filter(db0_fixture): @@ -286,6 +311,16 @@ def test_data_filter_predicate_filters_access_controlled_find(db0_fixture): assert list(db0.find(FilteredFindClass, "visible")) == [allowed] +def test_data_filter_predicate_does_not_filter_typed_public_find(db0_fixture): + public = FilteredFindPublicClass("public") + db0.tags(public).add("visible") + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert list(db0.find(FilteredFindPublicClass, "visible")) == [public] + + def test_data_filter_predicate_refreshes_after_matching_objects_are_committed(db0_fixture): initial = FilteredFindClass("initial") db0.tags(initial).add(["visible", "grant"]) @@ -433,6 +468,92 @@ def test_data_filter_prefix_scope_only_filters_configured_prefix_fetch(db0_fixtu assert db0.fetch(db0.uuid(unfiltered)) is unfiltered +def test_data_filter_predicate_filters_typeless_index_range(db0_fixture): + index = db0.index() + allowed = FilteredFindClass("range-allowed") + denied = FilteredFindClass("range-denied") + db0.tags(allowed).add("grant") + index.add(1, allowed) + index.add(2, denied) + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert list(index.select()) == [allowed] + + +def test_data_filter_release_mode_requires_predicate_for_typeless_index_range(db0_fixture): + index = db0.index() + index.add(1, FilteredFindClass("range-denied")) + predicate.set(None) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + with pytest.raises(PermissionError): + list(index.select()) + + +def test_data_filter_predicate_filters_deserialized_typeless_query(db0_fixture): + allowed = FilteredFindClass("serialized-allowed") + denied = FilteredFindClass("serialized-denied") + db0.tags(allowed).add(["visible", "grant"]) + db0.tags(denied).add("visible") + + query_bytes = db0.serialize(db0.find("visible")) + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert list(db0.deserialize(query_bytes)) == [allowed] + + +def test_data_filter_predicate_is_not_serialized_with_typeless_query(db0_fixture): + first = FilteredFindClass("serialized-first") + second = FilteredFindClass("serialized-second") + db0.tags(first).add(["visible", "first-grant"]) + db0.tags(second).add(["visible", "second-grant"]) + + predicate.set(db0.predicate("first-grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + query_bytes = db0.serialize(db0.find("visible")) + + predicate.set(db0.predicate("second-grant")) + + assert list(db0.deserialize(query_bytes)) == [second] + + +def test_data_filter_predicate_is_not_serialized_with_index_range(db0_fixture): + index = db0.index() + first = FilteredFindClass("range-serialized-first") + second = FilteredFindClass("range-serialized-second") + db0.tags(first).add("first-grant") + db0.tags(second).add("second-grant") + index.add(1, first) + index.add(2, second) + + predicate.set(db0.predicate("first-grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + query_bytes = db0.serialize(index.select()) + + predicate.set(db0.predicate("second-grant")) + + assert list(db0.deserialize(query_bytes)) == [second] + + +def test_data_filter_predicate_is_not_attached_to_predicate_query(db0_fixture): + first = FilteredFindPublicClass("predicate-first") + second = FilteredFindPublicClass("predicate-second") + db0.tags(first).add(["query-grant", "first-grant"]) + db0.tags(second).add("query-grant") + + predicate.set(db0.predicate("first-grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix(), mode="DEBUG") + query_predicate = db0.deserialize(db0.serialize(db0.predicate("query-grant"))) + + assert sorted(item.value for item in db0.find(FilteredFindPublicClass, query_predicate)) == [ + "predicate-first", + "predicate-second", + ] + + def test_data_filter_predicate_filters_mutable_memo_reference(db0_fixture): allowed = FilteredFindClass("allowed") denied = FilteredFindClass("denied") diff --git a/src/dbzero/bindings/python/PyInternalAPI.cpp b/src/dbzero/bindings/python/PyInternalAPI.cpp index b09dbb7f..5fd44791 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.cpp +++ b/src/dbzero/bindings/python/PyInternalAPI.cpp @@ -176,6 +176,37 @@ namespace db0::python } return setFetchPermissionError("data filter predicate does not include referenced object"); } + + bool appendDataFilterPredicate(db0::swine_ptr fixture, + std::shared_ptr type, + std::vector > &native_predicates, + std::vector > &owned_predicates) + { + auto filter_state = fixture->getFilterState(); + if (type && !type->isAccessControl()) { + return true; + } + if (type && !filter_state && !db0::Settings::m_data_filter_enabled) { + throw PermissionException("data filter must be initialized before querying an access-controlled type"); + } + if (!filter_state) { + return true; + } + + PyObject *py_predicate = nullptr; + if (PyContextVar_Get(filter_state->contextVar, NULL, &py_predicate) < 0) { + return false; + } + owned_predicates.emplace_back(Py_OWN(py_predicate)); + if (!py_predicate || py_predicate == Py_None) { + if (filter_state->mode == db0::DataMaskingMode::DEBUG) { + return true; + } + throw PermissionException("data filter predicate is not set"); + } + native_predicates.push_back(fixture->get().get(py_predicate)); + return true; + } LoadGuard::LoadGuard(std::unordered_set *load_stack_ptr, const void *arg_ptr) : m_load_stack_ptr(load_stack_ptr) diff --git a/src/dbzero/bindings/python/PyInternalAPI.hpp b/src/dbzero/bindings/python/PyInternalAPI.hpp index 0f2537ca..a13f4089 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.hpp +++ b/src/dbzero/bindings/python/PyInternalAPI.hpp @@ -5,8 +5,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -86,6 +88,11 @@ namespace db0::python bool authorizeDataFilterFetch(db0::swine_ptr &fixture, const db0::object_model::Class &type, UniqueAddress address); + + bool appendDataFilterPredicate(db0::swine_ptr fixture, + std::shared_ptr type, + std::vector > &native_predicates, + std::vector > &owned_predicates); /** * Open dbzero object from a specific fixture diff --git a/src/dbzero/bindings/python/PyTagsAPI.cpp b/src/dbzero/bindings/python/PyTagsAPI.cpp index 8d14961c..17327628 100644 --- a/src/dbzero/bindings/python/PyTagsAPI.cpp +++ b/src/dbzero/bindings/python/PyTagsAPI.cpp @@ -23,52 +23,6 @@ namespace db0::python { - bool setFindPermissionError(const char *message) - { - PyErr_SetString(PyExc_PermissionError, message); - return false; - } - - // Enforces data-filter rules for protected find() calls and, when a filter - // is active, appends its refreshed predicate query to native_predicates so - // TagIndex::find() intersects the user query with the access-control query. - // - // PyContextVar_Get() returns a new reference. find_args only stores borrowed - // raw PyObject* values, so owned_predicates keeps that reference alive until - // TagIndex::find() has consumed find_args. - bool appendDataFilterPredicate(db0::swine_ptr fixture, std::shared_ptr type, - std::vector > &native_predicates, - std::vector > &owned_predicates) - { - auto filter_state = fixture->getFilterState(); - if (filter_state && !type) { - return setFindPermissionError("typeless find is not allowed when data filtering is enabled for the prefix"); - } - if (!type || !type->isAccessControl()) { - return true; - } - if (!filter_state && !db0::Settings::m_data_filter_enabled) { - return setFindPermissionError("data filter must be initialized before querying an access-controlled type"); - } - if (!filter_state) { - return true; - } - - PyObject *py_predicate = nullptr; - if (PyContextVar_Get(filter_state->contextVar, NULL, &py_predicate) < 0) { - return false; - } - owned_predicates.emplace_back(Py_OWN(py_predicate)); - if (!py_predicate || py_predicate == Py_None) { - if (filter_state->mode == db0::DataMaskingMode::DEBUG) { - return true; - } - return setFindPermissionError("data filter predicate is not set"); - } - native_predicates.push_back(fixture->get().get(py_predicate)); - return true; - } - PyObject *findIn(db0::Snapshot &snapshot, PyObject* const *args, Py_ssize_t nargs, PyObject *context, const char *prefix_name, bool bypass_data_filters, bool predicate_only) { @@ -91,16 +45,13 @@ namespace db0::python } auto &tag_index = fixture->get(); std::vector > query_observers; - std::vector native_predicate_ptrs; - native_predicate_ptrs.reserve(native_predicates.size()); - for (const auto &native_predicate: native_predicates) { - native_predicate_ptrs.push_back(native_predicate.get()); - } - auto query_iterator = tag_index.find(find_args.data(), find_args.size(), type, query_observers, no_result, - native_predicate_ptrs); + auto query_iterator = tag_index.find(find_args.data(), find_args.size(), type, query_observers, no_result); auto iter_obj = PyObjectIterableDefault_new(); iter_obj->makeNew(fixture, std::move(query_iterator), type, lang_type, std::move(query_observers), std::vector{}, predicate_only); + if (!native_predicates.empty()) { + iter_obj->modifyExt().addDataFilterPredicates(std::move(native_predicates)); + } if (context) { (iter_obj.get())->ext().attachContext(context); } diff --git a/src/dbzero/bindings/python/collections/PyIndex.cpp b/src/dbzero/bindings/python/collections/PyIndex.cpp index 36aefd84..a3d1e45b 100644 --- a/src/dbzero/bindings/python/collections/PyIndex.cpp +++ b/src/dbzero/bindings/python/collections/PyIndex.cpp @@ -205,12 +205,21 @@ namespace db0::python auto &index = py_index->ext(); // construct range iterator - auto iter_factory = index.range(low, high, null_first); + auto iter_factory = index.range(low, high, null_first); + auto fixture = index.getFixture(); + std::vector > owned_predicates; + std::vector > native_predicates; + if (!appendDataFilterPredicate(fixture, nullptr, native_predicates, owned_predicates)) { + return nullptr; + } auto py_iter_obj = PyObjectIterableDefault_new(); py_iter_obj->makeNew( - index.getFixture(), std::move(iter_factory), nullptr, nullptr, std::vector >{}, + fixture, std::move(iter_factory), nullptr, nullptr, std::vector >{}, std::vector{} ); + if (!native_predicates.empty()) { + py_iter_obj->modifyExt().addDataFilterPredicates(std::move(native_predicates)); + } return py_iter_obj.steal(); } diff --git a/src/dbzero/bindings/python/iter/PyObjectIterable.cpp b/src/dbzero/bindings/python/iter/PyObjectIterable.cpp index 69f64aaf..b9a75b76 100644 --- a/src/dbzero/bindings/python/iter/PyObjectIterable.cpp +++ b/src/dbzero/bindings/python/iter/PyObjectIterable.cpp @@ -13,6 +13,23 @@ namespace db0::python { + namespace + { + bool ensureDataFilterPredicates(PyObjectIterable *py_iterable) + { + auto &iterable = py_iterable->modifyExt(); + if (iterable.hasDataFilterPredicates()) { + return true; + } + std::vector > owned_predicates; + std::vector > native_predicates; + if (!appendDataFilterPredicate(iterable.getFixture(), iterable.getType(), native_predicates, owned_predicates)) { + return false; + } + iterable.addDataFilterPredicates(std::move(native_predicates)); + return true; + } + } PyObjectIterable *PyObjectIterable_new(PyTypeObject *type, PyObject *, PyObject *) { return reinterpret_cast(type->tp_alloc(type, 0)); @@ -77,6 +94,9 @@ namespace db0::python } // getFixture to prevent segfault in case the associated context (e.g. snapshot) has been destroyed auto fixture = py_iterable->ext().getFixture(); + if (!ensureDataFilterPredicates(py_iterable)) { + return nullptr; + } auto py_iter = PyObjectIteratorDefault_new(); py_iter->makeNew(py_iterable->ext().iter()); return py_iter.steal(); @@ -96,6 +116,9 @@ namespace db0::python } // getFixture to prevent segfault in case the associated context (e.g. snapshot) has been destroyed auto fixture = py_iterable->ext().getFixture(); + if (!ensureDataFilterPredicates(py_iterable)) { + return -1; + } return py_iterable->ext().getSize(); } @@ -168,6 +191,9 @@ namespace db0::python PyErr_SetString(PyExc_PermissionError, "predicate queries cannot be indexed or sliced directly"); return nullptr; } + if (!ensureDataFilterPredicates(py_iterable)) { + return nullptr; + } if (PyTuple_Check(py_key)) { // itemgetter's key (item indexes) @@ -221,6 +247,9 @@ namespace db0::python PyErr_SetString(PyExc_PermissionError, "predicate queries cannot be tested for truth directly"); return -1; } + if (!ensureDataFilterPredicates(py_iterable)) { + return -1; + } // check if the iterable is empty if (py_iterable->ext().empty()) { return 0; // False diff --git a/src/dbzero/object_model/tags/ObjectIterable.cpp b/src/dbzero/object_model/tags/ObjectIterable.cpp index 6b4aba99..33bcfd75 100644 --- a/src/dbzero/object_model/tags/ObjectIterable.cpp +++ b/src/dbzero/object_model/tags/ObjectIterable.cpp @@ -9,7 +9,9 @@ #include #include #include +#include #include +#include namespace db0::object_model @@ -91,6 +93,7 @@ namespace db0::object_model , m_factory(factory) , m_query_observers(std::move(query_observers)) , m_filters(std::move(filters)) + , m_data_filter_predicates() , m_type(type) , m_lang_type(lang_type) , m_slice_def(slice_def) @@ -104,6 +107,7 @@ namespace db0::object_model , m_class_factory(other.m_class_factory) , m_factory(other.m_factory) , m_filters(other.m_filters) + , m_data_filter_predicates(other.m_data_filter_predicates) , m_type(other.m_type) , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def) @@ -127,6 +131,7 @@ namespace db0::object_model , m_class_factory(other.m_class_factory) , m_factory(other.m_factory) , m_filters(other.m_filters) + , m_data_filter_predicates(other.m_data_filter_predicates) , m_type(other.m_type) , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def.combineWith(slice_def)) @@ -152,6 +157,7 @@ namespace db0::object_model , m_factory(nullptr) , m_query_observers(std::move(query_observers)) , m_filters(other.m_filters) + , m_data_filter_predicates(other.m_data_filter_predicates) , m_type(other.m_type) , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def) @@ -169,6 +175,7 @@ namespace db0::object_model , m_factory(other.m_factory) , m_query_observers(std::move(query_observers)) , m_filters(other.m_filters) + , m_data_filter_predicates(other.m_data_filter_predicates) , m_type(other.m_type) , m_lang_type(other.m_lang_type) , m_slice_def(other.m_slice_def) @@ -183,7 +190,7 @@ namespace db0::object_model } bool ObjectIterable::isNull() const { - return !m_query_iterator && !m_sorted_iterator && !m_factory; + return !m_query_iterator && !m_sorted_iterator && !m_factory && m_data_filter_predicates.empty(); } bool ObjectIterable::isSliced() const { @@ -197,16 +204,55 @@ namespace db0::object_model return nullptr; } - // pull FT iterator from factory if available std::unique_ptr result; if (m_factory) { - return m_factory->createFTIterator(); - } else { - if (!m_query_iterator) { - THROWF(db0::InputException) << "Invalid object iterator" << THROWF_END; + result = m_factory->createFTIterator(); + } else if (m_sorted_iterator) { + result = m_sorted_iterator->beginFTQuery(); + } else if (m_query_iterator) { + result = m_query_iterator->beginTyped(direction); + } else if (m_data_filter_predicates.empty()) { + return nullptr; + } + + if (m_data_filter_predicates.empty()) { + return result; + } + + db0::FT_ANDIteratorFactory factory; + if (result) { + factory.add(std::move(result)); + } + for (const auto &predicate: m_data_filter_predicates) { + if (!predicate) { + continue; } - return m_query_iterator->beginTyped(direction); + std::vector > query_observers; + auto predicate_query = predicate->beginFTQuery(query_observers, direction); + if (!predicate_query || predicate_query->isEnd()) { + factory.clear(); + return nullptr; + } + factory.add(std::move(predicate_query)); + } + return factory.release(direction); + } + + void ObjectIterable::addDataFilterPredicates(std::vector > &&predicates) + { + if (predicates.empty()) { + return; } + m_data_filter_predicates.insert( + m_data_filter_predicates.end(), + std::make_move_iterator(predicates.begin()), + std::make_move_iterator(predicates.end()) + ); + } + + bool ObjectIterable::hasDataFilterPredicates() const + { + return !m_data_filter_predicates.empty(); } std::unique_ptr ObjectIterable::beginFTQuery( @@ -221,7 +267,7 @@ namespace db0::object_model } return result; } - + std::unique_ptr ObjectIterable::beginSorted() const { if (isNull()) { @@ -240,11 +286,12 @@ namespace db0::object_model void ObjectIterable::serialize(std::vector &buf) const { auto fixture = getFixture(); + bool base_is_null = !m_query_iterator && !m_sorted_iterator && !m_factory; // FIXTURE uuid db0::serial::write(buf, fixture->getUUID()); - db0::serial::write(buf, this->isNull()); + db0::serial::write(buf, base_is_null); db0::serial::write(buf, m_predicate_only); - if (this->isNull()) { + if (base_is_null) { return; } if (m_query_iterator) { diff --git a/src/dbzero/object_model/tags/ObjectIterable.hpp b/src/dbzero/object_model/tags/ObjectIterable.hpp index edb7b509..453ea56e 100644 --- a/src/dbzero/object_model/tags/ObjectIterable.hpp +++ b/src/dbzero/object_model/tags/ObjectIterable.hpp @@ -120,6 +120,10 @@ namespace db0::object_model const std::vector &getFilters() const { return m_filters; } + + void addDataFilterPredicates(std::vector > &&); + + bool hasDataFilterPredicates() const; // Get type of the results if it was specified std::shared_ptr getType() const; @@ -144,6 +148,7 @@ namespace db0::object_model std::shared_ptr m_factory; std::vector > m_query_observers; std::vector m_filters; + std::vector > m_data_filter_predicates; std::shared_ptr m_type = nullptr; TypeObjectSharedPtr m_lang_type = nullptr; const SliceDef m_slice_def = {}; From 1a72e10acb94e31e238c9f1c2092e1e80b0102c1 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 28 May 2026 10:54:29 +0200 Subject: [PATCH 08/11] predicate filter fixes --- python_tests/test_data_filter.py | 28 +++++++++++++++++++ .../object_model/tags/ObjectIterable.cpp | 24 +++------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index b3a37481..a1d96eca 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -482,6 +482,34 @@ def test_data_filter_predicate_filters_typeless_index_range(db0_fixture): assert list(index.select()) == [allowed] +def test_data_filter_predicate_filters_typeless_index_range_len_bool_and_slice(db0_fixture): + index = db0.index() + allowed = FilteredFindClass("range-count-allowed") + denied = FilteredFindClass("range-count-denied") + db0.tags(allowed).add("grant") + index.add(1, allowed) + index.add(2, denied) + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + query = index.select() + + assert len(query) == 1 + assert bool(query) is True + assert list(query[:1]) == [allowed] + + +def test_data_filter_predicate_filters_typeless_index_range_bool_when_denied_only(db0_fixture): + index = db0.index() + denied = FilteredFindClass("range-bool-denied") + index.add(1, denied) + + predicate.set(db0.predicate("grant")) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + + assert bool(index.select()) is False + + def test_data_filter_release_mode_requires_predicate_for_typeless_index_range(db0_fixture): index = db0.index() index.add(1, FilteredFindClass("range-denied")) diff --git a/src/dbzero/object_model/tags/ObjectIterable.cpp b/src/dbzero/object_model/tags/ObjectIterable.cpp index 33bcfd75..d0f142bf 100644 --- a/src/dbzero/object_model/tags/ObjectIterable.cpp +++ b/src/dbzero/object_model/tags/ObjectIterable.cpp @@ -118,8 +118,7 @@ namespace db0::object_model std::unique_ptr query_iterator; std::unique_ptr sorted_iterator; - if (other.m_query_iterator || other.m_factory) { - assert(!other.m_sorted_iterator); + if (other.m_query_iterator || other.m_factory || !other.m_data_filter_predicates.empty()) { m_query_iterator = other.beginFTQuery(m_query_observers, -1); } else if (other.m_sorted_iterator) { m_sorted_iterator = other.m_sorted_iterator->beginSorted(); @@ -140,8 +139,7 @@ namespace db0::object_model { std::unique_ptr query_iterator; std::unique_ptr sorted_iterator; - if (other.m_query_iterator || other.m_factory) { - assert(!other.m_sorted_iterator); + if (other.m_query_iterator || other.m_factory || !other.m_data_filter_predicates.empty()) { m_query_iterator = other.beginFTQuery(m_query_observers, -1); } else if (other.m_sorted_iterator) { m_sorted_iterator = other.m_sorted_iterator->beginSorted(); @@ -430,14 +428,7 @@ namespace db0::object_model return result; } - std::unique_ptr iter; - if (m_factory) { - iter = m_factory->createFTIterator(); - } else if (m_query_iterator) { - iter = m_query_iterator->beginTyped(-1); - } else if (m_sorted_iterator) { - iter = m_sorted_iterator->beginFTQuery(); - } + auto iter = beginFTQuery(-1); std::size_t result = 0; if (iter) { @@ -485,14 +476,7 @@ namespace db0::object_model return !obj_iter->next(); } - std::unique_ptr iter; - if (m_factory) { - iter = m_factory->createFTIterator(); - } else if (m_query_iterator) { - iter = m_query_iterator->beginTyped(-1); - } else if (m_sorted_iterator) { - iter = m_sorted_iterator->beginFTQuery(); - } + auto iter = beginFTQuery(-1); if (iter) { Slice slice(iter.get(), m_slice_def); From b5e1384aefb975ccce775666c337bf2b60e7f7ee Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 28 May 2026 14:09:04 +0200 Subject: [PATCH 09/11] serialization of composite tag sequences + focused predicate-based security test --- python_tests/test_data_filter.py | 47 +++++++ python_tests/test_query_serialization.py | 49 ++++++++ .../collections/full_text/FT_BaseIndex.cpp | 19 ++- .../collections/full_text/FT_BaseIndex.hpp | 8 ++ .../full_text/FT_IndexIterator.hpp | 42 +++++-- .../full_text/FT_Serialization.hpp | 12 +- src/dbzero/object_model/tags/TagIndex.cpp | 118 ++++++++++++++---- src/dbzero/object_model/tags/TagIndex.hpp | 7 +- tests/unit_tests/QuerySerializationTest.cpp | 44 ++++++- 9 files changed, 305 insertions(+), 41 deletions(-) diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index a1d96eca..214ebedb 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -548,6 +548,53 @@ def test_data_filter_predicate_is_not_serialized_with_typeless_query(db0_fixture assert list(db0.deserialize(query_bytes)) == [second] +def test_data_filter_composite_access_control_switches_by_account_context(db0_fixture): + account_a = FilteredFindPublicClass("account-a") + account_b = FilteredFindPublicClass("account-b") + role_reader = "role-reader" + role_auditor = "role-auditor" + + account_a_private = FilteredFindClass("account-a-private") + account_b_private = FilteredFindClass("account-b-private") + shared_by_role = FilteredFindClass("shared-by-reader-role") + denied_for_account_a = FilteredFindClass("denied-for-account-a") + denied_for_account_b_role = FilteredFindClass("denied-for-account-b-role") + + db0.tags(account_a_private).add("document", db0.as_tag("GRANT", account_a)) + db0.tags(account_b_private).add("document", db0.as_tag("GRANT", account_b)) + db0.tags(shared_by_role).add("document", db0.as_tag("GRANT", role_reader)) + db0.tags(denied_for_account_a).add( + "document", + db0.as_tag("GRANT", account_a), + db0.as_tag("DENY", account_a), + ) + db0.tags(denied_for_account_b_role).add( + "document", + db0.as_tag("GRANT", role_auditor), + db0.as_tag("DENY", role_auditor), + ) + + def access_predicate(account, roles): + grants = [db0.as_tag("GRANT", account)] + [db0.as_tag("GRANT", role) for role in roles] + denials = [db0.as_tag("DENY", account)] + [db0.as_tag("DENY", role) for role in roles] + return db0.predicate(grants, db0.no(denials)) + + predicate.set(access_predicate(account_a, [role_reader])) + db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) + query_bytes = db0.serialize(db0.find(FilteredFindClass, "document")) + + assert [item.value for item in db0.deserialize(query_bytes)] == [ + "shared-by-reader-role", + "account-a-private", + ] + + predicate.set(access_predicate(account_b, [role_auditor])) + + assert [item.value for item in db0.deserialize(query_bytes)] == [ + "account-b-private", + ] + + def test_data_filter_predicate_is_not_serialized_with_index_range(db0_fixture): index = db0.index() first = FilteredFindClass("range-serialized-first") diff --git a/python_tests/test_query_serialization.py b/python_tests/test_query_serialization.py index 734414e3..e9e07085 100644 --- a/python_tests/test_query_serialization.py +++ b/python_tests/test_query_serialization.py @@ -4,6 +4,7 @@ import pytest import dbzero as db0 from .memo_test_types import MemoTestClass +from .test_composite_tags import CompositeTagDocument def test_serialized_query_can_be_stored_as_member(db0_fixture): @@ -101,3 +102,51 @@ def test_serialize_sliced_query(db0_fixture, memo_tags): bytes = db0.serialize(db0.find("tag1")[2:]) query = db0.deserialize(bytes) assert len(query) == len(db0.find("tag1")[2:]) + + +def test_deserialize_nested_composite_tag_query_from_bytes(db0_fixture): + document_1 = CompositeTagDocument("doc-1") + document_2 = CompositeTagDocument("doc-2") + document_3 = CompositeTagDocument("doc-3") + document_4 = CompositeTagDocument("doc-4") + + db0.tags(document_1).add(db0.as_tag("GRANT-READ", "tenant-1", "active")) + db0.tags(document_2).add(db0.as_tag("GRANT-READ", "tenant-1", "archived")) + db0.tags(document_3).add(db0.as_tag("GRANT-READ", "tenant-2", "active")) + db0.tags(document_4).add("active") + + bytes = db0.serialize(db0.find(db0.as_tag("GRANT-READ", "tenant-1", "active"))) + query = db0.deserialize(bytes) + + assert [doc.title for doc in query] == ["doc-1"] + + +def test_stored_nested_composite_tag_query_can_be_deserialized(db0_fixture): + document_1 = CompositeTagDocument("doc-1") + document_2 = CompositeTagDocument("doc-2") + document_3 = CompositeTagDocument("doc-3") + + db0.tags(document_1).add(db0.as_tag("GRANT-READ", "tenant-1", "active")) + db0.tags(document_2).add(db0.as_tag("GRANT-READ", "tenant-1", "archived")) + db0.tags(document_3).add("active") + + query_object = MemoTestClass(db0.find(db0.as_tag("GRANT-READ", "tenant-1", "active"))) + + assert [doc.title for doc in query_object.value] == ["doc-1"] + + +def test_deserialize_nested_composite_tag_query_with_type_and_simple_tag(db0_fixture): + document_1 = CompositeTagDocument("doc-1") + document_2 = CompositeTagDocument("doc-2") + document_3 = CompositeTagDocument("doc-3") + + db0.tags(document_1).add(db0.as_tag("GRANT-READ", "tenant-1", "active"), "visible") + db0.tags(document_2).add(db0.as_tag("GRANT-READ", "tenant-1", "active"), "hidden") + db0.tags(document_3).add("active", "visible") + + bytes = db0.serialize( + db0.find(CompositeTagDocument, db0.as_tag("GRANT-READ", "tenant-1", "active"), "visible") + ) + query = db0.deserialize(bytes) + + assert [doc.title for doc in query] == ["doc-1"] diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp index 082873d2..e80b939d 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp @@ -40,6 +40,14 @@ namespace db0 template std::unique_ptr > FT_BaseIndex::makeIterator(IndexKeyT key, int direction) const + { + return makeIterator(key, direction, std::vector { key }); + } + + template + std::unique_ptr > + FT_BaseIndex::makeIterator(IndexKeyT key, int direction, + std::vector &&index_key_sequence) const { using ListT = typename super_t::ListT; auto inverted_list_ptr = this->tryGetExistingInvertedList(key); @@ -47,12 +55,19 @@ namespace db0 return nullptr; } return std::unique_ptr >( - new FT_IndexIterator(*inverted_list_ptr, direction, key) + new FT_IndexIterator(*inverted_list_ptr, direction, key, std::move(index_key_sequence)) ); } template bool FT_BaseIndex::addIterator(FT_IteratorFactory &factory, IndexKeyT key) const + { + return addIterator(factory, key, std::vector { key }); + } + + template + bool FT_BaseIndex::addIterator(FT_IteratorFactory &factory, IndexKeyT key, + std::vector &&index_key_sequence) const { using ListT = typename super_t::ListT; auto inverted_list_ptr = this->tryGetExistingInvertedList(key); @@ -62,7 +77,7 @@ namespace db0 // key inverted index factory.add(std::unique_ptr >( - new FT_IndexIterator(*inverted_list_ptr, -1, key)) + new FT_IndexIterator(*inverted_list_ptr, -1, key, std::move(index_key_sequence))) ); return true; } diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp index 4faa82d8..96f0beb8 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp @@ -10,6 +10,7 @@ #include "FT_ORXIterator.hpp" #include #include "LongTag.hpp" +#include namespace db0 @@ -36,11 +37,18 @@ namespace db0 * @return false if no iterator collected (e.g. no such key) */ bool addIterator(FT_IteratorFactory &, IndexKeyT key) const; + // index_key_sequence is optional serialization metadata for nested + // composite-tag queries; it is the root-to-leaf tag key path. + bool addIterator(FT_IteratorFactory &, IndexKeyT key, std::vector &&index_key_sequence) const; /** * @param key either tag or class identifier */ std::unique_ptr > makeIterator(IndexKeyT key, int direction = -1) const; + // See addIterator overload above: index_key_sequence is not used for + // lookup, only to serialize enough context to reopen a nested tag path. + std::unique_ptr > makeIterator(IndexKeyT key, int direction, + std::vector &&index_key_sequence) const; /** * Match all elements from the user provided sequence diff --git a/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp b/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp index e4b0b30a..ee83dd58 100644 --- a/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp +++ b/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace db0 @@ -26,13 +27,18 @@ namespace db0 using super_t = FT_Iterator; using iterator = typename bindex_t::joinable_const_iterator; - FT_IndexIterator(const bindex_t &data, int direction, std::optional index_key = {}); + // index_key_sequence is serialization-only metadata. For plain tag + // iterators it is empty and index_key is enough. For nested composite + // tag iterators it stores the root-to-leaf tag key path so deserialize + // can traverse from the root TagIndex instead of persisting child index addresses. + FT_IndexIterator(const bindex_t &data, int direction, std::optional index_key = {}, + std::vector &&index_key_sequence = {}); /** * Construct over already initialized simple iterator */ FT_IndexIterator(const bindex_t &data, int direction, const iterator &it, - std::optional index_key = {}); + std::optional index_key = {}, std::vector &&index_key_sequence = {}); virtual ~FT_IndexIterator() = default; @@ -87,9 +93,12 @@ namespace db0 bool m_has_detach_key = false; key_t m_detach_key; const std::optional m_index_key; + // Full root-to-leaf tag key path used only to reconstruct nested composite-tag + // iterators during deserialization. A plain tag iterator leaves this empty. + const std::vector m_index_key_sequence; FT_IndexIterator(std::uint64_t uid, const bindex_t &data, int direction, - std::optional index_key = {}); + std::optional index_key = {}, std::vector &&index_key_sequence = {}); /** * Get valid iterator after detach @@ -110,32 +119,35 @@ namespace db0 template FT_IndexIterator::FT_IndexIterator(const bindex_t &data, int direction, - std::optional index_key) + std::optional index_key, std::vector &&index_key_sequence) : m_data(data) , m_direction(direction) , m_iterator(m_data.beginJoin(direction)) , m_index_key(index_key) + , m_index_key_sequence(std::move(index_key_sequence)) { } template FT_IndexIterator::FT_IndexIterator(const bindex_t &data, int direction, const iterator &it, - std::optional index_key) + std::optional index_key, std::vector &&index_key_sequence) : m_data(data) , m_direction(direction) , m_iterator(it) , m_index_key(index_key) + , m_index_key_sequence(std::move(index_key_sequence)) { } template FT_IndexIterator::FT_IndexIterator(std::uint64_t uid, const bindex_t &data, int direction, - std::optional index_key) + std::optional index_key, std::vector &&index_key_sequence) : FT_Iterator(uid) , m_data(data) , m_direction(direction) , m_iterator(m_data.beginJoin(direction)) , m_index_key(index_key) + , m_index_key_sequence(std::move(index_key_sequence)) { } @@ -196,7 +208,10 @@ namespace db0 template std::unique_ptr > FT_IndexIterator::beginTyped(int direction) const { - return std::unique_ptr >(new FT_IndexIterator(this->m_uid, m_data, direction, this->m_index_key)); + return std::unique_ptr >( + new FT_IndexIterator(this->m_uid, m_data, direction, this->m_index_key, + std::vector(this->m_index_key_sequence)) + ); } template @@ -290,7 +305,18 @@ namespace db0 db0::serial::write(v, db0::serial::typeId()); db0::serial::write(v, m_data.getMemspace().getUUID()); db0::serial::write(v, m_direction); - db0::serial::write(v, *m_index_key); + // For nested composite tags, serialize the whole root-to-leaf key path. + // Deserialization cannot use the nested index address directly because child + // indexes must be resolved from the root TagIndex in the target snapshot. + if (m_index_key_sequence.empty()) { + db0::serial::write(v, 1); + db0::serial::write(v, *m_index_key); + } else { + db0::serial::write(v, m_index_key_sequence.size()); + for (auto const &index_key: m_index_key_sequence) { + db0::serial::write(v, index_key); + } + } } template diff --git a/src/dbzero/core/collections/full_text/FT_Serialization.hpp b/src/dbzero/core/collections/full_text/FT_Serialization.hpp index bb6f62db..7d813d42 100644 --- a/src/dbzero/core/collections/full_text/FT_Serialization.hpp +++ b/src/dbzero/core/collections/full_text/FT_Serialization.hpp @@ -142,12 +142,20 @@ namespace db0 auto fixture = snapshot.getFixture(db0::serial::read(iter, end)); int direction = db0::serial::read(iter, end); if (index_key_type_id == db0::serial::typeId()) { - auto index_key = db0::serial::read(iter, end); + auto index_key_count = db0::serial::read(iter, end); + if (index_key_count == 0) { + THROWF(db0::InternalException) << "Serialized FT index iterator is missing index keys" << THROWF_END; + } + std::vector index_keys; + index_keys.reserve(index_key_count); + for (std::uint64_t i = 0; i < index_key_count; ++i) { + index_keys.push_back(db0::serial::read(iter, end)); + } // use FT_Base index as the factory // NOTE: TagIndex only supports UniqueAddress key type if constexpr (std::is_same_v) { auto &tag_index = fixture->get(); - return tag_index.getBaseIndexShort().makeIterator(index_key, direction); + return tag_index.makeIterator(index_keys, direction); } } THROWF(db0::InternalException) << "Unsupported key type ID: " << key_type_id << THROWF_END; diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index 3f5967c2..f29e6b79 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -678,14 +678,15 @@ namespace db0::object_model } bool TagIndex::addIterator(ObjectPtr arg, db0::FT_IteratorFactory &factory, - std::vector > &neg_iterators, std::vector > &query_observers) const + std::vector > &neg_iterators, + std::vector > &query_observers) const { using TypeId = db0::bindings::TypeId; using IterableSequence = TagMakerSequence; auto type_id = LangToolkit::getTypeManager().getTypeId(arg); if (type_id == TypeId::DB0_COMPOSITE_TAG) { - return addCompositeIterator(LangToolkit::getTypeManager().extractCompositeTag(arg), factory, query_observers); + return addCompositeIterator(LangToolkit::getTypeManager().extractCompositeTag(arg), factory); } // simple tag-convertible type @@ -792,57 +793,86 @@ namespace db0::object_model } bool TagIndex::addCompositeIterator(const CompositeTagDef &tag, - db0::FT_IteratorFactory &factory, - std::vector > &query_observers) const + db0::FT_IteratorFactory &factory) const { if (tag.size() < 2 || !m_short_tag_index_map) { return false; } auto const &items = tag.getItems(); - auto firstKey = tryGetCompositeKey(items[0].get()); - if (!firstKey) { + auto first_key = tryGetCompositeKey(items[0].get()); + if (!first_key) { return false; } - auto currentTagIndexPtr = m_short_tag_index_map->tryGet( - *firstKey, + auto current_tag_index_ptr = m_short_tag_index_map->tryGet( + *first_key, m_class_factory, m_enum_factory, m_string_pool, m_cache, m_mutation_log ); - if (!currentTagIndexPtr) { + if (!current_tag_index_ptr) { return false; } - auto *currentTagIndex = currentTagIndexPtr.get(); + // Serialization needs the full root-to-leaf tag key path; lookup below + // only uses the current nested TagIndex plus the leaf key. + std::vector serialized_tag_sequence; + serialized_tag_sequence.reserve(items.size()); + serialized_tag_sequence.push_back(*first_key); + + auto *current_tag_index = current_tag_index_ptr.get(); for (std::size_t i = 1; i + 1 < items.size(); ++i) { - auto key = currentTagIndex->tryGetCompositeKey(items[i].get()); - if (!key || !currentTagIndex->m_short_tag_index_map) { + auto tag_key = current_tag_index->tryGetCompositeKey(items[i].get()); + if (!tag_key || !current_tag_index->m_short_tag_index_map) { return false; } - currentTagIndexPtr = currentTagIndex->m_short_tag_index_map->tryGet( - *key, - currentTagIndex->m_class_factory, - currentTagIndex->m_enum_factory, - currentTagIndex->m_string_pool, - currentTagIndex->m_cache, - currentTagIndex->m_mutation_log + serialized_tag_sequence.push_back(*tag_key); + current_tag_index_ptr = current_tag_index->m_short_tag_index_map->tryGet( + *tag_key, + current_tag_index->m_class_factory, + current_tag_index->m_enum_factory, + current_tag_index->m_string_pool, + current_tag_index->m_cache, + current_tag_index->m_mutation_log ); - if (!currentTagIndexPtr) { + if (!current_tag_index_ptr) { return false; } - currentTagIndex = currentTagIndexPtr.get(); + current_tag_index = current_tag_index_ptr.get(); + } + + return current_tag_index->addCompositeLeafIterator( + items.back().get(), + factory, + std::move(serialized_tag_sequence) + ); + } + + bool TagIndex::addCompositeLeafIterator(ObjectPtr arg, db0::FT_IteratorFactory &factory, + std::vector &&serialized_tag_sequence) const + { + using TypeId = db0::bindings::TypeId; + + auto type_id = LangToolkit::getTypeManager().getTypeId(arg); + if (type_id == TypeId::DB0_COMPOSITE_TAG) { + THROWF(db0::InputException) << "Nested composite tag leaves are not supported" << THROWF_END; + } + if (isLongTag(type_id, arg)) { + return m_base_index_long.addIterator(factory, getLongTag(type_id, arg)); } - std::vector > negIterators; - auto result = currentTagIndex->addIterator(items.back().get(), factory, negIterators, query_observers); - if (!negIterators.empty()) { - THROWF(db0::InputException) << "Negated composite tag leaves are not supported" << THROWF_END; + auto leaf_key = tryGetCompositeKey(arg); + if (!leaf_key) { + return false; } - return result; + // current TagIndex is already the correct nested index for lookup. The + // complete root-to-leaf sequence is attached only so serialized queries can + // reopen the same nested index by traversing from the root during deserialize. + serialized_tag_sequence.push_back(*leaf_key); + return m_base_index_short.addIterator(factory, *leaf_key, std::move(serialized_tag_sequence)); } std::optional TagIndex::tryGetCompositeKey(ObjectPtr arg) const @@ -1229,6 +1259,42 @@ namespace db0::object_model flush(); return m_base_index_short.makeIterator(tag); } + + std::unique_ptr TagIndex::makeIterator(const std::vector &tag_sequence, + int direction) const + { + if (tag_sequence.empty()) { + return nullptr; + } + + flush(); + auto const *current_tag_index = this; + std::vector > keep_alive; + keep_alive.reserve(tag_sequence.size() - 1); + + for (std::size_t i = 0; i + 1 < tag_sequence.size(); ++i) { + if (!current_tag_index->m_short_tag_index_map) { + return nullptr; + } + auto child_tag_index = current_tag_index->m_short_tag_index_map->tryGet( + tag_sequence[i], + current_tag_index->m_class_factory, + current_tag_index->m_enum_factory, + current_tag_index->m_string_pool, + current_tag_index->m_cache, + current_tag_index->m_mutation_log + ); + if (!child_tag_index) { + return nullptr; + } + current_tag_index = child_tag_index.get(); + keep_alive.push_back(std::move(child_tag_index)); + } + + return current_tag_index->m_base_index_short.makeIterator( + tag_sequence.back(), direction, std::vector(tag_sequence) + ); + } std::uint64_t getFindFixtureUUID(TagIndex::ObjectPtr obj_ptr) { diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index d122dd91..904168b0 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -150,6 +150,8 @@ DB0_PACKED_END std::unique_ptr makeIterator(const TagDef &) const; std::unique_ptr makeIterator(const Class &) const; std::unique_ptr makeIterator(ShortTagT) const; + std::unique_ptr makeIterator(const std::vector &tag_sequence, + int direction = -1) const; private: using TypeId = db0::bindings::TypeId; @@ -235,8 +237,9 @@ DB0_PACKED_END bool addIterator(const ObjectIterable &, db0::FT_IteratorFactory &factory, std::vector > &neg_iterators, std::vector > &query_observers) const; - bool addCompositeIterator(const CompositeTagDef &, db0::FT_IteratorFactory &factory, - std::vector > &query_observers) const; + bool addCompositeIterator(const CompositeTagDef &, db0::FT_IteratorFactory &factory) const; + bool addCompositeLeafIterator(ObjectPtr, db0::FT_IteratorFactory &factory, + std::vector &&serialized_tag_sequence) const; std::optional tryGetCompositeKey(ObjectPtr) const; bool isShortTag(ObjectPtr) const; diff --git a/tests/unit_tests/QuerySerializationTest.cpp b/tests/unit_tests/QuerySerializationTest.cpp index c54adece..31c6a2c2 100644 --- a/tests/unit_tests/QuerySerializationTest.cpp +++ b/tests/unit_tests/QuerySerializationTest.cpp @@ -114,4 +114,46 @@ namespace tests runTestCase(test); } -} \ No newline at end of file + TEST_F( QuerySerializationTest , testCompositeTagPathIteratorCanBeDeserializedFromRoot ) + { + auto fixture = getFixture(); + FixedObjectList shared_object_list(100); + db0::object_model::ClassFactory class_factory(fixture); + db0::object_model::EnumFactory enum_factory(fixture); + VObjectCache cache(*fixture, shared_object_list); + + auto &tag_index = fixture->addResource( + *fixture, class_factory, enum_factory, fixture->getLimitedStringPool(), cache, fixture->addMutationHandler() + ); + + auto child = tag_index.addComposite(nullptr, 11); + auto grandchild = child->addComposite(nullptr, 22); + { + auto batch_data = grandchild->getBaseIndexShort().beginBatchUpdate(); + batch_data->addTags({ makeUniqueAddr(101, 1), nullptr }, std::vector { 33 }); + batch_data->flush(); + } + { + auto batch_data = tag_index.getBaseIndexShort().beginBatchUpdate(); + batch_data->addTags({ makeUniqueAddr(202, 1), nullptr }, std::vector { 33 }); + batch_data->flush(); + } + + auto ft_query = tag_index.makeIterator(std::vector { 11, 22, 33 }); + ASSERT_TRUE(ft_query); + + std::vector buf; + ft_query->serialize(buf); + + auto iter = buf.cbegin(), end = buf.cend(); + auto cut = deserializeFT_Iterator(m_workspace, iter, end); + ASSERT_TRUE(cut); + ASSERT_FALSE(cut->isEnd()); + + UniqueAddress value; + cut->next(&value); + ASSERT_EQ(value, makeUniqueAddr(101, 1)); + ASSERT_TRUE(cut->isEnd()); + } + +} From 85faa02f639489eb8fecfa26bcdf75124616fe19 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 28 May 2026 15:41:05 +0200 Subject: [PATCH 10/11] FT_MissingIterator implemented + integration for proper query serialization / deser --- python_tests/test_data_filter.py | 1 - python_tests/test_query_serialization.py | 47 ++++++ .../full_text/FT_IndexIterator.hpp | 4 +- .../full_text/FT_MissingIndexIterator.hpp | 142 ++++++++++++++++++ .../full_text/FT_Serialization.hpp | 4 +- src/dbzero/object_model/tags/TagIndex.cpp | 85 +++++++++-- src/dbzero/object_model/tags/TagIndex.hpp | 2 + tests/unit_tests/QuerySerializationTest.cpp | 36 +++++ 8 files changed, 303 insertions(+), 18 deletions(-) create mode 100644 src/dbzero/core/collections/full_text/FT_MissingIndexIterator.hpp diff --git a/python_tests/test_data_filter.py b/python_tests/test_data_filter.py index 214ebedb..ea5bded1 100644 --- a/python_tests/test_data_filter.py +++ b/python_tests/test_data_filter.py @@ -340,7 +340,6 @@ def test_data_filter_predicate_refreshes_after_matching_objects_are_committed(db assert list(db0.find(FilteredFindClass, "visible")) == [later, initial] -@pytest.mark.skip(reason="TODO: preserve predicate expressions that initially resolve to no results") def test_data_filter_predicate_created_before_tag_exists_refreshes_after_commit(db0_fixture): predicate.set(db0.predicate("grant")) db0._init_data_filter(predicate, prefix=db0.get_current_prefix()) diff --git a/python_tests/test_query_serialization.py b/python_tests/test_query_serialization.py index e9e07085..6a9304b6 100644 --- a/python_tests/test_query_serialization.py +++ b/python_tests/test_query_serialization.py @@ -85,6 +85,53 @@ def test_deserialize_query_from_bytes(db0_fixture, memo_tags): assert len(list(query)) == 10 +def test_deserialize_missing_tag_query_matches_after_tag_is_created(db0_fixture): + bytes = db0.serialize(db0.find("late-tag")) + + first = MemoTestClass("first") + second = MemoTestClass("second") + db0.tags(first).add("late-tag") + db0.tags(second).add("other-tag") + + assert [item.value for item in db0.deserialize(bytes)] == ["first"] + + +def test_deserialize_missing_composite_tag_query_matches_after_tag_is_created(db0_fixture): + bytes = db0.serialize(db0.find(db0.as_tag("GRANT-READ", "tenant-late", "active"))) + + document_1 = CompositeTagDocument("doc-1") + document_2 = CompositeTagDocument("doc-2") + db0.tags(document_1).add(db0.as_tag("GRANT-READ", "tenant-late", "active")) + db0.tags(document_2).add(db0.as_tag("GRANT-READ", "tenant-late", "archived")) + + assert [doc.title for doc in db0.deserialize(bytes)] == ["doc-1"] + + +def test_deserialize_missing_tag_and_query_matches_after_tag_is_created(db0_fixture): + visible = MemoTestClass("visible") + hidden = MemoTestClass("hidden") + db0.tags(visible).add("visible") + + bytes = db0.serialize(db0.find("visible", "late-tag")) + + db0.tags(visible).add("late-tag") + db0.tags(hidden).add("late-tag") + + assert [item.value for item in db0.deserialize(bytes)] == ["visible"] + + +def test_deserialize_missing_tag_or_query_matches_after_tag_is_created(db0_fixture): + early = MemoTestClass("early") + late = MemoTestClass("late") + db0.tags(early).add("early-tag") + + bytes = db0.serialize(db0.find(["early-tag", "late-tag"])) + + db0.tags(late).add("late-tag") + + assert {item.value for item in db0.deserialize(bytes)} == {"early", "late"} + + def test_deserialize_from_bytes_with_snapshot(db0_fixture, memo_tags): db0.commit() snap = db0.snapshot() diff --git a/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp b/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp index ee83dd58..fbd64884 100644 --- a/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp +++ b/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp @@ -309,10 +309,10 @@ namespace db0 // Deserialization cannot use the nested index address directly because child // indexes must be resolved from the root TagIndex in the target snapshot. if (m_index_key_sequence.empty()) { - db0::serial::write(v, 1); + db0::serial::write(v, 1); db0::serial::write(v, *m_index_key); } else { - db0::serial::write(v, m_index_key_sequence.size()); + db0::serial::write(v, m_index_key_sequence.size()); for (auto const &index_key: m_index_key_sequence) { db0::serial::write(v, index_key); } diff --git a/src/dbzero/core/collections/full_text/FT_MissingIndexIterator.hpp b/src/dbzero/core/collections/full_text/FT_MissingIndexIterator.hpp new file mode 100644 index 00000000..38346118 --- /dev/null +++ b/src/dbzero/core/collections/full_text/FT_MissingIndexIterator.hpp @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2026 DBZero Software sp. z o.o. + +#pragma once + +#include "FT_Iterator.hpp" +#include +#include +#include +#include +#include + +namespace db0 + +{ + + template + class FT_MissingIndexIterator: public FT_Iterator + { + public: + using self_t = FT_MissingIndexIterator; + using super_t = FT_Iterator; + + FT_MissingIndexIterator(std::uint64_t fixture_uuid, int direction, + std::vector &&index_key_sequence) + : m_fixture_uuid(fixture_uuid) + , m_direction(direction) + , m_index_key_sequence(std::move(index_key_sequence)) + { + } + + key_t getKey() const override + { + THROWF(db0::InputException) << "Missing index iterator has no key" << THROWF_END; + } + + bool isEnd() const override + { + return true; + } + + const std::type_info &typeId() const override + { + return typeid(self_t); + } + + void operator++() override + { + } + + void operator--() override + { + } + + void next(void * = nullptr) override + { + } + + bool join(key_t, int = -1) override + { + return false; + } + + void joinBound(key_t) override + { + } + + std::pair peek(key_t) const override + { + return { key_t{}, false }; + } + + bool isNextKeyDuplicated() const override + { + return false; + } + + std::unique_ptr > beginTyped(int direction = -1) const override + { + return std::make_unique( + m_fixture_uuid, + direction, + std::vector(m_index_key_sequence) + ); + } + + bool limitBy(key_t) override + { + return false; + } + + std::ostream &dump(std::ostream &os) const override + { + return os << "FTMissingIndex@" << this; + } + + void stop() override + { + } + + FTIteratorType getSerialTypeId() const override + { + return FTIteratorType::Index; + } + + void getSignature(std::vector &v) const override + { + db0::serial::getSignature(*this, v); + } + + protected: + void serializeFTIterator(std::vector &v) const override + { + using TypeIdType = decltype(db0::serial::typeId()); + + if (m_index_key_sequence.empty()) { + THROWF(db0::InternalException) << "Missing index iterator is missing index keys" << THROWF_END; + } + + db0::serial::write(v, db0::MorphingBIndex::getSerialTypeId()); + db0::serial::write(v, db0::serial::typeId()); + db0::serial::write(v, db0::serial::typeId()); + db0::serial::write(v, m_fixture_uuid); + db0::serial::write(v, m_direction); + db0::serial::write(v, m_index_key_sequence.size()); + for (auto const &index_key: m_index_key_sequence) { + db0::serial::write(v, index_key); + } + } + + double compareToImpl(const FT_IteratorBase &it) const override + { + return this->typeId() == it.typeId() ? 0.0 : 1.0; + } + + private: + const std::uint64_t m_fixture_uuid; + const int m_direction; + const std::vector m_index_key_sequence; + }; + +} diff --git a/src/dbzero/core/collections/full_text/FT_Serialization.hpp b/src/dbzero/core/collections/full_text/FT_Serialization.hpp index 7d813d42..909fd8f3 100644 --- a/src/dbzero/core/collections/full_text/FT_Serialization.hpp +++ b/src/dbzero/core/collections/full_text/FT_Serialization.hpp @@ -142,13 +142,13 @@ namespace db0 auto fixture = snapshot.getFixture(db0::serial::read(iter, end)); int direction = db0::serial::read(iter, end); if (index_key_type_id == db0::serial::typeId()) { - auto index_key_count = db0::serial::read(iter, end); + auto index_key_count = db0::serial::read(iter, end); if (index_key_count == 0) { THROWF(db0::InternalException) << "Serialized FT index iterator is missing index keys" << THROWF_END; } std::vector index_keys; index_keys.reserve(index_key_count); - for (std::uint64_t i = 0; i < index_key_count; ++i) { + for (std::uint32_t i = 0; i < index_key_count; ++i) { index_keys.push_back(db0::serial::read(iter, end)); } // use FT_Base index as the factory diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index f29e6b79..dd927295 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -697,7 +698,20 @@ namespace db0::object_model // query as the long-tag return m_base_index_long.addIterator(factory, getLongTag(type_id, arg)); } else { - return m_base_index_short.addIterator(factory, getShortTag(type_id, arg)); + auto short_tag = getShortTag(type_id, arg); + if (m_base_index_short.addIterator(factory, short_tag)) { + return true; + } + bool inc_ref = false; + auto missing_tag = tryAddShortTag(type_id, arg, inc_ref); + if (!missing_tag) { + return false; + } + if (inc_ref) { + m_inc_refed_tags.insert(*missing_tag); + } + factory.add(makeMissingIterator(std::vector { *missing_tag })); + return true; } } @@ -795,7 +809,7 @@ namespace db0::object_model bool TagIndex::addCompositeIterator(const CompositeTagDef &tag, db0::FT_IteratorFactory &factory) const { - if (tag.size() < 2 || !m_short_tag_index_map) { + if (tag.size() < 2) { return false; } @@ -805,6 +819,28 @@ namespace db0::object_model return false; } + // Serialization needs the full root-to-leaf tag key path; lookup below + // only uses the current nested TagIndex plus the leaf key. + std::vector serialized_tag_sequence; + serialized_tag_sequence.reserve(items.size()); + serialized_tag_sequence.push_back(*first_key); + + auto add_missing_composite_iterator = [&]() { + for (std::size_t i = serialized_tag_sequence.size(); i < items.size(); ++i) { + auto tag_key = tryGetCompositeKey(items[i].get()); + if (!tag_key) { + return false; + } + serialized_tag_sequence.push_back(*tag_key); + } + factory.add(makeMissingIterator(std::move(serialized_tag_sequence))); + return true; + }; + + if (!m_short_tag_index_map) { + return add_missing_composite_iterator(); + } + auto current_tag_index_ptr = m_short_tag_index_map->tryGet( *first_key, m_class_factory, @@ -814,22 +850,19 @@ namespace db0::object_model m_mutation_log ); if (!current_tag_index_ptr) { - return false; + return add_missing_composite_iterator(); } - // Serialization needs the full root-to-leaf tag key path; lookup below - // only uses the current nested TagIndex plus the leaf key. - std::vector serialized_tag_sequence; - serialized_tag_sequence.reserve(items.size()); - serialized_tag_sequence.push_back(*first_key); - auto *current_tag_index = current_tag_index_ptr.get(); for (std::size_t i = 1; i + 1 < items.size(); ++i) { auto tag_key = current_tag_index->tryGetCompositeKey(items[i].get()); - if (!tag_key || !current_tag_index->m_short_tag_index_map) { + if (!tag_key) { return false; } serialized_tag_sequence.push_back(*tag_key); + if (!current_tag_index->m_short_tag_index_map) { + return add_missing_composite_iterator(); + } current_tag_index_ptr = current_tag_index->m_short_tag_index_map->tryGet( *tag_key, current_tag_index->m_class_factory, @@ -839,7 +872,7 @@ namespace db0::object_model current_tag_index->m_mutation_log ); if (!current_tag_index_ptr) { - return false; + return add_missing_composite_iterator(); } current_tag_index = current_tag_index_ptr.get(); } @@ -872,7 +905,11 @@ namespace db0::object_model // complete root-to-leaf sequence is attached only so serialized queries can // reopen the same nested index by traversing from the root during deserialize. serialized_tag_sequence.push_back(*leaf_key); - return m_base_index_short.addIterator(factory, *leaf_key, std::move(serialized_tag_sequence)); + if (m_base_index_short.addIterator(factory, *leaf_key, std::vector(serialized_tag_sequence))) { + return true; + } + factory.add(makeMissingIterator(std::move(serialized_tag_sequence))); + return true; } std::optional TagIndex::tryGetCompositeKey(ObjectPtr arg) const @@ -886,8 +923,17 @@ namespace db0::object_model if (typeId == TypeId::DB0_TAG) { return tryAddShortTagFromTag(arg); } - if (typeId == TypeId::STRING || typeId == TypeId::DB0_ENUM_VALUE || typeId == TypeId::DB0_ENUM_VALUE_REPR || + if (typeId == TypeId::STRING || typeId == TypeId::DB0_ENUM_VALUE || typeId == TypeId::DB0_FIELD_DEF || typeId == TypeId::DB0_CLASS) + { + bool inc_ref = false; + auto tag_key = tryAddShortTag(typeId, arg, inc_ref); + if (inc_ref && tag_key) { + m_inc_refed_tags.insert(*tag_key); + } + return tag_key; + } + if (typeId == TypeId::DB0_ENUM_VALUE_REPR) { return getShortTag(typeId, arg); } @@ -1295,6 +1341,19 @@ namespace db0::object_model tag_sequence.back(), direction, std::vector(tag_sequence) ); } + + std::unique_ptr TagIndex::makeMissingIterator(std::vector &&tag_sequence, + int direction) const + { + if (tag_sequence.empty()) { + return nullptr; + } + return std::make_unique >( + m_fixture_uuid, + direction, + std::move(tag_sequence) + ); + } std::uint64_t getFindFixtureUUID(TagIndex::ObjectPtr obj_ptr) { diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index 904168b0..39f33111 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -152,6 +152,8 @@ DB0_PACKED_END std::unique_ptr makeIterator(ShortTagT) const; std::unique_ptr makeIterator(const std::vector &tag_sequence, int direction = -1) const; + std::unique_ptr makeMissingIterator(std::vector &&tag_sequence, + int direction = -1) const; private: using TypeId = db0::bindings::TypeId; diff --git a/tests/unit_tests/QuerySerializationTest.cpp b/tests/unit_tests/QuerySerializationTest.cpp index 31c6a2c2..1295b16a 100644 --- a/tests/unit_tests/QuerySerializationTest.cpp +++ b/tests/unit_tests/QuerySerializationTest.cpp @@ -156,4 +156,40 @@ namespace tests ASSERT_TRUE(cut->isEnd()); } + TEST_F( QuerySerializationTest , testMissingTagIteratorCanBeDeserializedAfterTagIsCreated ) + { + auto fixture = getFixture(); + FixedObjectList shared_object_list(100); + db0::object_model::ClassFactory class_factory(fixture); + db0::object_model::EnumFactory enum_factory(fixture); + VObjectCache cache(*fixture, shared_object_list); + + auto &tag_index = fixture->addResource( + *fixture, class_factory, enum_factory, fixture->getLimitedStringPool(), cache, fixture->addMutationHandler() + ); + + auto ft_query = tag_index.makeMissingIterator(std::vector { 44 }); + ASSERT_TRUE(ft_query); + ASSERT_TRUE(ft_query->isEnd()); + + std::vector buf; + ft_query->serialize(buf); + + { + auto batch_data = tag_index.getBaseIndexShort().beginBatchUpdate(); + batch_data->addTags({ makeUniqueAddr(303, 1), nullptr }, std::vector { 44 }); + batch_data->flush(); + } + + auto iter = buf.cbegin(), end = buf.cend(); + auto cut = deserializeFT_Iterator(m_workspace, iter, end); + ASSERT_TRUE(cut); + ASSERT_FALSE(cut->isEnd()); + + UniqueAddress value; + cut->next(&value); + ASSERT_EQ(value, makeUniqueAddr(303, 1)); + ASSERT_TRUE(cut->isEnd()); + } + } From 2724a340aecec4fb6dbe466bcb16edd4acbe11b2 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Thu, 28 May 2026 16:24:51 +0200 Subject: [PATCH 11/11] get_memo_classes extension + debug test fixes --- AGENTS.md | 3 +- dbzero/dbzero/dbzero.py | 2 +- dbzero/dbzero/reflection_api.py | 18 +++- python_tests/test_reflection_api.py | 88 +++++++++++++++++++ scripts/build.sh | 4 +- src/dbzero/bindings/python/types/PyClass.cpp | 35 ++++---- .../object_model/object/ObjectInitializer.hpp | 3 + src/dbzero/object_model/tags/TagIndex.cpp | 6 +- src/dbzero/object_model/tags/TagIndex.hpp | 2 +- tests/unit_tests/ObjectInitializerTest.cpp | 2 + 10 files changed, 135 insertions(+), 28 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index df8ca9d4..2bc2fc09 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,7 +16,7 @@ All tests must pass before a change is considered complete. ### Building -- Debug build: `./scripts/build.sh` +- Debug build: `./scripts/build.sh -d` (equivalent to `./scripts/build.sh`; debug is the default) - Release build: `./scripts/build.sh -r` - Release build with C++ unit test binary: `./scripts/build.sh -r -t` @@ -25,6 +25,7 @@ All tests must pass before a change is considered complete. - Python tests: `./scripts/run_tests.sh` - Final Python test checks: `./scripts/run_tests.sh -j 6` - C++ tests after a `-t` build: `./build/release/tests.x` +- Before final handoff, also verify the code in debug mode with a debug build (`./scripts/build.sh -d`) and the relevant Python tests against that debug build. Debug assertions are part of the required validation, not optional diagnostics. - During development, do not run stress tests by default; they are intentionally slow. Run focused tests specific to the feature or refactor being worked on before finalization. - If any C++ source under the native/core part of the project was modified, also run the C++ test suite (do not rely on the Python tests alone to cover native changes). diff --git a/dbzero/dbzero/dbzero.py b/dbzero/dbzero/dbzero.py index c9e4f4dc..21899e3d 100644 --- a/dbzero/dbzero/dbzero.py +++ b/dbzero/dbzero/dbzero.py @@ -10,7 +10,7 @@ def load_dynamic(name, path): def __bootstrap__(): global __bootstrap__, __loader__, __file__ - paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"] + paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"] __file__ = None for path in paths: if os.path.isdir(path): diff --git a/dbzero/dbzero/reflection_api.py b/dbzero/dbzero/reflection_api.py index 3578f97a..4326655a 100644 --- a/dbzero/dbzero/reflection_api.py +++ b/dbzero/dbzero/reflection_api.py @@ -77,11 +77,18 @@ def __init__(self, param: inspect.Parameter, method: MethodInfo): class MemoMetaClass: """Memo class metadata info.""" - def __init__(self, name, module, class_uuid, is_singleton=False, instance_uuid=None): + def __init__(self, name, module, class_uuid, type_flags=None, instance_uuid=None): self.__name = name self.__module = module self.__class_uuid = class_uuid - self.__is_singleton = is_singleton + self.__type_flags = type_flags or { + "singleton": False, + "no_default_tags": False, + "immutable": False, + "intern": False, + "protect_fields": False, + "access_control": False, + } self.__instance_uuid = instance_uuid self.__cls = None @@ -117,7 +124,12 @@ def get_type(self)-> type: @property def is_singleton(self): """Is Memo class a singleton.""" - return self.__is_singleton + return self.__type_flags["singleton"] + + @property + def type_flags(self): + """Memo class type flags.""" + return self.__type_flags @property def instance_uuid(self): diff --git a/python_tests/test_reflection_api.py b/python_tests/test_reflection_api.py index 48a14c10..970d1d65 100644 --- a/python_tests/test_reflection_api.py +++ b/python_tests/test_reflection_api.py @@ -93,6 +93,94 @@ def test_get_memo_classes_returns_singletons(db0_fixture): # try accessing the singleton by UUID obj = singletons[0].get_instance() assert obj == root + + +def test_get_memo_classes_returns_type_flags(db0_fixture): + @db0.memo + class ReflectionFlagsDefault: + pass + + @db0.memo(singleton=True) + class ReflectionFlagsSingleton: + pass + + @db0.memo(protect_fields=True) + class ReflectionFlagsProtected: + pass + + @db0.memo(immutable=True) + class ReflectionFlagsImmutable: + pass + + @db0.memo(immutable=True, intern=True) + class ReflectionFlagsIntern: + pass + + @db0.memo(access_control=True) + class ReflectionFlagsAccessControlled: + pass + + @db0.memo(no_default_tags=True) + class ReflectionFlagsNoDefaultTags: + pass + + _ = ReflectionFlagsDefault() + singleton = ReflectionFlagsSingleton() + _ = ReflectionFlagsProtected() + _ = db0.materialized(ReflectionFlagsImmutable()) + _ = db0.materialized(ReflectionFlagsIntern()) + _ = ReflectionFlagsAccessControlled() + _ = ReflectionFlagsNoDefaultTags() + + expected_keys = { + "singleton", + "no_default_tags", + "immutable", + "intern", + "protect_fields", + "access_control", + } + memo_classes = list(db0.get_memo_classes()) + + def by_name(name): + return next( + memo_class for memo_class in memo_classes + if memo_class.name == name or memo_class.name.endswith(f".{name}") + ) + + def assert_flags(name, **expected): + memo_class = by_name(name) + flags = memo_class.type_flags + assert set(flags) == expected_keys + assert all(type(value) is bool for value in flags.values()) + assert memo_class.is_singleton is flags["singleton"] + expected_flags = {key: False for key in expected_keys} + expected_flags.update(expected) + for key, value in expected_flags.items(): + assert flags[key] is value + + assert_flags("ReflectionFlagsDefault", singleton=False) + assert_flags("ReflectionFlagsSingleton", singleton=True) + assert by_name("ReflectionFlagsSingleton").instance_uuid == db0.uuid(singleton) + assert_flags("ReflectionFlagsProtected", protect_fields=True) + assert_flags("ReflectionFlagsImmutable", immutable=True) + assert_flags("ReflectionFlagsIntern", immutable=True, intern=True) + assert_flags("ReflectionFlagsAccessControlled", access_control=True) + assert_flags("ReflectionFlagsNoDefaultTags", no_default_tags=True) + assert by_name("ReflectionFlagsDefault").instance_uuid is None + + +def test_get_memo_class_returns_type_flags(db0_fixture): + @db0.memo(immutable=True, no_default_tags=True) + class ReflectionSingleClassFlags: + pass + + obj = db0.materialized(ReflectionSingleClassFlags()) + memo_class = db0.get_memo_class(obj) + + assert memo_class.type_flags == memo_class.get_class().get_type_flags() + assert memo_class.type_flags["immutable"] is True + assert memo_class.type_flags["no_default_tags"] is True def test_memo_class_get_attributes(db0_fixture): diff --git a/scripts/build.sh b/scripts/build.sh index bf4d0104..f2a3144e 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -5,6 +5,7 @@ function show_help { echo "Use: build.sh [options]" echo " -h, --help Shows this help screen." echo " -j, --jobs Threads number. Max by default." + echo " -d, --debug Compile as debug. Note: debug build is by default." echo " -r, --release Compile as release. Note: debug build is by default." echo " -s, --sanitize Compile with sanitizers." echo " -i, --install Install build in specified directory" @@ -22,7 +23,7 @@ sanitizer="false" enable_debug_exceptions="true" build_tests="false" -TEMP=`getopt -o hj:rtsie --long help,jobs:,release,tests,sanitize,install,debug_exceptions -n 'build.sh' -- "$@"` +TEMP=`getopt -o hj:drtsie --long help,jobs:,debug,release,tests,sanitize,install,debug_exceptions -n 'build.sh' -- "$@"` if [ ! $? -eq 0 ]; then exit fi @@ -32,6 +33,7 @@ while true ; do case "$1" in -h|--help) show_help ; shift ;; -s|--sanitize) sanitizer="true" ; shift ;; + -d|--debug) build_type="debug" ; shift ;; -r|--release) build_type="release" ; shift ;; -t|--tests) build_tests="true" ; shift ;; -e|--disable_debug_exceptions) enable_debug_exceptions="false" ; shift ;; diff --git a/src/dbzero/bindings/python/types/PyClass.cpp b/src/dbzero/bindings/python/types/PyClass.cpp index c7f5b222..de52f2d1 100644 --- a/src/dbzero/bindings/python/types/PyClass.cpp +++ b/src/dbzero/bindings/python/types/PyClass.cpp @@ -98,9 +98,8 @@ namespace db0::python return runSafe(tryGetTypeInfo, reinterpret_cast(self)->ext()); } - PyObject *tryPyClassGetTypeFlags(PyObject *self) + PyObject *getTypeFlags(const db0::object_model::Class &type) { - auto &type = reinterpret_cast(self)->ext(); auto py_result = Py_OWN(PyDict_New()); PySafeDict_SetItemString(*py_result, "singleton", Py_OWN(PyBool_fromBool(type.isSingleton()))); PySafeDict_SetItemString(*py_result, "no_default_tags", Py_OWN(PyBool_fromBool(type.isNoDefaultTags()))); @@ -111,6 +110,11 @@ namespace db0::python return py_result.steal(); } + PyObject *tryPyClassGetTypeFlags(PyObject *self) + { + return getTypeFlags(reinterpret_cast(self)->ext()); + } + PyObject *PyAPI_PyClass_get_type_flags(PyObject *self, PyObject *) { PY_API_FUNC @@ -154,24 +158,15 @@ namespace db0::python } PyObject *tryGetTypeInfo(const db0::object_model::Class &type) - { - if (type.isSingleton()) { - // name, module, memo_uuid, is_singleton, singleton_uuid - return PySafeTuple_Pack( - Py_OWN(PyUnicode_FromString(type.getTypeName().c_str())), - Py_OWN(PyUnicode_FromString(type.getModuleName().c_str())), - Py_OWN(PyUnicode_FromString(type.getClassId().toUUIDString().c_str())), - Py_OWN(PyBool_fromBool(type.isSingleton())), - Py_OWN(getSingletonUUID(type)) - ); - } else { - // name, module, memo_uuid - return PySafeTuple_Pack( - Py_OWN(PyUnicode_FromString(type.getTypeName().c_str())), - Py_OWN(PyUnicode_FromString(type.getModuleName().c_str())), - Py_OWN(PyUnicode_FromString(type.getClassId().toUUIDString().c_str())) - ); - } + { + // name, module, memo_uuid, type_flags, singleton_uuid + return PySafeTuple_Pack( + Py_OWN(PyUnicode_FromString(type.getTypeName().c_str())), + Py_OWN(PyUnicode_FromString(type.getModuleName().c_str())), + Py_OWN(PyUnicode_FromString(type.getClassId().toUUIDString().c_str())), + Py_OWN(getTypeFlags(type)), + Py_OWN(getSingletonUUID(type)) + ); } } diff --git a/src/dbzero/object_model/object/ObjectInitializer.hpp b/src/dbzero/object_model/object/ObjectInitializer.hpp index 48c40f2c..bcc72f8b 100644 --- a/src/dbzero/object_model/object/ObjectInitializer.hpp +++ b/src/dbzero/object_model/object/ObjectInitializer.hpp @@ -289,6 +289,9 @@ namespace db0::object_model auto initAt = [&](std::uint32_t loc) { if (m_initializers[loc] && typeid(*m_initializers[loc]) == typeid(InitializerT)) { + if (!m_initializers[loc]->closed()) { + m_initializers[loc]->reset(); + } static_cast(m_initializers[loc].get())->init(object, std::forward(args)...); } else { m_initializers[loc].reset(new InitializerT(*this, loc, object, std::forward(args)...)); diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index dd927295..84357f67 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -1087,9 +1087,13 @@ namespace db0::object_model << " as a tag" << THROWF_END; } - TagIndex::ShortTagT TagIndex::addShortTagFromString(ObjectPtr py_arg, bool &inc_ref) const + std::optional TagIndex::addShortTagFromString(ObjectPtr py_arg, bool &inc_ref) const { assert(LangToolkit::isString(py_arg)); + if (m_fixture.safe_lock()->getAccessType() == AccessType::READ_ONLY) { + auto tag = getShortTagFromString(py_arg); + return tag ? std::optional(tag) : std::nullopt; + } return LangToolkit::addTagFromString(py_arg, m_string_pool, inc_ref); } diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index 39f33111..79405a31 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -228,7 +228,7 @@ DB0_PACKED_END std::optional tryAddShortTag(TypeId, ObjectPtr, bool &inc_ref) const; std::optional tryAddShortTag(ObjectPtr, bool &inc_ref) const; std::optional tryAddShortTag(ObjectSharedPtr, bool &inc_ref) const; - ShortTagT addShortTagFromString(ObjectPtr, bool &inc_ref) const; + std::optional addShortTagFromString(ObjectPtr, bool &inc_ref) const; // return 0x0 if object is from a different prefix (must be added as long tag) std::optional tryAddShortTagFromTag(ObjectPtr) const; std::optional tryAddShortTagFromMemo(ObjectPtr) const; diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 57f81257..23d2824b 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -1384,6 +1384,8 @@ namespace tests ASSERT_EQ(root.getRefCounts().second, 1u); root.destroy(); + holderClass->flush(); + rootClass->flush(); } rootClass.reset();