From 662bab6dbce3a48f89218ce7471a92b50549e1e8 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 2 Jun 2026 18:20:59 +0200 Subject: [PATCH 1/5] TagAddress implemented + integration --- design/PASSIVE_TAGS_DESIGN.md | 318 ++++++++++++++++++ .../collections/full_text/FT_BaseIndex.cpp | 1 + .../collections/full_text/FT_BaseIndex.hpp | 1 + .../full_text/FT_Serialization.hpp | 27 +- src/dbzero/core/memory/Address.hpp | 116 ++++++- .../core/serialization/Serializable.hpp | 4 +- src/dbzero/object_model/tags/TagIndex.cpp | 52 +-- src/dbzero/object_model/tags/TagIndex.hpp | 6 +- tests/unit_tests/QuerySerializationTest.cpp | 35 +- tests/unit_tests/TagAddressTest.cpp | 95 ++++++ tests/unit_tests/VInstanceMapTest.cpp | 14 +- 11 files changed, 605 insertions(+), 64 deletions(-) create mode 100644 design/PASSIVE_TAGS_DESIGN.md create mode 100644 tests/unit_tests/TagAddressTest.cpp diff --git a/design/PASSIVE_TAGS_DESIGN.md b/design/PASSIVE_TAGS_DESIGN.md new file mode 100644 index 00000000..14294bcf --- /dev/null +++ b/design/PASSIVE_TAGS_DESIGN.md @@ -0,0 +1,318 @@ +# Passive Tags Design + +This is a design document for passive tags: tag index entries that behave like normal tags for query matching and removal, but do not hold the tagged object alive. + +Passive tags are intended for high-churn labels such as security and access predicates. They reduce mutation overhead by skipping object tag reference bookkeeping. The tradeoff is that passive tag entries may outlive the object they point at and may require periodic cleanup. + +## Goals + +Passive tags should: + +- Use the existing `db0.tags(...).add(...)` and `db0.find(...)` tag grammar. +- Avoid object lifetime coupling: assigning a passive tag must not make the target object durable. +- Require at least one additional positive predicate when queried. +- Match regular tag values transparently in multi-predicate queries. +- Remove through the normal tag removal API, without requiring a passive flag. +- Work for simple and composite tags. +- Preserve existing regular tag behavior and type-tag durability semantics. + +Passive tags should not expose a separate public tag type, iterator type, or query API. The only public API addition is the keyword-only `passive` argument on `db0.tags(...)`. + +## Python API + +`db0.tags` gains a keyword-only boolean argument: + +```python +db0.tags(obj, passive=True).add("SECURITY-GROUP") +db0.tags(obj).remove("SECURITY-GROUP") +``` + +Rules: + +- `passive` defaults to `False`. +- `passive=True` affects only subsequent `add` calls through that tag manager. +- `passive` has no effect for `remove`. +- `passive` is keyword-only. Positional use must fail with the normal Python argument error. +- `passive=True` is accepted for direct object targets and query targets. + +Example: + +```python +obj = MemoType(123) + +db0.tags(obj, passive=True).add("SOME-NEW-TAG") + +# Re-application does not promote the passive tag to a regular tag. +db0.tags(obj).add("SOME-NEW-TAG") + +# Passive-only lookup is invalid. +db0.find("SOME-NEW-TAG") # raises + +# A passive tag may be queried together with another positive predicate. +db0.find(MemoType, "SOME-NEW-TAG") + +# Removal does not need passive=True. +db0.tags(obj).remove("SOME-NEW-TAG") +``` + +## Behavioral Semantics + +Passive tag assignment creates an index relationship from tag to object address, but does not create a tag reference on the object. If an otherwise unreachable object has only passive tags, it remains eligible for normal drop behavior. + +Regular and passive tag entries share the same logical tag identity. A query for `"A"` should match both the regular `"A"` entry and the passive `"A"` entry when the query is valid. + +If the same object already has a passive entry for a tag, adding the regular version of the same tag is a no-op for the relationship. It must not convert the entry to regular and must not increment the object's tag reference count. This is the "passive obscures regular" rule. + +If the same object already has a regular entry for a tag, adding the passive version of the same tag should also be a no-op for the relationship. The existing regular durability should remain regular. This avoids weakening an existing durable tag through an accidental passive add. + +Removing a tag removes both passive and regular forms for that object and logical tag. Removal should release regular tag references only for entries that were actually regular. + +## Query Semantics + +A query that contains only a passive-capable tag predicate is invalid because the index may contain stale object addresses that no longer refer to live objects. The caller must provide at least one additional positive predicate that can validate the object set, such as a memo type or another regular tag. + +Invalid: + +```python +db0.find("SECURITY-GROUP") +db0.find(db0.as_tag("SECURITY-GROUP")) +db0.find(["SECURITY-GROUP", "OTHER-SECURITY-GROUP"]) +``` + +Valid: + +```python +db0.find(MemoType, "SECURITY-GROUP") +db0.find("REGULAR-TAG", "SECURITY-GROUP") +db0.find(db0.as_tag("ACCESS", account), MemoType) +``` + +The implementation cannot know from the public tag value alone whether all matching entries are regular or passive unless it inspects the index. The conservative rule should therefore be syntactic and index-aware: + +- A root query must contain at least one positive non-passive predicate before any passive-capable predicate may be used. +- Explicit type predicates count as non-passive positive predicates. +- Direct memo-object predicates count as non-passive positive predicates. +- `ObjectIterable` native predicates count as non-passive positive predicates only if their query planning metadata says they are anchored by a non-passive predicate. +- Negated predicates do not count. + +For user-facing errors, prefer a clear `InputException` message such as: + +```text +Passive tag queries require at least one non-passive positive predicate +``` + +## Storage Representation + +Short tags currently fit into the low bits of a 64-bit `ShortTagT`. Passive tags should use the highest-order bit as a persisted passive flag: + +```cpp +static constexpr ShortTagT PASSIVE_TAG_BIT = 1ull << 63; +static constexpr ShortTagT SHORT_TAG_VALUE_MASK = ~PASSIVE_TAG_BIT; +``` + +All logical comparisons and lookups must ignore `PASSIVE_TAG_BIT`. Storage operations that need to distinguish passive from regular entries must preserve it. This means the stored key may be passive or regular depending on which form was inserted first, while searches and duplicate detection treat both keys as the same logical tag. + +Helpers should centralize this logic in `TagIndex` or a small tag-id helper: + +```cpp +constexpr bool isPassiveTagKey(ShortTagT tag); +constexpr ShortTagT makePassiveTagKey(ShortTagT tag); +constexpr ShortTagT stripPassiveTagKey(ShortTagT tag); +constexpr bool sameLogicalTag(ShortTagT lhs, ShortTagT rhs); +``` + +The existing note says regular short tags use only the low 50 bits. The implementation should still guard this explicitly with a debug assertion or static invariant wherever short tag keys are constructed, because passive tags depend on the high bit being available for metadata. + +Long tags should not need a separate passive encoding for the initial implementation. A foreign tag that cannot be represented as a local short tag can continue through the long-tag path as regular-only. If passive foreign tags are required later, the passive flag should be applied to the local short component inside the `LongTagT` pair only after verifying that full-text index comparison can mask that component consistently. + +## Index Layout + +The preferred implementation is to store passive and regular forms in the existing short-tag `FT_BaseIndex`, using the high bit to distinguish the persisted entry mode: + +- Regular entry: `tag`. +- Passive entry: `tag | PASSIVE_TAG_BIT`. +- Logical query for `tag`: search with `stripPassiveTagKey(tag)` and compare with `sameLogicalTag`. +- Logical duplicate detection: treat regular and passive keys as equal. +- Logical removal for `tag`: remove the matching stored key whether it is regular or passive. + +This preserves the current index structure and avoids a second full-text index. It does require the full-text index comparison points used by `TagIndex` to mask `PASSIVE_TAG_BIT`. If those comparison points are generic templates, prefer a tag-key comparator or traits parameter over changing all `FT_BaseIndex` users. Non-tag indexes must not start masking high bits accidentally. + +Iterator metadata and serialized query tag sequences should store logical, stripped tag values. Reopened queries must resolve through the same masked comparison rules, not depend on whether the original stored key happened to be passive. + +## Object Lifetime And Reference Counting + +Regular tag flushing currently increments the tagged object reference count through `add_tag_callback` and decrements it through `remove_tag_callback`. Passive entries must bypass those object callbacks. + +Implementation approach: + +- Add a separate passive batch operation for short tags, for example `m_batch_op_short_passive`. +- Flush passive entries with callbacks that update tag-token references as needed but do not call `LangToolkit::incRefMemo(true, ...)` or `LangToolkit::decRefMemo(true, ...)`. +- Keep regular `m_batch_op_short` behavior unchanged. +- Type tags must remain regular-only. +- Passive tag assignment must not trigger auto-assignment of default type tags when it is the object's first tag. + +Tag string/token reference counting still matters. A passive tag entry should keep the tag token definition alive for as long as the passive index entry exists, otherwise queries by that tag value may become unresolvable. Object lifetime and tag-token lifetime are separate concerns. + +## Duplicate And Obscuring Rules + +The duplicate rule follows from masked comparison: regular and passive forms are the same logical index key, but the persisted key keeps the mode of the first successful insert. + +Required behavior: + +- Adding passive when regular exists: no new passive entry; leave regular state unchanged. +- Adding regular when passive exists: no new regular entry; leave passive state unchanged. +- Adding the same mode twice: no-op, as existing tag add semantics already imply. +- Removing: remove the one stored physical key for the logical tag. + +If `FT_BaseIndex::BatchOperationBuilder` only detects duplicates through exact integer equality, it must be made passive-aware for tag indexes or `TagIndex` must explicitly check the alternate key before enqueueing an add. It should check both persisted index state and pending batch state. Pending checks are important for sequences like: + +```python +db0.tags(obj, passive=True).add("A") +db0.tags(obj).add("A") +``` + +before a flush. + +## Composite Tags + +Passive tags are allowed as composite tags. The passive flag applies to the leaf relationship, not the composite path keys. + +For a composite tag such as `("ACCESS", account)`: + +- The path keys (`"ACCESS"`) identify nested `TagIndex` instances and should stay unflagged. +- The leaf key (`account`) is stored as regular or passive according to the tag manager's add mode. +- Query planning for the composite leaf should use stripped logical keys and masked comparison. +- Removal should remove the stored leaf form whether it is regular or passive. + +This keeps the nested index map stable. If passive bits were applied to path keys, the same logical composite prefix could create separate nested indexes and break query equivalence. + +Composite query validation follows the same passive-predicate rule. A query made only of one passive-capable composite tag is invalid; adding a type or another positive non-passive predicate makes it valid. + +## Python Binding Changes + +`makeObjectTagManager` currently accepts only `METH_FASTCALL` positional arguments. To support a keyword-only `passive` argument, change the module method registration and parser: + +- Register `db0.tags` as `METH_FASTCALL | METH_KEYWORDS`. +- Parse positional targets as the existing object/query target list. +- Parse keyword-only `passive` as `bool`, defaulting to `False`. +- Reject unknown keywords. + +Thread the parsed flag through: + +- `PyObjectTagManager`. +- `ObjectTagManager::makeNew`. +- `ObjectTagManager` constructor. +- `ObjectTagManager::add`. +- `ObjectTagManager::ObjectInfo::add`. +- `TagIndex::addTags` and composite add helpers. + +Do not thread `passive` through removal. + +## C++ API Changes + +Add passive-aware overloads rather than changing every call site implicitly: + +```cpp +void TagIndex::addTags(ObjectPtr memo_ptr, ObjectPtr const *lang_args, std::size_t nargs, bool passive); +void TagIndex::addTag(ObjectPtr memo_ptr, ShortTagT tag_addr, bool is_type, bool passive = false); +std::shared_ptr addComposite(ObjectPtr memo_ptr, ShortTagT key); +``` + +`is_type` and `passive` must never both be true. Assert this in debug builds and reject it if a public path can trigger it. + +Type-tag assignment in `ObjectTagManager::ObjectInfo::add` should run only for regular adds: + +```cpp +if (!passive && !m_has_tags) { + // assign default type tags +} +``` + +`m_has_tags` currently reflects durable tag refs. Passive-only objects should continue to report false for this field so that the first later regular tag still assigns default type tags. + +## Query Planning Changes + +`TagIndex::addIterator` should track whether each query branch is passive-capable and whether the root query has a non-passive positive anchor. + +One practical structure is: + +```cpp +struct QueryPredicateInfo { + bool contributes_results = false; + bool may_read_passive_entries = false; + bool is_non_passive_anchor = false; +}; +``` + +For simple short tags: + +- Add an iterator for the stripped logical tag key. The underlying tag-index comparison must match either stored form. +- Set `may_read_passive_entries = true`. +- Set `is_non_passive_anchor = false` unless the argument is a type or direct memo object. + +For explicit type filters: + +- Query only the regular type-tag key. +- Set `is_non_passive_anchor = true`. + +For direct memo-object predicates: + +- Use the fixed-key iterator. +- Set `is_non_passive_anchor = true`. + +For nested OR/AND/list/tuple queries: + +- Propagate `may_read_passive_entries`. +- Propagate `is_non_passive_anchor` only when the branch semantics guarantee a positive anchor is applied to every returned object. An AND tuple may propagate an anchor from any positive child. An OR list should not count as a root anchor unless every OR branch has a non-passive anchor. + +After root planning, reject if `may_read_passive_entries` is true and no root non-passive anchor exists. + +## Cleanup + +Passive tags are intentionally not cleaned when the underlying object is dropped. This can degrade index size and query performance. + +The initial implementation may defer cleanup, but should keep enough structure to add it later: + +- Passive entries are physically distinguishable through `PASSIVE_TAG_BIT`. +- Query execution should already intersect passive results with another live-object predicate, preventing stale entries from being exposed. +- A future cleanup task can scan passive entries, test whether the object address still names a live object, and remove stale entries in batches. + +Do not perform cleanup opportunistically inside normal `find` iteration in the first implementation. Query paths may run against read-only snapshots, and mutating cleanup there would complicate snapshot consistency. + +## Tests + +Follow TDD and add Python tests before implementation. + +Core Python tests: + +- `db0.tags(obj, passive=True).add("A")` does not keep an otherwise unreferenced object alive across flush/reopen. +- `db0.find("A")` raises after passive assignment. +- `db0.find(MemoType, "A")` returns the object while it is still live. +- Passive then regular add remains passive: dropping all ordinary references still drops the object. +- Regular then passive add remains regular: the object remains durable as it did before. +- `db0.tags(obj).remove("A")` removes a passive tag. +- `db0.tags(obj).remove("A")` removes a regular tag even if called after passive-capable operations. +- `passive` is keyword-only and unknown keywords are rejected. +- `passive=True` on a query target applies tags to every object in the query. + +Composite tests: + +- Passive composite tag can be added and found with an explicit type predicate. +- Passive composite-only query raises. +- Passive then regular composite add does not promote the relationship. +- Composite removal removes passive entries without requiring `passive=True`. + +Native-focused tests, if C++ sources are modified: + +- Short tag helper masking. +- Query matching from logical key to either stored regular or passive physical key. +- Duplicate obscuring checks across pending and persisted entries. +- Passive flush does not call object inc/dec tag ref callbacks. + +Before final handoff for an implementation that changes native code, run a release build with C++ tests and the relevant Python tests, then a debug build with the relevant Python tests, per repository policy. + +## Open Questions + +- Whether passive long/foreign tags should be supported in the first implementation. The current design treats them as regular-only unless a safe masked representation is added for `LongTagT`. +- Whether serialized query metadata must record that a query branch may read passive entries. If serialized queries can reopen passive-capable iterators without going through normal planning, the passive anchor validation must be preserved in serialization. +- Whether a public cleanup API is needed immediately or a background/internal maintenance hook is sufficient for the first release. diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp index bb77830e..66f6b9b8 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp @@ -491,6 +491,7 @@ namespace db0 } template class FT_BaseIndex; + template class FT_BaseIndex; template class FT_BaseIndex; template class FT_BaseIndex; diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp index 87eb6372..57f8648a 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp @@ -306,6 +306,7 @@ namespace db0 }; extern template class FT_BaseIndex; + extern template class FT_BaseIndex; extern template class FT_BaseIndex; extern template class FT_BaseIndex; diff --git a/src/dbzero/core/collections/full_text/FT_Serialization.hpp b/src/dbzero/core/collections/full_text/FT_Serialization.hpp index 909fd8f3..c42a7d92 100644 --- a/src/dbzero/core/collections/full_text/FT_Serialization.hpp +++ b/src/dbzero/core/collections/full_text/FT_Serialization.hpp @@ -4,6 +4,7 @@ #pragma once #include +#include #include "FT_Iterator.hpp" #include "SortedIterator.hpp" #include "FT_IndexIterator.hpp" @@ -50,8 +51,9 @@ namespace db0 auto index_key_type_id = db0::serial::read(_iter, end); if (key_type_id == db0::serial::typeId()) { if constexpr (std::is_same_v) { - if (index_key_type_id == db0::serial::typeId()) { - return deserializeFT_IndexIterator, KeyT, std::uint64_t>(workspace, iter, end); + if (index_key_type_id == db0::serial::typeId() || + index_key_type_id == db0::serial::typeId()) { + return deserializeFT_IndexIterator, KeyT, db0::TagAddress>(workspace, iter, end); } else { THROWF(db0::InternalException) << "Unsupported index key type ID: " << index_key_type_id << THROWF_END; @@ -61,8 +63,9 @@ namespace db0 if (key_type_id == db0::serial::typeId()) { if constexpr (std::is_same_v) { - if (index_key_type_id == db0::serial::typeId()) { - return deserializeFT_IndexIterator, KeyT, std::uint64_t>(workspace, iter, end); + if (index_key_type_id == db0::serial::typeId() || + index_key_type_id == db0::serial::typeId()) { + return deserializeFT_IndexIterator, KeyT, db0::TagAddress>(workspace, iter, end); } else { THROWF(db0::InternalException) << "Unsupported index key type ID: " << index_key_type_id << THROWF_END; @@ -133,7 +136,9 @@ namespace db0 } auto index_key_type_id = db0::serial::read(iter, end); - if (index_key_type_id != db0::serial::typeId()) { + const bool compatible_tag_address = + std::is_same_v && index_key_type_id == db0::serial::typeId(); + if (index_key_type_id != db0::serial::typeId() && !compatible_tag_address) { THROWF(db0::InternalException) << "Index key type mismatch: " << index_key_type_id << " != " << db0::serial::typeId() << THROWF_END; } @@ -141,19 +146,23 @@ namespace db0 // get fixture by UUID auto fixture = snapshot.getFixture(db0::serial::read(iter, end)); int direction = db0::serial::read(iter, end); - if (index_key_type_id == db0::serial::typeId()) { + if (index_key_type_id == db0::serial::typeId() || index_key_type_id == db0::serial::typeId()) { auto index_key_count = db0::serial::read(iter, end); if (index_key_count == 0) { THROWF(db0::InternalException) << "Serialized FT index iterator is missing index keys" << THROWF_END; } - std::vector index_keys; + std::vector index_keys; index_keys.reserve(index_key_count); for (std::uint32_t i = 0; i < index_key_count; ++i) { - index_keys.push_back(db0::serial::read(iter, end)); + if constexpr (std::is_same_v) { + index_keys.push_back(db0::serial::read(iter, end)); + } else { + index_keys.push_back(db0::serial::read(iter, end)); + } } // use FT_Base index as the factory // NOTE: TagIndex only supports UniqueAddress key type - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v && std::is_same_v) { auto &tag_index = fixture->get(); return tag_index.makeIterator(index_keys, direction); } diff --git a/src/dbzero/core/memory/Address.hpp b/src/dbzero/core/memory/Address.hpp index b80ce09c..954cb612 100644 --- a/src/dbzero/core/memory/Address.hpp +++ b/src/dbzero/core/memory/Address.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace db0 @@ -178,7 +179,7 @@ DB0_PACKED_BEGIN inline Address getAddress() const { return Address::fromOffset(getOffset()); } - + // Address cast inline operator Address() const { return Address::fromOffset(getOffset()); @@ -194,6 +195,111 @@ DB0_PACKED_BEGIN }; DB0_PACKED_END +DB0_PACKED_BEGIN + class DB0_PACKED_ATTR TagAddress + { + public: + static constexpr std::uint64_t PASSIVE_BIT = 1ULL << 63; + static constexpr std::uint64_t ADDRESS_MASK = ~PASSIVE_BIT; + + TagAddress() = default; + + static inline TagAddress fromValue(std::uint64_t value) { + return TagAddress(value); + } + + static inline TagAddress fromOffset(std::uint64_t offset) { + assert((offset & PASSIVE_BIT) == 0); + return TagAddress(offset); + } + + static inline TagAddress fromAddress(Address address) { + return fromOffset(address.getOffset()); + } + + inline bool operator!() const { + return getOffset() == 0; + } + + inline bool isValid() const { + return getOffset() != 0; + } + + inline bool isPassive() const { + return (m_value & PASSIVE_BIT) != 0; + } + + inline TagAddress asPassive() const { + assert(isValid()); + return TagAddress(getOffset() | PASSIVE_BIT); + } + + inline TagAddress asRegular() const { + return TagAddress(getOffset()); + } + + inline std::uint64_t getOffset() const { + return m_value & ADDRESS_MASK; + } + + inline std::uint64_t getValue() const { + return m_value; + } + + inline Address getAddress() const { + return Address::fromOffset(getOffset()); + } + + inline operator Address() const { + return getAddress(); + } + + inline operator std::uint64_t() const { + return getOffset(); + } + + inline bool operator==(const TagAddress &other) const { + return getOffset() == other.getOffset(); + } + + inline bool operator!=(const TagAddress &other) const { + return getOffset() != other.getOffset(); + } + + inline bool operator<(const TagAddress &other) const { + return getOffset() < other.getOffset(); + } + + inline bool operator>(const TagAddress &other) const { + return getOffset() > other.getOffset(); + } + + inline bool operator<=(const TagAddress &other) const { + return getOffset() <= other.getOffset(); + } + + inline bool operator>=(const TagAddress &other) const { + return getOffset() >= other.getOffset(); + } + + inline friend std::ostream &operator<<(std::ostream &os, const TagAddress &address) { + os << address.m_value; + return os; + } + + private: + std::uint64_t m_value = 0; + + explicit inline TagAddress(std::uint64_t value) + : m_value(value) + { + } + }; +DB0_PACKED_END + + static_assert(sizeof(TagAddress) == sizeof(std::uint64_t)); + static_assert(std::is_trivially_copyable_v); + UniqueAddress makeUniqueAddr(std::uint64_t offset, std::uint16_t id); } @@ -215,4 +321,10 @@ namespace std } }; -} \ No newline at end of file + template <> struct hash { + std::size_t operator()(const db0::TagAddress &address) const noexcept { + return std::hash()(address.getOffset()); + } + }; + +} diff --git a/src/dbzero/core/serialization/Serializable.hpp b/src/dbzero/core/serialization/Serializable.hpp index 22afa607..017d4087 100644 --- a/src/dbzero/core/serialization/Serializable.hpp +++ b/src/dbzero/core/serialization/Serializable.hpp @@ -144,7 +144,7 @@ namespace db0::serial using TypeList = std::tuple< std::int8_t, std::int16_t, std::int32_t, std::int64_t, std::uint8_t, std::uint16_t, std::uint32_t, std::uint64_t, db0::Address, db0::UniqueAddress, - float, double, std::string>; + float, double, std::string, db0::TagAddress>; // compile error: binary expression in operand of fold-expression std::apply([&](auto... type) { std::size_t n { 0 }; @@ -169,4 +169,4 @@ namespace db0::serial } } -} \ No newline at end of file +} diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index 16b56b62..d77172db 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -30,7 +30,7 @@ namespace db0::object_model public: using LangToolkit = TagIndex::LangToolkit; using ObjectPtr = LangToolkit::ObjectPtr; - using TagMakerFunction = std::function; + using TagMakerFunction = std::function; TagMakerSequence(IteratorT begin, IteratorT end, TagMakerFunction tag_maker) : m_begin(begin) @@ -58,7 +58,7 @@ namespace db0::object_model ++m_value; } - std::uint64_t operator*() const { + TagIndex::ShortTagT operator*() const { return m_tag_maker(*m_value); } }; @@ -262,7 +262,7 @@ namespace db0::object_model } void TagIndex::addTag(ObjectPtr memo_ptr, Address tag_addr, bool is_type) { - addTag(memo_ptr, tag_addr.getOffset(), is_type); + addTag(memo_ptr, ShortTagT::fromAddress(tag_addr), is_type); } void TagIndex::addTag(ObjectPtr memo_ptr, ShortTagT tag, bool is_type) @@ -334,7 +334,7 @@ namespace db0::object_model void TagIndex::removeTypeTag(UniqueAddress obj_addr, Address tag_addr) { auto &batch_operation = getBatchOperation(m_base_index_short, m_batch_op_types); - batch_operation->removeTag({ obj_addr, nullptr }, tag_addr.getOffset()); + batch_operation->removeTag({ obj_addr, nullptr }, ShortTagT::fromAddress(tag_addr)); m_mutation_log->onDirty(); } @@ -437,7 +437,7 @@ namespace db0::object_model void TagIndex::tryTagIncRef(ShortTagT tag_addr) const { - if (m_string_pool.isTokenAddr(Address::fromOffset(tag_addr)) && + if (m_string_pool.isTokenAddr(tag_addr) && m_inc_refed_tags.find(tag_addr) == m_inc_refed_tags.end()) { m_string_pool.addRefByAddr(tag_addr); @@ -446,7 +446,7 @@ namespace db0::object_model void TagIndex::tryTagDecRef(ShortTagT tag_addr) const { - if (m_string_pool.isTokenAddr(Address::fromOffset(tag_addr))) { + if (m_string_pool.isTokenAddr(tag_addr)) { m_string_pool.unRefByAddr(tag_addr); } } @@ -543,7 +543,7 @@ namespace db0::object_model // but it will be more efficient to do it here const Class *type_ptr = &LangToolkit::getMemoType(obj_ptr); while (type_ptr) { - batch_op_types->removeTag({ obj_addr, nullptr }, type_ptr->getAddress().getOffset()); + batch_op_types->removeTag({ obj_addr, nullptr }, ShortTagT::fromAddress(type_ptr->getAddress())); type_ptr = type_ptr->getBaseClassPtr(); } } @@ -561,13 +561,13 @@ namespace db0::object_model } std::function add_long_index_callback = [&](LongTagT long_tag_addr) { - tryTagIncRef(long_tag_addr[0]); - tryTagIncRef(long_tag_addr[1]); + tryTagIncRef(ShortTagT::fromValue(long_tag_addr[0])); + tryTagIncRef(ShortTagT::fromValue(long_tag_addr[1])); }; std::function erase_long_index_callback = [&](LongTagT long_tag_addr) { - tryTagDecRef(long_tag_addr[0]); - tryTagDecRef(long_tag_addr[1]); + tryTagDecRef(ShortTagT::fromValue(long_tag_addr[0])); + tryTagDecRef(ShortTagT::fromValue(long_tag_addr[1])); }; // flush all long tags' updates @@ -645,7 +645,7 @@ namespace db0::object_model bool result = !no_result; // apply type filter if provided (unless type is a MemoBase) if (type) { - result &= m_base_index_short.addIterator(factory, type->getAddress().getOffset()); + result &= m_base_index_short.addIterator(factory, ShortTagT::fromAddress(type->getAddress())); } while (result && (offset < nargs)) { @@ -980,7 +980,7 @@ namespace db0::object_model } else if (type_id == TypeId::DB0_CLASS) { return getShortTagFromClass(py_arg); } else if (type_id == TypeId::MEMO_OBJECT || type_id == TypeId::MEMO_IMMUTABLE_OBJECT) { - return LangToolkit::getMemoUniqueAddress(py_arg).getAddress().getOffset(); + return ShortTagT::fromAddress(LangToolkit::getMemoUniqueAddress(py_arg).getAddress()); } THROWF(db0::InputException) << "Unable to interpret object of type: " << LangToolkit::getTypeName(py_arg) << " as a tag" << THROWF_END; @@ -1017,19 +1017,19 @@ namespace db0::object_model TagIndex::ShortTagT TagIndex::getShortTagFromString(ObjectPtr py_arg) const { assert(LangToolkit::isString(py_arg)); - return LangToolkit::getTagFromString(py_arg, m_string_pool); + return ShortTagT::fromOffset(LangToolkit::getTagFromString(py_arg, m_string_pool)); } TagIndex::ShortTagT TagIndex::getShortTagFromTag(ObjectPtr py_arg) const { assert(LangToolkit::isTag(py_arg)); // NOTE: we use only the offset part as tag - to distinguish from enum and class tags (high bits) - return LangToolkit::getTypeManager().extractTag(py_arg).getAddress(m_class_factory).getOffset(); + return ShortTagT::fromAddress(LangToolkit::getTypeManager().extractTag(py_arg).getAddress(m_class_factory)); } TagIndex::ShortTagT TagIndex::getShortTagFromTag(const TagDef &tag_def) const { // NOTE: we use only the offset part as tag - to distinguish from enum and class tags (high bits) - return tag_def.getAddress(m_class_factory).getOffset(); + return ShortTagT::fromAddress(tag_def.getAddress(m_class_factory)); } TagIndex::ShortTagT TagIndex::getShortTagFromEnumValue(const EnumValue &enum_value, ObjectSharedPtr *alt_repr) const @@ -1043,17 +1043,17 @@ namespace db0::object_model // tag does not exist return {}; } - return LangToolkit::getTypeManager().extractEnumValue(alt_repr->get()).getUID().asULong(); + return ShortTagT::fromOffset(LangToolkit::getTypeManager().extractEnumValue(alt_repr->get()).getUID().asULong()); } else { auto value = m_enum_factory.tryMigrateEnumValue(enum_value); if (!value) { // tag does not exist return {}; } - return (*value).getUID().asULong(); + return ShortTagT::fromOffset((*value).getUID().asULong()); } } - return enum_value.getUID().asULong(); + return ShortTagT::fromOffset(enum_value.getUID().asULong()); } TagIndex::ShortTagT TagIndex::getShortTagFromEnumValue(ObjectPtr py_arg, ObjectSharedPtr *alt_repr) const @@ -1069,14 +1069,14 @@ namespace db0::object_model } TagIndex::ShortTagT TagIndex::getShortTagFromClass(const Class &type) const { - return type.getAddress().getOffset(); + return ShortTagT::fromAddress(type.getAddress()); } TagIndex::ShortTagT TagIndex::getShortTagFromFieldDef(ObjectPtr py_arg) const { auto &field_def = LangToolkit::getTypeManager().extractFieldDef(py_arg); // class UID (32bit) + primary field ID (32 bit) - return (static_cast(field_def.m_class_uid) << 32) | field_def.m_member.getLongIndex(); + return ShortTagT::fromOffset((static_cast(field_def.m_class_uid) << 32) | field_def.m_member.getLongIndex()); } TagIndex::ShortTagT TagIndex::getShortTag(ObjectSharedPtr py_arg, ObjectSharedPtr *alt_repr) const { @@ -1117,9 +1117,9 @@ namespace db0::object_model assert(LangToolkit::isString(py_arg)); if (m_fixture.safe_lock()->getAccessType() == AccessType::READ_ONLY) { auto tag = getShortTagFromString(py_arg); - return tag ? std::optional(tag) : std::nullopt; + return tag.isValid() ? std::optional(tag) : std::nullopt; } - return LangToolkit::addTagFromString(py_arg, m_string_pool, inc_ref); + return ShortTagT::fromOffset(LangToolkit::addTagFromString(py_arg, m_string_pool, inc_ref)); } std::optional TagIndex::tryAddShortTagFromMemo(ObjectPtr py_arg) const @@ -1130,7 +1130,7 @@ namespace db0::object_model return std::nullopt; } // NOTE: we use only the offset part as tag - to distinguish from enum and class tags (high bits) - return LangToolkit::getMemoUniqueAddress(py_arg).getAddress().getOffset(); + return ShortTagT::fromAddress(LangToolkit::getMemoUniqueAddress(py_arg).getAddress()); } std::optional TagIndex::tryAddShortTagFromTag(ObjectPtr py_arg) const @@ -1142,7 +1142,7 @@ namespace db0::object_model // must be added as long tag return std::nullopt; } - return addr_pair.second; + return ShortTagT::fromAddress(addr_pair.second); } bool TagIndex::isScopeIdentifier(ObjectPtr ptr) const { @@ -1588,7 +1588,7 @@ namespace db0::object_model if (!fixture) { THROWF(db0::InternalException) << "Fixture closed while iteration"; } - return this->makeIterator(tag_id.getAddress()); + return this->makeIterator(ShortTagT::fromAddress(tag_id.getAddress())); }; return std::make_unique( diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index 79405a31..abe184fb 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -54,7 +54,7 @@ DB0_PACKED_END using QueryIterator = FT_Iterator; using TP_Iterator = TagProduct; // string tokens and classes are represented as short tags - using ShortTagT = std::uint64_t; + using ShortTagT = db0::TagAddress; using ShortTagIndexMap = db0::VInstanceMap; TagIndex(Memspace &memspace, ClassFactory &, EnumFactory &, RC_LimitedStringPool &, VObjectCache &, @@ -173,7 +173,7 @@ DB0_PACKED_END // batch operation associated with type-tags only (auto-assigned) mutable db0::FT_BaseIndex::BatchOperationBuilder m_batch_op_types; // the set of tags to which the ref-count has been increased when they were first created - mutable std::unordered_set m_inc_refed_tags; + mutable std::unordered_set m_inc_refed_tags; // A cache of language objects held until flush/close is called // it's required to prevent unreferenced objects from being collected by GC // and to handle callbacks from the full-text index @@ -340,7 +340,7 @@ DB0_PACKED_END auto first = *it; ++it; assert(it != sequence.end()); - return { first, *it }; + return { first.getValue(), (*it).getValue() }; } // Get type / enum / iterable associated fixture UUID (or 0 if not prefix bound) diff --git a/tests/unit_tests/QuerySerializationTest.cpp b/tests/unit_tests/QuerySerializationTest.cpp index 1295b16a..315e7349 100644 --- a/tests/unit_tests/QuerySerializationTest.cpp +++ b/tests/unit_tests/QuerySerializationTest.cpp @@ -29,8 +29,13 @@ namespace tests public: using RangeTreeT = RangeTree; using ItemT = typename RangeTreeT::ItemT; + using ShortTagT = db0::object_model::TagIndex::ShortTagT; - void runTestCase(std::function, FT_BaseIndex &)> test) + static ShortTagT tag(std::uint64_t value) { + return ShortTagT::fromOffset(value); + } + + void runTestCase(std::function, FT_BaseIndex &)> test) { auto fixture = getFixture(); // create with the limit of 8 items per range @@ -58,9 +63,9 @@ namespace tests auto &ft_index = tag_index.getBaseIndexShort(); { auto batch_data = ft_index.beginBatchUpdate(); - batch_data->addTags({ makeUniqueAddr(4, 1), nullptr }, std::vector { 1, 2, 3 }); - batch_data->addTags({ makeUniqueAddr(3, 1), nullptr }, std::vector { 1, 2 }); - batch_data->addTags({ makeUniqueAddr(8, 1), nullptr }, std::vector { 1, 2 }); + batch_data->addTags({ makeUniqueAddr(4, 1), nullptr }, std::vector { tag(1), tag(2), tag(3) }); + batch_data->addTags({ makeUniqueAddr(3, 1), nullptr }, std::vector { tag(1), tag(2) }); + batch_data->addTags({ makeUniqueAddr(8, 1), nullptr }, std::vector { tag(1), tag(2) }); batch_data->flush(); } test(index, rt, ft_index); @@ -69,8 +74,8 @@ namespace tests TEST_F( QuerySerializationTest , testRangeTreeFTSortedIteratorCanBeSerialized ) { - auto test = [](IndexBase &index, std::shared_ptr rt, FT_BaseIndex &ft_index) { - auto ft_query = ft_index.makeIterator(1); + auto test = [](IndexBase &index, std::shared_ptr rt, FT_BaseIndex &ft_index) { + auto ft_query = ft_index.makeIterator(tag(1)); std::vector values; RT_SortIterator cut(index, rt, std::move(ft_query)); std::vector buf; @@ -82,9 +87,9 @@ namespace tests TEST_F( QuerySerializationTest , testRangeTreeFTSortedIteratorCanBeDeserialized ) { - auto test = [&](IndexBase &index, std::shared_ptr rt, FT_BaseIndex &ft_index) { + auto test = [&](IndexBase &index, std::shared_ptr rt, FT_BaseIndex &ft_index) { std::vector buf; - auto ft_query = ft_index.makeIterator(1); + auto ft_query = ft_index.makeIterator(tag(1)); std::vector values; RT_SortIterator cut(index, rt, std::move(ft_query)); @@ -126,20 +131,20 @@ namespace tests *fixture, class_factory, enum_factory, fixture->getLimitedStringPool(), cache, fixture->addMutationHandler() ); - auto child = tag_index.addComposite(nullptr, 11); - auto grandchild = child->addComposite(nullptr, 22); + auto child = tag_index.addComposite(nullptr, tag(11)); + auto grandchild = child->addComposite(nullptr, tag(22)); { auto batch_data = grandchild->getBaseIndexShort().beginBatchUpdate(); - batch_data->addTags({ makeUniqueAddr(101, 1), nullptr }, std::vector { 33 }); + batch_data->addTags({ makeUniqueAddr(101, 1), nullptr }, std::vector { tag(33) }); batch_data->flush(); } { auto batch_data = tag_index.getBaseIndexShort().beginBatchUpdate(); - batch_data->addTags({ makeUniqueAddr(202, 1), nullptr }, std::vector { 33 }); + batch_data->addTags({ makeUniqueAddr(202, 1), nullptr }, std::vector { tag(33) }); batch_data->flush(); } - auto ft_query = tag_index.makeIterator(std::vector { 11, 22, 33 }); + auto ft_query = tag_index.makeIterator(std::vector { tag(11), tag(22), tag(33) }); ASSERT_TRUE(ft_query); std::vector buf; @@ -168,7 +173,7 @@ namespace tests *fixture, class_factory, enum_factory, fixture->getLimitedStringPool(), cache, fixture->addMutationHandler() ); - auto ft_query = tag_index.makeMissingIterator(std::vector { 44 }); + auto ft_query = tag_index.makeMissingIterator(std::vector { tag(44) }); ASSERT_TRUE(ft_query); ASSERT_TRUE(ft_query->isEnd()); @@ -177,7 +182,7 @@ namespace tests { auto batch_data = tag_index.getBaseIndexShort().beginBatchUpdate(); - batch_data->addTags({ makeUniqueAddr(303, 1), nullptr }, std::vector { 44 }); + batch_data->addTags({ makeUniqueAddr(303, 1), nullptr }, std::vector { tag(44) }); batch_data->flush(); } diff --git a/tests/unit_tests/TagAddressTest.cpp b/tests/unit_tests/TagAddressTest.cpp new file mode 100644 index 00000000..8b6b0e30 --- /dev/null +++ b/tests/unit_tests/TagAddressTest.cpp @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2026 DBZero Software sp. z o.o. + +#include +#include +#include + +namespace tests +{ + + TEST( TagAddressTest , testDefaultIsInvalid ) + { + db0::TagAddress cut; + + ASSERT_FALSE(cut.isValid()); + ASSERT_FALSE(cut.isPassive()); + ASSERT_EQ(cut.getValue(), 0u); + ASSERT_EQ(cut.getOffset(), 0u); + } + + TEST( TagAddressTest , testConstructFromOffset ) + { + auto cut = db0::TagAddress::fromOffset(12345); + + ASSERT_TRUE(cut.isValid()); + ASSERT_FALSE(cut.isPassive()); + ASSERT_EQ(cut.getValue(), 12345u); + ASSERT_EQ(cut.getOffset(), 12345u); + ASSERT_EQ(cut.getAddress(), db0::Address::fromOffset(12345)); + } + + TEST( TagAddressTest , testPassiveKeepsRawBitAndStripsLogicalOffset ) + { + auto regular = db0::TagAddress::fromOffset(12345); + auto passive = regular.asPassive(); + + ASSERT_TRUE(passive.isValid()); + ASSERT_TRUE(passive.isPassive()); + ASSERT_EQ(passive.getValue(), 12345u | db0::TagAddress::PASSIVE_BIT); + ASSERT_EQ(passive.getOffset(), 12345u); + ASSERT_EQ(passive.getAddress(), db0::Address::fromOffset(12345)); + ASSERT_EQ(passive.asRegular().getValue(), 12345u); + } + + TEST( TagAddressTest , testRawValueCanReopenPassiveAddress ) + { + auto cut = db0::TagAddress::fromValue(12345u | db0::TagAddress::PASSIVE_BIT); + + ASSERT_TRUE(cut.isPassive()); + ASSERT_EQ(cut.getValue(), 12345u | db0::TagAddress::PASSIVE_BIT); + ASSERT_EQ(cut.getOffset(), 12345u); + } + + TEST( TagAddressTest , testRegularAndPassiveCompareAsSameLogicalAddress ) + { + auto regular = db0::TagAddress::fromOffset(12345); + auto passive = regular.asPassive(); + + ASSERT_EQ(regular, passive); + ASSERT_FALSE(regular < passive); + ASSERT_FALSE(passive < regular); + ASSERT_LT(regular, db0::TagAddress::fromOffset(12346)); + } + + TEST( TagAddressTest , testCastsClearPassiveBit ) + { + auto passive = db0::TagAddress::fromOffset(12345).asPassive(); + + db0::Address address = passive; + std::uint64_t value = passive; + + ASSERT_EQ(address, db0::Address::fromOffset(12345)); + ASSERT_EQ(value, 12345u); + } + + TEST( TagAddressTest , testHashUsesLogicalAddress ) + { + auto regular = db0::TagAddress::fromOffset(12345); + auto passive = regular.asPassive(); + + std::unordered_set values; + values.insert(regular); + values.insert(passive); + + ASSERT_EQ(values.size(), 1u); + ASSERT_EQ(std::hash()(regular), std::hash()(passive)); + } + + TEST( TagAddressTest , testLayoutMatchesUint64 ) + { + ASSERT_EQ(sizeof(db0::TagAddress), sizeof(std::uint64_t)); + ASSERT_EQ(alignof(db0::TagAddress), alignof(db0::UniqueAddress)); + } + +} diff --git a/tests/unit_tests/VInstanceMapTest.cpp b/tests/unit_tests/VInstanceMapTest.cpp index 68ffbf7c..6c56dd85 100644 --- a/tests/unit_tests/VInstanceMapTest.cpp +++ b/tests/unit_tests/VInstanceMapTest.cpp @@ -430,7 +430,7 @@ DB0_PACKED_END mutation_log ); - constexpr TagIndex::ShortTagT composite_tag = 12345; + const auto composite_tag = TagIndex::ShortTagT::fromOffset(12345); ASSERT_FALSE(tag_index.tryUpdateComposite(reinterpret_cast(memo_ptr.get()), composite_tag)); auto child_tag_index = tag_index.addComposite(reinterpret_cast(memo_ptr.get()), composite_tag); @@ -477,7 +477,7 @@ DB0_PACKED_END mutation_log ); - constexpr TagIndex::ShortTagT composite_tag = 12346; + const auto composite_tag = TagIndex::ShortTagT::fromOffset(12346); auto child_tag_index = tag_index.addComposite(reinterpret_cast(memo_ptr.get()), composite_tag); child_tag_index->addTag(reinterpret_cast(memo_ptr.get()), composite_tag, false); ASSERT_FALSE(tag_index.empty()); @@ -512,7 +512,7 @@ DB0_PACKED_END ASSERT_FALSE(tag_index.flush()); - constexpr TagIndex::ShortTagT composite_tag = 12351; + const auto composite_tag = TagIndex::ShortTagT::fromOffset(12351); auto child_tag_index = tag_index.addComposite(reinterpret_cast(memo_ptr.get()), composite_tag); ASSERT_TRUE(child_tag_index->empty()); ASSERT_FALSE(tag_index.flush()); @@ -549,7 +549,7 @@ DB0_PACKED_END mutation_log ); - constexpr TagIndex::ShortTagT composite_tag = 12347; + const auto composite_tag = TagIndex::ShortTagT::fromOffset(12347); auto child_tag_index = tag_index.addComposite(reinterpret_cast(memo_ptr.get()), composite_tag); child_tag_index->addTag(reinterpret_cast(memo_ptr.get()), composite_tag, false); ASSERT_FALSE(tag_index.empty()); @@ -582,7 +582,7 @@ DB0_PACKED_END mutation_log ); - constexpr TagIndex::ShortTagT composite_tag = 12348; + const auto composite_tag = TagIndex::ShortTagT::fromOffset(12348); auto child_tag_index = tag_index.addComposite(reinterpret_cast(memo_ptr.get()), composite_tag); child_tag_index->addTag(reinterpret_cast(memo_ptr.get()), composite_tag, false); ASSERT_FALSE(tag_index.empty()); @@ -615,7 +615,7 @@ DB0_PACKED_END mutation_log ); - constexpr TagIndex::ShortTagT composite_tag = 12349; + const auto composite_tag = TagIndex::ShortTagT::fromOffset(12349); auto child_tag_index = tag_index.addComposite(reinterpret_cast(memo_ptr.get()), composite_tag); child_tag_index->addTag(reinterpret_cast(memo_ptr.get()), composite_tag, false); auto child_address = child_tag_index->getAddress(); @@ -657,7 +657,7 @@ DB0_PACKED_END mutation_log ); - constexpr TagIndex::ShortTagT composite_tag = 12350; + const auto composite_tag = TagIndex::ShortTagT::fromOffset(12350); auto child_tag_index = tag_index.addComposite(reinterpret_cast(memo_ptr.get()), composite_tag); child_tag_index->addTag(reinterpret_cast(memo_ptr.get()), composite_tag, false); From 95c7c65be24a61c2b88baf1bfee128d646e006cf Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 2 Jun 2026 20:41:34 +0200 Subject: [PATCH 2/5] passive tags --- dbzero/dbzero/dbzero.pyi | 5 +- python_tests/test_tags.py | 96 +++++++++++++++++++ .../bindings/python/PyObjectTagManager.cpp | 25 ++++- .../bindings/python/PyObjectTagManager.hpp | 4 +- src/dbzero/bindings/python/dbzero.cpp | 2 +- .../collections/full_text/FT_BaseIndex.cpp | 22 ++++- .../collections/full_text/FT_BaseIndex.hpp | 19 ++++ .../object_model/tags/ObjectTagManager.cpp | 24 +++-- .../object_model/tags/ObjectTagManager.hpp | 7 +- src/dbzero/object_model/tags/TagIndex.cpp | 96 +++++++++++++++---- src/dbzero/object_model/tags/TagIndex.hpp | 14 ++- 11 files changed, 265 insertions(+), 49 deletions(-) diff --git a/dbzero/dbzero/dbzero.pyi b/dbzero/dbzero/dbzero.pyi index 600cfd74..57ccd3ce 100644 --- a/dbzero/dbzero/dbzero.pyi +++ b/dbzero/dbzero/dbzero.pyi @@ -978,13 +978,16 @@ def bytearray(source: Union[bytes, Iterable[int]] = b'', /) -> ByteArrayObject: # Tag and query functions -def tags(*objects: Union[Memo, QueryObject]) -> ObjectTagManager: +def tags(*objects: Union[Memo, QueryObject], passive: bool = False) -> ObjectTagManager: """Get a tag manager interface for given Memo objects. Parameters ---------- *objects : Memo or QueryObject One or more Memo objects or query result sets to manage tags for. + passive : bool, optional + Assign simple tags without extending the tagged object's lifetime. Passive tags require + another positive predicate when queried. Returns ------- diff --git a/python_tests/test_tags.py b/python_tests/test_tags.py index 3dafd910..ef978635 100644 --- a/python_tests/test_tags.py +++ b/python_tests/test_tags.py @@ -31,6 +31,102 @@ def test_object_gets_incref_by_tags(db0_fixture): assert db0.getrefcount(object_1) == 1 +def test_passive_tag_does_not_incref_object(db0_fixture): + object_1 = MemoNoDefTags(1) + assert db0.getrefcount(object_1) == 0 + db0.tags(object_1, passive=True).add("passive-tag") + db0.commit() + assert db0.getrefcount(object_1) == 0 + + +def test_passive_tag_requires_positive_predicate_for_find(db0_fixture): + object_1 = MemoClassForTags(1) + db0.tags(object_1, passive=True).add("passive-tag") + assert [item.value for item in db0.find(MemoClassForTags, "passive-tag")] == [1] + with pytest.raises(Exception): + list(db0.find("passive-tag")) + + +def test_passive_tag_combines_with_regular_tag_predicate(db0_fixture): + object_1 = MemoClassForTags(1) + object_2 = MemoClassForTags(2) + db0.tags(object_1).add("regular-tag") + db0.tags(object_1, passive=True).add("passive-tag") + db0.tags(object_2).add("regular-tag") + assert [item.value for item in db0.find("regular-tag", "passive-tag")] == [1] + + +def test_passive_tag_only_or_query_is_rejected(db0_fixture): + object_1 = MemoClassForTags(1) + object_2 = MemoClassForTags(2) + db0.tags(object_1, passive=True).add("passive-tag-1") + db0.tags(object_2, passive=True).add("passive-tag-2") + with pytest.raises(Exception): + list(db0.find(["passive-tag-1", "passive-tag-2"])) + + +def test_passive_tag_remove_uses_regular_remove(db0_fixture): + object_1 = MemoClassForTags(1) + db0.tags(object_1, passive=True).add("passive-tag") + assert [item.value for item in db0.find(MemoClassForTags, "passive-tag")] == [1] + db0.tags(object_1).remove("passive-tag") + assert list(db0.find(MemoClassForTags, "passive-tag")) == [] + + +def test_passive_first_then_regular_tag_remains_non_durable(db0_fixture): + object_1 = MemoNoDefTags(1) + db0.tags(object_1, passive=True).add("passive-tag") + db0.commit() + db0.tags(object_1).add("passive-tag") + db0.commit() + assert db0.getrefcount(object_1) == 0 + + +def test_passive_first_then_regular_tag_in_same_transaction_remains_non_durable(db0_fixture): + object_1 = MemoNoDefTags(1) + db0.tags(object_1, passive=True).add("passive-tag") + db0.tags(object_1).add("passive-tag") + db0.commit() + assert db0.getrefcount(object_1) == 0 + + +def test_regular_first_then_passive_tag_remains_durable(db0_fixture): + object_1 = MemoNoDefTags(1) + db0.tags(object_1).add("regular-tag") + db0.commit() + db0.tags(object_1, passive=True).add("regular-tag") + db0.commit() + assert db0.getrefcount(object_1) == 1 + + +def test_regular_first_then_passive_tag_in_same_transaction_remains_durable(db0_fixture): + object_1 = MemoNoDefTags(1) + db0.tags(object_1).add("regular-tag") + db0.tags(object_1, passive=True).add("regular-tag") + db0.commit() + assert db0.getrefcount(object_1) == 1 + + +def test_passive_add_then_regular_remove_in_same_transaction_leaves_no_tag(db0_fixture): + object_1 = MemoClassForTags(1) + db0.tags(object_1, passive=True).add("passive-tag") + db0.tags(object_1).remove("passive-tag") + db0.commit() + assert db0.getrefcount(object_1) == 0 + assert list(db0.find(MemoClassForTags, "passive-tag")) == [] + + +def test_passive_tag_does_not_preserve_object_lifetime(db0_fixture): + object_1 = MemoNoDefTags(1) + object_uuid = db0.uuid(object_1) + db0.tags(object_1, passive=True).add("passive-tag") + db0.commit() + del object_1 + db0.commit() + with pytest.raises(Exception): + db0.fetch(object_uuid) + + def test_assigned_tags_can_be_removed(db0_fixture): object_1 = MemoClassForTags(1) db0.tags(object_1).add(["tag1", "tag2"]) diff --git a/src/dbzero/bindings/python/PyObjectTagManager.cpp b/src/dbzero/bindings/python/PyObjectTagManager.cpp index 6cdf5ac8..038530fa 100644 --- a/src/dbzero/bindings/python/PyObjectTagManager.cpp +++ b/src/dbzero/bindings/python/PyObjectTagManager.cpp @@ -103,8 +103,24 @@ namespace db0::python .tp_free = PyObject_Free, }; - PyObjectTagManager *tryMakeObjectTagManager(PyObject *, PyObject *const *args, Py_ssize_t nargs) + PyObjectTagManager *tryMakeObjectTagManager(PyObject *, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + bool passive = false; + if (kwnames) { + auto nkwargs = PyTuple_GET_SIZE(kwnames); + for (Py_ssize_t i = 0; i < nkwargs; ++i) { + auto *kwname = PyTuple_GET_ITEM(kwnames, i); + if (!PyUnicode_Check(kwname) || PyUnicode_CompareWithASCIIString(kwname, "passive") != 0) { + THROWF(db0::InputException) << "Unknown keyword argument for dbzero.tags" << THROWF_END; + } + auto is_true = PyObject_IsTrue(args[nargs + i]); + if (is_true < 0) { + THROWF(db0::InputException) << "Unable to interpret passive argument as bool" << THROWF_END; + } + passive = is_true != 0; + } + } + std::vector memo_args; std::vector > query_targets; memo_args.reserve(nargs); @@ -138,15 +154,16 @@ namespace db0::python &tags_obj->modifyExt(), memo_args.data(), memo_args.size(), - std::move(query_targets) + std::move(query_targets), + passive ); return tags_obj.steal(); } - PyObjectTagManager *makeObjectTagManager(PyObject *, PyObject *const *args, Py_ssize_t nargs) + PyObjectTagManager *makeObjectTagManager(PyObject *, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PY_API_FUNC - return runSafe(tryMakeObjectTagManager, nullptr, args, nargs); + return runSafe(tryMakeObjectTagManager, nullptr, args, nargs, kwnames); } } diff --git a/src/dbzero/bindings/python/PyObjectTagManager.hpp b/src/dbzero/bindings/python/PyObjectTagManager.hpp index 1e73d536..b3d7548a 100644 --- a/src/dbzero/bindings/python/PyObjectTagManager.hpp +++ b/src/dbzero/bindings/python/PyObjectTagManager.hpp @@ -23,6 +23,6 @@ namespace db0::python extern PyTypeObject PyObjectTagManagerType; - PyObjectTagManager *makeObjectTagManager(PyObject *, PyObject *const *args, Py_ssize_t nargs); + PyObjectTagManager *makeObjectTagManager(PyObject *, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames); -} \ No newline at end of file +} diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index eead8d91..cdb40359 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -60,7 +60,7 @@ static PyMethodDef dbzero_methods[] = {"weak_set", (PyCFunction)&py::PyAPI_makeWeakSet, METH_FASTCALL, "Create a new dbzero weak set instance"}, {"dict", (PyCFunction)&py::PyAPI_makeDict, METH_VARARGS | METH_KEYWORDS, "Create a new dbzero dict instance"}, {"bytearray", (PyCFunction)&py::PyAPI_makeByteArray, METH_FASTCALL, "Create a new dbzero bytearray instance"}, - {"tags", (PyCFunction)&py::makeObjectTagManager, METH_FASTCALL, ""}, + {"tags", (PyCFunction)&py::makeObjectTagManager, METH_FASTCALL | METH_KEYWORDS, ""}, {"find", (PyCFunction)&py::PyAPI_find, METH_VARARGS | METH_KEYWORDS, "Find memo instances by tags with optional filtering"}, {"predicate", (PyCFunction)&py::PyAPI_predicate, METH_VARARGS | METH_KEYWORDS, "Build a non-iterable predicate query for composing filters"}, {"join", (PyCFunction)&py::PyAPI_join, METH_VARARGS | METH_KEYWORDS, "Join memo collections by common tags with optional filtering"}, diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp index 66f6b9b8..d31f5043 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp @@ -215,10 +215,14 @@ namespace db0 auto old_map_value = addressOfMBIndex(*tag_index_ptr); // NOTICE: only unique items are retained in index // callback notified about unique items (objects) + // Use the actual key being inserted to decide whether value callbacks apply. + auto *range_insert_callback_ptr = FT_IndexKeyPolicy::enableValueCallbacks( + range_first->first + ) ? insert_callback_ptr : nullptr; std::pair stats = tag_index_ptr->bulkInsertUnique( ValueIterator(range_first), ValueIterator(range_last), - insert_callback_ptr + range_insert_callback_ptr ); // This check is here because tag_index's location may have been changed by insert @@ -259,15 +263,23 @@ namespace db0 return first_item.first != item.first; }); // instance collection by tag pointer - auto tag_index_ptr = index.tryGetExistingInvertedList(first_item.first); - if (tag_index_ptr) { + typename FT_BaseIndex::MapItemT item(first_item.first); + auto it_list = index.find(item); + if (it_list != index.end()) { + auto stored_key = (*it_list).key; + auto tag_index_ptr = index.getInvertedList(it_list); // we need to remember old type nd pointer because they may be modified by bulkErase operation auto old_addr = tag_index_ptr->getAddress(); auto old_map_value = addressOfMBIndex(*tag_index_ptr); + // Removal requests may use a logically equivalent key without stored flags; + // use the stored key to decide whether value callbacks apply. + auto *range_erase_callback_ptr = FT_IndexKeyPolicy::enableValueCallbacks( + stored_key + ) ? erase_callback_ptr : nullptr; std::size_t erased_count = tag_index_ptr->bulkErase( ValueIterator(buf_begin), ValueIterator(range_end), - erase_callback_ptr + range_erase_callback_ptr ); auto new_map_value = addressOfMBIndex(*tag_index_ptr); if (old_map_value != new_map_value) { @@ -278,7 +290,7 @@ namespace db0 index.erase(it); // notify callback on index erased if (index_erase_callback_ptr) { - (*index_erase_callback_ptr)(first_item.first); + (*index_erase_callback_ptr)(stored_key); } } else { it.modifyItem().value = new_map_value; diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp index 57f8648a..f1f10534 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp @@ -15,7 +15,26 @@ namespace db0 { + // Flush-time policy for index-key metadata that affects value callbacks. + // FT_BaseIndex stores and mutates values grouped by index key; when a key + // carries behavioral flags, the flush path can decide once per stored key + // range whether object/value insert-erase callbacks should run. + template + struct FT_IndexKeyPolicy + { + static bool enableValueCallbacks(const IndexKeyT &) { + return true; + } + }; + template <> + struct FT_IndexKeyPolicy + { + static bool enableValueCallbacks(db0::TagAddress tag_addr) { + return !tag_addr.isPassive(); + } + }; + // FT_BaseIndex provides common API for managing tag/type inverted lists // @tparam IndexKeyT the tag / element's key type template diff --git a/src/dbzero/object_model/tags/ObjectTagManager.cpp b/src/dbzero/object_model/tags/ObjectTagManager.cpp index a5af0723..78cc3a63 100644 --- a/src/dbzero/object_model/tags/ObjectTagManager.cpp +++ b/src/dbzero/object_model/tags/ObjectTagManager.cpp @@ -51,11 +51,12 @@ namespace db0::object_model } ObjectTagManager::ObjectTagManager(ObjectPtr const *memo_ptr, std::size_t nargs, - std::vector > &&query_targets) + std::vector > &&query_targets, bool passive) : m_empty(nargs == 0 && query_targets.empty()) , m_info_vec_ptr((nargs > 1) ? (new ObjectInfo[nargs - 1]) : nullptr) , m_info_vec_size(nargs > 0 ? nargs - 1 : 0) , m_query_targets(std::move(query_targets)) + , m_passive(passive) { if (m_empty) { return; @@ -96,13 +97,13 @@ namespace db0::object_model } ObjectTagManager *ObjectTagManager::makeNew(void *at_ptr, ObjectPtr const *memo_ptr, std::size_t nargs, - std::vector > &&query_targets) + std::vector > &&query_targets, bool passive) { if (nargs == 0 && query_targets.empty()) { // construct as empty return new (at_ptr) ObjectTagManager(); } - return new (at_ptr) ObjectTagManager(memo_ptr, nargs, std::move(query_targets)); + return new (at_ptr) ObjectTagManager(memo_ptr, nargs, std::move(query_targets), passive); } ObjectTagManager::ObjectInfo::ObjectInfo(ObjectPtr memo_ptr) @@ -126,15 +127,18 @@ namespace db0::object_model return false; } - void ObjectTagManager::ObjectInfo::add(ObjectPtr const *args, Py_ssize_t nargs) + void ObjectTagManager::ObjectInfo::add(ObjectPtr const *args, Py_ssize_t nargs, bool passive) { assert(m_tag_index_ptr); auto &tag_index = *m_tag_index_ptr; assert(m_access_mode == AccessType::READ_WRITE); if (!hasCompositeTags(args, nargs)) { - tag_index.addTags(m_lang_ptr.get(), args, nargs); + tag_index.addTags(m_lang_ptr.get(), args, nargs, passive); } else { + if (passive) { + THROWF(db0::InputException) << "Passive composite tags are not supported" << THROWF_END; + } for (Py_ssize_t i = 0; i < nargs; ++i) { if (isCompositeTag(args[i])) { validateCompositeTag(args[i]); @@ -144,12 +148,12 @@ namespace db0::object_model if (isCompositeTag(args[i])) { addComposite(args[i]); } else { - tag_index.addTags(m_lang_ptr.get(), args + i, 1); + tag_index.addTags(m_lang_ptr.get(), args + i, 1, passive); } } } // assign default tags (only when adding the first tag) - if (!m_has_tags) { + if (!passive && !m_has_tags) { auto type = m_type; while (type) { // also add type as tag (once) @@ -245,13 +249,13 @@ namespace db0::object_model } validateQueryTargets(); if (!!m_info.m_lang_ptr) { - m_info.add(args, nargs); + m_info.add(args, nargs, m_passive); } for (std::size_t i = 0; i < m_info_vec_size; ++i) { - m_info_vec_ptr[i].add(args, nargs); + m_info_vec_ptr[i].add(args, nargs, m_passive); } forEachQueryTarget([&](ObjectInfo &object_info) { - object_info.add(args, nargs); + object_info.add(args, nargs, m_passive); }); onUpdated(); } diff --git a/src/dbzero/object_model/tags/ObjectTagManager.hpp b/src/dbzero/object_model/tags/ObjectTagManager.hpp index 348dafb5..52a228d5 100644 --- a/src/dbzero/object_model/tags/ObjectTagManager.hpp +++ b/src/dbzero/object_model/tags/ObjectTagManager.hpp @@ -34,7 +34,7 @@ namespace db0::object_model // construct as empty ObjectTagManager(); ObjectTagManager(ObjectPtr const *memo_ptr, std::size_t nargs, - std::vector > &&query_targets = {}); + std::vector > &&query_targets = {}, bool passive = false); ~ObjectTagManager(); /** @@ -45,7 +45,7 @@ namespace db0::object_model void remove(ObjectPtr const *args, Py_ssize_t nargs); static ObjectTagManager *makeNew(void *at_ptr, ObjectPtr const *memo_ptr, std::size_t nargs, - std::vector > &&query_targets = {}); + std::vector > &&query_targets = {}, bool passive = false); private: // Memo object to be assigned tags to (language specific) @@ -63,7 +63,7 @@ namespace db0::object_model ObjectInfo() = default; ObjectInfo(ObjectPtr memo_ptr); - void add(ObjectPtr const *args, Py_ssize_t nargs); + void add(ObjectPtr const *args, Py_ssize_t nargs, bool passive); void remove(ObjectPtr const *args, Py_ssize_t nargs); bool hasCompositeTags(ObjectPtr const *args, Py_ssize_t nargs) const; void addComposite(ObjectPtr); @@ -80,6 +80,7 @@ namespace db0::object_model std::size_t m_info_vec_size = 0; AccessType m_access_mode = AccessType::READ_WRITE; std::vector > m_query_targets; + bool m_passive = false; // fixtures of the tagged objects (to mark as updated) db0::WeakFixtureVector m_fixtures; bool m_on_updated = false; diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index d77172db..5d639b4f 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -180,7 +180,7 @@ namespace db0::object_model "TagIndex::flush() or close() must be called before destruction"); } - void TagIndex::addTags(ObjectPtr memo_ptr, ObjectPtr const *args, std::size_t nargs) + void TagIndex::addTags(ObjectPtr memo_ptr, ObjectPtr const *args, std::size_t nargs, bool passive) { using TypeId = db0::bindings::TypeId; if (nargs == 0) { @@ -197,7 +197,7 @@ namespace db0::object_model ObjectPtr arg = args[i]; // must check for string since it's an iterable as well if (!LangToolkit::isString(arg) && LangToolkit::isIterable(arg)) { - auto tag_sequence = IterableSequence(LangToolkit::getIterator(arg), ForwardIterator::end(), [this](ObjectSharedPtr arg) { + auto tag_sequence = IterableSequence(LangToolkit::getIterator(arg), ForwardIterator::end(), [this, passive](ObjectSharedPtr arg) { bool inc_ref = false; auto result = tryAddShortTag(arg.get(), inc_ref); if (!result) { @@ -207,10 +207,13 @@ namespace db0::object_model if (inc_ref) { m_inc_refed_tags.insert(*result); } - return *result; + return passive ? result->asPassive() : *result; }); // sequence (pair) may represent a single long tag if (isLongTag(arg)) { + if (passive) { + THROWF(db0::InputException) << "Passive long tags are not supported" << THROWF_END; + } if (!batch_op_long_ptr) { batch_op_long_ptr = &getBatchOperationLong(memo_ptr, active_key); } @@ -224,11 +227,14 @@ namespace db0::object_model bool inc_ref = false; auto tag_addr = tryAddShortTag(type_id, arg, inc_ref); if (tag_addr) { - batch_op_short->addTag(active_key, *tag_addr); + batch_op_short->addTag(active_key, passive ? tag_addr->asPassive() : *tag_addr); if (inc_ref) { m_inc_refed_tags.insert(*tag_addr); } } else { + if (passive) { + THROWF(db0::InputException) << "Passive long tags are not supported" << THROWF_END; + } // must try adding as a long tag (item from a foreign scope) if (!batch_op_long_ptr) { batch_op_long_ptr = &getBatchOperationLong(memo_ptr, active_key); @@ -344,18 +350,16 @@ namespace db0::object_model return; } - using IterableSequence = TagMakerSequence; ActiveValueT active_key = { UniqueAddress(), nullptr }; auto &batch_operation = getBatchOperationShort(memo_ptr, active_key, false); for (std::size_t i = 0; i < nargs; ++i) { auto type_id = LangToolkit::getTypeManager().getTypeId(args[i]); // must check for string since it's an iterable as well if (type_id != TypeId::STRING && LangToolkit::isIterable(args[i])) { - batch_operation->removeTags(active_key, - IterableSequence(LangToolkit::getIterator(args[i]), ForwardIterator::end(), [&](ObjectSharedPtr arg) { - return getCompositeKey(arg.get()); - }) - ); + ForwardIterator it(LangToolkit::getIterator(args[i])); + for (auto end = ForwardIterator::end(); it != end; ++it) { + batch_operation->removeTag(active_key, getCompositeKey((*it).get())); + } m_mutation_log->onDirty(); } else { batch_operation->removeTag(active_key, getCompositeKey(args[i])); @@ -450,6 +454,16 @@ namespace db0::object_model m_string_pool.unRefByAddr(tag_addr); } } + + std::optional TagIndex::tryGetStoredShortTag(ShortTagT tag_addr) const + { + db0::key_value item(tag_addr); + auto it = m_base_index_short.find(item); + if (it == m_base_index_short.end()) { + return std::nullopt; + } + return (*it).key; + } bool TagIndex::flush() const { @@ -553,7 +567,7 @@ namespace db0::object_model std::function erase_index_callback = [&](ShortTagT tag_addr) { tryTagDecRef(tag_addr); }; - + // flush all short tags' updates if (!m_batch_op_short.assureEmpty()) { m_batch_op_short->flush(&add_tag_callback, &remove_tag_callback, @@ -643,18 +657,27 @@ namespace db0::object_model // if the 1st argument is a type then resolve as a typed ObjectIterable std::size_t offset = 0; bool result = !no_result; + bool has_passive_predicate = false; + bool has_positive_anchor = type || !native_args.empty(); // apply type filter if provided (unless type is a MemoBase) if (type) { result &= m_base_index_short.addIterator(factory, ShortTagT::fromAddress(type->getAddress())); } while (result && (offset < nargs)) { - result &= addIterator(args[offset], factory, neg_iterators, observers); + result &= addIterator( + args[offset], factory, neg_iterators, observers, + &has_passive_predicate, &has_positive_anchor + ); ++offset; } for (auto *native_arg: native_args) { assert(native_arg); - result &= addIterator(*native_arg, factory, neg_iterators, observers); + result &= addIterator(*native_arg, factory, neg_iterators, observers, &has_positive_anchor); + } + if (has_passive_predicate && !has_positive_anchor) { + THROWF(db0::InputException) << "Passive tags require at least one non-passive predicate" + << THROWF_END; } if (!result) { // invalidate factory since no matching results exist @@ -678,12 +701,16 @@ namespace db0::object_model } bool TagIndex::addIterator(const ObjectIterable &obj_iter, db0::FT_IteratorFactory &factory, - std::vector > &neg_iterators, std::vector > &query_observers) const + std::vector > &neg_iterators, + std::vector > &query_observers, bool *has_positive_anchor) const { auto ft_query = obj_iter.beginFTQuery(query_observers, -1); if (!ft_query || ft_query->isEnd()) { return false; } + if (has_positive_anchor) { + *has_positive_anchor = true; + } factory.add(std::move(ft_query)); return true; } @@ -705,13 +732,17 @@ namespace db0::object_model bool TagIndex::addIterator(ObjectPtr arg, db0::FT_IteratorFactory &factory, std::vector > &neg_iterators, - std::vector > &query_observers) const + std::vector > &query_observers, + bool *has_passive_predicate, bool *has_positive_anchor) const { using TypeId = db0::bindings::TypeId; using IterableSequence = TagMakerSequence; auto type_id = LangToolkit::getTypeManager().getTypeId(arg); if (type_id == TypeId::DB0_COMPOSITE_TAG) { + if (has_positive_anchor) { + *has_positive_anchor = true; + } return addCompositeIterator(LangToolkit::getTypeManager().extractCompositeTag(arg), factory); } @@ -721,10 +752,22 @@ namespace db0::object_model { if (isLongTag(type_id, arg)) { // query as the long-tag + if (has_positive_anchor) { + *has_positive_anchor = true; + } return m_base_index_long.addIterator(factory, getLongTag(type_id, arg)); } else { auto short_tag = getShortTag(type_id, arg); - if (m_base_index_short.addIterator(factory, short_tag)) { + auto stored_tag = tryGetStoredShortTag(short_tag); + auto query_tag = stored_tag.value_or(short_tag); + if (m_base_index_short.addIterator(factory, query_tag)) { + if (stored_tag && stored_tag->isPassive()) { + if (has_passive_predicate) { + *has_passive_predicate = true; + } + } else if (has_positive_anchor) { + *has_positive_anchor = true; + } return true; } bool inc_ref = false; @@ -735,6 +778,9 @@ namespace db0::object_model if (inc_ref) { m_inc_refed_tags.insert(*missing_tag); } + if (has_positive_anchor) { + *has_positive_anchor = true; + } factory.add(makeMissingIterator(std::vector { *missing_tag })); return true; } @@ -743,6 +789,9 @@ namespace db0::object_model // Memo instance is directly fed into the FT_FixedKeyIterator if (type_id == TypeId::MEMO_OBJECT || type_id == TypeId::MEMO_IMMUTABLE_OBJECT) { auto addr = LangToolkit::getMemoUniqueAddress(arg); + if (has_positive_anchor) { + *has_positive_anchor = true; + } factory.add(std::make_unique >(&addr, &addr + 1)); return true; } @@ -762,6 +811,9 @@ namespace db0::object_model } return *result; }); + if (has_positive_anchor) { + *has_positive_anchor = true; + } return m_base_index_long.addIterator(factory, makeLongTagFromSequence(sequence)); } @@ -778,7 +830,10 @@ namespace db0::object_model bool all = true; ForwardIterator it(LangToolkit::getIterator(arg)); for (auto end = ForwardIterator::end(); it != end; ++it) { - bool result = addIterator((*it).get(), *inner_factory, inner_neg_iterators, query_observers); + bool result = addIterator( + (*it).get(), *inner_factory, inner_neg_iterators, query_observers, + has_passive_predicate, has_positive_anchor + ); any |= result; all &= result; } @@ -802,7 +857,7 @@ namespace db0::object_model if (type_id == TypeId::OBJECT_ITERABLE) { auto &obj_iter = LangToolkit::getTypeManager().extractObjectIterable(arg); // try interpreting the iterator as FT-query - return addIterator(obj_iter, factory, neg_iterators, query_observers); + return addIterator(obj_iter, factory, neg_iterators, query_observers, has_positive_anchor); } if (type_id == TypeId::DB0_TAG_SET) { @@ -817,7 +872,10 @@ namespace db0::object_model } else { // just add as regular iterators for (auto &arg: tag_set.getArgs()) { - addIterator(arg.get(), factory, inner_neg_iterators, query_observers); + addIterator( + arg.get(), factory, inner_neg_iterators, query_observers, + has_passive_predicate, has_positive_anchor + ); } } if (!inner_neg_iterators.empty()) { diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index abe184fb..0d8c40c7 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -77,7 +77,7 @@ DB0_PACKED_END // add a tag using long identifier void addTag(ObjectPtr memo_ptr, LongTagT tag_addr); - void addTags(ObjectPtr memo_ptr, ObjectPtr const *lang_args, std::size_t nargs); + void addTags(ObjectPtr memo_ptr, ObjectPtr const *lang_args, std::size_t nargs, bool passive = false); // NOTE: type tags are removed when dropping the object, therefore lang instances are not required void removeTypeTag(UniqueAddress obj_addr, Address tag_addr); @@ -196,7 +196,7 @@ DB0_PACKED_END db0::FT_BaseIndex::BatchOperationBuilder &getBatchOperationShort(ObjectPtr, ActiveValueT &result, bool is_type) const; - + db0::FT_BaseIndex::BatchOperationBuilder &getBatchOperationLong(ObjectPtr, ActiveValueT &result) const; @@ -233,12 +233,17 @@ DB0_PACKED_END std::optional tryAddShortTagFromTag(ObjectPtr) const; std::optional tryAddShortTagFromMemo(ObjectPtr) const; + // Passive tag predicates are allowed only when the query is anchored by at least one + // non-passive positive predicate (type, regular tag, fixed object, nested query, etc.). + // These optional flags let root query planning distinguish passive predicates from anchors. bool addIterator(ObjectPtr, db0::FT_IteratorFactory &factory, std::vector > &neg_iterators, - std::vector > &query_observers) const; + std::vector > &query_observers, + bool *has_passive_predicate = nullptr, bool *has_positive_anchor = nullptr) const; bool addIterator(const ObjectIterable &, db0::FT_IteratorFactory &factory, std::vector > &neg_iterators, - std::vector > &query_observers) const; + std::vector > &query_observers, + bool *has_positive_anchor = nullptr) const; bool addCompositeIterator(const CompositeTagDef &, db0::FT_IteratorFactory &factory) const; bool addCompositeLeafIterator(ObjectPtr, db0::FT_IteratorFactory &factory, std::vector &&serialized_tag_sequence) const; @@ -272,6 +277,7 @@ DB0_PACKED_END // unless such reference has already been added when the tag was first created void tryTagIncRef(ShortTagT tag_addr) const; void tryTagDecRef(ShortTagT tag_addr) const; + std::optional tryGetStoredShortTag(ShortTagT tag_addr) const; // revert all pending operations associated with a specific object void revert(ObjectPtr) const; From c7a6e910e509c7985d37ebfc327b49c2d2455659 Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 2 Jun 2026 21:19:35 +0200 Subject: [PATCH 3/5] composite passive tags --- benchmarks/tagging.py | 241 ++++++++++++++++++ python_tests/test_composite_tags.py | 83 ++++++ python_tests/test_tags.py | 72 ++++++ .../object_model/tags/ObjectTagManager.cpp | 72 +++++- .../object_model/tags/ObjectTagManager.hpp | 2 +- src/dbzero/object_model/tags/TagIndex.cpp | 42 ++- src/dbzero/object_model/tags/TagIndex.hpp | 6 +- 7 files changed, 499 insertions(+), 19 deletions(-) create mode 100755 benchmarks/tagging.py diff --git a/benchmarks/tagging.py b/benchmarks/tagging.py new file mode 100755 index 00000000..84a42e27 --- /dev/null +++ b/benchmarks/tagging.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +"""Benchmark db0 tag assignment throughput. + +The benchmark creates memo objects in a preparation step, commits them, and +pre-seeds a regular anchor tag before timing starts. The measured loop applies +fresh simple tags to a fixed batch of existing objects and commits after every +batch, so the reported throughput includes flushing pending tag index updates. + +Scenarios: +- fresh-tags: creates a new tag key for every measured batch; this measures tag + key creation, inverted-list creation, assignment, and flush cost together. +- precreated-tags: creates and flushes tag keys before timing starts, then uses + those existing keys once during timing; this focuses on assignment and flush + cost without string-pool/tag-key creation. + +Observed on this workspace: +- CPU: 11th Gen Intel(R) Core(TM) i9-11950H @ 2.60GHz +- Python: 3.11.13 +- Build: release +- Commands: + python3 benchmarks/tagging.py --scenario fresh-tags --target-seconds 30 + python3 benchmarks/tagging.py --scenario fresh-tags --target-seconds 30 --passive + python3 benchmarks/tagging.py --scenario precreated-tags --target-seconds 30 + python3 benchmarks/tagging.py --scenario precreated-tags --target-seconds 30 --passive +- Current result, fresh-tags: + regular: + object_count=10000 + batch_size=1000 + tags_per_batch=1 + batches=1442 + elapsed_seconds=30.012787 + tag_assignments=1442000 + tag_assignments_per_second=48046.187 + passive: + object_count=10000 + batch_size=1000 + tags_per_batch=1 + batches=1546 + elapsed_seconds=30.021849 + tag_assignments=1546000 + tag_assignments_per_second=51495.829 +- Current result, precreated-tags: + regular: + object_count=10000 + batch_size=1000 + tags_per_batch=1 + precreated_tag_count=5000 + batches=1334 + elapsed_seconds=30.006962 + tag_assignments=1334000 + tag_assignments_per_second=44456.349 + passive: + object_count=10000 + batch_size=1000 + tags_per_batch=1 + precreated_tag_count=5000 + batches=1484 + elapsed_seconds=30.018302 + tag_assignments=1484000 + tag_assignments_per_second=49436.507 +""" + +import argparse +import gc +import os +import platform +import tempfile +import time + +import dbzero as db0 + + +@db0.memo +class TagBenchmarkMemo: + def __init__(self, value): + self.value = value + + +def read_cpu_model(): + try: + with open("/proc/cpuinfo", "r", encoding="utf-8") as cpuinfo: + for line in cpuinfo: + if line.startswith("model name"): + return line.split(":", 1)[1].strip() + except OSError: + pass + return platform.processor() or "unknown" + + +def prepare_objects(object_count, batch_size): + objects = [TagBenchmarkMemo(i) for i in range(object_count)] + db0.commit() + target_objects = objects[:batch_size] + db0.tags(*target_objects).add("tagging-benchmark-anchor") + db0.commit() + return objects, target_objects + + +def prepare_precreated_tags(tag_count, passive): + tag_owner = TagBenchmarkMemo(-1) + db0.commit() + tags = [ + f"{'passive' if passive else 'regular'}-precreated-tag-{index}" + for index in range(tag_count) + ] + db0.tags(tag_owner, passive=passive).add(tags) + db0.commit() + return tag_owner, tags + + +def apply_tag_batch(target_objects, tags, passive): + db0.tags(*target_objects, passive=passive).add(tags) + db0.commit() + return len(target_objects) * len(tags) + + +def fresh_tag_batch(passive, batch_index, tags_per_batch): + return [ + f"{'passive' if passive else 'regular'}-fresh-tag-{batch_index}-{index}" + for index in range(tags_per_batch) + ] + + +def precreated_tag_batch(tags, batch_index, tags_per_batch): + start = batch_index * tags_per_batch + end = start + tags_per_batch + if end > len(tags): + return None + return tags[start:end] + + +def measure(target_objects, passive, target_seconds, tags_per_batch, tag_batch_factory): + total_assignments = 0 + batches = 0 + exhausted_tags = False + gc_was_enabled = gc.isenabled() + gc.disable() + try: + start = time.perf_counter() + deadline = start + target_seconds + while True: + tags = tag_batch_factory(batches) + if tags is None: + exhausted_tags = True + break + total_assignments += apply_tag_batch(target_objects, tags, passive) + batches += 1 + if time.perf_counter() >= deadline: + break + elapsed = time.perf_counter() - start + finally: + if gc_was_enabled: + gc.enable() + return elapsed, total_assignments, batches, exhausted_tags + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--scenario", + choices=("fresh-tags", "precreated-tags"), + default="fresh-tags", + ) + parser.add_argument("--passive", action="store_true", help="assign passive tags") + parser.add_argument("--target-seconds", type=float, default=30.0) + parser.add_argument("--object-count", type=int, default=10000) + parser.add_argument("--batch-size", type=int, default=1000) + parser.add_argument("--tags-per-batch", type=int, default=1) + parser.add_argument( + "--precreated-tag-count", + type=int, + default=5000, + help="number of tag keys to create before timing in the precreated-tags scenario", + ) + args = parser.parse_args() + + if args.object_count < args.batch_size: + parser.error("--object-count must be greater than or equal to --batch-size") + if args.batch_size <= 0: + parser.error("--batch-size must be positive") + if args.tags_per_batch <= 0: + parser.error("--tags-per-batch must be positive") + if args.precreated_tag_count <= 0: + parser.error("--precreated-tag-count must be positive") + + with tempfile.TemporaryDirectory() as root: + db0.init(root) + db0.open("tagging-throughput-benchmark") + objects, target_objects = prepare_objects(args.object_count, args.batch_size) + precreated_tag_owner = None + precreated_tags = None + if args.scenario == "precreated-tags": + if args.precreated_tag_count < args.tags_per_batch: + parser.error("--precreated-tag-count must cover at least one measured batch") + precreated_tag_owner, precreated_tags = prepare_precreated_tags( + args.precreated_tag_count, + args.passive, + ) + + if args.scenario == "fresh-tags": + tag_batch_factory = lambda batch: fresh_tag_batch( + args.passive, + batch, + args.tags_per_batch, + ) + else: + tag_batch_factory = lambda batch: precreated_tag_batch( + precreated_tags, + batch, + args.tags_per_batch, + ) + + elapsed, assignments, batches, exhausted_tags = measure( + target_objects, + args.passive, + args.target_seconds, + args.tags_per_batch, + tag_batch_factory, + ) + + print(f"cpu={read_cpu_model()}") + print(f"python={platform.python_version()}") + print(f"build_flags={db0.build_flags()}") + print(f"pid={os.getpid()}") + print(f"scenario={args.scenario}") + print(f"passive={args.passive}") + print(f"object_count={len(objects)}") + print(f"batch_size={args.batch_size}") + print(f"tags_per_batch={args.tags_per_batch}") + print(f"precreated_tag_count={len(precreated_tags) if precreated_tags is not None else 0}") + print(f"precreated_tag_owner_kept={precreated_tag_owner is not None}") + print(f"batches={batches}") + print(f"elapsed_seconds={elapsed:.6f}") + print(f"tag_assignments={assignments}") + print(f"tag_assignments_per_second={assignments / elapsed if elapsed else 0:.3f}") + print(f"seconds_per_batch={elapsed / batches if batches else 0:.6f}") + print(f"exhausted_tags={exhausted_tags}") + + +if __name__ == "__main__": + main() diff --git a/python_tests/test_composite_tags.py b/python_tests/test_composite_tags.py index 1a5f2e75..098be2b5 100644 --- a/python_tests/test_composite_tags.py +++ b/python_tests/test_composite_tags.py @@ -77,6 +77,89 @@ def test_find_by_composite_tag_with_type_filter(db0_fixture): assert [doc.title for doc in db0.find(CompositeTagDocument, db0.as_tag("GRANT-READ", user))] == ["doc-1"] +def test_can_add_passive_composite_tag(db0_fixture): + user = CompositeTagUser("user-1") + document = CompositeTagDocument("doc-1") + + db0.tags(document, passive=True).add(db0.as_tag("GRANT-READ", user)) + + assert [doc.title for doc in db0.find(CompositeTagDocument, db0.as_tag("GRANT-READ", user))] == ["doc-1"] + with pytest.raises(Exception): + list(db0.find(db0.as_tag("GRANT-READ", user))) + + +def test_can_remove_passive_composite_tag(db0_fixture): + user = CompositeTagUser("user-1") + document = CompositeTagDocument("doc-1") + composite_tag = db0.as_tag("GRANT-READ", user) + + db0.tags(document, passive=True).add(composite_tag) + db0.tags(document).remove(composite_tag) + + assert list(db0.find(CompositeTagDocument, composite_tag)) == [] + + +def test_batch_passive_composite_tags(db0_fixture): + users = [CompositeTagUser(f"user-{i}") for i in range(3)] + documents = [CompositeTagDocument(f"doc-{i}") for i in range(4)] + read_tags = [db0.as_tag("GRANT-READ", user) for user in users[:2]] + write_tag = db0.as_tag("GRANT-WRITE", users[2]) + + db0.tags(documents[0], documents[1], passive=True).add(read_tags) + db0.tags(db0.find(CompositeTagDocument), passive=True).add(write_tag) + + assert {doc.title for doc in db0.find(CompositeTagDocument, read_tags[0])} == {"doc-0", "doc-1"} + assert {doc.title for doc in db0.find(CompositeTagDocument, read_tags[1])} == {"doc-0", "doc-1"} + assert {doc.title for doc in db0.find(CompositeTagDocument, write_tag)} == {"doc-0", "doc-1", "doc-2", "doc-3"} + + db0.tags(documents[0], documents[1]).remove(read_tags) + db0.tags(db0.find(CompositeTagDocument)).remove(write_tag) + + assert list(db0.find(CompositeTagDocument, read_tags[0])) == [] + assert list(db0.find(CompositeTagDocument, read_tags[1])) == [] + assert list(db0.find(CompositeTagDocument, write_tag)) == [] + + +def test_passive_composite_tags_can_be_added_and_removed_from_find_results(db0_fixture): + user = CompositeTagUser("user-1") + documents = [CompositeTagDocument(f"doc-{i}") for i in range(4)] + composite_tag = db0.as_tag("GRANT-READ", user) + db0.tags(documents[0], documents[1], documents[2]).add("source") + db0.tags(documents[3]).add("other") + + db0.tags(db0.find(CompositeTagDocument, "source"), passive=True).add(composite_tag) + + assert {doc.title for doc in db0.find(CompositeTagDocument, composite_tag)} == {"doc-0", "doc-1", "doc-2"} + with pytest.raises(Exception): + list(db0.find(composite_tag)) + + db0.tags(db0.find(CompositeTagDocument, "source")).remove(composite_tag) + + assert list(db0.find(CompositeTagDocument, composite_tag)) == [] + assert {doc.title for doc in db0.find("source")} == {"doc-0", "doc-1", "doc-2"} + assert {doc.title for doc in db0.find("other")} == {"doc-3"} + + +def test_mixed_passive_simple_and_composite_tags_can_be_added_and_removed_from_find_results(db0_fixture): + user = CompositeTagUser("user-1") + documents = [CompositeTagDocument(f"doc-{i}") for i in range(4)] + composite_tag = db0.as_tag("GRANT-WRITE", user) + passive_tags = ["passive-audit", composite_tag] + db0.tags(documents[0], documents[1]).add("source") + db0.tags(documents[2], documents[3]).add("other") + + db0.tags(db0.find(CompositeTagDocument, "source"), passive=True).add(passive_tags) + + assert {doc.title for doc in db0.find(CompositeTagDocument, "passive-audit")} == {"doc-0", "doc-1"} + assert {doc.title for doc in db0.find(CompositeTagDocument, composite_tag)} == {"doc-0", "doc-1"} + + db0.tags(db0.find(CompositeTagDocument, "source")).remove(passive_tags) + + assert list(db0.find(CompositeTagDocument, "passive-audit")) == [] + assert list(db0.find(CompositeTagDocument, composite_tag)) == [] + assert {doc.title for doc in db0.find("other")} == {"doc-2", "doc-3"} + + def test_find_by_composite_tag_combines_with_simple_tags(db0_fixture): user = CompositeTagUser("user-1") document_1 = CompositeTagDocument("doc-1") diff --git a/python_tests/test_tags.py b/python_tests/test_tags.py index ef978635..808ea815 100644 --- a/python_tests/test_tags.py +++ b/python_tests/test_tags.py @@ -73,6 +73,57 @@ def test_passive_tag_remove_uses_regular_remove(db0_fixture): assert list(db0.find(MemoClassForTags, "passive-tag")) == [] +def test_passive_batch_adds_multiple_tags_to_multiple_objects(db0_fixture): + objects = [MemoClassForTags(i) for i in range(3)] + db0.tags(objects[0], objects[1], passive=True).add(["passive-tag-1", "passive-tag-2"]) + + assert {item.value for item in db0.find(MemoClassForTags, "passive-tag-1")} == {0, 1} + assert {item.value for item in db0.find(MemoClassForTags, "passive-tag-2")} == {0, 1} + + +def test_passive_batch_removes_multiple_tags_from_multiple_objects(db0_fixture): + objects = [MemoClassForTags(i) for i in range(3)] + db0.tags(*objects, passive=True).add(["passive-tag-1", "passive-tag-2"]) + db0.tags(objects[0], objects[1]).remove(["passive-tag-1", "passive-tag-2"]) + + assert {item.value for item in db0.find(MemoClassForTags, "passive-tag-1")} == {2} + assert {item.value for item in db0.find(MemoClassForTags, "passive-tag-2")} == {2} + + +def test_passive_batch_adds_and_removes_tags_from_query_target(db0_fixture): + objects = [MemoClassForTags(i) for i in range(4)] + db0.tags(objects[0], objects[1]).add("source") + db0.tags(objects[2]).add("other") + + db0.tags(db0.find("source"), passive=True).add(["passive-tag-1", "passive-tag-2"]) + assert {item.value for item in db0.find("source", "passive-tag-1")} == {0, 1} + assert {item.value for item in db0.find("source", "passive-tag-2")} == {0, 1} + + db0.tags(db0.find("source")).remove(["passive-tag-1", "passive-tag-2"]) + assert list(db0.find("source", "passive-tag-1")) == [] + assert list(db0.find("source", "passive-tag-2")) == [] + assert {item.value for item in db0.find("source")} == {0, 1} + assert {item.value for item in db0.find("other")} == {2} + + +def test_passive_tags_can_be_added_and_removed_from_find_results(db0_fixture): + objects = [MemoClassForTags(i) for i in range(5)] + db0.tags(objects[0], objects[1], objects[2]).add("source") + db0.tags(objects[3]).add("other") + + db0.tags(db0.find(MemoClassForTags, "source"), passive=True).add("passive-find-tag") + + assert {item.value for item in db0.find(MemoClassForTags, "passive-find-tag")} == {0, 1, 2} + with pytest.raises(Exception): + list(db0.find("passive-find-tag")) + + db0.tags(db0.find(MemoClassForTags, "source")).remove("passive-find-tag") + + assert list(db0.find(MemoClassForTags, "passive-find-tag")) == [] + assert {item.value for item in db0.find("source")} == {0, 1, 2} + assert {item.value for item in db0.find("other")} == {3} + + def test_passive_first_then_regular_tag_remains_non_durable(db0_fixture): object_1 = MemoNoDefTags(1) db0.tags(object_1, passive=True).add("passive-tag") @@ -127,6 +178,27 @@ def test_passive_tag_does_not_preserve_object_lifetime(db0_fixture): db0.fetch(object_uuid) +def test_passive_tagged_objects_are_not_found_after_regular_tags_removed(db0_fixture): + objects = [MemoNoDefTags(i) for i in range(3)] + object_uuids = [db0.uuid(obj) for obj in objects] + + db0.tags(*objects).add("regular-lifetime-tag") + db0.tags(*objects, passive=True).add("passive-stale-tag") + db0.commit() + + assert {item.value for item in db0.find(MemoNoDefTags, "passive-stale-tag")} == {0, 1, 2} + + db0.tags(*objects).remove("regular-lifetime-tag") + del objects + db0.commit() + + for object_uuid in object_uuids: + assert not db0.exists(object_uuid) + with pytest.raises(Exception): + db0.fetch(object_uuid) + assert list(db0.find(MemoNoDefTags, "passive-stale-tag")) == [] + + def test_assigned_tags_can_be_removed(db0_fixture): object_1 = MemoClassForTags(1) db0.tags(object_1).add(["tag1", "tag2"]) diff --git a/src/dbzero/object_model/tags/ObjectTagManager.cpp b/src/dbzero/object_model/tags/ObjectTagManager.cpp index 78cc3a63..0ee3c822 100644 --- a/src/dbzero/object_model/tags/ObjectTagManager.cpp +++ b/src/dbzero/object_model/tags/ObjectTagManager.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace db0::object_model @@ -21,6 +22,13 @@ namespace db0::object_model (PyTuple_Check(arg) && ObjectTagManager::LangToolkit::length(arg) >= 2); } + bool isExpandableTagBatch(ObjectTagManager::ObjectPtr arg) + { + return !ObjectTagManager::LangToolkit::isString(arg) && + !isCompositeTag(arg) && + ObjectTagManager::LangToolkit::isIterable(arg); + } + std::size_t compositeTagSize(ObjectTagManager::ObjectPtr arg) { if (db0::python::PyCompositeTag_Check(arg)) { @@ -48,6 +56,23 @@ namespace db0::object_model } } + bool hasCompositeTagInBatch(ObjectTagManager::ObjectPtr arg) + { + if (isCompositeTag(arg)) { + return true; + } + if (!isExpandableTagBatch(arg)) { + return false; + } + auto iterator = ObjectTagManager::LangToolkit::getIterator(arg); + for (ForwardIterator it(iterator), end = ForwardIterator::end(); it != end; ++it) { + if (isCompositeTag((*it).get())) { + return true; + } + } + return false; + } + } ObjectTagManager::ObjectTagManager(ObjectPtr const *memo_ptr, std::size_t nargs, @@ -120,7 +145,7 @@ namespace db0::object_model bool ObjectTagManager::ObjectInfo::hasCompositeTags(ObjectPtr const *args, Py_ssize_t nargs) const { for (Py_ssize_t i = 0; i < nargs; ++i) { - if (isCompositeTag(args[i])) { + if (hasCompositeTagInBatch(args[i])) { return true; } } @@ -136,17 +161,32 @@ namespace db0::object_model if (!hasCompositeTags(args, nargs)) { tag_index.addTags(m_lang_ptr.get(), args, nargs, passive); } else { - if (passive) { - THROWF(db0::InputException) << "Passive composite tags are not supported" << THROWF_END; - } for (Py_ssize_t i = 0; i < nargs; ++i) { if (isCompositeTag(args[i])) { validateCompositeTag(args[i]); + } else if (isExpandableTagBatch(args[i])) { + ForwardIterator it(LangToolkit::getIterator(args[i])); + for (auto end = ForwardIterator::end(); it != end; ++it) { + if (isCompositeTag((*it).get())) { + validateCompositeTag((*it).get()); + } + } } } for (Py_ssize_t i = 0; i < nargs; ++i) { if (isCompositeTag(args[i])) { - addComposite(args[i]); + addComposite(args[i], passive); + } else if (isExpandableTagBatch(args[i])) { + ForwardIterator it(LangToolkit::getIterator(args[i])); + for (auto end = ForwardIterator::end(); it != end; ++it) { + auto item = (*it); + if (isCompositeTag(item.get())) { + addComposite(item.get(), passive); + } else { + ObjectPtr tag = item.get(); + tag_index.addTags(m_lang_ptr.get(), &tag, 1, passive); + } + } } else { tag_index.addTags(m_lang_ptr.get(), args + i, 1, passive); } @@ -176,11 +216,29 @@ namespace db0::object_model for (Py_ssize_t i = 0; i < nargs; ++i) { if (isCompositeTag(args[i])) { validateCompositeTag(args[i]); + } else if (isExpandableTagBatch(args[i])) { + ForwardIterator it(LangToolkit::getIterator(args[i])); + for (auto end = ForwardIterator::end(); it != end; ++it) { + if (isCompositeTag((*it).get())) { + validateCompositeTag((*it).get()); + } + } } } for (Py_ssize_t i = 0; i < nargs; ++i) { if (isCompositeTag(args[i])) { removeComposite(args[i]); + } else if (isExpandableTagBatch(args[i])) { + ForwardIterator it(LangToolkit::getIterator(args[i])); + for (auto end = ForwardIterator::end(); it != end; ++it) { + auto item = (*it); + if (isCompositeTag(item.get())) { + removeComposite(item.get()); + } else { + ObjectPtr tag = item.get(); + tag_index.removeTags(m_lang_ptr.get(), &tag, 1); + } + } } else { tag_index.removeTags(m_lang_ptr.get(), args + i, 1); } @@ -188,7 +246,7 @@ namespace db0::object_model } } - void ObjectTagManager::ObjectInfo::addComposite(ObjectPtr arg) + void ObjectTagManager::ObjectInfo::addComposite(ObjectPtr arg, bool passive) { assert(m_tag_index_ptr); assert(isCompositeTag(arg)); @@ -207,7 +265,7 @@ namespace db0::object_model auto tagPtr = getCompositeItem(arg, length - 1); ObjectPtr tag = tagPtr.get(); - currentTagIndex->addTags(m_lang_ptr.get(), &tag, 1); + currentTagIndex->addTags(m_lang_ptr.get(), &tag, 1, passive); } void ObjectTagManager::ObjectInfo::removeComposite(ObjectPtr arg) diff --git a/src/dbzero/object_model/tags/ObjectTagManager.hpp b/src/dbzero/object_model/tags/ObjectTagManager.hpp index 52a228d5..cf7e92ca 100644 --- a/src/dbzero/object_model/tags/ObjectTagManager.hpp +++ b/src/dbzero/object_model/tags/ObjectTagManager.hpp @@ -66,7 +66,7 @@ namespace db0::object_model void add(ObjectPtr const *args, Py_ssize_t nargs, bool passive); void remove(ObjectPtr const *args, Py_ssize_t nargs); bool hasCompositeTags(ObjectPtr const *args, Py_ssize_t nargs) const; - void addComposite(ObjectPtr); + void addComposite(ObjectPtr, bool passive); void removeComposite(ObjectPtr); db0::swine_ptr getFixture() const; diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index 5d639b4f..a1a10917 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -740,10 +740,12 @@ namespace db0::object_model auto type_id = LangToolkit::getTypeManager().getTypeId(arg); if (type_id == TypeId::DB0_COMPOSITE_TAG) { - if (has_positive_anchor) { - *has_positive_anchor = true; - } - return addCompositeIterator(LangToolkit::getTypeManager().extractCompositeTag(arg), factory); + return addCompositeIterator( + LangToolkit::getTypeManager().extractCompositeTag(arg), + factory, + has_passive_predicate, + has_positive_anchor + ); } // simple tag-convertible type @@ -890,7 +892,8 @@ namespace db0::object_model } bool TagIndex::addCompositeIterator(const CompositeTagDef &tag, - db0::FT_IteratorFactory &factory) const + db0::FT_IteratorFactory &factory, + bool *has_passive_predicate, bool *has_positive_anchor) const { if (tag.size() < 2) { return false; @@ -916,6 +919,9 @@ namespace db0::object_model } serialized_tag_sequence.push_back(*tag_key); } + if (has_positive_anchor) { + *has_positive_anchor = true; + } factory.add(makeMissingIterator(std::move(serialized_tag_sequence))); return true; }; @@ -963,12 +969,15 @@ namespace db0::object_model return current_tag_index->addCompositeLeafIterator( items.back().get(), factory, - std::move(serialized_tag_sequence) + std::move(serialized_tag_sequence), + has_passive_predicate, + has_positive_anchor ); } bool TagIndex::addCompositeLeafIterator(ObjectPtr arg, db0::FT_IteratorFactory &factory, - std::vector &&serialized_tag_sequence) const + std::vector &&serialized_tag_sequence, + bool *has_passive_predicate, bool *has_positive_anchor) const { using TypeId = db0::bindings::TypeId; @@ -977,6 +986,9 @@ namespace db0::object_model THROWF(db0::InputException) << "Nested composite tag leaves are not supported" << THROWF_END; } if (isLongTag(type_id, arg)) { + if (has_positive_anchor) { + *has_positive_anchor = true; + } return m_base_index_long.addIterator(factory, getLongTag(type_id, arg)); } @@ -984,13 +996,25 @@ namespace db0::object_model if (!leaf_key) { return false; } + auto stored_leaf_key = tryGetStoredShortTag(*leaf_key); + auto query_leaf_key = stored_leaf_key.value_or(*leaf_key); // current TagIndex is already the correct nested index for lookup. The // complete root-to-leaf sequence is attached only so serialized queries can // reopen the same nested index by traversing from the root during deserialize. - serialized_tag_sequence.push_back(*leaf_key); - if (m_base_index_short.addIterator(factory, *leaf_key, std::vector(serialized_tag_sequence))) { + serialized_tag_sequence.push_back(query_leaf_key); + if (m_base_index_short.addIterator(factory, query_leaf_key, std::vector(serialized_tag_sequence))) { + if (stored_leaf_key && stored_leaf_key->isPassive()) { + if (has_passive_predicate) { + *has_passive_predicate = true; + } + } else if (has_positive_anchor) { + *has_positive_anchor = true; + } return true; } + if (has_positive_anchor) { + *has_positive_anchor = true; + } factory.add(makeMissingIterator(std::move(serialized_tag_sequence))); return true; } diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index 0d8c40c7..b3817fe5 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -244,9 +244,11 @@ DB0_PACKED_END std::vector > &neg_iterators, std::vector > &query_observers, bool *has_positive_anchor = nullptr) const; - bool addCompositeIterator(const CompositeTagDef &, db0::FT_IteratorFactory &factory) const; + bool addCompositeIterator(const CompositeTagDef &, db0::FT_IteratorFactory &factory, + bool *has_passive_predicate = nullptr, bool *has_positive_anchor = nullptr) const; bool addCompositeLeafIterator(ObjectPtr, db0::FT_IteratorFactory &factory, - std::vector &&serialized_tag_sequence) const; + std::vector &&serialized_tag_sequence, + bool *has_passive_predicate = nullptr, bool *has_positive_anchor = nullptr) const; std::optional tryGetCompositeKey(ObjectPtr) const; bool isShortTag(ObjectPtr) const; From b5249c653529804670a7bc6bb85d657104a4c5ef Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 2 Jun 2026 21:47:34 +0200 Subject: [PATCH 4/5] long passive tags + test + fix enum tag collision --- python_tests/test_enum.py | 20 ++++ python_tests/test_tags.py | 44 +++++++ .../collections/full_text/FT_BaseIndex.hpp | 8 ++ .../core/collections/full_text/LongTag.hpp | 44 ++++++- src/dbzero/core/memory/Address.hpp | 54 ++++++--- .../object_model/tags/ObjectTagManager.cpp | 7 ++ src/dbzero/object_model/tags/TagIndex.cpp | 111 +++++++++++++++--- src/dbzero/object_model/tags/TagIndex.hpp | 1 + 8 files changed, 255 insertions(+), 34 deletions(-) diff --git a/python_tests/test_enum.py b/python_tests/test_enum.py index a7f310ad..a66da7e8 100644 --- a/python_tests/test_enum.py +++ b/python_tests/test_enum.py @@ -47,6 +47,26 @@ def test_enum_tags_are_distinguished_from_string_values(db0_fixture): assert set([x.value for x in db0.find(Colors.RED)]) == set([1]) +def test_enum_tags_are_not_treated_as_passive_tags(db0_fixture): + Colors = db0.enum("Colors", ["RED", "GREEN", "BLUE"]) + db0.tags(MemoTestClass(1)).add(Colors.RED) + db0.tags(MemoTestClass(2), passive=True).add("passive-tag") + + assert set([x.value for x in db0.find(Colors.RED)]) == {1} + with pytest.raises(Exception): + list(db0.find("passive-tag")) + + +def test_enum_tag_can_anchor_passive_tag_query(db0_fixture): + Colors = db0.enum("Colors", ["RED", "GREEN", "BLUE"]) + obj = MemoTestClass(1) + db0.tags(obj).add(Colors.RED) + db0.tags(obj, passive=True).add("passive-tag") + db0.tags(MemoTestClass(2), passive=True).add("passive-tag") + + assert set([x.value for x in db0.find(Colors.RED, "passive-tag")]) == {1} + + def test_enum_type_defines_values_method(db0_fixture): Colors = db0.enum("Colors", ["RED", "GREEN", "BLUE"]) assert len(Colors.values()) == 3 diff --git a/python_tests/test_tags.py b/python_tests/test_tags.py index 808ea815..3c5e8b3b 100644 --- a/python_tests/test_tags.py +++ b/python_tests/test_tags.py @@ -199,6 +199,50 @@ def test_passive_tagged_objects_are_not_found_after_regular_tags_removed(db0_fix assert list(db0.find(MemoNoDefTags, "passive-stale-tag")) == [] +def test_passive_foreign_tag_requires_positive_predicate_for_find(db0_fixture): + foreign_tag_source = MemoScopedClass(2) + foreign_tag = db0.as_tag(foreign_tag_source) + db0.open("passive-long-tag-prefix", "rw") + local_object = MemoClassForTags(1) + + db0.tags(local_object, passive=True).add(foreign_tag) + + assert [item.value for item in db0.find(MemoClassForTags, foreign_tag)] == [1] + with pytest.raises(Exception): + list(db0.find(foreign_tag)) + + +def test_passive_foreign_tag_remove_uses_regular_remove(db0_fixture): + foreign_tag_source = MemoScopedClass(2) + foreign_tag = db0.as_tag(foreign_tag_source) + db0.open("passive-long-tag-prefix", "rw") + local_object = MemoClassForTags(1) + + db0.tags(local_object, passive=True).add(foreign_tag) + db0.tags(local_object).remove(foreign_tag) + + assert list(db0.find(MemoClassForTags, foreign_tag)) == [] + + +def test_passive_foreign_tag_first_then_regular_remains_non_durable(db0_fixture): + foreign_tag_source = MemoScopedClass(2) + foreign_tag = db0.as_tag(foreign_tag_source) + db0.open("passive-long-tag-prefix", "rw") + local_object = MemoNoDefTags(1) + local_uuid = db0.uuid(local_object) + + db0.tags(local_object, passive=True).add(foreign_tag) + db0.commit() + db0.tags(local_object).add(foreign_tag) + db0.commit() + assert db0.getrefcount(local_object) == 0 + + del local_object + db0.commit() + assert not db0.exists(local_uuid) + assert list(db0.find(MemoNoDefTags, foreign_tag)) == [] + + def test_assigned_tags_can_be_removed(db0_fixture): object_1 = MemoClassForTags(1) db0.tags(object_1).add(["tag1", "tag2"]) diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp index f1f10534..73ff68ba 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp @@ -34,6 +34,14 @@ namespace db0 return !tag_addr.isPassive(); } }; + + template <> + struct FT_IndexKeyPolicy + { + static bool enableValueCallbacks(const db0::LongTagT &tag_addr) { + return !db0::isPassiveLongTag(tag_addr); + } + }; // FT_BaseIndex provides common API for managing tag/type inverted lists // @tparam IndexKeyT the tag / element's key type diff --git a/src/dbzero/core/collections/full_text/LongTag.hpp b/src/dbzero/core/collections/full_text/LongTag.hpp index 7bd2ce4e..e7a9ebec 100644 --- a/src/dbzero/core/collections/full_text/LongTag.hpp +++ b/src/dbzero/core/collections/full_text/LongTag.hpp @@ -3,6 +3,7 @@ #pragma once +#include #include namespace db0 @@ -12,6 +13,46 @@ namespace db0 // field-level tags are represented as long tags using LongTagT = db0::num_pack; + inline std::uint64_t regularLongTagPart(std::uint64_t value) { + return TagAddress::regularValue(value); + } + + inline bool isPassiveLongTag(const LongTagT &tag) { + return TagAddress::isPassiveValue(tag.data[1]); + } + + inline LongTagT asPassiveLongTag(LongTagT tag) { + tag.data[1] = TagAddress::fromValue(tag.data[1]).asPassive().getValue(); + return tag; + } + + inline LongTagT asRegularLongTag(LongTagT tag) { + tag.data[1] = regularLongTagPart(tag.data[1]); + return tag; + } + + template <> inline bool num_pack::operator<(const num_pack &other) const + { + if (data[0] < other.data[0]) { + return true; + } + if (data[0] > other.data[0]) { + return false; + } + return regularLongTagPart(data[1]) < regularLongTagPart(other.data[1]); + } + + template <> inline bool num_pack::operator==(const num_pack &other) const + { + return data[0] == other.data[0] && + regularLongTagPart(data[1]) == regularLongTagPart(other.data[1]); + } + + template <> inline bool num_pack::operator!=(const num_pack &other) const + { + return !(*this == other); + } + } namespace std @@ -22,7 +63,8 @@ namespace std template <> struct hash { std::size_t operator()(const db0::LongTagT &tag) const { - return std::hash()(tag.data[0]) ^ std::hash()(tag.data[1]); + return std::hash()(tag.data[0]) ^ + std::hash()(db0::regularLongTagPart(tag.data[1])); } }; diff --git a/src/dbzero/core/memory/Address.hpp b/src/dbzero/core/memory/Address.hpp index 954cb612..8d88ce21 100644 --- a/src/dbzero/core/memory/Address.hpp +++ b/src/dbzero/core/memory/Address.hpp @@ -199,8 +199,15 @@ DB0_PACKED_BEGIN class DB0_PACKED_ATTR TagAddress { public: - static constexpr std::uint64_t PASSIVE_BIT = 1ULL << 63; - static constexpr std::uint64_t ADDRESS_MASK = ~PASSIVE_BIT; + // TagAddress is used by the short-tag index, whose key space contains + // both real memory addresses and packed non-address tags. Address-backed + // tags use only low 50 bits, matching UniqueAddress's address payload. + // EnumValue_UID tags reserve bit 63 to distinguish enum tags from + // address-backed tags, so passive tags must not use or mask that bit. + // PASSIVE_BIT is only recognized when no non-address high bits are set. + static constexpr std::uint64_t ENUM_BIT = 1ULL << 63; + static constexpr std::uint64_t PASSIVE_BIT = 1ULL << 62; + static constexpr std::uint64_t ADDRESS_MASK = (1ULL << 50) - 1; TagAddress() = default; @@ -209,7 +216,6 @@ DB0_PACKED_BEGIN } static inline TagAddress fromOffset(std::uint64_t offset) { - assert((offset & PASSIVE_BIT) == 0); return TagAddress(offset); } @@ -218,28 +224,34 @@ DB0_PACKED_BEGIN } inline bool operator!() const { - return getOffset() == 0; + return regularValue(m_value) == 0; } inline bool isValid() const { - return getOffset() != 0; + return regularValue(m_value) != 0; } inline bool isPassive() const { - return (m_value & PASSIVE_BIT) != 0; + return isPassiveValue(m_value); } inline TagAddress asPassive() const { - assert(isValid()); - return TagAddress(getOffset() | PASSIVE_BIT); + auto regular_value = regularValue(m_value); + // Non-address tag encodings, such as enum and field-def tags, are + // not eligible for passive storage. Returning them unchanged keeps + // their packed identity intact. + if ((regular_value & ~ADDRESS_MASK) != 0) { + return *this; + } + return TagAddress(regular_value | PASSIVE_BIT); } inline TagAddress asRegular() const { - return TagAddress(getOffset()); + return TagAddress(regularValue(m_value)); } inline std::uint64_t getOffset() const { - return m_value & ADDRESS_MASK; + return regularValue(m_value); } inline std::uint64_t getValue() const { @@ -255,31 +267,39 @@ DB0_PACKED_BEGIN } inline operator std::uint64_t() const { - return getOffset(); + return regularValue(m_value); } inline bool operator==(const TagAddress &other) const { - return getOffset() == other.getOffset(); + return regularValue(m_value) == regularValue(other.m_value); } inline bool operator!=(const TagAddress &other) const { - return getOffset() != other.getOffset(); + return regularValue(m_value) != regularValue(other.m_value); } inline bool operator<(const TagAddress &other) const { - return getOffset() < other.getOffset(); + return regularValue(m_value) < regularValue(other.m_value); } inline bool operator>(const TagAddress &other) const { - return getOffset() > other.getOffset(); + return regularValue(m_value) > regularValue(other.m_value); } inline bool operator<=(const TagAddress &other) const { - return getOffset() <= other.getOffset(); + return regularValue(m_value) <= regularValue(other.m_value); } inline bool operator>=(const TagAddress &other) const { - return getOffset() >= other.getOffset(); + return regularValue(m_value) >= regularValue(other.m_value); + } + + static inline bool isPassiveValue(std::uint64_t value) { + return (value & PASSIVE_BIT) != 0 && (value & ~(PASSIVE_BIT | ADDRESS_MASK)) == 0; + } + + static inline std::uint64_t regularValue(std::uint64_t value) { + return isPassiveValue(value) ? (value & ADDRESS_MASK) : value; } inline friend std::ostream &operator<<(std::ostream &os, const TagAddress &address) { diff --git a/src/dbzero/object_model/tags/ObjectTagManager.cpp b/src/dbzero/object_model/tags/ObjectTagManager.cpp index 0ee3c822..e56210ba 100644 --- a/src/dbzero/object_model/tags/ObjectTagManager.cpp +++ b/src/dbzero/object_model/tags/ObjectTagManager.cpp @@ -64,6 +64,13 @@ namespace db0::object_model if (!isExpandableTagBatch(arg)) { return false; } + // Avoid pre-scanning one-shot iterables such as generators. The + // scan is only an optimization to route batches containing + // composite tags through the composite path; consuming a generator + // here would leave no tags for the actual add/remove operation. + if (!PySequence_Check(arg)) { + return false; + } auto iterator = ObjectTagManager::LangToolkit::getIterator(arg); for (ForwardIterator it(iterator), end = ForwardIterator::end(); it != end; ++it) { if (isCompositeTag((*it).get())) { diff --git a/src/dbzero/object_model/tags/TagIndex.cpp b/src/dbzero/object_model/tags/TagIndex.cpp index a1a10917..6edc0843 100644 --- a/src/dbzero/object_model/tags/TagIndex.cpp +++ b/src/dbzero/object_model/tags/TagIndex.cpp @@ -211,13 +211,13 @@ namespace db0::object_model }); // sequence (pair) may represent a single long tag if (isLongTag(arg)) { - if (passive) { - THROWF(db0::InputException) << "Passive long tags are not supported" << THROWF_END; - } if (!batch_op_long_ptr) { batch_op_long_ptr = &getBatchOperationLong(memo_ptr, active_key); } auto tag = makeLongTagFromSequence(tag_sequence); + if (passive) { + tag = asPassiveLongTag(tag); + } (*batch_op_long_ptr)->addTags(active_key, TagPtrSequence(&tag, &tag + 1)); } else { batch_op_short->addTags(active_key, tag_sequence); @@ -232,14 +232,14 @@ namespace db0::object_model m_inc_refed_tags.insert(*tag_addr); } } else { - if (passive) { - THROWF(db0::InputException) << "Passive long tags are not supported" << THROWF_END; - } // must try adding as a long tag (item from a foreign scope) if (!batch_op_long_ptr) { batch_op_long_ptr = &getBatchOperationLong(memo_ptr, active_key); } auto long_tag = getLongTag(arg); + if (passive) { + long_tag = asPassiveLongTag(long_tag); + } (*batch_op_long_ptr)->addTag(active_key, long_tag); } } @@ -350,19 +350,49 @@ namespace db0::object_model return; } + using IterableSequence = TagMakerSequence; ActiveValueT active_key = { UniqueAddress(), nullptr }; - auto &batch_operation = getBatchOperationShort(memo_ptr, active_key, false); + db0::FT_BaseIndex::BatchOperationBuilder *batch_op_short_ptr = nullptr; + db0::FT_BaseIndex::BatchOperationBuilder *batch_op_long_ptr = nullptr; for (std::size_t i = 0; i < nargs; ++i) { auto type_id = LangToolkit::getTypeManager().getTypeId(args[i]); + if (isLongTag(type_id, args[i])) { + if (!batch_op_long_ptr) { + batch_op_long_ptr = &getBatchOperationLong(memo_ptr, active_key); + } + (*batch_op_long_ptr)->removeTag(active_key, getLongTag(type_id, args[i])); + m_mutation_log->onDirty(); + continue; + } // must check for string since it's an iterable as well if (type_id != TypeId::STRING && LangToolkit::isIterable(args[i])) { - ForwardIterator it(LangToolkit::getIterator(args[i])); - for (auto end = ForwardIterator::end(); it != end; ++it) { - batch_operation->removeTag(active_key, getCompositeKey((*it).get())); + if (isLongTag(LangToolkit::getIterator(args[i]), ForwardIterator::end())) { + if (!batch_op_long_ptr) { + batch_op_long_ptr = &getBatchOperationLong(memo_ptr, active_key); + } + IterableSequence sequence( + LangToolkit::getIterator(args[i]), + ForwardIterator::end(), + [this](ObjectSharedPtr arg) { + return getCompositeKey(arg.get()); + } + ); + (*batch_op_long_ptr)->removeTag(active_key, makeLongTagFromSequence(sequence)); + } else { + if (!batch_op_short_ptr) { + batch_op_short_ptr = &getBatchOperationShort(memo_ptr, active_key, false); + } + ForwardIterator it(LangToolkit::getIterator(args[i])); + for (auto end = ForwardIterator::end(); it != end; ++it) { + (*batch_op_short_ptr)->removeTag(active_key, getCompositeKey((*it).get())); + } } m_mutation_log->onDirty(); } else { - batch_operation->removeTag(active_key, getCompositeKey(args[i])); + if (!batch_op_short_ptr) { + batch_op_short_ptr = &getBatchOperationShort(memo_ptr, active_key, false); + } + (*batch_op_short_ptr)->removeTag(active_key, getCompositeKey(args[i])); m_mutation_log->onDirty(); } } @@ -464,6 +494,16 @@ namespace db0::object_model } return (*it).key; } + + std::optional TagIndex::tryGetStoredLongTag(LongTagT tag_addr) const + { + db0::key_value item(tag_addr); + auto it = m_base_index_long.find(item); + if (it == m_base_index_long.end()) { + return std::nullopt; + } + return (*it).key; + } bool TagIndex::flush() const { @@ -576,12 +616,12 @@ namespace db0::object_model std::function add_long_index_callback = [&](LongTagT long_tag_addr) { tryTagIncRef(ShortTagT::fromValue(long_tag_addr[0])); - tryTagIncRef(ShortTagT::fromValue(long_tag_addr[1])); + tryTagIncRef(ShortTagT::fromValue(regularLongTagPart(long_tag_addr[1]))); }; std::function erase_long_index_callback = [&](LongTagT long_tag_addr) { tryTagDecRef(ShortTagT::fromValue(long_tag_addr[0])); - tryTagDecRef(ShortTagT::fromValue(long_tag_addr[1])); + tryTagDecRef(ShortTagT::fromValue(regularLongTagPart(long_tag_addr[1]))); }; // flush all long tags' updates @@ -754,10 +794,23 @@ namespace db0::object_model { if (isLongTag(type_id, arg)) { // query as the long-tag + auto long_tag = getLongTag(type_id, arg); + auto stored_tag = tryGetStoredLongTag(long_tag); + auto query_tag = stored_tag.value_or(long_tag); + if (m_base_index_long.addIterator(factory, query_tag)) { + if (stored_tag && isPassiveLongTag(*stored_tag)) { + if (has_passive_predicate) { + *has_passive_predicate = true; + } + } else if (has_positive_anchor) { + *has_positive_anchor = true; + } + return true; + } if (has_positive_anchor) { *has_positive_anchor = true; } - return m_base_index_long.addIterator(factory, getLongTag(type_id, arg)); + return false; } else { auto short_tag = getShortTag(type_id, arg); auto stored_tag = tryGetStoredShortTag(short_tag); @@ -813,10 +866,23 @@ namespace db0::object_model } return *result; }); + auto long_tag = makeLongTagFromSequence(sequence); + auto stored_tag = tryGetStoredLongTag(long_tag); + auto query_tag = stored_tag.value_or(long_tag); + if (m_base_index_long.addIterator(factory, query_tag)) { + if (stored_tag && isPassiveLongTag(*stored_tag)) { + if (has_passive_predicate) { + *has_passive_predicate = true; + } + } else if (has_positive_anchor) { + *has_positive_anchor = true; + } + return true; + } if (has_positive_anchor) { *has_positive_anchor = true; } - return m_base_index_long.addIterator(factory, makeLongTagFromSequence(sequence)); + return false; } bool is_or_clause = (type_id == TypeId::LIST); @@ -986,10 +1052,23 @@ namespace db0::object_model THROWF(db0::InputException) << "Nested composite tag leaves are not supported" << THROWF_END; } if (isLongTag(type_id, arg)) { + auto long_tag = getLongTag(type_id, arg); + auto stored_tag = tryGetStoredLongTag(long_tag); + auto query_tag = stored_tag.value_or(long_tag); + if (m_base_index_long.addIterator(factory, query_tag)) { + if (stored_tag && isPassiveLongTag(*stored_tag)) { + if (has_passive_predicate) { + *has_passive_predicate = true; + } + } else if (has_positive_anchor) { + *has_positive_anchor = true; + } + return true; + } if (has_positive_anchor) { *has_positive_anchor = true; } - return m_base_index_long.addIterator(factory, getLongTag(type_id, arg)); + return false; } auto leaf_key = tryGetCompositeKey(arg); diff --git a/src/dbzero/object_model/tags/TagIndex.hpp b/src/dbzero/object_model/tags/TagIndex.hpp index b3817fe5..5c5e4565 100644 --- a/src/dbzero/object_model/tags/TagIndex.hpp +++ b/src/dbzero/object_model/tags/TagIndex.hpp @@ -280,6 +280,7 @@ DB0_PACKED_END void tryTagIncRef(ShortTagT tag_addr) const; void tryTagDecRef(ShortTagT tag_addr) const; std::optional tryGetStoredShortTag(ShortTagT tag_addr) const; + std::optional tryGetStoredLongTag(LongTagT tag_addr) const; // revert all pending operations associated with a specific object void revert(ObjectPtr) const; From d4ffd29bb28488bddb21d0fc07f66c5eacbcf21e Mon Sep 17 00:00:00 2001 From: Wojtek Date: Wed, 3 Jun 2026 08:42:56 +0200 Subject: [PATCH 5/5] version update --- dbzero/setup.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbzero/setup.py b/dbzero/setup.py index 27f3bda1..2b2e451d 100644 --- a/dbzero/setup.py +++ b/dbzero/setup.py @@ -10,7 +10,7 @@ setup( name='dbzero', - version='0.3.4', + version='0.3.5', description='DBZero community edition', packages=['dbzero'], python_requires='>=3.9', diff --git a/pyproject.toml b/pyproject.toml index a9f49371..01cac082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = ['meson-python'] [project] name = 'dbzero' -version = '0.3.4' +version = '0.3.5' description = 'A state management system for Python 3.x that unifies your applications business logic, data persistence, and caching into a single, efficient layer.' readme = 'README.md' requires-python = '>=3.9'