diff --git a/dbzero/dbzero/__init__.py b/dbzero/dbzero/__init__.py index ab72e42a..fb95affd 100644 --- a/dbzero/dbzero/__init__.py +++ b/dbzero/dbzero/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) 2025 DBZero Software sp. z o.o. from .dbzero import * -from .dbzero import _init_data_masking +from .dbzero import _check_interned, _init_data_masking from .memo import * from .enum import * from .fast_query import * diff --git a/dbzero/dbzero/dbzero.pyi b/dbzero/dbzero/dbzero.pyi index e544d584..a353b3ef 100644 --- a/dbzero/dbzero/dbzero.pyi +++ b/dbzero/dbzero/dbzero.pyi @@ -112,6 +112,14 @@ def commit(prefix_name: Optional[str] = None) -> None: """ ... +def get_type_stats(type: type, prefix: Optional[str] = None) -> Dict[str, Any]: + """Retrieve statistics for a memo type. + + For interned types, the result includes ``content_index["size"]`` with the + number of currently indexed intern candidates. + """ + ... + # Object retrieval and management def fetch(identifier: Union[str, type], expected_type: Optional[type] = None, prefix: Optional[str] = None) -> Memo: diff --git a/dbzero/dbzero/memo.py b/dbzero/dbzero/memo.py index 3c123963..a09900fa 100644 --- a/dbzero/dbzero/memo.py +++ b/dbzero/dbzero/memo.py @@ -173,6 +173,10 @@ def memo(cls: Optional[type] = None, **kwargs) -> type: materialized, removing this argument from the Python definition does not clear the persisted flag; use reset_protect_fields on the dbzero Class object instead. Derived memo classes inherit field protection and cannot disable it. + intern : bool, default False + If True, the persistent class is marked for interned immutable materialization. + This option requires immutable=True, and interned instances may only reference + other interned memo instances. Returns ------- diff --git a/design/INTERNED_MEMO_DESIGN.md b/design/INTERNED_MEMO_DESIGN.md new file mode 100644 index 00000000..1ab5e731 --- /dev/null +++ b/design/INTERNED_MEMO_DESIGN.md @@ -0,0 +1,245 @@ +# Content-Addressed Objects Design + +This is a design document for content-addressed immutable memo objects. It is intended to complement `IMMUTABLE_OBJECTS_DESIGN.md` without modifying that original design. + +## Goal + +Content-addressed objects are immutable memo instances whose durable identity is bound to their contents. Creating the same immutable value more than once should not create multiple durable instances. Instead, dbzero resolves later materializations to the already materialized instance with identical content. + +The Python-facing feature is exposed as `intern=True` on immutable memo classes: + +```python +@memo(immutable=True, intern=True) +class HomeAddress: + city: str + street: str + country: str + +addr_1 = HomeAddress("Marszalkowska", "Warszawa", "Polska") +addr_2 = HomeAddress("Marszalkowska", "Warszawa", "Polska") + +assert db0.materialized(addr_1) is db0.materialized(addr_2) +assert db0.materialized(addr_2) is addr_1 +``` + +The term `intern` is used because it describes the user-visible behavior: pooling and de-duplication of equal immutable values. + +## User Semantics + +`intern=True` may only be set on immutable memo classes. A mutable class cannot be content-addressed because its identity would no longer remain bound to stable contents. + +Interning is part of the class materialization contract. It cannot be changed after instances of the class have already been materialized, because existing durable instances would have been created under different identity rules. + +When a non-materialized interned instance is materialized, dbzero first looks for an existing instance of the same interned class with identical content: + +- If a match exists, the new wrapper is resolved in place to the existing instance. +- If no match exists, the new instance is materialized and submitted to the per-class content index. + +This uniqueness guarantee applies both to standalone root objects and embedded immutable objects. + +## Class Restrictions + +Interned instances may only reference other interned instances. This restriction keeps equality and identity stable across the whole reachable immutable graph. + +The allowed field graph is: + +- Simple immutable values. +- Embedded immutable values. +- References to interned immutable memo instances. +- Immutable containers whose nested values satisfy the same requirements. + +The disallowed field graph is: + +- Mutable memo objects. +- Non-interned memo object references. +- Values whose durable comparison depends on mutable external state. + +## Content Index + +Each interned memo class has an additional per-class `ContentIndex`. The index maps normalized object content to the unique durable address for an existing matching instance. + +Conceptually: + +```text +ContentIndex[class_id] + content_hash -> candidate UniqueAddress list +``` + +The hash is used for lookup, but equality must be confirmed by comparing the normalized binary content of the candidate object. Hash equality alone is never sufficient for uniqueness. + +The index stores `UniqueAddress` values for existing interned instances. It does not own those instances and must not increment their reference counts. + +## Reference Counting And Lifetime + +`ContentIndex` is a discovery structure, not an ownership structure. An interned object with no external references must not be kept alive only because it appears in the content index. + +Lifetime rules: + +- Interned instances are not added to the normal materialized index solely because of interning. +- `ContentIndex` does not increment the referenced instance. +- When an interned instance's reference count drops to zero, it is removed from `ContentIndex`. +- Stale index entries must not resolve to deleted content. Removal can be eager during ref-count transition or lazily validated during lookup, but lookups must never return a dead instance. + +This ensures interning provides de-duplication without turning every interned value into a permanently retained object. + +## Materialization Flow + +Interned objects use the same deferred materialization phases as other immutable objects, with an added content lookup step: + +```text +empty stub + -> fully initialized non-materialized instance + -> content lookup on materialization + -> existing durable instance, new wrapper resolved in place + or + new durable root instance submitted to ContentIndex + or + embedded object view submitted to ContentIndex +``` + +Before a newly initialized interned instance becomes durable or embedded, dbzero computes or scans its normalized content and performs a `ContentIndex` lookup for its class. + +If the lookup finds an equal object, materialization returns the existing object. The new wrapper becomes a defunct/resolved wrapper whose valid use is subsequent materialization-style resolution to the canonical object. + +If the lookup misses, materialization proceeds normally. Once the object has a stable durable address, that address is registered in `ContentIndex`. + +## Embedded Interned Objects + +Interned instances may be embedded inside immutable root objects, including deeply nested immutable structures. Embedded and referenced forms must compare as equal when they represent the same interned value. + +This means equality and index matching are based on normalized object content, not on whether a nested value is physically embedded or referenced. + +Example: + +```python +addr_1 = HomeAddress("Marszalkowska", "Warszawa", "Polska") +company = Company(address=addr_1) + +addr_2 = db0.materialized(HomeAddress("Marszalkowska", "Warszawa", "Polska")) +``` + +If `Company` is immutable and embeds `addr_1`, `addr_2` must resolve to the same logical interned `HomeAddress` value. If the canonical instance is embedded, the resolved result may be a view into the containing root allocation. + +## Embedded Versus Referenced Equality + +References are treated on par with embedded contents. A field containing an embedded interned object and a field containing a reference to the same interned object compare as equal for content-addressing purposes. + +The comparison is performed against the binary contents of the `o_embedded_object` inner structure after normalizing reference fields. A referenced interned object contributes the same normalized bytes as the embedded content of that object. + +Implementation requirements: + +- Content comparison must be class-aware; equal bytes for different memo classes are not interchangeable. +- The normalized representation must be stable across process restarts. +- Reference fields to interned objects must be expanded or canonicalized before comparison. +- Embedded-object offsets must not leak into content equality, because offset is placement detail rather than value content. + +## Lazy Index Updates + +`ContentIndex` operations are lazy, following the same buffering model as `TagIndex`. + +Expected behavior: + +- New index insertions are buffered during materialization. +- Removals caused by zero reference count are buffered. +- The persistent index collection is updated on flush. +- Reads must observe pending buffered updates in addition to persisted entries. + +The lookup path must account for both persisted and pending state so uniqueness is preserved before and after flush. + +## Uniqueness Guarantee + +Content-addressing provides a strong uniqueness guarantee. Every materialization of an interned object must go through content lookup before a new durable identity can be exposed. + +The guarantee covers: + +- Explicit `db0.materialized(obj)` calls. +- Implicit materialization by storing an interned object in another durable object. +- Embedded immutable instances. +- Deeply nested embedded instances. +- Reopened objects after process restart. + +The main caveat is retrieval cost. If the canonical interned object is first materialized as an embedded object inside a large root object, future lookups may need to pull the containing root allocation to compare or expose the embedded value. + +Users can avoid surprising retrieval costs by explicitly materializing small interned values before embedding them: + +```python +addr_1 = HomeAddress("Marszalkowska", "Warszawa", "Polska") +person = Person(address=db0.materialized(addr_1)) +``` + +## Materialization Scenarios + +### Referenced Before Duplicate Construction + +```python +addr_1 = HomeAddress("Marszalkowska", "Warszawa", "Polska") +user(address=addr_1) + +addr_2 = db0.materialized(HomeAddress("Marszalkowska", "Warszawa", "Polska")) +``` + +Expected behavior: + +- `addr_1` starts as a fully initialized non-materialized instance. +- Passing it to `user` externally references it, so it is materialized. +- Materialization registers it in `ContentIndex`. +- `addr_2` resolves to the same dbzero object by looking up the content index. + +### Embedded Before Duplicate Construction + +```python +addr_1 = HomeAddress("Marszalkowska", "Warszawa", "Polska") +company = Company(address=addr_1) + +addr_2 = db0.materialized(HomeAddress("Marszalkowska", "Warszawa", "Polska")) +``` + +Expected behavior: + +- `addr_1` starts as a fully initialized non-materialized instance. +- If `Company` is immutable and the embedding cost model accepts the field, `addr_1` is embedded into `company`. +- The interned address value is registered in `ContentIndex`. +- `addr_2` resolves to the canonical interned value, which may be represented as an embedded-object view. + +## Development Guidance + +Follow TDD for this feature. Start with Python behavior tests for the user-visible guarantees, then add native tests for index storage, normalization, lifetime, and embedded-object lookup. + +Recommended implementation slices: + +1. Validate decorator semantics: `intern=True` requires `immutable=True` and cannot change after materialization. +2. Add a per-class `ContentIndex` abstraction with buffered insert/remove behavior. +3. Add normalized content hashing and equality for immutable root objects with simple fields. +4. Route interned materialization through lookup before durable identity exposure. +5. Add wrapper in-place resolution when a duplicate is found. +6. Add reference-count driven `ContentIndex` removal. +7. Extend normalized comparison to interned references and embedded objects. +8. Add embedded interned object registration and lookup. +9. Add restart/flush tests for persisted index behavior. +10. Add retrieval-cost tests or benchmarks for embedded canonical instances in large root objects. + +Tests should cover: + +- `intern=True` is rejected without `immutable=True`. +- Interning cannot be enabled or disabled after class instances exist. +- Two independently constructed equal interned objects materialize to the same object. +- Unequal interned objects materialize to distinct objects. +- Duplicate materialization resolves the later wrapper to the canonical instance. +- `ContentIndex` does not keep an object alive after all external references are gone. +- A later equal value is re-created after the prior canonical instance is dropped. +- Interned objects cannot reference non-interned memo instances. +- Embedded interned values and referenced interned values compare as equal. +- Deeply nested interned embedded values resolve through `ContentIndex`. +- Buffered index updates are visible before flush. +- Persisted index entries resolve correctly after reopening. +- Stale index entries are ignored or removed and never return dead objects. + +Native implementation must preserve existing project conventions: + +- Use the established `v_object` constructor pattern. +- Use `db0::o_ext` for variable-size overlaid inheritance. +- Use camelCase for C++ locals, lambdas, and method names. +- Use explicit double-negation checks such as `if (!!obj)` when a project type supports `operator!()`. +- Use `modifyExt()` for real durable state mutations from Python wrappers. +- Do not use `const_cast` on `ext()` to call mutating methods. +- Use `Py_FOR(item, iterator)` and `PySafe_*` helpers for Python C API iteration and container writes. diff --git a/python_tests/test_load.py b/python_tests/test_load.py index 7c3d3359..90a55ced 100644 --- a/python_tests/test_load.py +++ b/python_tests/test_load.py @@ -34,6 +34,53 @@ class LoadProtectedDerivedClass(LoadProtectedBaseClass): derived_value: str +@db0.memo(immutable=True, no_default_tags=True) +@dataclass +class LoadImmutableLeaf: + name: str + count: int + + +@db0.memo(immutable=True, intern=True, no_default_tags=True) +@dataclass +class LoadInternLeaf: + name: str + count: int + + +@db0.memo(immutable=True, no_default_tags=True) +class LoadImmutableHolder: + def __init__(self, value): + self.value = value + + +@db0.memo(immutable=True, no_default_tags=True) +class LoadImmutableDeepRoot: + def __init__(self): + self.branch = LoadImmutableHolder(LoadImmutableLeaf("deep", 9)) + self.label = "root" + + +@db0.memo(immutable=True, no_default_tags=True) +class LoadImmutableCollections: + def __init__(self): + self.values = ( + LoadImmutableLeaf("tuple", 1), + [LoadImmutableLeaf("list", 2)], + {"key": LoadImmutableLeaf("dict-value", 3)}, + ) + + +@db0.memo(immutable=True, no_default_tags=True) +class LoadImmutableCustom: + def __init__(self, name, count): + self.name = name + self.count = count + + def __load__(self, **kwargs): + return {"custom": self.name, "count": self.count} + + def test_load_py_string(): assert db0.load("abc") == "abc" @@ -75,6 +122,59 @@ def test_load_memo_types(db0_fixture): assert db0.load(memo) == {"value": "string"} +def test_load_immutable_memo_root(db0_fixture): + memo = LoadImmutableLeaf("immutable", 7) + + assert db0.load(memo) == {"name": "immutable", "count": 7} + + +def test_load_intern_memo_root(db0_fixture): + memo = LoadInternLeaf("intern", 8) + + assert db0.load(memo) == {"name": "intern", "count": 8} + + +def test_load_embedded_immutable_memo_directly(db0_fixture): + holder = LoadImmutableHolder(LoadImmutableLeaf("embedded", 4)) + + assert db0.load(holder.value) == {"name": "embedded", "count": 4} + + +def test_load_deep_embedded_immutable_memo(db0_fixture): + root = LoadImmutableDeepRoot() + + assert db0.load(root) == { + "branch": {"value": {"name": "deep", "count": 9}}, + "label": "root", + } + + +def test_load_embedded_immutable_memo_inside_collections(db0_fixture): + root = LoadImmutableCollections() + + assert db0.load(root) == { + "values": ( + {"name": "tuple", "count": 1}, + [{"name": "list", "count": 2}], + {"key": {"name": "dict-value", "count": 3}}, + ) + } + + +def test_load_exclude_on_immutable_and_embedded_memo(db0_fixture): + holder = LoadImmutableHolder(LoadImmutableLeaf("embedded", 4)) + + assert db0.load(holder, exclude=["value"]) == {} + assert db0.load(holder.value, exclude=["count"]) == {"name": "embedded"} + + +def test_load_custom_immutable_memo(db0_fixture): + memo = LoadImmutableCustom("custom", 11) + + assert db0.load(memo) == {"custom": "custom", "count": 11} + assert db0.load_all(memo) == {"name": "custom", "count": 11} + + def test_load_protected_memo_only_loads_readable_fields(db0_fixture): account_id = ContextVar("load_protected_account_id") memo = LoadProtectedClass("alpha", 7, "private") diff --git a/python_tests/test_memo_immutable.py b/python_tests/test_memo_immutable.py index 2ab0c8fe..f5664bf7 100644 --- a/python_tests/test_memo_immutable.py +++ b/python_tests/test_memo_immutable.py @@ -305,6 +305,57 @@ def test_uuid_and_fetch_immutable_root_object(db0_fixture): assert reopened.value == 102 +def test_uuid_materializes_non_materialized_immutable_root_object(db0_fixture): + obj = MemoImmutableClass1(data="uuid materializes immutable", value=104) + + obj_uuid = db0.uuid(obj) + db0.tags(obj).add("keep-uuid-materialized-immutable") + + assert db0.fetch(obj_uuid) is obj + assert obj.data == "uuid materializes immutable" + assert obj.value == 104 + + +def test_immutable_flag_cannot_change_to_mutable_after_materialization(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/immutable-stable-contract", immutable=True) + @dataclass + class MemoInitiallyImmutable: + name: str + + db0.materialized(MemoInitiallyImmutable("alpha")) + + @db0.memo(id="dbzero-software/dbzero/tests/immutable-stable-contract") + @dataclass + class MemoNoLongerImmutable: + name: str + + with pytest.raises(RuntimeError, match="immutable flag"): + db0.materialized(MemoNoLongerImmutable("beta")) + + +def test_fetch_rejects_immutable_class_redeclared_as_mutable(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/immutable-fetch-stable-contract", immutable=True) + @dataclass + class MemoInitiallyImmutable: + name: str + + obj = db0.materialized(MemoInitiallyImmutable("alpha")) + obj_uuid = db0.uuid(obj) + db0.commit() + + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix", "rw") + + @db0.memo(id="dbzero-software/dbzero/tests/immutable-fetch-stable-contract") + @dataclass + class MemoNoLongerImmutable: + name: str + + with pytest.raises(RuntimeError, match="immutable flag"): + db0.fetch(MemoNoLongerImmutable, obj_uuid) + + def test_uuid_and_fetch_embedded_nested_immutable_object(db0_fixture): root = MemoImmutableNestedHolder(name="embedded uuid", count=103, label="root") db0.tags(root).add("keep-embedded-fetch-uuid") @@ -331,6 +382,24 @@ def test_uuid_and_fetch_embedded_nested_immutable_object(db0_fixture): assert reopened.count == 103 +def test_embedded_immutable_shadow_type_reused_for_public_operations(db0_fixture): + root_a = db0.materialized(MemoImmutableNestedHolder(name="embedded type cache a", count=131, label="root-a")) + root_b = db0.materialized(MemoImmutableNestedHolder(name="embedded type cache b", count=132, label="root-b")) + nested_a = root_a.nested + nested_b = root_b.nested + + assert isinstance(nested_a, MemoImmutableNestedPayload) + assert type(nested_a) is type(nested_b) + + nested_uuid = db0.uuid(nested_a) + db0.tags(nested_a).add("embedded-type-cache-tag") + result = list(db0.find(MemoImmutableNestedPayload, "embedded-type-cache-tag")) + + assert len(result) == 1 + assert db0.uuid(result[0]) == nested_uuid + assert result[0].name == "embedded type cache a" + + def test_uuid_and_fetch_deeply_embedded_immutable_objects(db0_fixture): root = MemoImmutableDeepRoot(name="deep embedded uuid", count=104) db0.tags(root).add("keep-deep-embedded-fetch-uuid") diff --git a/python_tests/test_memo_intern.py b/python_tests/test_memo_intern.py new file mode 100644 index 00000000..30fe83d8 --- /dev/null +++ b/python_tests/test_memo_intern.py @@ -0,0 +1,594 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright (c) 2025 DBZero Software sp. z o.o. + +import gc +import random +import time +from dataclasses import dataclass + +import pytest +import dbzero as db0 + +from .conftest import DB0_DIR + + +def get_memo_class_object(obj): + return db0.get_memo_class(obj).get_class() + + +@db0.memo(immutable=True, intern=True) +@dataclass +class MemoInternLeaf: + name: str + + +@db0.memo(immutable=True, intern=True) +@dataclass +class MemoInternLeafSibling: + name: str + + +@db0.memo(immutable=True) +@dataclass +class MemoNonInternImmutableLeaf: + name: str + + +@db0.memo(immutable=True, no_cache=True) +@dataclass +class MemoNoCacheImmutableLeaf: + name: str + + +@db0.memo +@dataclass +class MemoNonInternMutableLeaf: + name: str + + +@db0.memo(no_default_tags=True) +class MemoRegularInternReferenceHolder: + def __init__(self): + self.value = None + + +@db0.memo(immutable=True, intern=True) +class MemoInternHolder: + def __init__(self, value): + self.value = value + + +@db0.memo(immutable=True, intern=True) +class MemoInternContainerHolder: + def __init__(self, values): + self.values = values + + +@db0.memo(immutable=True) +class MemoImmutableHolder: + def __init__(self, value): + self.value = value + + +@db0.memo(immutable=True, intern=True) +class MemoInternComposite: + def __init__(self, name, count, payload): + self.name = name + self.count = count + self.payload = payload + + +@db0.memo(immutable=True, intern=True, no_default_tags=True) +class MemoInternStressObject: + def __init__(self, name, payload): + self.name = name + self.payload = payload + + +@db0.memo(immutable=True, intern=True, no_default_tags=True) +class MemoInternWideObject: + def __init__(self, items): + for name, value in items: + setattr(self, name, value) + + +def make_intern_stress_payload(index, variant): + address_items = [ + ("street", f"{index % 997} Intern Ave"), + ("unit", index % 113), + ("zip", f"{10000 + index % 90000:05d}"), + ] + profile_items = [ + ("bucket", index % 251), + ("rank", index // 251), + ("address", dict(reversed(address_items) if variant % 2 else address_items)), + ] + inner_items = [ + ("profile", dict(reversed(profile_items) if variant % 3 == 0 else profile_items)), + ("flags", (index % 2 == 0, index % 5 == 0, index % 17)), + ("checksum", (index * 2654435761) & 0xFFFFFFFF), + ] + payload_items = [ + ("inner", dict(reversed(inner_items) if variant % 5 == 0 else inner_items)), + ("label", f"group-{index % 4096}"), + ("values", (index, index % 31, index % 127)), + ] + return dict(reversed(payload_items) if variant % 7 == 0 else payload_items) + + +def make_intern_stress_indexes(total_count, unique_count): + rng = random.Random(12648430) + indexes = list(range(unique_count)) + indexes.extend(rng.randrange(unique_count) for _ in range(total_count - unique_count)) + rng.shuffle(indexes) + return indexes + + +def test_intern_flag_is_persisted_on_class(db0_fixture): + obj = db0.materialized(MemoInternLeaf("alpha")) + + flags = get_memo_class_object(obj).get_type_flags() + + assert flags["immutable"] is True + assert flags["intern"] is True + + +def test_intern_requires_immutable_decorator_flag(): + with pytest.raises(RuntimeError, match="intern.*immutable"): + + @db0.memo(intern=True) + class MemoInvalidIntern: + pass + + +def test_intern_flag_cannot_change_after_class_materialization(db0_fixture): + @db0.memo(id="dbzero-software/dbzero/tests/intern-stable-contract", immutable=True, intern=True) + class MemoInitiallyIntern: + def __init__(self, name): + self.name = name + + db0.materialized(MemoInitiallyIntern("alpha")) + + with pytest.raises(RuntimeError, match="intern flag"): + + @db0.memo(id="dbzero-software/dbzero/tests/intern-stable-contract", immutable=True) + class MemoNoLongerIntern: + def __init__(self, name): + self.name = name + + db0.materialized(MemoNoLongerIntern("beta")) + + +def test_interned_object_can_reference_interned_immutable_instance(db0_fixture): + leaf = MemoInternLeaf("nested") + + holder = db0.materialized(MemoInternHolder(leaf)) + + assert holder.value.name == "nested" + + +def test_assigning_non_materialized_intern_to_existing_regular_memo_materializes_reference(db0_fixture): + holder = MemoRegularInternReferenceHolder() + db0.tags(holder).add("keep-regular-intern-reference-holder") + leaf = MemoInternLeaf("assigned") + + holder.value = leaf + + leaf_uuid = db0.uuid(leaf) + assert db0._check_interned(leaf_uuid, MemoInternLeaf) + assert db0.uuid(holder.value) == leaf_uuid + assert holder.value.name == "assigned" + + +def test_uuid_materializes_non_materialized_intern_instance(db0_fixture): + leaf = MemoInternLeaf("uuid materialized") + + leaf_uuid = db0.uuid(leaf) + + assert db0._check_interned(leaf_uuid, MemoInternLeaf) + assert db0.fetch(leaf_uuid, MemoInternLeaf).name == "uuid materialized" + + +def test_interned_object_reuses_materialized_reference(db0_fixture): + leaf = db0.materialized(MemoInternLeaf("materialized reference")) + leaf_uuid = db0.uuid(leaf) + + holder = db0.materialized(MemoInternHolder(leaf)) + second = db0.materialized(MemoInternLeaf("materialized reference")) + + assert db0.uuid(holder.value) == leaf_uuid + assert db0.uuid(second) == leaf_uuid + assert holder.value.name == "materialized reference" + + +def test_embedded_interned_object_reuses_embedded_instance(db0_fixture): + leaf = MemoInternLeaf("embedded") + holder = db0.materialized(MemoInternHolder(leaf)) + db0.clear_cache() + second = db0.materialized(MemoInternLeaf("embedded")) + + assert db0.uuid(leaf) == db0.uuid(holder.value) + assert db0.uuid(second) == db0.uuid(leaf) + assert leaf.name == "embedded" + assert second.name == "embedded" + + +def test_fetch_embedded_object_reuses_lang_cache_entry(db0_fixture): + leaf = MemoInternLeaf("embedded cache") + holder = db0.materialized(MemoInternHolder(leaf)) + leaf_uuid = db0.uuid(holder.value) + db0.tags(holder).add("keep-embedded-cache") + db0.clear_cache() + + first = db0.fetch(leaf_uuid, MemoInternLeaf) + second = db0.fetch(leaf_uuid, MemoInternLeaf) + + assert second is first + assert second.name == "embedded cache" + + +def test_fetch_no_cache_embedded_object_is_not_added_to_lang_cache(db0_fixture): + holder = db0.materialized(MemoImmutableHolder(MemoNoCacheImmutableLeaf("embedded no-cache"))) + leaf_uuid = db0.uuid(holder.value) + db0.tags(holder).add("keep-embedded-no-cache") + db0.clear_cache() + + first = db0.fetch(leaf_uuid, MemoNoCacheImmutableLeaf) + second = db0.fetch(leaf_uuid, MemoNoCacheImmutableLeaf) + + assert second is not first + assert first.name == "embedded no-cache" + assert second.name == "embedded no-cache" + + +def test_embedded_interned_object_reuses_after_commit_and_fetch(db0_fixture): + leaf = MemoInternLeaf("embedded committed") + holder = db0.materialized(MemoInternHolder(leaf)) + db0.tags(holder).add("keep-embedded-intern") + leaf_uuid = db0.uuid(leaf) + holder_uuid = db0.uuid(holder) + db0.commit() + + fetched_holder = db0.fetch(holder_uuid, MemoInternHolder) + second = db0.materialized(MemoInternLeaf("embedded committed")) + + assert db0.uuid(fetched_holder.value) == leaf_uuid + assert db0.uuid(second) == leaf_uuid + assert second.name == "embedded committed" + + +@pytest.mark.parametrize( + ("make_values", "extract_value"), + [ + pytest.param(lambda leaf: ("prefix", leaf), lambda values: values[1], id="tuple"), + pytest.param(lambda leaf: ["prefix", leaf], lambda values: values[1], id="list"), + pytest.param( + lambda leaf: {"marker", leaf}, + lambda values: next(value for value in values if isinstance(value, MemoInternLeaf)), + id="set", + ), + pytest.param(lambda leaf: {"child": leaf}, lambda values: values["child"], id="dict-value"), + pytest.param(lambda leaf: {leaf: "child"}, lambda values: next(iter(values.keys())), id="dict-key"), + ], +) +def test_embedded_interned_object_inside_container_reuses_embedded_instance( + db0_fixture, make_values, extract_value +): + leaf = MemoInternLeaf("container embedded") + holder = db0.materialized(MemoInternContainerHolder(make_values(leaf))) + second = db0.materialized(MemoInternLeaf("container embedded")) + + embedded_leaf = extract_value(holder.values) + assert db0.uuid(embedded_leaf) == db0.uuid(leaf) + assert db0.uuid(second) == db0.uuid(leaf) + assert second.name == "container embedded" + + +def test_standalone_interned_object_reuses_existing_instance(db0_fixture): + first = db0.materialized(MemoInternLeaf("dedupe")) + db0.clear_cache() + second = db0.materialized(MemoInternLeaf("dedupe")) + + assert db0.uuid(second) == db0.uuid(first) + assert second.name == "dedupe" + + +def test_standalone_intern_lookup_uses_bound_type(db0_fixture): + first = db0.materialized(MemoInternLeaf("same-content-bound-type")) + sibling = db0.materialized(MemoInternLeafSibling("same-content-bound-type")) + db0.clear_cache() + second = db0.materialized(MemoInternLeaf("same-content-bound-type")) + second_sibling = db0.materialized(MemoInternLeafSibling("same-content-bound-type")) + + assert db0.uuid(second) == db0.uuid(first) + assert db0.uuid(second_sibling) == db0.uuid(sibling) + assert db0.uuid(second) != db0.uuid(second_sibling) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 1 + assert db0.get_type_stats(MemoInternLeafSibling)["content_index"]["size"] == 1 + + +def test_standalone_interned_object_keeps_distinct_content(db0_fixture): + first = db0.materialized(MemoInternLeaf("alpha")) + second = db0.materialized(MemoInternLeaf("beta")) + + assert db0.uuid(second) != db0.uuid(first) + assert first.name == "alpha" + assert second.name == "beta" + + +def test_standalone_interned_object_reuses_after_commit_and_fetch(db0_fixture): + first = db0.materialized(MemoInternLeaf("committed")) + first_uuid = db0.uuid(first) + db0.commit() + + fetched = db0.fetch(first_uuid, MemoInternLeaf) + second = db0.materialized(MemoInternLeaf("committed")) + + assert db0.uuid(fetched) == first_uuid + assert db0.uuid(second) == first_uuid + assert second.name == "committed" + + +def test_standalone_interned_object_reuses_after_close_and_reopen(db0_fixture): + first = db0.materialized(MemoInternLeaf("reopened")) + first_uuid = db0.uuid(first) + db0.tags(first).add("keep-reopened-intern") + db0.commit() + db0.close() + db0.init(DB0_DIR) + db0.open("my-test-prefix", "rw") + + fetched = db0.fetch(first_uuid, MemoInternLeaf) + second = db0.materialized(MemoInternLeaf("reopened")) + + assert db0.uuid(fetched) == first_uuid + assert db0.uuid(second) == first_uuid + assert second.name == "reopened" + + +def test_composite_interned_object_reuses_equivalent_content(db0_fixture): + first = db0.materialized(MemoInternComposite( + "composite", 7, {"items": ("alpha", 1), "flags": {"x", "y"}} + )) + second = db0.materialized(MemoInternComposite( + "composite", 7, {"flags": {"y", "x"}, "items": ("alpha", 1)} + )) + different = db0.materialized(MemoInternComposite( + "composite", 8, {"items": ("alpha", 1), "flags": {"x", "y"}} + )) + + assert db0.uuid(second) == db0.uuid(first) + assert db0.uuid(different) != db0.uuid(first) + assert second.name == "composite" + assert second.count == 7 + + +def test_interned_dict_content_ignores_insertion_order(db0_fixture): + first = db0.materialized(MemoInternHolder({ + "alpha": 1, + "nested": {"street": "Intern Ave", "unit": 7}, + "flags": {"hot", "cold"}, + })) + second = db0.materialized(MemoInternHolder({ + "flags": {"cold", "hot"}, + "nested": {"unit": 7, "street": "Intern Ave"}, + "alpha": 1, + })) + + assert db0.uuid(second) == db0.uuid(first) + assert db0.get_type_stats(MemoInternHolder)["content_index"]["size"] == 1 + + +def test_interned_dict_content_keeps_distinct_values(db0_fixture): + first = db0.materialized(MemoInternHolder({"alpha": 1, "nested": {"unit": 7}})) + second = db0.materialized(MemoInternHolder({"nested": {"unit": 8}, "alpha": 1})) + + assert db0.uuid(second) != db0.uuid(first) + assert db0.get_type_stats(MemoInternHolder)["content_index"]["size"] == 2 + + +def test_interned_set_content_uses_hash_lookup(db0_fixture): + first = db0.materialized(MemoInternHolder({"value": {"alpha", "beta", "gamma"}})) + second = db0.materialized(MemoInternHolder({"value": {"gamma", "alpha", "beta"}})) + + assert db0.uuid(second) == db0.uuid(first) + assert db0.get_type_stats(MemoInternHolder)["content_index"]["size"] == 1 + + +def test_interned_wide_object_fields_use_hash_lookup(db0_fixture): + items = [(f"field_{index:04d}", index * 17) for index in range(512)] + changed_items = list(items) + changed_items[257] = (changed_items[257][0], -1) + + first = db0.materialized(MemoInternWideObject(items)) + second = db0.materialized(MemoInternWideObject(list(reversed(items)))) + different = db0.materialized(MemoInternWideObject(changed_items)) + + assert db0.uuid(second) == db0.uuid(first) + assert db0.uuid(different) != db0.uuid(first) + assert db0.get_type_stats(MemoInternWideObject)["content_index"]["size"] == 2 + + +def test_many_interned_materializations_reuse_root_and_embedded_candidates(db0_fixture): + canonical_uuids = {} + canonical_objects = [] + holders = [] + + for index in range(128): + name = f"bulk-{index % 16}" + if name not in canonical_uuids and index % 2 == 0: + leaf = MemoInternLeaf(name) + holder = db0.materialized(MemoInternHolder(leaf)) + db0.tags(holder).add(f"keep-bulk-holder-{name}") + holders.append(holder) + canonical_uuids[name] = db0.uuid(leaf) + else: + leaf = db0.materialized(MemoInternLeaf(name)) + canonical_uuids.setdefault(name, db0.uuid(leaf)) + if len(canonical_objects) < len(canonical_uuids): + db0.tags(leaf).add(f"keep-bulk-leaf-{name}") + canonical_objects.append(leaf) + assert db0.uuid(leaf) == canonical_uuids[name] + + assert len(canonical_uuids) == 16 + assert len(set(canonical_uuids.values())) == 16 + assert len(holders) == 8 + + db0.commit() + for index in range(128): + name = f"bulk-{index % 16}" + leaf = db0.materialized(MemoInternLeaf(name)) + + assert db0.uuid(leaf) == canonical_uuids[name] + assert leaf.name == name + + +@pytest.mark.stress_test +def test_interned_memo_random_objects_deduplicate_to_unique_count(db0_fixture): + total_count = 50000 + unique_count = 15000 + indexes = make_intern_stress_indexes(total_count, unique_count) + + objects = [] + start = time.perf_counter() + for offset, index in enumerate(indexes): + obj = db0.materialized( + MemoInternStressObject(f"name-{index % 4096}", make_intern_stress_payload(index, offset)) + ) + objects.append(obj) + + elapsed = time.perf_counter() - start + print( + f"Interned memo stress: {total_count} materializations in {elapsed:.3f}s " + f"({total_count / elapsed:.0f} ops/sec)" + ) + + stats = db0.get_type_stats(MemoInternStressObject) + assert stats["intern"] is True + assert stats["instances"] == unique_count + assert stats["content_index"]["size"] == unique_count + assert len({db0.uuid(obj) for obj in objects}) == unique_count + + +def test_dropped_standalone_interned_object_is_not_reused(db0_fixture): + obj = db0.materialized(MemoInternLeaf("dropped standalone")) + old_uuid = db0.uuid(obj) + assert db0._check_interned(old_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 1 + del obj + gc.collect() + db0.commit() + + assert not db0.exists(old_uuid) + assert not db0._check_interned(old_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 0 + with pytest.raises(Exception): + db0.fetch(old_uuid, MemoInternLeaf) + + replacement = db0.materialized(MemoInternLeaf("dropped standalone")) + + assert replacement.name == "dropped standalone" + assert db0.exists(db0.uuid(replacement)) + assert db0._check_interned(db0.uuid(replacement), MemoInternLeaf) + + +def test_dropped_embedded_interned_object_is_not_reused(db0_fixture): + leaf = MemoInternLeaf("dropped embedded") + holder = db0.materialized(MemoInternHolder(leaf)) + leaf_uuid = db0.uuid(leaf) + holder_uuid = db0.uuid(holder) + assert db0._check_interned(leaf_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 1 + del leaf, holder + gc.collect() + db0.commit() + + assert not db0.exists(holder_uuid) + assert not db0.exists(leaf_uuid) + assert not db0._check_interned(leaf_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 0 + + replacement = db0.materialized(MemoInternLeaf("dropped embedded")) + + assert replacement.name == "dropped embedded" + assert db0.uuid(replacement) != leaf_uuid + assert db0.exists(db0.uuid(replacement)) + + +@pytest.mark.parametrize( + "make_values", + [ + pytest.param(lambda leaf: ("prefix", leaf), id="tuple"), + pytest.param(lambda leaf: [leaf, "suffix"], id="list"), + pytest.param(lambda leaf: {"child": leaf}, id="dict-value"), + ], +) +def test_dropped_container_embedded_interned_object_is_not_reused(db0_fixture, make_values): + leaf = MemoInternLeaf("dropped container embedded") + holder = db0.materialized(MemoInternContainerHolder(make_values(leaf))) + leaf_uuid = db0.uuid(leaf) + holder_uuid = db0.uuid(holder) + assert db0._check_interned(leaf_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 1 + del leaf, holder + gc.collect() + db0.commit() + + assert not db0.exists(holder_uuid) + assert not db0.exists(leaf_uuid) + assert not db0._check_interned(leaf_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 0 + + replacement = db0.materialized(MemoInternLeaf("dropped container embedded")) + + assert replacement.name == "dropped container embedded" + assert db0.uuid(replacement) != leaf_uuid + assert db0.exists(db0.uuid(replacement)) + + +def test_dropped_duplicate_intern_candidate_does_not_hide_live_candidate(db0_fixture): + live = db0.materialized(MemoInternLeaf("live duplicate")) + live_uuid = db0.uuid(live) + db0.tags(live).add("keep-live-duplicate") + + duplicate_leaf = MemoInternLeaf("live duplicate") + holder = db0.materialized(MemoInternHolder(duplicate_leaf)) + duplicate_uuid = db0.uuid(duplicate_leaf) + holder_uuid = db0.uuid(holder) + assert duplicate_uuid != live_uuid + assert db0._check_interned(live_uuid, MemoInternLeaf) + assert db0._check_interned(duplicate_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 2 + + del duplicate_leaf, holder + gc.collect() + db0.commit() + + assert db0.exists(live_uuid) + assert not db0.exists(holder_uuid) + assert not db0.exists(duplicate_uuid) + assert db0._check_interned(live_uuid, MemoInternLeaf) + assert not db0._check_interned(duplicate_uuid, MemoInternLeaf) + assert db0.get_type_stats(MemoInternLeaf)["content_index"]["size"] == 1 + + replacement = db0.materialized(MemoInternLeaf("live duplicate")) + + assert db0.uuid(replacement) == live_uuid + assert replacement.name == "live duplicate" + + +def test_interned_object_rejects_non_intern_immutable_reference(db0_fixture): + with pytest.raises((RuntimeError, AttributeError), match="intern.*reference"): + db0.materialized(MemoInternHolder(MemoNonInternImmutableLeaf("nested"))) + + +def test_interned_object_rejects_mutable_reference(db0_fixture): + with pytest.raises((RuntimeError, AttributeError), match="intern.*reference"): + db0.materialized(MemoInternHolder(MemoNonInternMutableLeaf("nested"))) + + +def test_interned_object_rejects_nested_non_intern_reference(db0_fixture): + value = {"items": (MemoNonInternImmutableLeaf("nested"),)} + + with pytest.raises((RuntimeError, AttributeError), match="intern.*reference"): + db0.materialized(MemoInternContainerHolder(value)) diff --git a/src/dbzero/bindings/python/Memo.cpp b/src/dbzero/bindings/python/Memo.cpp index 09de6f32..d5a4812c 100644 --- a/src/dbzero/bindings/python/Memo.cpp +++ b/src/dbzero/bindings/python/Memo.cpp @@ -898,7 +898,8 @@ namespace db0::python PyObject *wrapPyType(PyTypeObject *base_class, bool is_singleton, bool no_default_tags, const char *prefix_name, const char *type_id, const char *file_name, std::vector &&init_vars, PyObject *py_dyn_prefix_callable, - std::vector &&migrations, bool no_cache, bool immutable, std::optional protect_fields_option) + std::vector &&migrations, bool no_cache, bool immutable, bool intern, + std::optional protect_fields_option) { auto py_class = Py_BORROW(base_class); auto py_module = Py_OWN(findModule(*Py_OWN(PyObject_GetAttrString((PyObject*)*py_class, "__module__")))); @@ -931,6 +932,9 @@ namespace db0::python if (!new_type) { return nullptr; } + if (intern && !immutable) { + THROWF(db0::InputException) << "intern=True requires immutable=True"; + } auto base_memo_type = PyToolkit::getBaseMemoType(*new_type); bool inherited_protect_fields = base_memo_type @@ -951,6 +955,9 @@ namespace db0::python if (protect_fields) { type_flags.set(MemoOptions::PROTECT_FIELDS); } + if (intern) { + type_flags.set(MemoOptions::INTERN); + } auto type_info = MemoTypeDecoration( py_module, prefix_name, @@ -990,13 +997,14 @@ namespace db0::python PyObject *py_migrations = nullptr; PyObject *py_no_cache = nullptr; PyObject *py_immutable = nullptr; + PyObject *py_intern = nullptr; PyObject *py_protect_fields = nullptr; static const char *kwlist[] = { "input", "singleton", "no_default_tags", "prefix", "id", "py_file", "py_init_vars", - "py_dyn_prefix", "py_migrations", "no_cache", "immutable", "protect_fields", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOOOOOOOO", const_cast(kwlist), &class_obj, &py_singleton, + "py_dyn_prefix", "py_migrations", "no_cache", "immutable", "intern", "protect_fields", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOOOOOOOOO", const_cast(kwlist), &class_obj, &py_singleton, &py_no_default_tags, &py_prefix_name, &py_type_id, &py_file_name, &py_init_vars, &py_dyn_prefix, &py_migrations, - &py_no_cache, &py_immutable, &py_protect_fields)) + &py_no_cache, &py_immutable, &py_intern, &py_protect_fields)) { return NULL; } @@ -1005,6 +1013,7 @@ namespace db0::python bool no_default_tags = py_no_default_tags && PyObject_IsTrue(py_no_default_tags); bool no_cache = py_no_cache && PyObject_IsTrue(py_no_cache); bool immutable = py_immutable && PyObject_IsTrue(py_immutable); + bool intern = py_intern && PyObject_IsTrue(py_intern); std::optional protect_fields_option; if (py_protect_fields) { protect_fields_option = PyObject_IsTrue(py_protect_fields); @@ -1043,7 +1052,7 @@ namespace db0::python auto migrations = extractMigrations(py_migrations); return wrapPyType(castToType(class_obj), is_singleton, no_default_tags, prefix_name, type_id, file_name, - std::move(init_vars), py_dyn_prefix, std::move(migrations), no_cache, immutable, protect_fields_option + std::move(init_vars), py_dyn_prefix, std::move(migrations), no_cache, immutable, intern, protect_fields_option ); } @@ -1261,6 +1270,78 @@ namespace db0::python } return tryLoad(*result, kwargs, nullptr, load_stack_ptr); } + + bool shouldSkipLoadMember(PyObject *py_exclude, PyObject *key_obj, bool &has_error) + { + if (py_exclude == nullptr || py_exclude == Py_None) { + return false; + } + + int contains = PySequence_Contains(py_exclude, key_obj); + if (contains < 0) { + has_error = true; + return true; + } + return contains == 1; + } + + PyObject *tryLoadDictMembers(PyObject *members, PyObject *kwargs, PyObject *py_exclude, + std::unordered_set *load_stack_ptr) + { + auto py_result = Py_OWN(PyDict_New()); + if (!py_result) { + return nullptr; + } + + PyObject *key = nullptr; + PyObject *value = nullptr; + Py_ssize_t pos = 0; + bool has_error = false; + while (PyDict_Next(members, &pos, &key, &value)) { + if (shouldSkipLoadMember(py_exclude, key, has_error)) { + if (has_error) { + return nullptr; + } + continue; + } + + auto loaded = Py_OWN(tryLoad(value, kwargs, nullptr, load_stack_ptr, false)); + if (!loaded) { + return nullptr; + } + PySafeDict_SetItem(*py_result, Py_BORROW(key), loaded); + } + + return py_result.steal(); + } + + PyObject *tryLoadEmbeddedMemo(MemoImmutableObject *memo_obj, PyObject *kwargs, PyObject *py_exclude, + std::unordered_set *load_stack_ptr, bool load_all) + { + auto *py_object = reinterpret_cast(memo_obj); + if (!load_all) { + auto load_method = Py_OWN(PyObject_GetAttrString(py_object, "__load__")); + if (load_method.get()) { + if (PyCallable_Check(*load_method)) { + return executeLoadFunction(*load_method, kwargs, py_exclude, load_stack_ptr); + } + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + return nullptr; + } + } + + auto members = Py_OWN(PyObject_GetAttrString(py_object, "__dict__")); + if (!members) { + return nullptr; + } + if (!PyDict_Check(*members)) { + PyErr_SetString(PyExc_TypeError, "Embedded memo __dict__ did not return a dict"); + return nullptr; + } + return tryLoadDictMembers(*members, kwargs, py_exclude, load_stack_ptr); + } template PyObject *tryLoadMemo(MemoImplT *memo_obj, PyObject *kwargs, PyObject *py_exclude, @@ -1279,25 +1360,22 @@ namespace db0::python PyErr_Clear(); auto py_result = Py_OWN(PyDict_New()); + if (!py_result) { + return nullptr; + } + bool has_error = false; - memo_obj->ext().forAll([&py_result, memo_obj, py_exclude, kwargs, &has_error, load_stack_ptr] - (const std::string &key, PyTypes::ObjectSharedPtr) - { + for (const auto &key: memo_obj->ext().getMembers()) { auto key_obj = Py_OWN(PyUnicode_FromString(key.c_str())); if (!key_obj) { - has_error = true; - return false; + return nullptr; } - if (py_exclude != nullptr && py_exclude != Py_None) { - int contains = PySequence_Contains(py_exclude, *key_obj); - if (contains < 0) { - has_error = true; - return false; - } - if (contains == 1) { - return true; + if (shouldSkipLoadMember(py_exclude, *key_obj, has_error)) { + if (has_error) { + return nullptr; } + continue; } auto &memo_type = memo_obj->ext().getType(); @@ -1308,29 +1386,23 @@ namespace db0::python member_loc, key.c_str() )) { if (PyErr_Occurred()) { - has_error = true; - return false; + return nullptr; } - return true; + continue; } } auto attr = Py_OWN(PyAPI_MemoObject_getattro(memo_obj, *key_obj)); if (!attr) { - has_error = true; - return false; + return nullptr; } auto res = Py_OWN(tryLoad(*attr, kwargs, nullptr, load_stack_ptr, false)); if (!res) { - has_error = true; + return nullptr; } else { PySafeDict_SetItemString(*py_result, key.c_str(), res); } - return !has_error; - }); - if (has_error) { - return nullptr; } return py_result.steal(); } diff --git a/src/dbzero/bindings/python/Memo.hpp b/src/dbzero/bindings/python/Memo.hpp index 21322489..b5f3249b 100644 --- a/src/dbzero/bindings/python/Memo.hpp +++ b/src/dbzero/bindings/python/Memo.hpp @@ -53,6 +53,9 @@ namespace db0::python template PyObject *tryLoadMemo(MemoImplT *memo_obj, PyObject* kwargs, PyObject* exclude, std::unordered_set *load_stack_ptr = nullptr, bool load_all = false); + + PyObject *tryLoadEmbeddedMemo(MemoImmutableObject *memo_obj, PyObject* kwargs, PyObject* exclude, + std::unordered_set *load_stack_ptr = nullptr, bool load_all = false); // check for a memo type (i.e. generated by PyAPI_wrapPyClass) template bool PyMemo_Check(PyObject *); diff --git a/src/dbzero/bindings/python/PyAPI.cpp b/src/dbzero/bindings/python/PyAPI.cpp index e82463cf..e52df80f 100644 --- a/src/dbzero/bindings/python/PyAPI.cpp +++ b/src/dbzero/bindings/python/PyAPI.cpp @@ -27,6 +27,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -258,6 +261,100 @@ namespace db0::python return runSafe(tryExists, py_id, reinterpret_cast(py_type), prefix_name); } + bool checkInternedAddress( + db0::swine_ptr &fixture, object_model::ClassFactory &classFactory, + UniqueAddress address, const std::shared_ptr &expectedType + ) + { + db0::Allocator::AllocationInfo allocation; + try { + allocation = fixture->findAllocation(address.getAddress(), object_model::ObjectImmutableImpl::REALM_ID); + } catch (const db0::AbstractException &) { + return false; + } + + auto root = object_model::ObjectImmutableImpl::tryUnloadStem( + fixture, allocation.address, address.getInstanceId(), AccessFlags {} + ); + if (!root) { + return false; + } + + if (address.getAddress() == allocation.address) { + auto type = classFactory.getTypeByClassRef(root->getClassRef()).m_class; + if (expectedType && type->getAddress() != expectedType->getAddress()) { + return false; + } + if (!type->isIntern() || !type->hasContentIndex()) { + return false; + } + return type->getContentIndex().contains(root->getObject(), address); + } + + auto offset = address.getAddress().getOffset() - allocation.address.getOffset(); + if (!root->getOffsetIndex().contains(offset)) { + return false; + } + + const auto *rootBytes = reinterpret_cast(root.operator->()); + const auto &embeddedObject = object_model::o_embedded_object::__const_ref(rootBytes + offset); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + if (expectedType && type->getAddress() != expectedType->getAddress()) { + return false; + } + if (!type->isIntern() || !type->hasContentIndex()) { + return false; + } + return type->getContentIndex().contains(embeddedObject, address); + } + + PyObject *tryCheckInterned(const char *uuid, PyObject *pyType) + { + auto objectId = ObjectId::tryFromBase32(uuid); + if (!objectId || objectId.m_storage_class != db0::object_model::StorageClass::OBJECT_REF) { + Py_RETURN_FALSE; + } + + auto fixture = PyToolkit::getPyWorkspace().getWorkspace().tryGetFixture(objectId.m_fixture_uuid); + if (!fixture) { + Py_RETURN_FALSE; + } + + bool result = false; + auto &classFactory = fixture->get(); + if (pyType && pyType != Py_None) { + if (!PyType_Check(pyType)) { + THROWF(db0::InputException) << "type must be a dbzero memo type"; + } + auto *memoType = reinterpret_cast(pyType); + if (!PyAnyMemoType_Check(memoType)) { + THROWF(db0::InputException) << "type must be a dbzero memo type"; + } + + auto type = classFactory.tryGetExistingType(memoType); + if (!type || !type->isIntern() || !type->hasContentIndex()) { + Py_RETURN_FALSE; + } + return PyBool_fromBool(checkInternedAddress(fixture, classFactory, objectId.m_address, type)); + } + + result = checkInternedAddress(fixture, classFactory, objectId.m_address, nullptr); + return PyBool_fromBool(result); + } + + PyObject *PyAPI_checkInterned(PyObject *, PyObject *args) + { + const char *uuid = nullptr; + PyObject *pyType = nullptr; + if (!PyArg_ParseTuple(args, "s|O", &uuid, &pyType)) { + PyErr_SetString(PyExc_TypeError, "Invalid argument type"); + return NULL; + } + + PY_API_FUNC + return runSafe(tryCheckInterned, uuid, pyType); + } + PyObject *tryOpen(PyObject *self, PyObject *args, PyObject *kwargs) { // prefix_name, open_mode, autocommit (bool) @@ -652,6 +749,106 @@ namespace db0::python PY_API_FUNC return runSafe(tryGetPrefixStats, args, kwargs); } + + PyObject *tryGetTypeStats(PyObject *args, PyObject *kwargs) + { + PyObject *pyType = nullptr; + const char *prefixName = nullptr; + const char * const kwlist[] = {"type", "prefix", nullptr}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:get_type_stats", const_cast(kwlist), + &pyType, &prefixName)) { + return nullptr; + } + if (!PyType_Check(pyType)) { + THROWF(db0::InputException) << "type must be a dbzero memo type"; + } + + auto *memoType = reinterpret_cast(pyType); + if (!PyAnyMemoType_Check(memoType)) { + THROWF(db0::InputException) << "type must be a dbzero memo type"; + } + + auto &workspace = PyToolkit::getPyWorkspace().getWorkspace(); + db0::swine_ptr fixture; + if (prefixName) { + fixture = workspace.getFixture(prefixName, AccessType::READ_ONLY); + } else { + auto fixtureUuid = MemoTypeDecoration::get(memoType).getFixtureUUID(AccessType::READ_ONLY); + fixture = fixtureUuid + ? workspace.getFixture(fixtureUuid, AccessType::READ_ONLY) + : workspace.getCurrentFixture(); + } + fixture->refreshIfUpdated(); + + auto type = fixture->get().tryGetExistingType(memoType); + if (!type && !prefixName) { + workspace.forEachFixture([&](const Fixture &existingFixture) { + if (!type && existingFixture != *fixture) { + type = existingFixture.get().tryGetExistingType(memoType); + } + return !type; + }); + } + if (!type) { + THROWF(db0::InputException) << "Class not found: " << PyToolkit::getTypeName(memoType); + } + + auto stats = Py_OWN(PyDict_New()); + if (!stats) { + return nullptr; + } + + PySafeDict_SetItemString(*stats, "name", Py_OWN(PyUnicode_FromString(type->getName().c_str()))); + if (auto moduleName = type->tryGetModuleName()) { + PySafeDict_SetItemString(*stats, "module", Py_OWN(PyUnicode_FromString(moduleName->c_str()))); + } else { + PySafeDict_SetItemString(*stats, "module", Py_BORROW(Py_None)); + } + if (auto typeId = type->getTypeId()) { + PySafeDict_SetItemString(*stats, "type_id", Py_OWN(PyUnicode_FromString(typeId->c_str()))); + } else { + PySafeDict_SetItemString(*stats, "type_id", Py_BORROW(Py_None)); + } + PySafeDict_SetItemString(*stats, "prefix", Py_OWN(PyUnicode_FromString(type->getFixture()->getPrefix().getName().c_str()))); + PySafeDict_SetItemString(*stats, "uuid", Py_OWN(PyLong_FromUnsignedLongLong(type->getFixture()->getUUID()))); + PySafeDict_SetItemString(*stats, "address", Py_OWN(PyLong_FromUnsignedLongLong(type->getAddress().getValue()))); + PySafeDict_SetItemString(*stats, "class_ref", Py_OWN(PyLong_FromUnsignedLong(type->getClassRef()))); + auto refCounts = type->getRefCounts(); + auto instanceCount = refCounts.second > 0 ? refCounts.second - 1 : 0; + PySafeDict_SetItemString(*stats, "fields", Py_OWN(PyLong_FromSize_t(type->size()))); + PySafeDict_SetItemString(*stats, "instances", Py_OWN(PyLong_FromUnsignedLong(instanceCount))); + PySafeDict_SetItemString(*stats, "immutable", Py_OWN(PyBool_fromBool(type->isImmutable()))); + PySafeDict_SetItemString(*stats, "intern", Py_OWN(PyBool_fromBool(type->isIntern()))); + PySafeDict_SetItemString(*stats, "singleton", Py_OWN(PyBool_fromBool(type->isSingleton()))); + PySafeDict_SetItemString(*stats, "no_default_tags", Py_OWN(PyBool_fromBool(type->isNoDefaultTags()))); + PySafeDict_SetItemString(*stats, "no_cache", Py_OWN(PyBool_fromBool(type->isNoCache()))); + + auto refCountsDict = Py_OWN(PyDict_New()); + if (!refCountsDict) { + return nullptr; + } + PySafeDict_SetItemString(*refCountsDict, "tags", Py_OWN(PyLong_FromUnsignedLong(refCounts.first))); + PySafeDict_SetItemString(*refCountsDict, "objects", Py_OWN(PyLong_FromUnsignedLong(refCounts.second))); + PySafeDict_SetItemString(*stats, "ref_counts", refCountsDict); + + if (type->isIntern()) { + auto contentIndex = Py_OWN(PyDict_New()); + if (!contentIndex) { + return nullptr; + } + auto size = type->hasContentIndex() ? type->getContentIndex().size() : 0; + PySafeDict_SetItemString(*contentIndex, "size", Py_OWN(PyLong_FromUnsignedLongLong(size))); + PySafeDict_SetItemString(*stats, "content_index", contentIndex); + } + + return stats.steal(); + } + + PyObject *getTypeStats(PyObject *self, PyObject *args, PyObject *kwargs) + { + PY_API_FUNC + return runSafe(tryGetTypeStats, args, kwargs); + } PyObject *PyAPI_getSnapshot(PyObject *, PyObject *args, PyObject *kwargs) { @@ -1518,7 +1715,7 @@ namespace db0::python PyErr_SetString(PyExc_TypeError, "Invalid argument type. Exclude shoud be a sequence"); return NULL; } - if (!PyAnyMemo_Check(py_object)) { + if (!PyToolkit::isAnyMemoObject(py_object)) { PyErr_SetString(PyExc_TypeError, "Exclude is only supported for memo objects"); return NULL; } diff --git a/src/dbzero/bindings/python/PyAPI.hpp b/src/dbzero/bindings/python/PyAPI.hpp index ff047379..609e4c6c 100644 --- a/src/dbzero/bindings/python/PyAPI.hpp +++ b/src/dbzero/bindings/python/PyAPI.hpp @@ -42,6 +42,10 @@ namespace db0::python PyObject *PyAPI_fetch(PyObject *, PyObject *args, PyObject *kwargs); // Similar to PyAPI_fetch, but only returns a flag True / False if object can be fetched PyObject *PyAPI_exists(PyObject *, PyObject *args, PyObject *kwargs); + + // Test/debug helper: returns true if a UUID is present in an intern ContentIndex. + // Optional second argument limits the check to a specific memo type. + PyObject *PyAPI_checkInterned(PyObject *, PyObject *args); /** * Initialize dbzero Python bindings @@ -145,6 +149,8 @@ namespace db0::python PyObject *PyAPI_getMemoClasses(PyObject *self, PyObject *args, PyObject *kwargs); PyObject *getPrefixStats(PyObject *self, PyObject *args, PyObject *kwargs); + + PyObject *getTypeStats(PyObject *self, PyObject *args, PyObject *kwargs); PyObject *getStorageStats(PyObject *, PyObject *args, PyObject *kwargs); diff --git a/src/dbzero/bindings/python/PyInternalAPI.cpp b/src/dbzero/bindings/python/PyInternalAPI.cpp index a5048ac1..e873bbdf 100644 --- a/src/dbzero/bindings/python/PyInternalAPI.cpp +++ b/src/dbzero/bindings/python/PyInternalAPI.cpp @@ -5,6 +5,9 @@ #include "PyToolkit.hpp" #include "Memo.hpp" #include +#include +#include +#include #include #include #include @@ -37,6 +40,95 @@ namespace db0::python { + + namespace + { + PyObject *tryLoadSequenceAsTuple( + PyObject *sequence, PyObject *kwargs, std::unordered_set *load_stack_ptr + ) + { + auto size = PyObject_Length(sequence); + if (size < 0) { + return nullptr; + } + + auto result = Py_OWN(PyTuple_New(size)); + if (!result) { + return nullptr; + } + + for (Py_ssize_t i = 0; i < size; ++i) { + auto item = Py_OWN(PySequence_GetItem(sequence, i)); + if (!item) { + return nullptr; + } + auto loaded = Py_OWN(tryLoad(*item, kwargs, nullptr, load_stack_ptr)); + if (!loaded) { + return nullptr; + } + PySafeTuple_SetItem(*result, i, loaded); + } + return result.steal(); + } + + PyObject *tryLoadIterableAsList( + PyObject *iterable, PyObject *kwargs, std::unordered_set *load_stack_ptr + ) + { + auto iterator = Py_OWN(PyObject_GetIter(iterable)); + if (!iterator) { + return nullptr; + } + + auto result = Py_OWN(PyList_New(0)); + if (!result) { + return nullptr; + } + + Py_FOR(item, iterator) { + auto loaded = Py_OWN(tryLoad(*item, kwargs, nullptr, load_stack_ptr)); + if (!loaded) { + return nullptr; + } + if (PyList_Append(*result, *loaded) < 0) { + return nullptr; + } + } + return result.steal(); + } + + PyObject *tryLoadMappingAsDict( + PyObject *mapping, PyObject *kwargs, std::unordered_set *load_stack_ptr + ) + { + auto iterator = Py_OWN(PyObject_GetIter(mapping)); + if (!iterator) { + return nullptr; + } + + auto result = Py_OWN(PyDict_New()); + if (!result) { + return nullptr; + } + + Py_FOR(item, iterator) { + auto key = Py_OWN(tryLoad(*item, kwargs, nullptr, load_stack_ptr)); + if (!key) { + return nullptr; + } + auto value = Py_OWN(PyObject_GetItem(mapping, *item)); + if (!value) { + return nullptr; + } + auto loadedValue = Py_OWN(tryLoad(*value, kwargs, nullptr, load_stack_ptr)); + if (!loadedValue) { + return nullptr; + } + PySafeDict_SetItem(*result, key, loadedValue); + } + return result.steal(); + } + } LoadGuard::LoadGuard(std::unordered_set *load_stack_ptr, const void *arg_ptr) : m_load_stack_ptr(load_stack_ptr) @@ -752,6 +844,16 @@ namespace db0::python PyErr_SetString(PyExc_RecursionError, "Recursive loading detected"); return nullptr; } + + if (Py_TYPE(py_obj) == &EmbeddedTupleType) { + return tryLoadSequenceAsTuple(py_obj, kwargs, load_stack_ptr); + } + if (Py_TYPE(py_obj) == &EmbeddedSetType) { + return tryLoadIterableAsList(py_obj, kwargs, load_stack_ptr); + } + if (Py_TYPE(py_obj) == &EmbeddedDictType) { + return tryLoadMappingAsDict(py_obj, kwargs, load_stack_ptr); + } using TypeId = db0::bindings::TypeId; auto &type_manager = PyToolkit::getTypeManager(); @@ -794,6 +896,11 @@ namespace db0::python } else if (type_id == TypeId::MEMO_OBJECT) { return tryLoadMemo(reinterpret_cast(py_obj), kwargs, py_exclude, load_stack_ptr, load_all); } else if (type_id == TypeId::MEMO_IMMUTABLE_OBJECT) { + if (PyEmbeddedMemo_Check(py_obj)) { + return tryLoadEmbeddedMemo( + reinterpret_cast(py_obj), kwargs, py_exclude, load_stack_ptr, load_all + ); + } return tryLoadMemo(reinterpret_cast(py_obj), kwargs, py_exclude, load_stack_ptr, load_all); } else { THROWF(db0::InputException) << "__load__ not implemented for type: " @@ -829,7 +936,15 @@ namespace db0::python db0::FixtureLock lock(fixture); // materialize by calling postInit memo_obj->modifyExt().setLangObject(reinterpret_cast(memo_obj)); - memo_obj->modifyExt().postInit(lock); + auto existingInternAddress = memo_obj->modifyExt().postInit(lock); + if (existingInternAddress) { + auto &classFactory = fixture->get(); + auto internObject = PyToolkit::unloadAnyObject( + fixture, existingInternAddress->getAddress(), classFactory, nullptr, + existingInternAddress->getInstanceId(), memo_obj->ext().getAccessMode() + ); + return internObject.steal(); + } if (!memo_obj->ext().getType().isNoCache()) { fixture->getLangCache().add(memo_obj->ext().getAddress(), memo_obj); } diff --git a/src/dbzero/bindings/python/PyToolkit.cpp b/src/dbzero/bindings/python/PyToolkit.cpp index bb646eb2..c1f6aa23 100644 --- a/src/dbzero/bindings/python/PyToolkit.cpp +++ b/src/dbzero/bindings/python/PyToolkit.cpp @@ -75,6 +75,33 @@ namespace db0::python return reinterpret_cast(pyObject)->ext().hasRefs(); } + bool isEmbeddedObject(PyObject *pyObject) + { + return pyObject && Py_TYPE(pyObject) == &EmbeddedObjectType; + } + + std::optional tryGetEmbeddedUniqueAddress(PyObject *pyObject) + { + if (PyEmbeddedMemo_Check(pyObject)) { + return getEmbeddedMemoRef(reinterpret_cast(pyObject)).uniqueAddress(); + } + if (isEmbeddedObject(pyObject)) { + return reinterpret_cast(pyObject)->ext().uniqueAddress(); + } + return std::nullopt; + } + + bool shouldCacheEmbeddedObject(PyObject *pyObject) + { + if (PyEmbeddedMemo_Check(pyObject)) { + return !getEmbeddedMemoRef(reinterpret_cast(pyObject)).type().isNoCache(); + } + if (isEmbeddedObject(pyObject)) { + return !reinterpret_cast(pyObject)->ext().type().isNoCache(); + } + return false; + } + PyToolkit::TypeObjectPtr resolveUnloadLangType( const PyToolkit::ClassFactory &classFactory, const std::shared_ptr &type, PyToolkit::TypeObjectSharedPtr langType, PyToolkit::TypeObjectPtr langTypeHint) @@ -250,6 +277,17 @@ namespace db0::python } return getTypeManager().extractAnyObject(pyObject).getType(); } + + const object_model::o_embedded_object &PyToolkit::getMemoImmutableObject(ObjectPtr pyObject) + { + if (PyEmbeddedMemo_Check(pyObject)) { + return getEmbeddedMemoRef(reinterpret_cast(pyObject)).embeddedObject(); + } + if (isEmbeddedObject(pyObject)) { + return reinterpret_cast(pyObject)->ext().embeddedObject(); + } + return reinterpret_cast(pyObject)->ext()->getObject(); + } void PyToolkit::throwErrorWithPyErrorCheck(const std::string& message, const std::string& error_detail) { if (PyErr_Occurred()) { @@ -575,6 +613,18 @@ namespace db0::python PyToolkit::TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode, ObjectSharedPtr rootObject, const Allocator::AllocationInfo *allocationInfo) { + auto &langCache = fixture->getLangCache(); + auto cachedObject = langCache.get(address); + if (!!cachedObject) { + if (instance_id) { + auto cachedAddress = tryGetEmbeddedUniqueAddress(cachedObject.get()); + if (!cachedAddress || cachedAddress->getInstanceId() != instance_id) { + THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; + } + } + return cachedObject; + } + Allocator::AllocationInfo alloc_info; if (!allocationInfo) { alloc_info = fixture->findAllocation(address, db0::object_model::ObjectImmutableImpl::REALM_ID); @@ -595,7 +645,11 @@ namespace db0::python } assert(!!rootObject); auto *rootMemo = reinterpret_cast(rootObject.get()); - return rootMemo->ext().getEmbeddedInstanceAtOffset(embeddedOffset); + auto embeddedObject = rootMemo->ext().getEmbeddedInstanceAtOffset(embeddedOffset); + if (shouldCacheEmbeddedObject(embeddedObject.get())) { + langCache.add(address, embeddedObject.get()); + } + return embeddedObject; } PyToolkit::ObjectSharedPtr PyToolkit::unloadAnyObject( @@ -618,6 +672,34 @@ namespace db0::python ); } + PyToolkit::ObjectSharedPtr PyToolkit::unloadAnyObject( + db0::swine_ptr &fixture, Address address, std::shared_ptr type_hint, + TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode) + { + auto allocation = fixture->findAllocation(address, db0::object_model::ObjectImmutableImpl::REALM_ID); + auto &classFactory = fixture->get(); + if (!lang_type_ptr) { + auto langType = classFactory.hasLangType(*type_hint) + ? classFactory.getLangType(*type_hint) + : getTypeManager().getMemoBaseType(); + lang_type_ptr = langType.get(); + } + auto rootObject = unloadObject( + fixture, allocation.address, std::move(type_hint), lang_type_ptr, access_mode + ); + auto *rootMemo = reinterpret_cast(rootObject.get()); + if (instance_id && rootMemo->ext().getInstanceId() != instance_id) { + THROWF(db0::InputException) << "Invalid UUID or object has been deleted"; + } + if (allocation.address == address) { + return rootObject; + } + + return unloadEmbeddedObject( + fixture, address, classFactory, lang_type_ptr, instance_id, access_mode, rootObject, &allocation + ); + } + PyToolkit::ObjectSharedPtr PyToolkit::unloadObject(db0::swine_ptr &fixture, Address address, const ClassFactory &class_factory, TypeObjectPtr lang_type_ptr, std::uint16_t instance_id, AccessFlags access_mode) { @@ -1077,6 +1159,15 @@ namespace db0::python } } + bool PyToolkit::isIntern(TypeObjectPtr py_type) + { + if (isAnyMemoType(py_type)) { + return MemoTypeDecoration::get(py_type).getFlags()[MemoOptions::INTERN]; + } else { + return false; + } + } + bool PyToolkit::isProtectFields(TypeObjectPtr py_type) { if (isAnyMemoType(py_type)) { diff --git a/src/dbzero/bindings/python/PyToolkit.hpp b/src/dbzero/bindings/python/PyToolkit.hpp index bca746d3..330af5a2 100644 --- a/src/dbzero/bindings/python/PyToolkit.hpp +++ b/src/dbzero/bindings/python/PyToolkit.hpp @@ -23,6 +23,7 @@ namespace db0 class Fixture; class ProcessTimer; + template class v_object; } @@ -110,6 +111,9 @@ namespace db0::python TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadAnyObject(db0::swine_ptr &, Address, const ClassFactory &, TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}); + static ObjectSharedPtr unloadAnyObject(db0::swine_ptr &, Address, + std::shared_ptr type_hint, TypeObjectPtr lang_class = nullptr, + std::uint16_t instance_id = 0, AccessFlags = {}); static ObjectSharedPtr unloadEmbeddedObject(db0::swine_ptr &, Address, const ClassFactory &, TypeObjectPtr lang_class = nullptr, std::uint16_t instance_id = 0, AccessFlags = {}, ObjectSharedPtr root_object = {}, const Allocator::AllocationInfo *allocation_info = nullptr); @@ -219,6 +223,7 @@ namespace db0::python static bool isNoCache(TypeObjectPtr); // type marked as immutable static bool isImmutable(TypeObjectPtr); + static bool isIntern(TypeObjectPtr); static bool isProtectFields(TypeObjectPtr); static FlagSet getMemoFlags(TypeObjectPtr); static bool hasMemoInstance(ObjectPtr); @@ -227,6 +232,7 @@ namespace db0::python static bool isMemoDropped(ObjectPtr); static bool hasMemoAnyRefs(ObjectPtr); static const object_model::Class &getMemoType(ObjectPtr); + static const object_model::o_embedded_object &getMemoImmutableObject(ObjectPtr); inline static void incRef(ObjectPtr py_object) { Py_INCREF(py_object); diff --git a/src/dbzero/bindings/python/PyTypeManager.cpp b/src/dbzero/bindings/python/PyTypeManager.cpp index 94770b13..b451f422 100644 --- a/src/dbzero/bindings/python/PyTypeManager.cpp +++ b/src/dbzero/bindings/python/PyTypeManager.cpp @@ -621,19 +621,6 @@ namespace db0::python return embedded_type; } - bool PyTypeManager::isEmbeddedMemoType(TypeObjectPtr type) const - { - if (!type) { - return false; - } - for (const auto &[_, embedded_type]: m_embedded_memo_types) { - if (embedded_type.get() == type) { - return true; - } - } - return false; - } - PyTypeManager::ObjectPtr PyTypeManager::getBadPrefixError() const { return m_py_bad_prefix_error.get(); } diff --git a/src/dbzero/bindings/python/PyTypeManager.hpp b/src/dbzero/bindings/python/PyTypeManager.hpp index 9bef97e4..b9825d2f 100644 --- a/src/dbzero/bindings/python/PyTypeManager.hpp +++ b/src/dbzero/bindings/python/PyTypeManager.hpp @@ -217,7 +217,6 @@ namespace db0::python TypeObjectPtr getEmbeddedMemoType( TypeObjectPtr memo_type, const std::function &create_type ); - bool isEmbeddedMemoType(TypeObjectPtr type) const; // Decode either of: None, False or True from a lo-fi code ObjectSharedPtr getLangConstant(unsigned int) const; diff --git a/src/dbzero/bindings/python/Types.cpp b/src/dbzero/bindings/python/Types.cpp index 32f510d9..ae4827f3 100644 --- a/src/dbzero/bindings/python/Types.cpp +++ b/src/dbzero/bindings/python/Types.cpp @@ -118,7 +118,12 @@ namespace db0::python template PyObject *tryGetUUIDOf(T *self) { - auto &instance = self->ext(); + auto materialized = Py_OWN(getMaterializedMemoObject(self)); + if (!materialized) { + return nullptr; + } + auto *materializedMemo = reinterpret_cast(*materialized); + auto &instance = materializedMemo->ext(); if (!instance.hasInstance()) { THROWF(db0::InputException) << "Cannot get UUID of an uninitialized object"; } diff --git a/src/dbzero/bindings/python/dbzero.cpp b/src/dbzero/bindings/python/dbzero.cpp index d712d57d..8be88d0e 100644 --- a/src/dbzero/bindings/python/dbzero.cpp +++ b/src/dbzero/bindings/python/dbzero.cpp @@ -47,6 +47,7 @@ static PyMethodDef dbzero_methods[] = {"commit", &py::PyAPI_commit, METH_VARARGS, "Commit data to disk / persistent storage"}, {"fetch", (PyCFunction)&py::PyAPI_fetch, METH_VARARGS | METH_KEYWORDS, "Retrieve dbzero object instance by its UUID or type (in case of a singleton)"}, {"exists", (PyCFunction)&py::PyAPI_exists, METH_VARARGS | METH_KEYWORDS, "Check if a specific UUID points to a valid dbzero object instance or if singleton of a given type exists"}, + {"_check_interned", &py::PyAPI_checkInterned, METH_VARARGS, "Check if a UUID is present in an intern ContentIndex"}, {"delete", &py::PyAPI_del, METH_VARARGS, "Delete dbzero object and the corresponding Python instance"}, {"get_type_info", &py::PyAPI_getTypeInfo, METH_VARARGS, "Get dbzero type information"}, {"uuid", (PyCFunction)&py::PyAPI_getUUID, METH_FASTCALL, "Get unique object ID"}, @@ -64,6 +65,7 @@ static PyMethodDef dbzero_methods[] = {"refresh", (PyCFunction)&py::refresh, METH_VARARGS, ""}, {"get_state_num", (PyCFunction)&py::PyAPI_getStateNum, METH_VARARGS | METH_KEYWORDS, ""}, {"get_prefix_stats", (PyCFunction)&py::getPrefixStats, METH_VARARGS | METH_KEYWORDS, "Retrieve prefix specific statistics"}, + {"get_type_stats", (PyCFunction)&py::getTypeStats, METH_VARARGS | METH_KEYWORDS, "Retrieve memo type statistics"}, {"snapshot", (PyCFunction)&py::PyAPI_getSnapshot, METH_VARARGS | METH_KEYWORDS, "Get snapshot of dbzero state"}, {"get_snapshot_of", (PyCFunction)&py::PyAPI_getSnapshotOf, METH_FASTCALL, "Get snapshot associated with a specific object"}, {"begin_atomic", (PyCFunction)&py::PyAPI_beginAtomic, METH_FASTCALL, "Opens a new atomic operation's context"}, diff --git a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp index 3fda8fbe..a27ed618 100644 --- a/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp +++ b/src/dbzero/bindings/python/embedded/EmbeddedObject.cpp @@ -738,7 +738,13 @@ namespace db0::python bool PyEmbeddedMemoType_Check(PyTypeObject *type) { - return PyToolkit::getTypeManager().isEmbeddedMemoType(type); + if (!type) { + return false; + } + auto expected = reinterpret_cast( + static_cast(&PyAPI_EmbeddedMemo_del) + ); + return type->tp_dealloc == expected; } bool PyEmbeddedMemo_Check(PyObject *object) diff --git a/src/dbzero/bindings/python/types/PyClass.cpp b/src/dbzero/bindings/python/types/PyClass.cpp index 7b2156d1..98de86c7 100644 --- a/src/dbzero/bindings/python/types/PyClass.cpp +++ b/src/dbzero/bindings/python/types/PyClass.cpp @@ -105,6 +105,7 @@ namespace db0::python PySafeDict_SetItemString(*py_result, "singleton", Py_OWN(PyBool_fromBool(type.isSingleton()))); PySafeDict_SetItemString(*py_result, "no_default_tags", Py_OWN(PyBool_fromBool(type.isNoDefaultTags()))); PySafeDict_SetItemString(*py_result, "immutable", Py_OWN(PyBool_fromBool(type.isImmutable()))); + PySafeDict_SetItemString(*py_result, "intern", Py_OWN(PyBool_fromBool(type.isIntern()))); PySafeDict_SetItemString(*py_result, "protect_fields", Py_OWN(PyBool_fromBool(type.isProtectFields()))); return py_result.steal(); } diff --git a/src/dbzero/object_model/ObjectBase.hpp b/src/dbzero/object_model/ObjectBase.hpp index 0e1847a3..199f43f4 100644 --- a/src/dbzero/object_model/ObjectBase.hpp +++ b/src/dbzero/object_model/ObjectBase.hpp @@ -81,6 +81,11 @@ namespace db0 { m_gc_registered = tryAddToGC0(*fixture, this); } + + ObjectBase(tag_no_gc, tag_from_stem, db0::swine_ptr &fixture, BaseT &&stem) + : has_fixture(typename has_fixture::tag_from_stem(), fixture, std::move(stem)) + { + } ~ObjectBase() { diff --git a/src/dbzero/object_model/class/Class.cpp b/src/dbzero/object_model/class/Class.cpp index 16e781b4..cfb0a191 100644 --- a/src/dbzero/object_model/class/Class.cpp +++ b/src/dbzero/object_model/class/Class.cpp @@ -11,7 +11,7 @@ #include #include "Schema.hpp" -DEFINE_ENUM_VALUES(db0::ClassOptions, "SINGLETON", "NO_DEFAULT_TAGS", "IMMUTABLE", "PROTECT_FIELDS") +DEFINE_ENUM_VALUES(db0::ClassOptions, "SINGLETON", "NO_DEFAULT_TAGS", "IMMUTABLE", "PROTECT_FIELDS", "INTERN") namespace db0::object_model @@ -307,6 +307,10 @@ namespace db0::object_model return (*this)->m_flags[ClassOptions::IMMUTABLE]; } + bool Class::isIntern() const { + return (*this)->m_flags[ClassOptions::INTERN]; + } + bool Class::hasOwnProtectFields() const { return (*this)->m_flags[ClassOptions::PROTECT_FIELDS]; } @@ -339,6 +343,14 @@ namespace db0::object_model } } + void Class::assertContentIndexSupported() const + { + if ((*this)->getObjVer() < CONTENT_INDEX_MIN_VERSION) { + THROWF(db0::InputException) << "Class version too low to support ContentIndex. Current is: " + << (*this)->getObjVer() << ", for minimum support you need " << CONTENT_INDEX_MIN_VERSION; + } + } + void Class::openFieldSafe() const { if ((*this)->getObjVer() >= FIELD_SAFE_MIN_VERSION && (*this)->m_field_safe_ptr && !m_field_safe) { @@ -346,6 +358,19 @@ namespace db0::object_model } } + void Class::openContentIndex() const + { + assertContentIndexSupported(); + if ((*this)->m_content_index_ptr && !m_content_index) { + auto fixture = getFixture(); + m_content_index.emplace( + fixture->myPtr((*this)->m_content_index_ptr.getAddress()), + fixture, + std::const_pointer_cast(shared_from_this()) + ); + } + } + FieldSafe &Class::ensureFieldSafe() { if (m_field_safe) { @@ -381,6 +406,37 @@ namespace db0::object_model return (*this)->getObjVer() >= FIELD_SAFE_MIN_VERSION && (*this)->m_field_safe_ptr; } + bool Class::hasContentIndex() const + { + if ((*this)->getObjVer() < CONTENT_INDEX_MIN_VERSION) { + return false; + } + return !!(*this)->m_content_index_ptr; + } + + ContentIndex &Class::getContentIndex() + { + assertContentIndexSupported(); + openContentIndex(); + if (!m_content_index) { + auto fixture = getFixture(); + auto type = shared_from_this(); + m_content_index.emplace(fixture, type); + modify().m_content_index_ptr = *m_content_index; + } + return *m_content_index; + } + + const ContentIndex &Class::getContentIndex() const + { + assertContentIndexSupported(); + openContentIndex(); + if (!m_content_index) { + THROWF(db0::InputException) << "ContentIndex is not initialized for class " << getName(); + } + return *m_content_index; + } + FieldSafe &Class::getFieldSafe() { if (!m_field_safe) { @@ -706,6 +762,9 @@ namespace db0::object_model if (m_field_safe) { m_field_safe->detach(); } + if (m_content_index) { + m_content_index->detach(); + } super_t::detach(); } @@ -715,10 +774,16 @@ namespace db0::object_model void Class::flush() const { m_schema.flush(); + if (m_content_index) { + m_content_index->flush(); + } } void Class::rollback() { m_schema.rollback(); + if (m_content_index) { + m_content_index->rollback(); + } } void Class::commit() const @@ -729,6 +794,9 @@ namespace db0::object_model if (m_field_safe) { m_field_safe->commit(); } + if (m_content_index) { + m_content_index->commit(); + } super_t::commit(); } diff --git a/src/dbzero/object_model/class/Class.hpp b/src/dbzero/object_model/class/Class.hpp index 696620db..7c871e1a 100644 --- a/src/dbzero/object_model/class/Class.hpp +++ b/src/dbzero/object_model/class/Class.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -37,14 +38,15 @@ namespace db0 // instances of this type opted out of auto-assigned type tags NO_DEFAULT_TAGS = 0x0002, IMMUTABLE = 0x0004, - PROTECT_FIELDS = 0x0008 + PROTECT_FIELDS = 0x0008, + INTERN = 0x0010 }; using ClassFlags = db0::FlagSet; } -DECLARE_ENUM_VALUES(db0::ClassOptions, 4) +DECLARE_ENUM_VALUES(db0::ClassOptions, 5) namespace db0::object_model @@ -86,6 +88,7 @@ DB0_PACKED_BEGIN // Version 1 fields. db0_ptr m_field_safe_ptr; + db0_ptr m_content_index_ptr; o_class(RC_LimitedStringPool &, const std::string &name, std::optional module_name, const VFieldMatrix &, const VFidelityVector &, const Schema &, const char *type_id, const char *prefix_name, ClassFlags, @@ -114,6 +117,7 @@ DB0_PACKED_END static constexpr std::uint32_t SLOT_NUM = Fixture::TYPE_SLOT_NUM; static constexpr unsigned int PRIMARY_FIDELITY = 2; static constexpr std::uint16_t FIELD_SAFE_MIN_VERSION = 1; + static constexpr std::uint16_t CONTENT_INDEX_MIN_VERSION = 1; struct Member { @@ -174,6 +178,7 @@ DB0_PACKED_END bool isSingleton() const; bool isNoDefaultTags() const; bool isImmutable() const; + bool isIntern() const; bool assignDefaultTags() const; bool isProtectFields() const; bool hasOwnProtectFields() const; @@ -182,6 +187,9 @@ DB0_PACKED_END bool hasFieldSafe() const; FieldSafe &getFieldSafe(); const FieldSafe &getFieldSafe() const; + bool hasContentIndex() const; + ContentIndex &getContentIndex(); + const ContentIndex &getContentIndex() const; void setFieldAccess(const std::vector &account_ids, FieldMaskFlags mask, const std::vector &field_names); std::optional tryGetFieldAccessByMember(std::uint64_t account_id, const Member &) const; @@ -334,6 +342,7 @@ DB0_PACKED_END VFidelityVector m_fidelities; Schema m_schema; mutable std::optional m_field_safe; + mutable std::optional m_content_index; std::shared_ptr m_base_class_ptr; mutable std::optional m_protect_fields_cache; @@ -356,9 +365,11 @@ DB0_PACKED_END // callback for MemberID updates void onMemberIDUpdated(const MemberID &) const; void assertFieldSafeSupported() const; + void assertContentIndexSupported() const; void resetProtectFieldsCache() const; FieldSafe &ensureFieldSafe(); void openFieldSafe() const; + void openContentIndex() const; // translate member's field ID into a unique key FieldID getPrimaryKey(unsigned int index) const; diff --git a/src/dbzero/object_model/class/ClassFactory.cpp b/src/dbzero/object_model/class/ClassFactory.cpp index 752b9eb9..f31ea248 100644 --- a/src/dbzero/object_model/class/ClassFactory.cpp +++ b/src/dbzero/object_model/class/ClassFactory.cpp @@ -71,6 +71,22 @@ namespace db0::object_model return false; } + void validateInternFlag(const Class &type, ClassFactory::TypeObjectPtr lang_type) + { + if (lang_type && type.isIntern() != ClassFactory::LangToolkit::isIntern(lang_type)) { + THROWF(db0::InputException) + << "Cannot change intern flag after memo class materialization: " << type.getName(); + } + } + + void validateImmutableFlag(const Class &type, ClassFactory::TypeObjectPtr lang_type) + { + if (lang_type && type.isImmutable() && !ClassFactory::LangToolkit::isImmutable(lang_type)) { + THROWF(db0::InputException) + << "Cannot change immutable flag after memo class materialization: " << type.getName(); + } + } + o_class_factory::o_class_factory(Memspace &memspace) : m_class_map_ptrs { VClassMap(memspace), VClassMap(memspace), VClassMap(memspace), VClassMap(memspace) } { @@ -159,6 +175,8 @@ namespace db0::object_model if (memo_base) { getOrCreateType(memo_base); } + validateImmutableFlag(*type, lang_type); + validateInternFlag(*type, lang_type); if (LangToolkit::isProtectFields(lang_type) && !type->hasOwnProtectFields()) { type->setProtectFields(); } @@ -176,6 +194,7 @@ namespace db0::object_model } flags.set(ClassOptions::NO_DEFAULT_TAGS, LangToolkit::isNoDefaultTags(lang_type)); flags.set(ClassOptions::IMMUTABLE, LangToolkit::isImmutable(lang_type)); + flags.set(ClassOptions::INTERN, LangToolkit::isIntern(lang_type)); auto memo_base = LangToolkit::getBaseMemoType(lang_type); std::shared_ptr base_class; if (memo_base) { @@ -211,6 +230,8 @@ namespace db0::object_model if (memo_base) { getOrCreateType(memo_base); } + validateImmutableFlag(*it_cached->second, lang_type); + validateInternFlag(*it_cached->second, lang_type); if (LangToolkit::isProtectFields(lang_type) && !it_cached->second->hasOwnProtectFields()) { it_cached->second->setProtectFields(); } @@ -246,6 +267,8 @@ namespace db0::object_model m_pending_ptrs.push_back(ptr); } if (lang_type && !it_cached->second.m_lang_type) { + validateImmutableFlag(*it_cached->second.m_class, lang_type); + validateInternFlag(*it_cached->second.m_class, lang_type); it_cached->second.m_lang_type = lang_type; it_cached->second.m_class->setInitVars(LangToolkit::getInitVars(lang_type)); it_cached->second.m_class->setRuntimeFlags(LangToolkit::getMemoFlags(lang_type)); @@ -315,6 +338,8 @@ namespace db0::object_model } // initialize the language model if (lang_type) { + validateImmutableFlag(*type, lang_type); + validateInternFlag(*type, lang_type); type->setInitVars(LangToolkit::getInitVars(lang_type)); type->setRuntimeFlags(LangToolkit::getMemoFlags(lang_type)); if (LangToolkit::isProtectFields(lang_type) && !type->hasOwnProtectFields()) { @@ -327,6 +352,8 @@ namespace db0::object_model } // register the lang type mapping if missing if (lang_type && !it_cached->second.m_lang_type) { + validateImmutableFlag(*it_cached->second.m_class, lang_type); + validateInternFlag(*it_cached->second.m_class, lang_type); it_cached->second.m_lang_type = lang_type; it_cached->second.m_class->setInitVars(LangToolkit::getInitVars(lang_type)); it_cached->second.m_class->setRuntimeFlags(LangToolkit::getMemoFlags(lang_type)); diff --git a/src/dbzero/object_model/dict/o_dict.cpp b/src/dbzero/object_model/dict/o_dict.cpp index 88159465..27b3436e 100644 --- a/src/dbzero/object_model/dict/o_dict.cpp +++ b/src/dbzero/object_model/dict/o_dict.cpp @@ -284,6 +284,38 @@ namespace db0::object_model return nullptr; } + const o_dict::Item *o_dict::get(const Item &key) const + { + auto capacity = hashIndexCapacity(size()); + if (capacity == 0) { + return nullptr; + } + + auto element = elementFromItem(key); + const auto *entries = beginOfHashIndex(); + auto slot = itemHash(key) % capacity; + const auto &entry = entries[slot]; + if (entry.isEmpty()) { + return nullptr; + } + + auto offset = entry.offset(); + if (!entry.isBucket()) { + const auto &pair = pairAtOffset(offset); + return itemEqualsElement(pair.key(), element) ? &pair.value() : nullptr; + } + + const auto &bucket = bucketAtOffset(offset); + auto keyIt = bucket.keys().begin(); + auto valueIt = bucket.values().begin(); + for (; keyIt != bucket.keys().end(); ++keyIt, ++valueIt) { + if (itemEqualsElement(*keyIt, element)) { + return &*valueIt; + } + } + return nullptr; + } + o_dict::const_iterator o_dict::begin() const { return const_iterator(reinterpret_cast(beginOfPairs())); diff --git a/src/dbzero/object_model/dict/o_dict.hpp b/src/dbzero/object_model/dict/o_dict.hpp index a4856bff..7bdde8cc 100644 --- a/src/dbzero/object_model/dict/o_dict.hpp +++ b/src/dbzero/object_model/dict/o_dict.hpp @@ -138,6 +138,7 @@ DB0_PACKED_BEGIN bool empty() const; bool contains(const Element &key) const; const Item *get(const Element &key) const; + const Item *get(const Item &key) const; const_iterator begin() const; const_iterator end() const; std::size_t sizeOf() const; diff --git a/src/dbzero/object_model/object/ContentIndex.cpp b/src/dbzero/object_model/object/ContentIndex.cpp new file mode 100644 index 00000000..af8acfc3 --- /dev/null +++ b/src/dbzero/object_model/object/ContentIndex.cpp @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2026 DBZero Software sp. z o.o. + +#include "ContentIndex.hpp" + +#include +#include +#include +#include + +namespace db0::object_model +{ + namespace + { + ContentIndex::TypeObjectSharedPtr tryResolveLangType( + const db0::swine_ptr &fixture, const std::shared_ptr &type) + { + auto &classFactory = fixture->get(); + if (classFactory.hasLangType(*type)) { + return classFactory.getLangType(*type); + } + return {}; + } + } + + ContentIndex::ContentIndex(db0::swine_ptr &fixture, std::shared_ptr type) + : super_t(*fixture) + , m_fixture(fixture) + , m_class(std::move(type)) + , m_lang_type(tryResolveLangType(fixture, m_class)) + , m_base_index(*fixture) + { + modify().m_base_index_ptr = m_base_index.getAddress(); + } + + ContentIndex::ContentIndex(mptr ptr, db0::swine_ptr &fixture, std::shared_ptr type) + : super_t(ptr) + , m_fixture(fixture) + , m_class(std::move(type)) + , m_lang_type(tryResolveLangType(fixture, m_class)) + , m_base_index(myPtr((*this)->m_base_index_ptr)) + { + } + + ContentIndex::~ContentIndex() + { + assert(m_pending_updates.empty() && "ContentIndex::flush() or close() must be called before destruction"); + } + + void ContentIndex::resyncBucket(typename BaseIndexT::iterator &iterator, const BucketIndexT &bucket) const + { + iterator.modifyItem().value.m_index_address = bucket.getAddress(); + iterator.modifyItem().value.m_type = bucket.getIndexType(); + } + + void ContentIndex::incrementSize() const + { + ++const_cast(this)->modify().m_size; + } + + void ContentIndex::decrementSize() const + { + --const_cast(this)->modify().m_size; + } + + void ContentIndex::applyInsert(HashT hash, UniqueAddress address) const + { + auto iterator = m_base_index.find(hash); + if (iterator == m_base_index.end()) { + BucketIndexT bucket(getMemspace(), address); + m_base_index.insert({hash, bucket}); + incrementSize(); + return; + } + + auto bucket = (*iterator).value.getIndex(getMemspace()); + if (bucket.contains(address)) { + return; + } + auto oldAddress = bucket.getAddress(); + bucket.insert(address); + if (bucket.getAddress() != oldAddress) { + resyncBucket(iterator, bucket); + } + incrementSize(); + } + + void ContentIndex::applyRemove(HashT hash, UniqueAddress address) const + { + auto iterator = m_base_index.find(hash); + if (iterator == m_base_index.end()) { + return; + } + + auto bucket = (*iterator).value.getIndex(getMemspace()); + if (!bucket.contains(address)) { + return; + } + + if (bucket.size() == 1) { + m_base_index.erase(iterator); + bucket.destroy(); + decrementSize(); + return; + } + + auto oldAddress = bucket.getAddress(); + bucket.erase(address); + if (bucket.getAddress() != oldAddress) { + resyncBucket(iterator, bucket); + } + decrementSize(); + } + + void ContentIndex::insert(const o_embedded_object &key, UniqueAddress address) const + { + auto fixture = m_fixture; + auto hash = intern_hash(fixture, key); + m_pending_updates.push_back({true, hash, address}); + } + + void ContentIndex::remove(const o_embedded_object &key, UniqueAddress address) const + { + auto fixture = m_fixture; + auto hash = intern_hash(fixture, key); + m_pending_updates.push_back({false, hash, address}); + } + + bool ContentIndex::contains(HashT hash, UniqueAddress address) const + { + flush(); + + auto iterator = m_base_index.find(hash); + if (iterator == m_base_index.end()) { + return false; + } + + auto bucket = (*iterator).value.getIndex(getMemspace()); + return bucket.contains(address); + } + + bool ContentIndex::contains(const o_embedded_object &key, UniqueAddress address) const + { + auto fixture = m_fixture; + return contains(intern_hash(fixture, key), address); + } + + bool ContentIndex::contains(const ImmutableObjectInitializer &initializer, UniqueAddress address) const + { + auto fixture = m_fixture; + return contains(intern_hash(fixture, initializer), address); + } + + ContentIndex::LangToolkit::TypeObjectPtr ContentIndex::getLangType() const + { + if (!!m_lang_type) { + return m_lang_type.get(); + } + m_lang_type = tryResolveLangType(m_fixture, m_class); + return m_lang_type.get(); + } + + bool ContentIndex::candidateMatches(const ImmutableObjectInitializer &initializer, UniqueAddress candidate) const + { + auto fixture = m_fixture; + auto candidateObject = LangToolkit::unloadAnyObject( + fixture, candidate.getAddress(), m_class, getLangType(), candidate.getInstanceId(), AccessFlags {} + ); + return intern_compare(fixture, initializer, LangToolkit::getMemoImmutableObject(candidateObject.get())) == 0; + } + + std::optional ContentIndex::lookup(const ImmutableObjectInitializer &initializer) const + { + flush(); + + auto fixture = m_fixture; + auto iterator = m_base_index.find(intern_hash(fixture, initializer)); + if (iterator == m_base_index.end()) { + return std::nullopt; + } + + auto bucket = (*iterator).value.getIndex(getMemspace()); + auto bucketIterator = bucket.beginJoin(1); + while (!bucketIterator.is_end()) { + auto candidateAddress = *bucketIterator; + if (candidateMatches(initializer, candidateAddress)) { + return candidateAddress; + } + ++bucketIterator; + } + return std::nullopt; + } + + void ContentIndex::rollback() + { + m_pending_updates.clear(); + } + + void ContentIndex::flush() const + { + if (m_pending_updates.empty()) { + return; + } + + auto pendingUpdates = std::move(m_pending_updates); + m_pending_updates.clear(); + for (const auto &update : pendingUpdates) { + if (update.m_insert) { + applyInsert(update.m_hash, update.m_address); + } else { + applyRemove(update.m_hash, update.m_address); + } + } + } + + void ContentIndex::commit() const + { + flush(); + m_base_index.commit(); + super_t::commit(); + } + + void ContentIndex::detach() const + { + m_base_index.detach(); + super_t::detach(); + } + + void ContentIndex::close() + { + m_pending_updates.clear(); + } + + bool ContentIndex::empty() const + { + return m_pending_updates.empty() && m_base_index.empty(); + } + + std::uint64_t ContentIndex::size() const + { + flush(); + return (*this)->m_size; + } + +} diff --git a/src/dbzero/object_model/object/ContentIndex.hpp b/src/dbzero/object_model/object/ContentIndex.hpp new file mode 100644 index 00000000..78141404 --- /dev/null +++ b/src/dbzero/object_model/object/ContentIndex.hpp @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2026 DBZero Software sp. z o.o. + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace db0 +{ + class Fixture; +} + +namespace db0::object_model +{ + + class Class; + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR o_content_index: public db0::o_fixed_versioned + { + Address m_base_index_ptr = {}; + std::uint64_t m_size = 0; + std::array m_reserved = {0, 0, 0}; + }; +DB0_PACKED_END + + using ContentBucketIndex = db0::MorphingBIndex; + +DB0_PACKED_BEGIN + struct DB0_PACKED_ATTR ContentBucketRef + { + db0::Address m_index_address = {}; + db0::bindex::type m_type = db0::bindex::type::empty; + + ContentBucketRef() = default; + + ContentBucketRef(const ContentBucketIndex &index) + : m_index_address(index.getAddress()) + , m_type(index.getIndexType()) + { + } + + ContentBucketIndex getIndex(db0::Memspace &memspace) const + { + return {memspace, m_index_address, m_type}; + } + }; +DB0_PACKED_END + + class ContentIndex: public db0::v_object + { + public: + using super_t = db0::v_object; + using HashT = std::uint64_t; + using BucketIndexT = ContentBucketIndex; + using BucketItemT = db0::key_value; + using BaseIndexT = db0::v_bindex; + using LangToolkit = LangConfig::LangToolkit; + using TypeObjectSharedPtr = typename LangToolkit::TypeObjectSharedPtr; + + ContentIndex(db0::swine_ptr &, std::shared_ptr); + ContentIndex(mptr, db0::swine_ptr &, std::shared_ptr); + ~ContentIndex(); + + void insert(const o_embedded_object &, UniqueAddress) const; + void remove(const o_embedded_object &, UniqueAddress) const; + bool contains(const o_embedded_object &, UniqueAddress) const; + bool contains(const ImmutableObjectInitializer &, UniqueAddress) const; + std::optional lookup(const ImmutableObjectInitializer &) const; + + void rollback(); + void flush() const; + void commit() const; + void detach() const; + void close(); + bool empty() const; + std::uint64_t size() const; + + private: + db0::swine_ptr m_fixture; + std::shared_ptr m_class; + mutable TypeObjectSharedPtr m_lang_type; + mutable BaseIndexT m_base_index; + struct PendingUpdate + { + bool m_insert = false; + HashT m_hash = 0; + UniqueAddress m_address = {}; + }; + mutable std::vector m_pending_updates; + + void applyInsert(HashT, UniqueAddress) const; + void applyRemove(HashT, UniqueAddress) const; + void incrementSize() const; + void decrementSize() const; + bool contains(HashT, UniqueAddress) const; + void resyncBucket(typename BaseIndexT::iterator &, const BucketIndexT &) const; + typename LangToolkit::TypeObjectPtr getLangType() const; + bool candidateMatches(const ImmutableObjectInitializer &, UniqueAddress) const; + }; + +} diff --git a/src/dbzero/object_model/object/InternContent.cpp b/src/dbzero/object_model/object/InternContent.cpp new file mode 100644 index 00000000..9d1e7c1d --- /dev/null +++ b/src/dbzero/object_model/object/InternContent.cpp @@ -0,0 +1,2456 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#include "InternContent.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace db0::object_model +{ + namespace + { + // These tags describe the normalized intern-content stream, including structural + // boundaries that are not represented by StorageClass values. The numeric values + // are part of the intern hash format and must remain stable. + enum class Token: std::uint8_t + { + Object = 1, + Field = 2, + Tuple = 3, + Set = 4, + Dict = 5, + Pair = 6, + None = 7, + Bool = 8, + Int = 9, + Double = 10, + String = 11, + Bytes = 12, + UInt = 13 + }; + + struct FieldValue + { + std::uint32_t m_index = 0; + StorageClass m_kind = StorageClass::UNDEFINED; + Value m_value; + const o_tuple_item *m_item = nullptr; + PyObject *m_object = nullptr; + bool m_valid = false; + }; + + // Interned objects are expected to be small value objects. The 1MB stream + // budget is intentionally generous and mainly guards accidental cycles or + // unexpectedly large content from unbounded hash/compare traversal. + constexpr std::size_t INTERN_CONTENT_STREAM_LIMIT = 1024 * 1024; + constexpr std::size_t INTERN_REFERENCE_TRAVERSAL_CHARGE = 1024; + + struct StreamBudget + { + void add(std::size_t size) + { + if (size > INTERN_CONTENT_STREAM_LIMIT || m_consumed > INTERN_CONTENT_STREAM_LIMIT - size) { + THROWF(db0::InputException) + << "Intern content stream size threshold exceeded; possible reference cycle"; + } + m_consumed += size; + } + + std::size_t m_consumed = 0; + }; + + class HashSink + { + public: + HashSink() + : m_budget(std::make_shared()) + { + } + + void writeByte(std::uint8_t value) + { + write(&value, sizeof(value)); + } + + void writeBytes(const void *data, std::size_t size) + { + write(data, size); + } + + template void writeScalar(T value) + { + write(&value, sizeof(value)); + } + + std::uint64_t getValue() const + { + return m_hash; + } + + void charge(std::size_t size) + { + m_budget->add(size); + } + + private: + void write(const void *data, std::size_t size) + { + charge(size); + const auto *cursor = static_cast(data); + for (std::size_t i = 0; i < size; ++i) { + m_hash ^= cursor[i]; + m_hash *= 1099511628211ULL; + } + } + + std::shared_ptr m_budget; + std::uint64_t m_hash = 1469598103934665603ULL; + }; + + template void writeToken(SinkT &sink, Token value) + { + sink.writeByte(static_cast(value)); + } + + StorageClass getNormalizedKind(StorageClass kind) + { + switch (kind) { + case StorageClass::PACKED_INT32: + return StorageClass::INT64; + case StorageClass::STRING_REF: + case StorageClass::POOLED_STRING: + case StorageClass::STR64: + case StorageClass::EMBEDDED_STRING: + return StorageClass::EMBEDDED_STRING; + case StorageClass::DB0_BYTES: + case StorageClass::DB0_BYTES_ARRAY: + case StorageClass::EMBEDDED_BYTES: + return StorageClass::EMBEDDED_BYTES; + case StorageClass::DB0_TUPLE: + case StorageClass::DB0_LIST: + return StorageClass::EMBEDDED_TUPLE; + case StorageClass::DB0_SET: + return StorageClass::EMBEDDED_SET; + case StorageClass::DB0_DICT: + return StorageClass::EMBEDDED_DICT; + case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT: + return StorageClass::EMBEDDED_OBJECT; + default: + return kind; + } + } + + int compareWithFixture( + db0::swine_ptr *fixture, const o_tuple_item &lhs, const o_tuple_item &rhs + ); + int compareWithFixture( + db0::swine_ptr *fixture, const o_dict_pair &lhs, const o_dict_pair &rhs + ); + int compareWithFixture(db0::swine_ptr *fixture, PyObject *lhs, PyObject *rhs); + std::uint64_t hashWithFixture(db0::swine_ptr *fixture, const o_tuple_item &item); + std::uint64_t hashWithFixture(db0::swine_ptr *fixture, PyObject *object); + std::uint64_t hashPairWithFixture(db0::swine_ptr *fixture, const o_dict_pair &pair); + std::uint64_t hashPythonDictPairWithFixture( + db0::swine_ptr *fixture, PyObject *dict, PyObject *key + ); + std::uint64_t hashFieldWithFixture(db0::swine_ptr *fixture, const FieldValue &field); + + db0::python::PyToolkit::ObjectSharedPtr resolveObjectRef( + db0::swine_ptr *fixture, db0::UniqueAddress address + ) + { + assert(fixture && *fixture); + + auto &classFactory = (*fixture)->template get(); + return db0::python::PyToolkit::unloadAnyObject( + *fixture, address.getAddress(), classFactory, nullptr, address.getInstanceId() + ); + } + + db0::python::PyToolkit::ObjectSharedPtr resolveObjectRef( + db0::swine_ptr *fixture, StorageClass kind, Value value + ) + { + assert(fixture && *fixture); + + if (kind == StorageClass::OBJECT_REF) { + auto &classFactory = (*fixture)->template get(); + return db0::python::PyToolkit::unloadObject(*fixture, value.asAddress(), classFactory); + } + + auto uniqueAddress = value.asUniqueAddress(); + if (!uniqueAddress.hasInstanceId()) { + THROWF(db0::InputException) << "Invalid intern object reference"; + } + return resolveObjectRef(fixture, uniqueAddress); + } + + class InternStreamer + { + public: + InternStreamer(HashSink &sink, db0::swine_ptr *fixture) + : m_sink(sink) + , m_fixture(fixture) + { + } + + void writeObject(const o_embedded_object &object) + { + writeToken(m_sink, Token::Object); + m_sink.writeScalar(object.getClassRef()); + writeFields(object); + } + + void writeInitializer(const ImmutableObjectInitializer &initializer) + { + writeToken(m_sink, Token::Object); + m_sink.writeScalar(initializer.getClassPtr()->getClassRef()); + writeFields(initializer); + } + + void writeTupleItemForHash(const o_tuple_item &item) + { + writeTupleItem(item); + } + + void writePythonItemForHash(PyObject *object) + { + writePythonItem(object); + } + + void writeDictPairForHash(const o_dict_pair &pair) + { + writeToken(m_sink, Token::Pair); + writeTupleItem(pair.key()); + writeTupleItem(pair.value()); + } + + void writePythonDictPairForHash(PyObject *dict, PyObject *key) + { + auto *value = PyDict_GetItemWithError(dict, key); + if (!value) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python dict value"; + } + writeToken(m_sink, Token::Pair); + writePythonItem(key); + writePythonItem(value); + } + + void writeFieldForHash(const FieldValue &field) + { + writeToken(m_sink, Token::Field); + m_sink.writeScalar(field.m_index); + writeFieldValue(field); + } + + private: + void writeFields(const o_embedded_object &object) + { + writeUnorderedFields(countObjectFields(object), [&](auto &&emit) { + forEachObjectField(object, emit); + }); + } + + void writeFields(const ImmutableObjectInitializer &initializer) + { + writeUnorderedFields(countInitializerFields(initializer), [&](auto &&emit) { + forEachInitializerField(initializer, emit); + }); + } + + template void writeUnorderedFields( + std::uint64_t count, FieldIteratorT iterateFields + ) + { + writeToken(m_sink, Token::Field); + m_sink.writeScalar(count); + std::uint64_t fieldHashSum = 0; + std::uint64_t emitted = 0; + iterateFields([&](const FieldValue &field) { + fieldHashSum += hashFieldWithFixture(m_fixture, field); + ++emitted; + }); + if (emitted != count) { + THROWF(db0::InternalException) << "Unable to normalize intern fields"; + } + m_sink.writeScalar(fieldHashSum); + } + + std::uint64_t countObjectFields(const o_embedded_object &object) const + { + std::uint64_t count = 0; + const auto &types = object.pos_vt().types(); + for (std::size_t i = 0; i < object.pos_vt().size(); ++i) { + count += isFieldValue(types[i]) ? 1 : 0; + } + for (const auto &value: object.index_vt().xvalues()) { + count += isFieldValue(value.m_type) ? 1 : 0; + } + for (const auto &entry: object.field_map()) { + (void)entry; + ++count; + } + return count; + } + + std::uint64_t countInitializerFields(const ImmutableObjectInitializer &initializer) const + { + std::uint64_t count = 0; + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer.getData(posVtData, posVtOffset); + + for (std::size_t i = 0; i < posVtData.size(); ++i) { + count += isFieldValue(posVtData.m_types[i]) ? 1 : 0; + } + for (auto value = indexVtData.first; value != indexVtData.second; ++value) { + count += isFieldValue(value->m_type) ? 1 : 0; + } + for (const auto &value: initializer.objects()) { + count += !!value.m_object && value.m_storage_class != StorageClass::DELETED ? 1 : 0; + } + return count; + } + + static bool isFieldValue(StorageClass kind) + { + return kind != StorageClass::UNDEFINED && kind != StorageClass::DELETED; + } + + static FieldValue makeField(std::uint32_t index, StorageClass kind, Value value) + { + FieldValue field; + field.m_index = index; + field.m_kind = kind; + field.m_value = value; + field.m_valid = true; + return field; + } + + static FieldValue makeField(std::uint32_t index, const o_tuple_item &item) + { + FieldValue field; + field.m_index = index; + field.m_item = &item; + field.m_valid = true; + return field; + } + + static FieldValue makeField(std::uint32_t index, StorageClass kind, PyObject *object) + { + FieldValue field; + field.m_index = index; + field.m_kind = kind; + field.m_object = object; + field.m_valid = true; + return field; + } + + template void forEachObjectField(const o_embedded_object &object, EmitT emit) + { + const auto &types = object.pos_vt().types(); + const auto &values = object.pos_vt().values(); + for (std::size_t i = 0; i < object.pos_vt().size(); ++i) { + if (isFieldValue(types[i])) { + emit(makeField( + static_cast(object.pos_vt().offset() + i), types[i], values[i] + )); + } + } + for (const auto &value: object.index_vt().xvalues()) { + if (isFieldValue(value.m_type)) { + emit(makeField(value.getIndex(), value.m_type, value.m_value)); + } + } + for (const auto &entry: object.field_map()) { + emit(makeField(getFieldIndex(entry.key()), entry.value())); + } + } + + template void forEachInitializerField( + const ImmutableObjectInitializer &initializer, EmitT emit + ) + { + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer.getData(posVtData, posVtOffset); + + for (std::size_t i = 0; i < posVtData.size(); ++i) { + if (isFieldValue(posVtData.m_types[i])) { + emit(makeField( + static_cast(posVtOffset + i), + posVtData.m_types[i], posVtData.m_values[i] + )); + } + } + for (auto value = indexVtData.first; value != indexVtData.second; ++value) { + if (isFieldValue(value->m_type)) { + emit(makeField(value->getIndex(), value->m_type, value->m_value)); + } + } + for (const auto &value: initializer.objects()) { + if (!!value.m_object && value.m_storage_class != StorageClass::DELETED) { + emit(makeField(value.m_loc.first, value.m_storage_class, value.m_object.get())); + } + } + } + + void writeFieldValue(const FieldValue &field) + { + if (field.m_item) { + writeTupleItem(*field.m_item); + return; + } + if (field.m_object) { + writePythonObject(field.m_kind, field.m_object); + return; + } + writeFixedValue(field.m_kind, field.m_value); + } + + std::uint32_t getFieldIndex(const o_tuple_item &key) const + { + if (key.itemKind() == StorageClass::PACKED_INT32) { + return key.packedIntPayload().value(); + } + if (key.itemKind() == StorageClass::INT64) { + return static_cast(key.intPayload().value()); + } + THROWF(db0::InternalException) << "Embedded object field map key is not an integer"; + return 0; + } + + void writeFixedValue(StorageClass kind, Value value) + { + switch (kind) { + case StorageClass::NONE: + writeToken(m_sink, Token::None); + return; + case StorageClass::BOOLEAN: + writeToken(m_sink, Token::Bool); + m_sink.writeScalar(value.m_store != 0 ? 1 : 0); + return; + case StorageClass::INT64: + case StorageClass::PACKED_INT32: + writeToken(m_sink, Token::Int); + m_sink.writeScalar(static_cast(value.m_store)); + return; + case StorageClass::FP_NUMERIC64: + writeToken(m_sink, Token::Double); + m_sink.writeScalar(value.m_store); + return; + case StorageClass::PTIME64: + case StorageClass::DATE: + case StorageClass::DATETIME: + case StorageClass::DATETIME_TZ: + case StorageClass::TIME: + case StorageClass::TIME_TZ: + case StorageClass::DECIMAL: + writeToken(m_sink, Token::UInt); + m_sink.writeByte(static_cast(kind)); + m_sink.writeScalar(value.m_store); + return; + case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT_REF: + writeObjectRef(kind, value); + return; + default: + THROWF(db0::InternalException) << "Unsupported fixed intern content kind: " << kind; + } + } + + void writeTupleItem(const o_tuple_item &item) + { + switch (item.itemKind()) { + case StorageClass::NONE: + writeToken(m_sink, Token::None); + return; + case StorageClass::BOOLEAN: + writeToken(m_sink, Token::Bool); + m_sink.writeScalar(item.boolPayload().value() ? 1 : 0); + return; + case StorageClass::PACKED_INT32: + writeToken(m_sink, Token::Int); + m_sink.writeScalar(static_cast(item.packedIntPayload().value())); + return; + case StorageClass::INT64: + writeToken(m_sink, Token::Int); + m_sink.writeScalar(item.intPayload().value()); + return; + case StorageClass::FP_NUMERIC64: + writeToken(m_sink, Token::Double); + m_sink.writeScalar(item.doublePayload().value()); + return; + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { + auto value = item.stringPayload().get(); + writeBytes(Token::String, value.get_raw(), value.size()); + return; + } + case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: + writeBytes(Token::Bytes, item.bytesPayload().begin(), item.bytesPayload().size()); + return; + case StorageClass::PTIME64: + case StorageClass::DATE: + case StorageClass::DATETIME: + case StorageClass::DATETIME_TZ: + case StorageClass::TIME: + case StorageClass::TIME_TZ: + case StorageClass::DECIMAL: + writeToken(m_sink, Token::UInt); + m_sink.writeByte(static_cast(item.itemKind())); + m_sink.writeScalar(item.uint64Payload().value()); + return; + case StorageClass::EMBEDDED_TUPLE: + writeTuple(o_py_tuple::__const_ref(item.embeddedPayload().begin())); + return; + case StorageClass::EMBEDDED_SET: + writeSet(o_py_set::__const_ref(item.embeddedPayload().begin())); + return; + case StorageClass::EMBEDDED_DICT: + writeDict(o_py_dict::__const_ref(item.embeddedPayload().begin())); + return; + case StorageClass::EMBEDDED_OBJECT: + writeObject(o_embedded_object::__const_ref(item.embeddedPayload().begin())); + return; + default: + THROWF(db0::InternalException) << "Unsupported tuple intern content kind: " << item.itemKind(); + } + } + + void writeTuple(const o_py_tuple &value) + { + writeToken(m_sink, Token::Tuple); + m_sink.writeScalar(value.size()); + for (const auto &item: value) { + writeTupleItem(item); + } + } + + void writeSet(const o_py_set &value) + { + writeToken(m_sink, Token::Set); + m_sink.writeScalar(value.size()); + std::uint64_t itemHashSum = 0; + for (const auto &item: value) { + itemHashSum += hashWithFixture(m_fixture, item); + } + m_sink.writeScalar(itemHashSum); + } + + void writeDict(const o_py_dict &value) + { + writeToken(m_sink, Token::Dict); + m_sink.writeScalar(value.size()); + std::uint64_t pairHashSum = 0; + for (const auto &pair: value) { + pairHashSum += hashPairWithFixture(m_fixture, pair); + } + m_sink.writeScalar(pairHashSum); + } + + int compare(const o_tuple_item &lhs, const o_tuple_item &rhs) + { + return compareWithFixture(m_fixture, lhs, rhs); + } + + int compare(const o_dict_pair &lhs, const o_dict_pair &rhs) + { + return compareWithFixture(m_fixture, lhs, rhs); + } + + int compare(PyObject *lhs, PyObject *rhs) + { + return compareWithFixture(m_fixture, lhs, rhs); + } + + void writeBytes(Token kind, const void *data, std::size_t size) + { + writeToken(m_sink, kind); + m_sink.writeScalar(size); + m_sink.writeBytes(data, size); + } + + void writeObjectRef(StorageClass kind, Value value) + { + writeObjectRefObject(resolveObjectRef(m_fixture, kind, value)); + } + + void writeObjectRef(db0::UniqueAddress address) + { + writeObjectRefObject(resolveObjectRef(m_fixture, address)); + } + + void writeObjectRefObject(const db0::python::PyToolkit::ObjectSharedPtr &pyObject) + { + if (!m_fixture || !*m_fixture) { + THROWF(db0::InputException) << "Fixture is required for intern object references"; + } + m_sink.charge(INTERN_REFERENCE_TRAVERSAL_CHARGE); + if (db0::python::PyEmbeddedMemo_Check(pyObject.get())) { + writeObject(db0::python::getEmbeddedMemoRef( + reinterpret_cast(pyObject.get()) + ).embeddedObject()); + return; + } + if (!db0::python::PyToolkit::isMemoImmutableObject(pyObject.get())) { + THROWF(db0::InputException) << "intern object reference does not resolve to an immutable object"; + } + const auto &memo = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject.get()); + writeObject(memo.getData()->getObject()); + } + + void writePythonObject(StorageClass storageClass, PyObject *pyObject) + { + switch (getNormalizedKind(storageClass)) { + case StorageClass::EMBEDDED_STRING: { + auto value = db0::python::PyToolkit::getTypeManager().extractString(pyObject); + writeBytes(Token::String, value, std::strlen(value)); + return; + } + case StorageClass::EMBEDDED_BYTES: { + auto value = db0::python::PyToolkit::getTypeManager().extractBytes(pyObject); + writeBytes(Token::Bytes, value.m_data, value.m_size); + return; + } + case StorageClass::EMBEDDED_TUPLE: + writePythonTuple(pyObject); + return; + case StorageClass::EMBEDDED_SET: + writePythonSet(pyObject); + return; + case StorageClass::EMBEDDED_DICT: + writePythonDict(pyObject); + return; + case StorageClass::EMBEDDED_OBJECT: + writePythonMemoObject(pyObject); + return; + default: + THROWF(db0::InternalException) << "Unsupported initializer intern object kind: " << storageClass; + } + } + + void writePythonTuple(PyObject *sequence) + { + writeToken(m_sink, Token::Tuple); + auto count = getPythonSequenceSize(sequence); + m_sink.writeScalar(count); + for (std::size_t i = 0; i < count; ++i) { + writePythonItem(getPythonSequenceItem(sequence, i)); + } + } + + void writePythonSet(PyObject *set) + { + if (!PySet_Check(set)) { + THROWF(db0::InputException) << "Intern set content expects a Python set"; + } + auto size = PySet_GET_SIZE(set); + if (size < 0) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python set size"; + } + auto count = static_cast(size); + writeToken(m_sink, Token::Set); + m_sink.writeScalar(count); + std::uint64_t itemHashSum = 0; + auto iterator = Py_OWN(PyObject_GetIter(set)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "Intern set content expects a Python set"; + } + Py_FOR(item, iterator) { + itemHashSum += hashWithFixture(m_fixture, *item); + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python set"; + } + m_sink.writeScalar(itemHashSum); + } + + void writePythonDict(PyObject *dict) + { + if (!PyDict_Check(dict)) { + THROWF(db0::InputException) << "Intern dict content expects a Python dict"; + } + auto size = PyDict_Size(dict); + if (size < 0) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python dict size"; + } + auto count = static_cast(size); + writeToken(m_sink, Token::Dict); + m_sink.writeScalar(count); + std::uint64_t pairHashSum = 0; + auto iterator = Py_OWN(PyObject_GetIter(dict)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "Intern dict content expects a Python dict"; + } + Py_FOR(key, iterator) { + pairHashSum += hashPythonDictPairWithFixture(m_fixture, dict, *key); + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python dict"; + } + m_sink.writeScalar(pairHashSum); + } + + std::size_t getPythonSequenceSize(PyObject *sequence) const + { + if (PyTuple_Check(sequence)) { + return static_cast(PyTuple_GET_SIZE(sequence)); + } + if (PyList_Check(sequence)) { + return static_cast(PyList_GET_SIZE(sequence)); + } + THROWF(db0::InputException) << "Intern tuple content expects a Python tuple or list"; + return 0; + } + + PyObject *getPythonSequenceItem(PyObject *sequence, std::size_t index) const + { + if (PyTuple_Check(sequence)) { + return PyTuple_GET_ITEM(sequence, static_cast(index)); + } + return PyList_GET_ITEM(sequence, static_cast(index)); + } + + void writePythonItem(PyObject *object) + { + auto &typeManager = db0::python::PyToolkit::getTypeManager(); + auto typeId = typeManager.getTypeId(object); + switch (typeId) { + case db0::bindings::TypeId::NONE: + writeToken(m_sink, Token::None); + return; + case db0::bindings::TypeId::BOOLEAN: + writeToken(m_sink, Token::Bool); + m_sink.writeScalar(object == Py_True ? 1 : 0); + return; + case db0::bindings::TypeId::INTEGER: { + auto value = PyLong_AsLongLong(object); + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Python integer is out of int64 range"; + } + writeToken(m_sink, Token::Int); + m_sink.writeScalar(value); + return; + } + case db0::bindings::TypeId::FLOAT: + writeToken(m_sink, Token::Double); + m_sink.writeScalar(PyFloat_AsDouble(object)); + return; + case db0::bindings::TypeId::DATETIME: + writePythonUIntItem(StorageClass::DATETIME, typeManager.extractUInt64(typeId, object)); + return; + case db0::bindings::TypeId::DATETIME_TZ: + writePythonUIntItem(StorageClass::DATETIME_TZ, typeManager.extractUInt64(typeId, object)); + return; + case db0::bindings::TypeId::DATE: + writePythonUIntItem(StorageClass::DATE, typeManager.extractUInt64(typeId, object)); + return; + case db0::bindings::TypeId::TIME: + writePythonUIntItem(StorageClass::TIME, typeManager.extractUInt64(typeId, object)); + return; + case db0::bindings::TypeId::TIME_TZ: + writePythonUIntItem(StorageClass::TIME_TZ, typeManager.extractUInt64(typeId, object)); + return; + case db0::bindings::TypeId::DECIMAL: + writePythonUIntItem(StorageClass::DECIMAL, typeManager.extractUInt64(typeId, object)); + return; + case db0::bindings::TypeId::STRING: { + auto value = typeManager.extractString(object); + writeBytes(Token::String, value, std::strlen(value)); + return; + } + case db0::bindings::TypeId::BYTES: { + auto value = typeManager.extractBytes(object); + writeBytes(Token::Bytes, value.m_data, value.m_size); + return; + } + case db0::bindings::TypeId::LIST: + case db0::bindings::TypeId::TUPLE: + writePythonTuple(object); + return; + case db0::bindings::TypeId::SET: + writePythonSet(object); + return; + case db0::bindings::TypeId::DICT: + writePythonDict(object); + return; + case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: + writePythonMemoObject(object); + return; + default: + break; + } + THROWF(db0::InputException) << "Unsupported intern content Python type: " << Py_TYPE(object)->tp_name; + } + + void writePythonUIntItem(StorageClass kind, std::uint64_t value) + { + writeToken(m_sink, Token::UInt); + m_sink.writeByte(static_cast(kind)); + m_sink.writeScalar(value); + } + + void writePythonMemoObject(PyObject *pyObject) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + + if (db0::python::PyEmbeddedMemo_Check(pyObject)) { + auto &embeddedObject = db0::python::getEmbeddedMemoRef( + reinterpret_cast(pyObject) + ).embeddedObject(); + writeObject(embeddedObject); + return; + } + + const auto &memo = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject); + if (!memo.hasInstance()) { + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(memo) + ); + if (!initializer) { + THROWF(db0::InputException) << "Non-materialized intern memo object has no initializer"; + } + this->writeInitializer(*initializer); + return; + } + + writeObjectRef(memo.getUniqueAddress()); + } + + HashSink &m_sink; + db0::swine_ptr *m_fixture = nullptr; + }; + + std::uint64_t hashWithFixture(db0::swine_ptr *fixture, const o_tuple_item &item) + { + HashSink sink; + InternStreamer(sink, fixture).writeTupleItemForHash(item); + return sink.getValue(); + } + + std::uint64_t hashWithFixture(db0::swine_ptr *fixture, PyObject *object) + { + HashSink sink; + InternStreamer(sink, fixture).writePythonItemForHash(object); + return sink.getValue(); + } + + std::uint64_t hashPairWithFixture(db0::swine_ptr *fixture, const o_dict_pair &pair) + { + HashSink sink; + InternStreamer(sink, fixture).writeDictPairForHash(pair); + return sink.getValue(); + } + + std::uint64_t hashPythonDictPairWithFixture( + db0::swine_ptr *fixture, PyObject *dict, PyObject *key + ) + { + HashSink sink; + InternStreamer(sink, fixture).writePythonDictPairForHash(dict, key); + return sink.getValue(); + } + + std::uint64_t hashFieldWithFixture(db0::swine_ptr *fixture, const FieldValue &field) + { + HashSink sink; + InternStreamer(sink, fixture).writeFieldForHash(field); + return sink.getValue(); + } + + class InternComparator + { + public: + explicit InternComparator(db0::swine_ptr *fixture) + : m_fixture(fixture) + { + } + + int compare(const o_embedded_object &lhs, const o_embedded_object &rhs) + { + if (auto result = compareToken(Token::Object, Token::Object)) { + return result; + } + if (auto result = compareScalar(lhs.getClassRef(), rhs.getClassRef())) { + return result; + } + return compareObjectFields(lhs, rhs); + } + + int compare(const ImmutableObjectInitializer &lhs, const o_embedded_object &rhs) + { + if (auto result = compareToken(Token::Object, Token::Object)) { + return result; + } + if (auto result = compareScalar(lhs.getClassPtr()->getClassRef(), rhs.getClassRef())) { + return result; + } + return compareInitializerFields(lhs, rhs); + } + + int compare(const o_embedded_object &lhs, const ImmutableObjectInitializer &rhs) + { + return -compare(rhs, lhs); + } + + int compare(const ImmutableObjectInitializer &lhs, const ImmutableObjectInitializer &rhs) + { + if (auto result = compareToken(Token::Object, Token::Object)) { + return result; + } + if (auto result = compareScalar(lhs.getClassPtr()->getClassRef(), rhs.getClassPtr()->getClassRef())) { + return result; + } + return compareInitializerFields(lhs, rhs); + } + + int compare(const o_tuple_item &lhs, const o_tuple_item &rhs) + { + switch (lhs.itemKind()) { + case StorageClass::NONE: + return compareNoneToItem(rhs); + case StorageClass::BOOLEAN: + return compare(lhs.boolPayload().value(), rhs); + case StorageClass::PACKED_INT32: + return compare(static_cast(lhs.packedIntPayload().value()), rhs); + case StorageClass::INT64: + return compare(lhs.intPayload().value(), rhs); + case StorageClass::FP_NUMERIC64: + return compare(lhs.doublePayload().value(), rhs); + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: { + auto value = lhs.stringPayload().get(); + return compareBytesWithToken(Token::String, value.get_raw(), value.size(), rhs); + } + case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: + return compareBytesWithToken(Token::Bytes, lhs.bytesPayload().begin(), lhs.bytesPayload().size(), rhs); + case StorageClass::PTIME64: + case StorageClass::DATE: + case StorageClass::DATETIME: + case StorageClass::DATETIME_TZ: + case StorageClass::TIME: + case StorageClass::TIME_TZ: + case StorageClass::DECIMAL: + return compare(lhs.itemKind(), lhs.uint64Payload().value(), rhs); + case StorageClass::EMBEDDED_TUPLE: + return compare(o_py_tuple::__const_ref(lhs.embeddedPayload().begin()), rhs); + case StorageClass::EMBEDDED_SET: + return compare(o_py_set::__const_ref(lhs.embeddedPayload().begin()), rhs); + case StorageClass::EMBEDDED_DICT: + return compare(o_py_dict::__const_ref(lhs.embeddedPayload().begin()), rhs); + case StorageClass::EMBEDDED_OBJECT: + return compare(o_embedded_object::__const_ref(lhs.embeddedPayload().begin()), rhs); + default: + THROWF(db0::InternalException) << "Unsupported tuple intern content kind: " << lhs.itemKind(); + return 0; + } + } + + int compare(const o_dict_pair &lhs, const o_dict_pair &rhs) + { + if (auto result = compareToken(Token::Pair, Token::Pair)) { + return result; + } + if (auto result = compare(lhs.key(), rhs.key())) { + return result; + } + return compare(lhs.value(), rhs.value()); + } + + int compare(PyObject *lhs, PyObject *rhs) + { + auto &typeManager = db0::python::PyToolkit::getTypeManager(); + auto lhsType = typeManager.getTypeId(lhs); + auto rhsType = typeManager.getTypeId(rhs); + + switch (lhsType) { + case db0::bindings::TypeId::NONE: + return compareToken(Token::None, getToken(rhs)); + case db0::bindings::TypeId::BOOLEAN: + return compare(lhs == Py_True, rhs); + case db0::bindings::TypeId::INTEGER: { + auto value = PyLong_AsLongLong(lhs); + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Python integer is out of int64 range"; + } + return compare(static_cast(value), rhs); + } + case db0::bindings::TypeId::FLOAT: + return compare(PyFloat_AsDouble(lhs), rhs); + case db0::bindings::TypeId::DATETIME: + return compare(StorageClass::DATETIME, typeManager.extractUInt64(lhsType, lhs), rhs); + case db0::bindings::TypeId::DATETIME_TZ: + return compare(StorageClass::DATETIME_TZ, typeManager.extractUInt64(lhsType, lhs), rhs); + case db0::bindings::TypeId::DATE: + return compare(StorageClass::DATE, typeManager.extractUInt64(lhsType, lhs), rhs); + case db0::bindings::TypeId::TIME: + return compare(StorageClass::TIME, typeManager.extractUInt64(lhsType, lhs), rhs); + case db0::bindings::TypeId::TIME_TZ: + return compare(StorageClass::TIME_TZ, typeManager.extractUInt64(lhsType, lhs), rhs); + case db0::bindings::TypeId::DECIMAL: + return compare(StorageClass::DECIMAL, typeManager.extractUInt64(lhsType, lhs), rhs); + case db0::bindings::TypeId::STRING: { + auto value = typeManager.extractString(lhs); + return compareBytesToPythonItem(Token::String, value, std::strlen(value), rhs); + } + case db0::bindings::TypeId::BYTES: { + auto value = typeManager.extractBytes(lhs); + return compareBytesToPythonItem(Token::Bytes, value.m_data, value.m_size, rhs); + } + case db0::bindings::TypeId::LIST: + case db0::bindings::TypeId::TUPLE: + return comparePythonTupleToPythonItem(lhs, rhs); + case db0::bindings::TypeId::SET: + return comparePythonSetToPythonItem(lhs, rhs); + case db0::bindings::TypeId::DICT: + return comparePythonDictToPythonItem(lhs, rhs); + case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: + return comparePythonMemoToPythonItem(lhs, rhs); + default: + break; + } + (void)rhsType; + THROWF(db0::InputException) << "Unsupported intern content Python type: " << Py_TYPE(lhs)->tp_name; + return 0; + } + + private: + template int compareScalar(T lhs, T rhs) + { + return compareBytes(&lhs, &rhs, sizeof(T)); + } + + int compareToken(Token lhs, Token rhs) + { + return compareScalar(static_cast(lhs), static_cast(rhs)); + } + + int compareBytes(const void *lhs, const void *rhs, std::size_t size) + { + m_lhsBudget.add(size); + m_rhsBudget.add(size); + auto result = std::memcmp(lhs, rhs, size); + return result < 0 ? -1 : result > 0 ? 1 : 0; + } + + int compareBytesPayload(const void *lhs, std::size_t lhsSize, const void *rhs, std::size_t rhsSize) + { + if (auto result = compareScalar(lhsSize, rhsSize)) { + return result; + } + auto prefix = lhsSize < rhsSize ? lhsSize : rhsSize; + if (auto result = compareBytes(lhs, rhs, prefix)) { + return result; + } + return 0; + } + + static bool isFieldValue(StorageClass kind) + { + return kind != StorageClass::UNDEFINED && kind != StorageClass::DELETED; + } + + std::uint64_t countObjectFields(const o_embedded_object &object) const + { + std::uint64_t count = 0; + const auto &types = object.pos_vt().types(); + for (std::size_t i = 0; i < object.pos_vt().size(); ++i) { + count += isFieldValue(types[i]) ? 1 : 0; + } + for (const auto &value: object.index_vt().xvalues()) { + count += isFieldValue(value.m_type) ? 1 : 0; + } + for (const auto &entry: object.field_map()) { + (void)entry; + ++count; + } + return count; + } + + std::uint64_t countInitializerFields(const ImmutableObjectInitializer &initializer) const + { + std::uint64_t count = 0; + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer.getData(posVtData, posVtOffset); + for (std::size_t i = 0; i < posVtData.size(); ++i) { + count += isFieldValue(posVtData.m_types[i]) ? 1 : 0; + } + for (auto value = indexVtData.first; value != indexVtData.second; ++value) { + count += isFieldValue(value->m_type) ? 1 : 0; + } + for (const auto &value: initializer.objects()) { + count += !!value.m_object && value.m_storage_class != StorageClass::DELETED ? 1 : 0; + } + return count; + } + + template int compareFields(const LhsT &lhs, const RhsT &rhs) + { + auto lhsCount = countFields(lhs); + auto rhsCount = countFields(rhs); + if (auto result = compareToken(Token::Field, Token::Field)) { + return result; + } + if (auto result = compareScalar(lhsCount, rhsCount)) { + return result; + } + auto rhsLookup = makeFieldLookup(rhs); + int compareResult = 0; + std::uint64_t emitted = 0; + forEachField(lhs, [&](const FieldValue &lhsField) { + ++emitted; + if (compareResult) { + return; + } + auto rhsField = rhsLookup.find(lhsField.m_index); + if (!rhsField.m_valid) { + compareResult = -1; + return; + } + compareResult = compareFieldValue(lhsField, rhsField); + }); + if (emitted != lhsCount) { + THROWF(db0::InternalException) << "Unable to compare intern fields"; + } + return compareResult; + } + + std::uint64_t countFields(const o_embedded_object &object) const + { + return countObjectFields(object); + } + + std::uint64_t countFields(const ImmutableObjectInitializer &initializer) const + { + return countInitializerFields(initializer); + } + + int compareObjectFields(const o_embedded_object &lhs, const o_embedded_object &rhs) + { + return compareFields(lhs, rhs); + } + + int compareInitializerFields(const ImmutableObjectInitializer &lhs, const o_embedded_object &rhs) + { + return compareFields(lhs, rhs); + } + + int compareInitializerFields(const ImmutableObjectInitializer &lhs, const ImmutableObjectInitializer &rhs) + { + return compareSequentialInitializerFields(lhs, rhs); + } + + static FieldValue makeField(std::uint32_t index, StorageClass kind, Value value) + { + FieldValue field; + field.m_index = index; + field.m_kind = kind; + field.m_value = value; + field.m_valid = true; + return field; + } + + static FieldValue makeField(std::uint32_t index, const o_tuple_item &item) + { + FieldValue field; + field.m_index = index; + field.m_item = &item; + field.m_valid = true; + return field; + } + + static FieldValue makeField(std::uint32_t index, StorageClass kind, PyObject *object) + { + FieldValue field; + field.m_index = index; + field.m_kind = kind; + field.m_object = object; + field.m_valid = true; + return field; + } + + struct ObjectFieldLookup + { + explicit ObjectFieldLookup(const o_embedded_object &object) + : m_object(object) + { + } + + FieldValue find(std::uint32_t index) const + { + std::pair value; + if (m_object.pos_vt().find(index, value) && InternComparator::isFieldValue(value.first)) { + return InternComparator::makeField(index, value.first, value.second); + } + if (m_object.index_vt().find(index, value) && InternComparator::isFieldValue(value.first)) { + return InternComparator::makeField(index, value.first, value.second); + } + if (auto *item = m_object.variableValue(index)) { + return InternComparator::makeField(index, *item); + } + return {}; + } + + const o_embedded_object &m_object; + }; + + ObjectFieldLookup makeFieldLookup(const o_embedded_object &object) const + { + return ObjectFieldLookup(object); + } + + struct InitializerFieldCursor + { + explicit InitializerFieldCursor(const ImmutableObjectInitializer &initializer) + : m_objects(&initializer.objects()) + { + m_indexVtData = initializer.getData(m_posVtData, m_posVtOffset); + m_indexValue = m_indexVtData.first; + } + + FieldValue next() + { + while (m_pos < m_posVtData.size()) { + auto pos = m_pos++; + if (InternComparator::isFieldValue(m_posVtData.m_types[pos])) { + return InternComparator::makeField( + static_cast(m_posVtOffset + pos), + m_posVtData.m_types[pos], m_posVtData.m_values[pos] + ); + } + } + while (m_indexValue != m_indexVtData.second) { + auto *value = m_indexValue++; + if (InternComparator::isFieldValue(value->m_type)) { + return InternComparator::makeField( + value->getIndex(), value->m_type, value->m_value + ); + } + } + while (m_objectPos < m_objects->size()) { + const auto &value = (*m_objects)[m_objectPos++]; + if (!!value.m_object && value.m_storage_class != StorageClass::DELETED) { + return InternComparator::makeField( + value.m_loc.first, value.m_storage_class, value.m_object.get() + ); + } + } + return {}; + } + + PosVT::Data m_posVtData; + unsigned int m_posVtOffset = 0; + std::size_t m_pos = 0; + std::pair m_indexVtData = { nullptr, nullptr }; + const XValue *m_indexValue = nullptr; + const std::vector *m_objects = nullptr; + std::size_t m_objectPos = 0; + }; + + int compareSequentialInitializerFields( + const ImmutableObjectInitializer &lhs, const ImmutableObjectInitializer &rhs + ) + { + auto lhsCount = countInitializerFields(lhs); + auto rhsCount = countInitializerFields(rhs); + if (auto result = compareToken(Token::Field, Token::Field)) { + return result; + } + if (auto result = compareScalar(lhsCount, rhsCount)) { + return result; + } + InitializerFieldCursor lhsCursor(lhs); + InitializerFieldCursor rhsCursor(rhs); + for (std::uint64_t i = 0; i < lhsCount; ++i) { + auto lhsField = lhsCursor.next(); + auto rhsField = rhsCursor.next(); + if (!lhsField.m_valid || !rhsField.m_valid) { + THROWF(db0::InternalException) << "Unable to compare intern fields"; + } + if (auto result = compareScalar(lhsField.m_index, rhsField.m_index)) { + return result; + } + if (auto result = compareFieldValue(lhsField, rhsField)) { + return result; + } + } + return 0; + } + + template void forEachField(const o_embedded_object &object, EmitT emit) + { + const auto &types = object.pos_vt().types(); + const auto &values = object.pos_vt().values(); + for (std::size_t i = 0; i < object.pos_vt().size(); ++i) { + if (isFieldValue(types[i])) { + emit(makeField( + static_cast(object.pos_vt().offset() + i), types[i], values[i] + )); + } + } + for (const auto &value: object.index_vt().xvalues()) { + if (isFieldValue(value.m_type)) { + emit(makeField(value.getIndex(), value.m_type, value.m_value)); + } + } + for (const auto &entry: object.field_map()) { + emit(makeField(getFieldIndex(entry.key()), entry.value())); + } + } + + template void forEachField(const ImmutableObjectInitializer &initializer, EmitT emit) + { + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer.getData(posVtData, posVtOffset); + for (std::size_t i = 0; i < posVtData.size(); ++i) { + if (isFieldValue(posVtData.m_types[i])) { + emit(makeField( + static_cast(posVtOffset + i), + posVtData.m_types[i], posVtData.m_values[i] + )); + } + } + for (auto value = indexVtData.first; value != indexVtData.second; ++value) { + if (isFieldValue(value->m_type)) { + emit(makeField(value->getIndex(), value->m_type, value->m_value)); + } + } + for (const auto &value: initializer.objects()) { + if (!!value.m_object && value.m_storage_class != StorageClass::DELETED) { + emit(makeField(value.m_loc.first, value.m_storage_class, value.m_object.get())); + } + } + } + + std::uint32_t getFieldIndex(const o_tuple_item &key) const + { + if (key.itemKind() == StorageClass::PACKED_INT32) { + return key.packedIntPayload().value(); + } + if (key.itemKind() == StorageClass::INT64) { + return static_cast(key.intPayload().value()); + } + THROWF(db0::InternalException) << "Embedded object field map key is not an integer"; + return 0; + } + + int compareFieldValue(const FieldValue &lhs, const FieldValue &rhs) + { + if (lhs.m_item) { + return compareItemToField(*lhs.m_item, rhs); + } + if (lhs.m_object) { + return comparePythonObjectToField(lhs.m_kind, lhs.m_object, rhs); + } + return compareFixedToField(lhs.m_kind, lhs.m_value, rhs); + } + + int compareItemToField(const o_tuple_item &lhs, const FieldValue &rhs) + { + if (rhs.m_item) { + return compare(lhs, *rhs.m_item); + } + if (rhs.m_object) { + return compareItemToPythonObject(lhs, rhs.m_kind, rhs.m_object); + } + return compare(lhs, rhs.m_kind, rhs.m_value); + } + + int compareFixedToField(StorageClass kind, Value value, const FieldValue &rhs) + { + if (rhs.m_item) { + return compare(kind, value, *rhs.m_item); + } + if (rhs.m_object) { + return compareFixedToPythonObject(kind, value, rhs.m_kind, rhs.m_object); + } + return compareFixed(kind, value, rhs.m_kind, rhs.m_value); + } + + int comparePythonObjectToField(StorageClass kind, PyObject *object, const FieldValue &rhs) + { + if (rhs.m_item) { + return comparePythonObjectToItem(kind, object, *rhs.m_item); + } + if (rhs.m_object) { + return comparePythonObject(kind, object, rhs.m_kind, rhs.m_object); + } + return -compareFixedToPythonObject(rhs.m_kind, rhs.m_value, kind, object); + } + + Token getToken(StorageClass kind) const + { + switch (kind) { + case StorageClass::NONE: + return Token::None; + case StorageClass::BOOLEAN: + return Token::Bool; + case StorageClass::INT64: + case StorageClass::PACKED_INT32: + return Token::Int; + case StorageClass::FP_NUMERIC64: + return Token::Double; + case StorageClass::PTIME64: + case StorageClass::DATE: + case StorageClass::DATETIME: + case StorageClass::DATETIME_TZ: + case StorageClass::TIME: + case StorageClass::TIME_TZ: + case StorageClass::DECIMAL: + return Token::UInt; + case StorageClass::OBJECT_REF: + case StorageClass::EMBEDDED_OBJECT_REF: + return Token::Object; + default: + THROWF(db0::InternalException) << "Unsupported fixed intern content kind: " << kind; + return Token::None; + } + } + + Token getToken(const o_tuple_item &item) const + { + switch (item.itemKind()) { + case StorageClass::NONE: + return Token::None; + case StorageClass::BOOLEAN: + return Token::Bool; + case StorageClass::PACKED_INT32: + case StorageClass::INT64: + return Token::Int; + case StorageClass::FP_NUMERIC64: + return Token::Double; + case StorageClass::STRING_REF: + case StorageClass::EMBEDDED_STRING: + return Token::String; + case StorageClass::DB0_BYTES: + case StorageClass::EMBEDDED_BYTES: + return Token::Bytes; + case StorageClass::PTIME64: + case StorageClass::DATE: + case StorageClass::DATETIME: + case StorageClass::DATETIME_TZ: + case StorageClass::TIME: + case StorageClass::TIME_TZ: + case StorageClass::DECIMAL: + return Token::UInt; + case StorageClass::EMBEDDED_TUPLE: + return Token::Tuple; + case StorageClass::EMBEDDED_SET: + return Token::Set; + case StorageClass::EMBEDDED_DICT: + return Token::Dict; + case StorageClass::EMBEDDED_OBJECT: + return Token::Object; + default: + THROWF(db0::InternalException) << "Unsupported tuple intern content kind: " << item.itemKind(); + return Token::None; + } + } + + Token getToken(PyObject *object) const + { + auto &typeManager = db0::python::PyToolkit::getTypeManager(); + switch (typeManager.getTypeId(object)) { + case db0::bindings::TypeId::NONE: + return Token::None; + case db0::bindings::TypeId::BOOLEAN: + return Token::Bool; + case db0::bindings::TypeId::INTEGER: + return Token::Int; + case db0::bindings::TypeId::FLOAT: + return Token::Double; + case db0::bindings::TypeId::DATETIME: + case db0::bindings::TypeId::DATETIME_TZ: + case db0::bindings::TypeId::DATE: + case db0::bindings::TypeId::TIME: + case db0::bindings::TypeId::TIME_TZ: + case db0::bindings::TypeId::DECIMAL: + return Token::UInt; + case db0::bindings::TypeId::STRING: + return Token::String; + case db0::bindings::TypeId::BYTES: + return Token::Bytes; + case db0::bindings::TypeId::LIST: + case db0::bindings::TypeId::TUPLE: + return Token::Tuple; + case db0::bindings::TypeId::SET: + return Token::Set; + case db0::bindings::TypeId::DICT: + return Token::Dict; + case db0::bindings::TypeId::MEMO_IMMUTABLE_OBJECT: + return Token::Object; + default: + break; + } + THROWF(db0::InputException) << "Unsupported intern content Python type: " << Py_TYPE(object)->tp_name; + return Token::None; + } + + int compareFixed(StorageClass lhsKind, Value lhs, StorageClass rhsKind, Value rhs) + { + if (lhsKind == StorageClass::OBJECT_REF || lhsKind == StorageClass::EMBEDDED_OBJECT_REF) { + return compareObjectRefToFixed(lhsKind, lhs, rhsKind, rhs); + } + if (rhsKind == StorageClass::OBJECT_REF || rhsKind == StorageClass::EMBEDDED_OBJECT_REF) { + return -compareObjectRefToFixed(rhsKind, rhs, lhsKind, lhs); + } + if (auto result = compareToken(getToken(lhsKind), getToken(rhsKind))) { + return result; + } + switch (lhsKind) { + case StorageClass::NONE: + return 0; + case StorageClass::BOOLEAN: { + auto lhsBool = static_cast(lhs.m_store != 0 ? 1 : 0); + auto rhsBool = static_cast(rhs.m_store != 0 ? 1 : 0); + return compareScalar(lhsBool, rhsBool); + } + case StorageClass::INT64: + case StorageClass::PACKED_INT32: + return compareScalar(static_cast(lhs.m_store), static_cast(rhs.m_store)); + case StorageClass::FP_NUMERIC64: + return compareScalar(lhs.m_store, rhs.m_store); + case StorageClass::PTIME64: + case StorageClass::DATE: + case StorageClass::DATETIME: + case StorageClass::DATETIME_TZ: + case StorageClass::TIME: + case StorageClass::TIME_TZ: + case StorageClass::DECIMAL: { + auto result = compareScalar(static_cast(lhsKind), static_cast(rhsKind)); + return result ? result : compareScalar(lhs.m_store, rhs.m_store); + } + default: + THROWF(db0::InternalException) << "Unsupported fixed intern content kind: " << lhsKind; + return 0; + } + } + + int compare(StorageClass kind, Value value, const o_tuple_item &item) + { + if (kind == StorageClass::OBJECT_REF || kind == StorageClass::EMBEDDED_OBJECT_REF) { + return compareObjectRefToItem(kind, value, item); + } + if (auto result = compareToken(getToken(kind), getToken(item))) { + return result; + } + switch (kind) { + case StorageClass::NONE: + return 0; + case StorageClass::BOOLEAN: + return compare(value.m_store != 0, item); + case StorageClass::INT64: + case StorageClass::PACKED_INT32: + return compare(static_cast(value.m_store), item); + case StorageClass::FP_NUMERIC64: { + auto rhsValue = item.doublePayload().value(); + return compareBytes(&value.m_store, &rhsValue, sizeof(value.m_store)); + } + case StorageClass::PTIME64: + case StorageClass::DATE: + case StorageClass::DATETIME: + case StorageClass::DATETIME_TZ: + case StorageClass::TIME: + case StorageClass::TIME_TZ: + case StorageClass::DECIMAL: + return compare(kind, value.m_store, item); + default: + THROWF(db0::InternalException) << "Unsupported fixed intern content kind: " << kind; + return 0; + } + } + + int compare(const o_tuple_item &item, StorageClass kind, Value value) + { + return -compare(kind, value, item); + } + + int compareNoneToItem(const o_tuple_item &rhs) + { + return compareToken(Token::None, getToken(rhs)); + } + + int compare(bool lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Bool, getToken(rhs))) { + return result; + } + auto lhsValue = static_cast(lhs ? 1 : 0); + auto rhsValue = static_cast(rhs.boolPayload().value() ? 1 : 0); + return compareScalar(lhsValue, rhsValue); + } + + int compare(std::int64_t lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Int, getToken(rhs))) { + return result; + } + auto rhsValue = rhs.itemKind() == StorageClass::PACKED_INT32 + ? static_cast(rhs.packedIntPayload().value()) + : rhs.intPayload().value(); + return compareScalar(lhs, static_cast(rhsValue)); + } + + int compare(double lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Double, getToken(rhs))) { + return result; + } + return compareScalar(lhs, rhs.doublePayload().value()); + } + + int compare(StorageClass kind, std::uint64_t lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::UInt, getToken(rhs))) { + return result; + } + if (auto result = compareScalar(static_cast(kind), static_cast(rhs.itemKind()))) { + return result; + } + return compareScalar(lhs, rhs.uint64Payload().value()); + } + + int compareBytesWithToken(Token tokenValue, const void *lhs, std::size_t lhsSize, const o_tuple_item &rhs) + { + if (auto result = compareToken(tokenValue, getToken(rhs))) { + return result; + } + if (tokenValue == Token::String) { + auto value = rhs.stringPayload().get(); + return compareBytesPayload(lhs, lhsSize, value.get_raw(), value.size()); + } + return compareBytesPayload(lhs, lhsSize, rhs.bytesPayload().begin(), rhs.bytesPayload().size()); + } + + int compare(const o_py_tuple &lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Tuple, getToken(rhs))) { + return result; + } + return compare(lhs, o_py_tuple::__const_ref(rhs.embeddedPayload().begin())); + } + + int compare(const o_py_set &lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Set, getToken(rhs))) { + return result; + } + return compare(lhs, o_py_set::__const_ref(rhs.embeddedPayload().begin())); + } + + int compare(const o_py_dict &lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Dict, getToken(rhs))) { + return result; + } + return compare(lhs, o_py_dict::__const_ref(rhs.embeddedPayload().begin())); + } + + int compare(const o_embedded_object &lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Object, getToken(rhs))) { + return result; + } + return compare(lhs, o_embedded_object::__const_ref(rhs.embeddedPayload().begin())); + } + + int compare(const o_py_tuple &lhs, const o_py_tuple &rhs) + { + if (auto result = compareToken(Token::Tuple, Token::Tuple)) { + return result; + } + if (auto result = compareScalar(lhs.size(), rhs.size())) { + return result; + } + auto lhsIt = lhs.begin(); + auto rhsIt = rhs.begin(); + for (; lhsIt != lhs.end(); ++lhsIt, ++rhsIt) { + if (auto result = compare(*lhsIt, *rhsIt)) { + return result; + } + } + return 0; + } + + int compare(const o_py_set &lhs, const o_py_set &rhs) + { + if (auto result = compareToken(Token::Set, Token::Set)) { + return result; + } + if (auto result = compareScalar(lhs.size(), rhs.size())) { + return result; + } + for (const auto &lhsItem: lhs) { + if (!rhs.contains(lhsItem)) { + return -1; + } + } + return 0; + } + + int compare(const o_py_dict &lhs, const o_py_dict &rhs) + { + if (auto result = compareToken(Token::Dict, Token::Dict)) { + return result; + } + if (auto result = compareScalar(lhs.size(), rhs.size())) { + return result; + } + for (const auto &lhsPair: lhs) { + auto *rhsValue = rhs.get(lhsPair.key()); + if (!rhsValue) { + return -1; + } + if (auto result = compare(lhsPair.value(), *rhsValue)) { + return result; + } + } + return 0; + } + + int compareObjectRefToFixed(StorageClass lhsKind, Value lhs, StorageClass rhsKind, Value rhs) + { + return withObjectRef(lhsKind, lhs, [&](const auto &lhsObject) { + return compareObjectToFixed(lhsObject, rhsKind, rhs); + }); + } + + int compareObjectRefToItem(StorageClass kind, Value value, const o_tuple_item &item) + { + return withObjectRef(kind, value, [&](const auto &lhs) { + return compareObjectToItem(lhs, item); + }); + } + + template int compareObjectToFixed(const ObjectT &lhs, StorageClass kind, Value value) + { + if (kind == StorageClass::OBJECT_REF || kind == StorageClass::EMBEDDED_OBJECT_REF) { + return withObjectRef(kind, value, [&](const auto &rhs) { + return compare(lhs, rhs); + }); + } + return compareToken(Token::Object, getToken(kind)); + } + + template int compareObjectToItem(const ObjectT &lhs, const o_tuple_item &item) + { + if (auto result = compareToken(Token::Object, getToken(item))) { + return result; + } + return compare(lhs, o_embedded_object::__const_ref(item.embeddedPayload().begin())); + } + + template int withObjectRef(StorageClass kind, Value value, FnT fn) + { + return withObjectRefObject(resolveObjectRef(m_fixture, kind, value), fn); + } + + template int withObjectRef(db0::UniqueAddress address, FnT fn) + { + return withObjectRefObject(resolveObjectRef(m_fixture, address), fn); + } + + template + int withObjectRefObject(const db0::python::PyToolkit::ObjectSharedPtr &pyObject, FnT fn) + { + if (!m_fixture || !*m_fixture) { + THROWF(db0::InputException) << "Fixture is required for intern object references"; + } + m_lhsBudget.add(INTERN_REFERENCE_TRAVERSAL_CHARGE); + m_rhsBudget.add(INTERN_REFERENCE_TRAVERSAL_CHARGE); + if (db0::python::PyEmbeddedMemo_Check(pyObject.get())) { + return fn(db0::python::getEmbeddedMemoRef( + reinterpret_cast(pyObject.get()) + ).embeddedObject()); + } + if (!db0::python::PyToolkit::isMemoImmutableObject(pyObject.get())) { + THROWF(db0::InputException) << "Intern object reference does not resolve to an immutable object"; + } + const auto &memo = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject.get()); + return fn(memo.getData()->getObject()); + } + + int compareItemToPythonObject(const o_tuple_item &item, StorageClass kind, PyObject *object) + { + return -comparePythonObjectToItem(kind, object, item); + } + + int comparePythonObjectToItem(StorageClass storageClass, PyObject *object, const o_tuple_item &item) + { + switch (getNormalizedKind(storageClass)) { + case StorageClass::EMBEDDED_STRING: { + auto value = db0::python::PyToolkit::getTypeManager().extractString(object); + return compareBytesWithToken(Token::String, value, std::strlen(value), item); + } + case StorageClass::EMBEDDED_BYTES: { + auto value = db0::python::PyToolkit::getTypeManager().extractBytes(object); + return compareBytesWithToken(Token::Bytes, value.m_data, value.m_size, item); + } + case StorageClass::EMBEDDED_TUPLE: + return comparePythonTuple(object, item); + case StorageClass::EMBEDDED_SET: + return comparePythonSet(object, item); + case StorageClass::EMBEDDED_DICT: + return comparePythonDict(object, item); + case StorageClass::EMBEDDED_OBJECT: + return comparePythonMemo(object, item); + default: + THROWF(db0::InternalException) << "Unsupported initializer intern object kind: " << storageClass; + return 0; + } + } + + int compareFixedToPythonObject(StorageClass lhsKind, Value lhs, StorageClass rhsKind, PyObject *rhs) + { + if (lhsKind == StorageClass::OBJECT_REF || lhsKind == StorageClass::EMBEDDED_OBJECT_REF) { + return withObjectRef(lhsKind, lhs, [&](const auto &lhsObject) { + return compareObjectToPythonObject(lhsObject, rhsKind, rhs); + }); + } + return -comparePythonObjectToFixed(rhsKind, rhs, lhsKind, lhs); + } + + int comparePythonObjectToFixed(StorageClass lhsKind, PyObject *lhs, StorageClass rhsKind, Value rhs) + { + switch (getNormalizedKind(lhsKind)) { + case StorageClass::EMBEDDED_OBJECT: + return comparePythonMemoToFixed(lhs, rhsKind, rhs); + default: + return compareToken(getPythonObjectToken(lhsKind), getToken(rhsKind)); + } + } + + int comparePythonObject(StorageClass lhsKind, PyObject *lhs, StorageClass rhsKind, PyObject *rhs) + { + switch (getNormalizedKind(lhsKind)) { + case StorageClass::EMBEDDED_STRING: { + auto lhsValue = db0::python::PyToolkit::getTypeManager().extractString(lhs); + auto rhsValue = db0::python::PyToolkit::getTypeManager().extractString(rhs); + if (auto result = compareToken(Token::String, Token::String)) { + return result; + } + return compareBytesPayload(lhsValue, std::strlen(lhsValue), rhsValue, std::strlen(rhsValue)); + } + case StorageClass::EMBEDDED_BYTES: { + auto lhsValue = db0::python::PyToolkit::getTypeManager().extractBytes(lhs); + auto rhsValue = db0::python::PyToolkit::getTypeManager().extractBytes(rhs); + if (auto result = compareToken(Token::Bytes, Token::Bytes)) { + return result; + } + return compareBytesPayload(lhsValue.m_data, lhsValue.m_size, rhsValue.m_data, rhsValue.m_size); + } + case StorageClass::EMBEDDED_TUPLE: + return comparePythonTuple(lhs, rhs); + case StorageClass::EMBEDDED_SET: + return comparePythonSet(lhs, rhs); + case StorageClass::EMBEDDED_DICT: + return comparePythonDict(lhs, rhs); + case StorageClass::EMBEDDED_OBJECT: + return comparePythonMemo(lhs, rhs); + default: + THROWF(db0::InternalException) << "Unsupported initializer intern object kind: " << lhsKind; + return 0; + } + } + + Token getPythonObjectToken(StorageClass kind) const + { + switch (getNormalizedKind(kind)) { + case StorageClass::EMBEDDED_STRING: + return Token::String; + case StorageClass::EMBEDDED_BYTES: + return Token::Bytes; + case StorageClass::EMBEDDED_TUPLE: + return Token::Tuple; + case StorageClass::EMBEDDED_SET: + return Token::Set; + case StorageClass::EMBEDDED_DICT: + return Token::Dict; + case StorageClass::EMBEDDED_OBJECT: + return Token::Object; + default: + THROWF(db0::InternalException) << "Unsupported initializer intern object kind: " << kind; + return Token::None; + } + } + + template int compareObjectToPythonObject(const ObjectT &lhs, StorageClass rhsKind, PyObject *rhs) + { + if (auto result = compareToken(Token::Object, getPythonObjectToken(rhsKind))) { + return result; + } + return compareObjectToPythonMemo(lhs, rhs); + } + + int comparePythonTuple(PyObject *lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Tuple, getToken(rhs))) { + return result; + } + return comparePythonTupleToTuple(lhs, o_py_tuple::__const_ref(rhs.embeddedPayload().begin())); + } + + int comparePythonSet(PyObject *lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Set, getToken(rhs))) { + return result; + } + return comparePythonSetToSet(lhs, o_py_set::__const_ref(rhs.embeddedPayload().begin())); + } + + int comparePythonDict(PyObject *lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Dict, getToken(rhs))) { + return result; + } + return comparePythonDictToDict(lhs, o_py_dict::__const_ref(rhs.embeddedPayload().begin())); + } + + int comparePythonMemo(PyObject *lhs, const o_tuple_item &rhs) + { + if (auto result = compareToken(Token::Object, getToken(rhs))) { + return result; + } + return comparePythonMemoToObject(lhs, o_embedded_object::__const_ref(rhs.embeddedPayload().begin())); + } + + int compare(bool lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Bool, getToken(rhs))) { + return result; + } + auto lhsValue = static_cast(lhs ? 1 : 0); + auto rhsValue = static_cast(rhs == Py_True ? 1 : 0); + return compareScalar(lhsValue, rhsValue); + } + + int compare(std::int64_t lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Int, getToken(rhs))) { + return result; + } + auto rhsValue = PyLong_AsLongLong(rhs); + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Python integer is out of int64 range"; + } + return compareScalar(lhs, static_cast(rhsValue)); + } + + int compare(double lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Double, getToken(rhs))) { + return result; + } + return compareScalar(lhs, PyFloat_AsDouble(rhs)); + } + + int compare(StorageClass kind, std::uint64_t lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::UInt, getToken(rhs))) { + return result; + } + auto &typeManager = db0::python::PyToolkit::getTypeManager(); + auto rhsType = typeManager.getTypeId(rhs); + auto rhsKind = getStorageClassForPythonUInt(rhsType); + if (auto result = compareScalar(static_cast(kind), static_cast(rhsKind))) { + return result; + } + return compareScalar(lhs, typeManager.extractUInt64(rhsType, rhs)); + } + + StorageClass getStorageClassForPythonUInt(db0::bindings::TypeId typeId) const + { + switch (typeId) { + case db0::bindings::TypeId::DATETIME: + return StorageClass::DATETIME; + case db0::bindings::TypeId::DATETIME_TZ: + return StorageClass::DATETIME_TZ; + case db0::bindings::TypeId::DATE: + return StorageClass::DATE; + case db0::bindings::TypeId::TIME: + return StorageClass::TIME; + case db0::bindings::TypeId::TIME_TZ: + return StorageClass::TIME_TZ; + case db0::bindings::TypeId::DECIMAL: + return StorageClass::DECIMAL; + default: + THROWF(db0::InputException) << "Unsupported intern content Python uint type"; + return StorageClass::UNDEFINED; + } + } + + int compareBytesToPythonItem(Token tokenValue, const void *lhs, std::size_t lhsSize, PyObject *rhs) + { + if (auto result = compareToken(tokenValue, getToken(rhs))) { + return result; + } + if (tokenValue == Token::String) { + auto value = db0::python::PyToolkit::getTypeManager().extractString(rhs); + return compareBytesPayload(lhs, lhsSize, value, std::strlen(value)); + } + auto value = db0::python::PyToolkit::getTypeManager().extractBytes(rhs); + return compareBytesPayload(lhs, lhsSize, value.m_data, value.m_size); + } + + int comparePythonTupleToPythonItem(PyObject *lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Tuple, getToken(rhs))) { + return result; + } + return comparePythonTuple(lhs, rhs); + } + + int comparePythonSetToPythonItem(PyObject *lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Set, getToken(rhs))) { + return result; + } + return comparePythonSet(lhs, rhs); + } + + int comparePythonDictToPythonItem(PyObject *lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Dict, getToken(rhs))) { + return result; + } + return comparePythonDict(lhs, rhs); + } + + int comparePythonMemoToPythonItem(PyObject *lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Object, getToken(rhs))) { + return result; + } + return comparePythonMemo(lhs, rhs); + } + + int comparePythonTuple(PyObject *lhs, PyObject *rhs) + { + if (auto result = compareToken(Token::Tuple, Token::Tuple)) { + return result; + } + auto lhsCount = getPythonSequenceSize(lhs); + auto rhsCount = getPythonSequenceSize(rhs); + if (auto result = compareScalar(lhsCount, rhsCount)) { + return result; + } + for (std::size_t i = 0; i < lhsCount; ++i) { + if (auto result = compare(getPythonSequenceItem(lhs, i), getPythonSequenceItem(rhs, i))) { + return result; + } + } + return 0; + } + + int comparePythonTupleToTuple(PyObject *lhs, const o_py_tuple &rhs) + { + if (auto result = compareToken(Token::Tuple, Token::Tuple)) { + return result; + } + auto lhsCount = getPythonSequenceSize(lhs); + if (auto result = compareScalar(lhsCount, rhs.size())) { + return result; + } + auto rhsIt = rhs.begin(); + for (std::size_t i = 0; i < lhsCount; ++i, ++rhsIt) { + if (auto result = comparePythonItemToTupleItem(getPythonSequenceItem(lhs, i), *rhsIt)) { + return result; + } + } + return 0; + } + + int comparePythonItemToTupleItem(PyObject *lhs, const o_tuple_item &rhs) + { + switch (getToken(lhs)) { + case Token::None: + return compareToken(Token::None, getToken(rhs)); + case Token::Bool: + return compare(lhs == Py_True, rhs); + case Token::Int: { + auto value = PyLong_AsLongLong(lhs); + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Python integer is out of int64 range"; + } + return compare(static_cast(value), rhs); + } + case Token::Double: + return compare(PyFloat_AsDouble(lhs), rhs); + case Token::String: { + auto value = db0::python::PyToolkit::getTypeManager().extractString(lhs); + return compareBytesWithToken(Token::String, value, std::strlen(value), rhs); + } + case Token::Bytes: { + auto value = db0::python::PyToolkit::getTypeManager().extractBytes(lhs); + return compareBytesWithToken(Token::Bytes, value.m_data, value.m_size, rhs); + } + case Token::Tuple: + return comparePythonTuple(lhs, rhs); + case Token::Set: + return comparePythonSet(lhs, rhs); + case Token::Dict: + return comparePythonDict(lhs, rhs); + case Token::Object: + return comparePythonMemo(lhs, rhs); + case Token::UInt: { + auto &typeManager = db0::python::PyToolkit::getTypeManager(); + auto typeId = typeManager.getTypeId(lhs); + return compare(getStorageClassForPythonUInt(typeId), typeManager.extractUInt64(typeId, lhs), rhs); + } + default: + break; + } + THROWF(db0::InputException) << "Unsupported intern content Python type: " << Py_TYPE(lhs)->tp_name; + return 0; + } + + int comparePythonSet(PyObject *lhs, PyObject *rhs) + { + if (!PySet_Check(lhs) || !PySet_Check(rhs)) { + THROWF(db0::InputException) << "Intern set content expects a Python set"; + } + auto lhsSize = PySet_GET_SIZE(lhs); + auto rhsSize = PySet_GET_SIZE(rhs); + if (lhsSize < 0 || rhsSize < 0) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python set size"; + } + if (auto result = compareToken(Token::Set, Token::Set)) { + return result; + } + if (auto result = compareScalar(lhsSize, rhsSize)) { + return result; + } + auto iterator = Py_OWN(PyObject_GetIter(lhs)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "Intern set content expects a Python set"; + } + Py_FOR(item, iterator) { + auto contains = PySet_Contains(rhs, *item); + if (contains < 0) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to lookup Python set item"; + } + if (!contains) { + return -1; + } + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python set"; + } + return 0; + } + + int comparePythonSetToSet(PyObject *lhs, const o_py_set &rhs) + { + if (!PySet_Check(lhs)) { + THROWF(db0::InputException) << "Intern set content expects a Python set"; + } + auto lhsSize = PySet_GET_SIZE(lhs); + if (lhsSize < 0) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python set size"; + } + if (auto result = compareToken(Token::Set, Token::Set)) { + return result; + } + if (auto result = compareScalar(lhsSize, rhs.size())) { + return result; + } + auto iterator = Py_OWN(PyObject_GetIter(lhs)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "Intern set content expects a Python set"; + } + Py_FOR(item, iterator) { + auto element = o_py_set::elementFromPythonObject(*item); + if (!rhs.contains(element)) { + return -1; + } + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python set"; + } + return 0; + } + + int comparePythonDict(PyObject *lhs, PyObject *rhs) + { + if (!PyDict_Check(lhs) || !PyDict_Check(rhs)) { + THROWF(db0::InputException) << "Intern dict content expects a Python dict"; + } + auto lhsSize = PyDict_Size(lhs); + auto rhsSize = PyDict_Size(rhs); + if (lhsSize < 0 || rhsSize < 0) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python dict size"; + } + if (auto result = compareToken(Token::Dict, Token::Dict)) { + return result; + } + if (auto result = compareScalar(lhsSize, rhsSize)) { + return result; + } + auto iterator = Py_OWN(PyObject_GetIter(lhs)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "Intern dict content expects a Python dict"; + } + Py_FOR(key, iterator) { + auto *lhsValue = getPythonDictValue(lhs, *key); + auto *rhsValue = PyDict_GetItemWithError(rhs, *key); + if (!rhsValue) { + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to lookup Python dict key"; + } + return -1; + } + if (auto result = compare(lhsValue, rhsValue)) { + return result; + } + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python dict"; + } + return 0; + } + + int comparePythonDictToDict(PyObject *lhs, const o_py_dict &rhs) + { + if (!PyDict_Check(lhs)) { + THROWF(db0::InputException) << "Intern dict content expects a Python dict"; + } + auto lhsSize = PyDict_Size(lhs); + if (lhsSize < 0) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python dict size"; + } + if (auto result = compareToken(Token::Dict, Token::Dict)) { + return result; + } + if (auto result = compareScalar(lhsSize, rhs.size())) { + return result; + } + auto iterator = Py_OWN(PyObject_GetIter(lhs)); + if (!iterator) { + PyErr_Clear(); + THROWF(db0::InputException) << "Intern dict content expects a Python dict"; + } + Py_FOR(key, iterator) { + auto *lhsValue = getPythonDictValue(lhs, *key); + auto element = o_py_dict::elementFromPythonObject(*key); + auto *rhsValue = rhs.get(element); + if (!rhsValue) { + return -1; + } + if (auto result = comparePythonItemToTupleItem(lhsValue, *rhsValue)) { + return result; + } + } + if (PyErr_Occurred()) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to iterate Python dict"; + } + return 0; + } + + PyObject *getPythonDictValue(PyObject *dict, PyObject *key) + { + auto *value = PyDict_GetItemWithError(dict, key); + if (!value) { + PyErr_Clear(); + THROWF(db0::InputException) << "Unable to read Python dict value"; + } + return value; + } + + int comparePythonMemo(PyObject *lhs, PyObject *rhs) + { + return withPythonMemo(lhs, [&](const auto &lhsObject) { + return compareObjectToPythonMemo(lhsObject, rhs); + }); + } + + template int compareObjectToPythonMemo(const ObjectT &lhs, PyObject *rhs) + { + return withPythonMemo(rhs, [&](const auto &rhsObject) { + return compare(lhs, rhsObject); + }); + } + + int comparePythonMemoToObject(PyObject *lhs, const o_embedded_object &rhs) + { + return withPythonMemo(lhs, [&](const auto &lhsObject) { + return compare(lhsObject, rhs); + }); + } + + int comparePythonMemoToFixed(PyObject *lhs, StorageClass rhsKind, Value rhs) + { + return withPythonMemo(lhs, [&](const auto &lhsObject) { + return compareObjectToFixed(lhsObject, rhsKind, rhs); + }); + } + + template int withPythonMemo(PyObject *pyObject, FnT fn) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + if (db0::python::PyEmbeddedMemo_Check(pyObject)) { + return fn(db0::python::getEmbeddedMemoRef(reinterpret_cast(pyObject)).embeddedObject()); + } + if (!db0::python::PyToolkit::isMemoImmutableObject(pyObject)) { + THROWF(db0::InputException) << "Interned object content can only reference immutable memo objects"; + } + const auto &memo = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject); + if (!memo.hasInstance()) { + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(memo) + ); + if (!initializer) { + THROWF(db0::InputException) << "Non-materialized intern memo object has no initializer"; + } + return fn(*initializer); + } + return withObjectRef(memo.getUniqueAddress(), fn); + } + + std::size_t getPythonSequenceSize(PyObject *sequence) const + { + if (PyTuple_Check(sequence)) { + return static_cast(PyTuple_GET_SIZE(sequence)); + } + if (PyList_Check(sequence)) { + return static_cast(PyList_GET_SIZE(sequence)); + } + THROWF(db0::InputException) << "Intern tuple content expects a Python tuple or list"; + return 0; + } + + PyObject *getPythonSequenceItem(PyObject *sequence, std::size_t index) const + { + if (PyTuple_Check(sequence)) { + return PyTuple_GET_ITEM(sequence, static_cast(index)); + } + return PyList_GET_ITEM(sequence, static_cast(index)); + } + + StreamBudget m_lhsBudget; + StreamBudget m_rhsBudget; + db0::swine_ptr *m_fixture = nullptr; + }; + + template + int compareStreams(db0::swine_ptr &fixture, const LhsT &lhs, const RhsT &rhs) + { + return InternComparator(&fixture).compare(lhs, rhs); + } + + int compareWithFixture( + db0::swine_ptr *fixture, const o_tuple_item &lhs, const o_tuple_item &rhs + ) + { + return InternComparator(fixture).compare(lhs, rhs); + } + + int compareWithFixture( + db0::swine_ptr *fixture, const o_dict_pair &lhs, const o_dict_pair &rhs + ) + { + return InternComparator(fixture).compare(lhs, rhs); + } + + int compareWithFixture(db0::swine_ptr *fixture, PyObject *lhs, PyObject *rhs) + { + return InternComparator(fixture).compare(lhs, rhs); + } + + } + + std::uint64_t intern_hash(db0::swine_ptr &fixture, const o_embedded_object &object) + { + HashSink sink; + InternStreamer(sink, &fixture).writeObject(object); + return sink.getValue(); + } + + std::uint64_t intern_hash( + db0::swine_ptr &fixture, const ImmutableObjectInitializer &initializer + ) + { + HashSink sink; + InternStreamer(sink, &fixture).writeInitializer(initializer); + return sink.getValue(); + } + + int intern_compare( + db0::swine_ptr &fixture, const o_embedded_object &lhs, const o_embedded_object &rhs + ) + { + return compareStreams(fixture, lhs, rhs); + } + + int intern_compare( + db0::swine_ptr &fixture, const ImmutableObjectInitializer &lhs, + const o_embedded_object &rhs + ) + { + return compareStreams(fixture, lhs, rhs); + } + + int intern_compare( + db0::swine_ptr &fixture, const o_embedded_object &lhs, + const ImmutableObjectInitializer &rhs + ) + { + return compareStreams(fixture, lhs, rhs); + } + + int intern_compare( + db0::swine_ptr &fixture, const ImmutableObjectInitializer &lhs, + const ImmutableObjectInitializer &rhs + ) + { + return compareStreams(fixture, lhs, rhs); + } +} diff --git a/src/dbzero/object_model/object/InternContent.hpp b/src/dbzero/object_model/object/InternContent.hpp new file mode 100644 index 00000000..0be2e95b --- /dev/null +++ b/src/dbzero/object_model/object/InternContent.hpp @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2025 DBZero Software sp. z o.o. + +#pragma once + +#include + +#include +#include +#include + +namespace db0 +{ + class Fixture; +} + +namespace db0::object_model +{ + std::uint64_t intern_hash(db0::swine_ptr &fixture, const o_embedded_object &object); + std::uint64_t intern_hash( + db0::swine_ptr &fixture, const ImmutableObjectInitializer &initializer + ); + int intern_compare( + db0::swine_ptr &fixture, const o_embedded_object &lhs, const o_embedded_object &rhs + ); + int intern_compare( + db0::swine_ptr &fixture, const ImmutableObjectInitializer &lhs, + const o_embedded_object &rhs + ); + int intern_compare( + db0::swine_ptr &fixture, const o_embedded_object &lhs, + const ImmutableObjectInitializer &rhs + ); + int intern_compare( + db0::swine_ptr &fixture, const ImmutableObjectInitializer &lhs, + const ImmutableObjectInitializer &rhs + ); +} diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp index 86bb24c0..6b532e74 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.cpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -198,10 +199,10 @@ namespace db0::object_model } unsigned int assignDefaultTypeTags( - db0::swine_ptr &fixture, ObjectImmutableImpl::ObjectPtr object + db0::swine_ptr &fixture, ObjectImmutableImpl::ObjectPtr object, const Class &type ) { - auto *classPtr = &db0::python::PyToolkit::getMemoType(object); + auto *classPtr = &type; if (!classPtr->assignDefaultTags()) { return 0; } @@ -216,7 +217,7 @@ namespace db0::object_model return result; } - unsigned int assignEmbeddedDefaultTypeTags( + unsigned int processEmbeddedObjects( db0::swine_ptr &fixture, ObjectImmutableImpl &object, ObjectImmutableImpl::ObjectPtr rootObject ) @@ -230,31 +231,79 @@ namespace db0::object_model return 0; } + auto &classFactory = fixture->get(); + const auto *root = reinterpret_cast(object.operator->()); + auto rootAddress = object.getAddress(); + auto rootInstanceId = object.getUniqueAddress().getInstanceId(); unsigned int result = 0; for (auto offset: offsetIndex) { - auto embeddedObject = object.getEmbeddedInstanceAtOffset(offset); - result += assignDefaultTypeTags(fixture, embeddedObject.get()); + const auto &embeddedObject = o_embedded_object::__const_ref(root + offset); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + if (type->isIntern()) { + type->getContentIndex().insert( + embeddedObject, UniqueAddress(rootAddress + offset, rootInstanceId) + ); + } + + auto embeddedObjectView = makeEmbeddedObjectView(fixture, rootObject, embeddedObject); + result += assignDefaultTypeTags(fixture, embeddedObjectView.get(), *type); } return result; } + void removeInternContentIndexEntries( + db0::swine_ptr &fixture, const ObjectImmutableImpl &object, Class &rootType + ) + { + const auto &rootEmbeddedObject = object->getObject(); + if (rootType.isIntern() && rootType.hasContentIndex()) { + rootType.getContentIndex().remove(rootEmbeddedObject, object.getUniqueAddress()); + } + + const auto &offsetIndex = object->getOffsetIndex(); + if (offsetIndex.size() == 0) { + return; + } + + auto &classFactory = fixture->get(); + const auto *root = reinterpret_cast(object.operator->()); + auto rootAddress = object.getAddress(); + auto rootInstanceId = object.getUniqueAddress().getInstanceId(); + for (auto offset: offsetIndex) { + const auto &embeddedObject = o_embedded_object::__const_ref(root + offset); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + if (!type->isIntern() || !type->hasContentIndex()) { + continue; + } + type->getContentIndex().remove(embeddedObject, UniqueAddress(rootAddress + offset, rootInstanceId)); + } + } + } - void ObjectImmutableImpl::postInit(FixtureLock &fixture) + std::optional ObjectImmutableImpl::postInit(FixtureLock &fixture) { if (!this->hasInstance()) { auto &initializer = InitManager::instance.getInitializer(*this); auto *immutableInitializer = dynamic_cast(&initializer); assert(immutableInitializer); - PosVT::Data posVtData; - unsigned int posVtOffset = 0; - auto indexVtData = initializer.getData(posVtData, posVtOffset); - this->m_type = initializer.getClassPtr(); assert(this->m_type); auto &type = *this->m_type; + if (type.isIntern()) { + auto candidate = type.getContentIndex().lookup(*immutableInitializer); + if (candidate) { + initializer.close(); + return candidate; + } + } + + PosVT::Data posVtData; + unsigned int posVtOffset = 0; + auto indexVtData = initializer.getData(posVtData, posVtOffset); + auto numTypeTags = safeNumTypeTags(type.getNumBases() + 1); this->init(*fixture, type.getClassRef(), initializer.getRefCounts(), numTypeTags, @@ -274,13 +323,17 @@ namespace db0::object_model transformEmbeddedObjectValues(*fixture, *this, m_lang_object, *immutableInitializer); if (m_lang_object) { this->modify().m_num_type_tags = safeNumTypeTags( - (*this)->m_num_type_tags + assignEmbeddedDefaultTypeTags(*fixture, *this, m_lang_object) + (*this)->m_num_type_tags + processEmbeddedObjects(*fixture, *this, m_lang_object) ); } + if (type.isIntern()) { + type.getContentIndex().insert((*this)->getObject(), this->getUniqueAddress()); + } initializer.close(); } assert(this->hasInstance()); + return std::nullopt; } void ObjectImmutableImpl::setLangObject(ObjectPtr object) const @@ -355,17 +408,22 @@ namespace db0::object_model ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::getEmbeddedInstanceAtOffset( std::uint64_t offset ) const + { + auto fixture = this->getFixture(); + auto rootObject = getLangObject(); + + const auto &embeddedObject = getEmbeddedObjectAtOffset(offset); + return makeEmbeddedObjectView(fixture, rootObject, embeddedObject); + } + + const o_embedded_object &ObjectImmutableImpl::getEmbeddedObjectAtOffset(std::uint64_t offset) const { if (!this->hasInstance() || !(*this)->getOffsetIndex().contains(offset)) { THROWF(db0::BadAddressException) << "Invalid embedded immutable object offset: " << offset; } - auto fixture = this->getFixture(); - auto rootObject = getLangObject(); - const auto *root = reinterpret_cast(this->operator->()); - const auto &embeddedObject = o_embedded_object::__const_ref(root + offset); - return makeEmbeddedObjectView(fixture, rootObject, embeddedObject); + return o_embedded_object::__const_ref(root + offset); } ObjectImmutableImpl::ObjectSharedPtr ObjectImmutableImpl::tryGetEmbeddedField( @@ -501,6 +559,7 @@ namespace db0::object_model void ObjectImmutableImpl::dropMembers(db0::swine_ptr &fixture, Class &classRef) const { + removeInternContentIndexEntries(fixture, *this, classRef); super_t::dropMembers(fixture, classRef); unrefEmbeddedObject(fixture, (*this)->getObject()); } diff --git a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp index 615e04fe..f977a02c 100644 --- a/src/dbzero/object_model/object/ObjectImmutableImpl.hpp +++ b/src/dbzero/object_model/object/ObjectImmutableImpl.hpp @@ -7,6 +7,7 @@ #include "o_immutable_object.hpp" #include +#include namespace db0::object_model @@ -30,8 +31,12 @@ namespace db0::object_model ObjectSharedPtr tryGet(const char *field_name, bool *is_auto_generated = nullptr) const; ObjectSharedPtr get(const char *field_name) const; ObjectSharedPtr getEmbeddedInstanceAtOffset(std::uint64_t offset) const; + const o_embedded_object &getEmbeddedObjectAtOffset(std::uint64_t offset) const; - void postInit(FixtureLock &); + // Returns the address of an existing durable instance on intern-index hit. + // Returns std::nullopt when this call created a new durable instance or + // when the object was already initialized. + std::optional postInit(FixtureLock &); void setLangObject(ObjectPtr) const; void destroy(); void dropInstance(FixtureLock &); diff --git a/src/dbzero/object_model/object/ObjectImplBase.cpp b/src/dbzero/object_model/object/ObjectImplBase.cpp index 21b47982..7c13b18e 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.cpp +++ b/src/dbzero/object_model/object/ObjectImplBase.cpp @@ -15,7 +15,10 @@ #include #include #include +#include +#include #include +#include namespace db0::object_model @@ -52,6 +55,85 @@ namespace db0::object_model } } + void validateInternMemoReference( + db0::swine_ptr &fixture, const db0::python::PyToolkit::ObjectPtr pyObject + ); + + void validateInternContainerReferences( + db0::swine_ptr &fixture, const db0::python::PyToolkit::ObjectPtr pyObject + ) + { + if (PyTuple_Check(pyObject) || PyList_Check(pyObject) || PySet_Check(pyObject)) { + auto iterator = Py_OWN(PyObject_GetIter(pyObject)); + Py_FOR(item, iterator) { + validateInternMemoReference(fixture, item.get()); + } + return; + } + + if (PyDict_Check(pyObject)) { + PyObject *key = nullptr; + PyObject *value = nullptr; + Py_ssize_t pos = 0; + while (PyDict_Next(pyObject, &pos, &key, &value)) { + validateInternMemoReference(fixture, key); + validateInternMemoReference(fixture, value); + } + } + } + + void validateInternMemoReference( + db0::swine_ptr &fixture, const db0::python::PyToolkit::ObjectPtr pyObject + ) + { + using MemoImmutableObject = db0::python::PyToolkit::TypeManager::MemoImmutableObject; + + if (db0::python::PyEmbeddedMemo_Check(pyObject)) { + auto embeddedFixture = db0::python::getEmbeddedMemoFixture(pyObject); + if (*embeddedFixture != *fixture) { + THROWF(db0::InputException) + << "Embedded immutable object references cannot cross prefixes"; + } + auto &classFactory = fixture->get(); + auto &embeddedObject = db0::python::getEmbeddedMemoRef( + reinterpret_cast(pyObject) + ).embeddedObject(); + auto type = classFactory.getTypeByClassRef(embeddedObject.getClassRef()).m_class; + if (!type->isIntern()) { + THROWF(db0::InputException) << "intern object reference must point to an intern memo class"; + } + return; + } + + if (db0::python::PyAnyMemo_Check(pyObject)) { + if (!db0::python::PyToolkit::isMemoImmutableObject(pyObject)) { + THROWF(db0::InputException) << "intern object reference must point to an immutable memo object"; + } + + const auto &memo = db0::python::PyToolkit::getTypeManager() + .template extractObject(pyObject); + const Class *type = nullptr; + if (memo.hasInstance()) { + type = &memo.getType(); + } else { + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(memo) + ); + if (!initializer) { + THROWF(db0::InputException) << "Non-materialized intern memo object has no initializer"; + } + type = &initializer->getClass(); + } + + if (!type->isIntern()) { + THROWF(db0::InputException) << "intern object reference must point to an intern memo class"; + } + return; + } + + validateInternContainerReferences(fixture, pyObject); + } + bool shouldEmbedd(TypeId typeId, StorageClass storageClass, LangConfig::ObjectPtr value) { return canStorePreInitEmbeddedValue(storageClass) && shouldEmbedValue(typeId, storageClass, value); @@ -62,6 +144,16 @@ namespace db0::object_model : super_t(tag_as_dropped(), addr, ext_refs) { } + + template + ObjectImplBase::ObjectImplBase( + typename super_t::tag_no_gc, db0::swine_ptr &fixture, ObjectStem &&stem, std::shared_ptr type + ) + : super_t(typename super_t::tag_no_gc(), typename super_t::tag_from_stem(), fixture, std::move(stem)) + { + this->m_type = type; + assert(hasValidClassRef()); + } template ObjectImplBase::ObjectImplBase(std::shared_ptr db0_class) @@ -260,6 +352,11 @@ namespace db0::object_model auto fixture = initializer.getFixture(); auto &type = initializer.getClass(); auto storage_class = recognizeType(*fixture, type_id, obj_ptr); + if constexpr (std::is_same_v) { + if (type.isIntern()) { + validateInternMemoReference(fixture, obj_ptr); + } + } bool embedValue = false; if constexpr (std::is_same_v) { embedValue = shouldEmbedd(type_id, storage_class, obj_ptr); diff --git a/src/dbzero/object_model/object/ObjectImplBase.hpp b/src/dbzero/object_model/object/ObjectImplBase.hpp index 54b6f4eb..94a8df8b 100644 --- a/src/dbzero/object_model/object/ObjectImplBase.hpp +++ b/src/dbzero/object_model/object/ObjectImplBase.hpp @@ -51,6 +51,7 @@ namespace db0::object_model // Construct as null / dropped object ObjectImplBase(tag_as_dropped, UniqueAddress, unsigned int ext_refs); + ObjectImplBase(typename super_t::tag_no_gc, db0::swine_ptr &, ObjectStem &&, std::shared_ptr); ObjectImplBase(const ObjectImplBase &) = delete; ObjectImplBase(ObjectImplBase &&) = delete; diff --git a/src/dbzero/object_model/object/Options.cpp b/src/dbzero/object_model/object/Options.cpp index 4a14bce0..e627cd5e 100644 --- a/src/dbzero/object_model/object/Options.cpp +++ b/src/dbzero/object_model/object/Options.cpp @@ -3,4 +3,4 @@ #include "Options.hpp" -DEFINE_ENUM_VALUES(db0::object_model::MemoOptions, "NO_DEFAULT_TAGS", "NO_CACHE", "IMMUTABLE", "PROTECT_FIELDS") +DEFINE_ENUM_VALUES(db0::object_model::MemoOptions, "NO_DEFAULT_TAGS", "NO_CACHE", "IMMUTABLE", "PROTECT_FIELDS", "INTERN") diff --git a/src/dbzero/object_model/object/Options.hpp b/src/dbzero/object_model/object/Options.hpp index b3f964c0..7f43ba8b 100644 --- a/src/dbzero/object_model/object/Options.hpp +++ b/src/dbzero/object_model/object/Options.hpp @@ -17,11 +17,12 @@ namespace db0::object_model // instances of this type opted out of caching NO_CACHE = 0x0002, IMMUTABLE = 0x0004, - PROTECT_FIELDS = 0x0008 + PROTECT_FIELDS = 0x0008, + INTERN = 0x0010 }; using MemoFlags = db0::FlagSet; } -DECLARE_ENUM_VALUES(db0::object_model::MemoOptions, 4) +DECLARE_ENUM_VALUES(db0::object_model::MemoOptions, 5) diff --git a/src/dbzero/object_model/set/o_py_set.hpp b/src/dbzero/object_model/set/o_py_set.hpp index 904651f5..f38cea34 100644 --- a/src/dbzero/object_model/set/o_py_set.hpp +++ b/src/dbzero/object_model/set/o_py_set.hpp @@ -24,6 +24,7 @@ DB0_PACKED_BEGIN o_py_set(PyObject *iterable, EmbeddedObjectOffsetCollector &offsetCollector); static std::size_t measure(PyObject *iterable); + static Element elementFromPythonObject(PyObject *object); template static std::size_t safeSizeOf(BufT buf) { @@ -41,7 +42,6 @@ DB0_PACKED_BEGIN static db0::Foundation::Type type(); private: - static Element elementFromPythonObject(PyObject *object); static Element elementFromPythonObject( PyObject *object, EmbeddedObjectOffsetCollector *offsetCollector ); diff --git a/src/dbzero/object_model/set/o_set.cpp b/src/dbzero/object_model/set/o_set.cpp index 0e5d1b3e..6a0dbb88 100644 --- a/src/dbzero/object_model/set/o_set.cpp +++ b/src/dbzero/object_model/set/o_set.cpp @@ -184,6 +184,36 @@ namespace db0::object_model return false; } + bool o_set::contains(const Item &item) const + { + auto capacity = hashIndexCapacity(size()); + if (capacity == 0) { + return false; + } + + auto element = elementFromItem(item); + auto hash = itemHash(item); + const auto *entries = beginOfHashIndex(); + auto slot = hash % capacity; + const auto &entry = entries[slot]; + if (entry.isEmpty()) { + return false; + } + + auto offset = entry.offset(); + if (!entry.isBucket()) { + return itemEqualsElement(itemAtOffset(offset), element); + } + + const auto &bucket = bucketAtOffset(offset); + for (const auto &bucketItem: bucket) { + if (itemEqualsElement(bucketItem, element)) { + return true; + } + } + return false; + } + const o_set::Item &o_set::item(std::size_t index) const { auto it = begin(); diff --git a/src/dbzero/object_model/set/o_set.hpp b/src/dbzero/object_model/set/o_set.hpp index 5265ef72..4ed8ac5c 100644 --- a/src/dbzero/object_model/set/o_set.hpp +++ b/src/dbzero/object_model/set/o_set.hpp @@ -62,6 +62,7 @@ DB0_PACKED_BEGIN std::size_t size() const; bool empty() const; bool contains(const Element &element) const; + bool contains(const Item &item) const; const_iterator begin() const; const_iterator end() const; std::size_t sizeOf() const; diff --git a/tests/unit_tests/ContentIndexTest.cpp b/tests/unit_tests/ContentIndexTest.cpp new file mode 100644 index 00000000..df1e5906 --- /dev/null +++ b/tests/unit_tests/ContentIndexTest.cpp @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// Copyright (c) 2026 DBZero Software sp. z o.o. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace tests +{ + + using namespace db0; + using namespace db0::object_model; + + class ContentIndexTest: public testing::Test + { + protected: + ContentIndexTest() + : m_workspace("", {}, {}, {}, {}, db0::object_model::initializer()) + { + } + + void SetUp() override + { + m_fixture = m_workspace.getFixture("content-index-test"); + } + + void TearDown() override + { + m_workspace.close(); + } + + std::shared_ptr makeClass(const char *name) + { + static std::uint64_t typeIndex = 0; + auto typeId = std::string("tests/content-index/") + name + "/" + std::to_string(typeIndex++); + ClassFlags flags; + flags.set(ClassOptions::IMMUTABLE, true); + return std::shared_ptr(new SubClass( + m_fixture, name, std::nullopt, typeId.c_str(), "test_prefix", {}, flags, nullptr + )); + } + + std::unique_ptr makeObject(const std::shared_ptr &type, std::int64_t value) + { + auto object = std::make_unique(type); + setInitializerValue(*object, type, value); + { + db0::FixtureLock lock(m_fixture); + object->postInit(lock); + } + object->incRef(false); + return object; + } + + static ImmutableObjectInitializer &setInitializerValue( + ObjectImmutableImpl &object, const std::shared_ptr &type, std::int64_t value + ) + { + auto memberLoc = type->findField("value"); + if (!memberLoc.first) { + memberLoc.first = type->addField("value", 0); + } + auto field = memberLoc.first.get(0).getIndexAndOffset(); + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(object) + ); + if (!initializer) { + THROWF(db0::InternalException) << "Immutable initializer not found" << THROWF_END; + } + initializer->set(field, StorageClass::INT64, Value(value)); + return *initializer; + } + + Workspace m_workspace; + db0::swine_ptr m_fixture; + }; + + TEST_F(ContentIndexTest, testContainsFindsInsertedAddress) + { + auto type = makeClass("ContentIndexLookup"); + auto object = makeObject(type, 42); + auto &index = type->getContentIndex(); + + index.insert((*object)->getObject(), object->getUniqueAddress()); + + ASSERT_TRUE(index.contains((*object)->getObject(), object->getUniqueAddress())); + } + + TEST_F(ContentIndexTest, testLookupMissesDifferentContent) + { + auto type = makeClass("ContentIndexDifferentContent"); + auto object = makeObject(type, 42); + auto &index = type->getContentIndex(); + index.insert((*object)->getObject(), object->getUniqueAddress()); + + ObjectImmutableImpl probe(type); + auto &initializer = setInitializerValue(probe, type, 43); + + ASSERT_FALSE(index.lookup(initializer).has_value()); + } + + TEST_F(ContentIndexTest, testLookupMissesSameFieldsFromDifferentClass) + { + auto indexedType = makeClass("ContentIndexIndexedType"); + auto lookupType = makeClass("ContentIndexLookupType"); + auto object = makeObject(indexedType, 42); + auto &index = indexedType->getContentIndex(); + index.insert((*object)->getObject(), object->getUniqueAddress()); + + ObjectImmutableImpl probe(lookupType); + auto &initializer = setInitializerValue(probe, lookupType, 42); + + ASSERT_FALSE(index.lookup(initializer).has_value()); + } + + TEST_F(ContentIndexTest, testRollbackDiscardsPendingInsert) + { + auto type = makeClass("ContentIndexRollback"); + auto object = makeObject(type, 42); + auto &index = type->getContentIndex(); + index.insert((*object)->getObject(), object->getUniqueAddress()); + index.rollback(); + ASSERT_EQ(index.size(), 0); + + ASSERT_FALSE(index.contains((*object)->getObject(), object->getUniqueAddress())); + } + + TEST_F(ContentIndexTest, testRemoveHidesInsertedCandidate) + { + auto type = makeClass("ContentIndexRemove"); + auto object = makeObject(type, 42); + auto &index = type->getContentIndex(); + index.insert((*object)->getObject(), object->getUniqueAddress()); + + ASSERT_TRUE(index.contains((*object)->getObject(), object->getUniqueAddress())); + ASSERT_EQ(index.size(), 1); + + index.remove((*object)->getObject(), object->getUniqueAddress()); + ASSERT_EQ(index.size(), 0); + ASSERT_FALSE(index.contains((*object)->getObject(), object->getUniqueAddress())); + } + + TEST_F(ContentIndexTest, testDuplicateInsertIsCountedOncePerAddress) + { + auto type = makeClass("ContentIndexDuplicateInsert"); + auto object = makeObject(type, 42); + auto equivalentObject = makeObject(type, 42); + auto &index = type->getContentIndex(); + + ASSERT_NO_THROW(index.insert((*object)->getObject(), object->getUniqueAddress())); + ASSERT_NO_THROW(index.insert((*equivalentObject)->getObject(), equivalentObject->getUniqueAddress())); + ASSERT_EQ(index.size(), 2); + ASSERT_TRUE(index.contains((*object)->getObject(), object->getUniqueAddress())); + ASSERT_TRUE(index.contains((*equivalentObject)->getObject(), equivalentObject->getUniqueAddress())); + } + + TEST_F(ContentIndexTest, testBucketRemainsReachableAcrossMorphingInsertAndRemove) + { + auto type = makeClass("ContentIndexBucketMorphing"); + auto &index = type->getContentIndex(); + std::vector> objects; + objects.reserve(8); + + for (std::int64_t i = 0; i < 8; ++i) { + auto object = makeObject(type, 42); + index.insert((*object)->getObject(), object->getUniqueAddress()); + objects.push_back(std::move(object)); + } + + ASSERT_EQ(index.size(), objects.size()); + for (const auto &object : objects) { + ASSERT_TRUE(index.contains((*object)->getObject(), object->getUniqueAddress())) + << "address=" << object->getUniqueAddress(); + } + + std::vector removed(objects.size(), false); + std::vector removedIndexes = {3, 6, 1, 5}; + for (auto removedIndex : removedIndexes) { + auto removedAddress = objects[removedIndex]->getUniqueAddress(); + index.remove((*objects[removedIndex])->getObject(), removedAddress); + removed[removedIndex] = true; + + ASSERT_FALSE(index.contains((*objects[removedIndex])->getObject(), removedAddress)) + << "removedIndex=" << removedIndex; + for (std::size_t i = 0; i < objects.size(); ++i) { + if (removed[i]) { + continue; + } + ASSERT_TRUE(index.contains((*objects[i])->getObject(), objects[i]->getUniqueAddress())) + << "remainingIndex=" << i << " removedIndex=" << removedIndex; + } + } + ASSERT_EQ(index.size(), objects.size() - removedIndexes.size()); + } + + TEST_F(ContentIndexTest, testContainsManyRealImmutableObjectReferences) + { + auto type = makeClass("ContentIndexManyImmutableObjects"); + auto &index = type->getContentIndex(); + std::vector> objects; + objects.reserve(100); + + for (std::int64_t value = 0; value < 100; ++value) { + auto object = makeObject(type, value); + index.insert((*object)->getObject(), object->getUniqueAddress()); + objects.push_back(std::move(object)); + } + + for (std::int64_t value = 0; value < 100; ++value) { + ASSERT_TRUE(index.contains( + (*objects[static_cast(value)])->getObject(), + objects[static_cast(value)]->getUniqueAddress() + )) << "value=" << value; + } + } + + TEST_F(ContentIndexTest, testClassCreatesContentIndexLazilyAndReopensIt) + { + auto type = makeClass("ContentIndexClassIntegration"); + ASSERT_FALSE(type->hasContentIndex()); + + auto object = makeObject(type, 42); + auto &index = type->getContentIndex(); + ASSERT_TRUE(type->hasContentIndex()); + index.insert((*object)->getObject(), object->getUniqueAddress()); + type->commit(); + + auto reopened = std::make_shared(m_fixture, type->getAddress()); + ASSERT_TRUE(reopened->hasContentIndex()); + ASSERT_TRUE(reopened->getContentIndex().contains((*object)->getObject(), object->getUniqueAddress())); + } + + TEST_F(ContentIndexTest, testLegacyClassVersionDoesNotExposeContentIndex) + { + auto type = std::static_pointer_cast(makeClass("ContentIndexLegacyClass")); + type->forceObjVersionForTest(0); + + ASSERT_FALSE(type->hasContentIndex()); + ASSERT_THROW(type->getContentIndex(), db0::InputException); + + const Class &constType = *type; + ASSERT_THROW(constType.getContentIndex(), db0::InputException); + } + +} diff --git a/tests/unit_tests/EmbeddedObjectTest.cpp b/tests/unit_tests/EmbeddedObjectTest.cpp index bb9a7a3d..cb57d41d 100644 --- a/tests/unit_tests/EmbeddedObjectTest.cpp +++ b/tests/unit_tests/EmbeddedObjectTest.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +22,7 @@ #include #include +#include namespace tests { @@ -46,6 +49,18 @@ namespace tests return *initializer; } + static ImmutableObjectInitializer &makeInitializer( + ObjectInitializerManager &manager, int &object, std::shared_ptr type + ) + { + manager.addInitializerFor(object, std::move(type)); + auto *initializer = dynamic_cast(manager.findInitializer(object)); + if (!initializer) { + throw std::runtime_error("immutable initializer not found"); + } + return *initializer; + } + static db0::python::shared_py_object makeMemoType() { static std::uint64_t memoTypeIndex = 0; @@ -126,6 +141,335 @@ namespace tests ASSERT_FALSE(object->fixedValue(999).has_value()); } + TEST_F( EmbeddedObjectTest , testInternContentInitializerAndEmbeddedObjectMatch ) + { + db0::tests::drop("intern-content-init.db0"); + db0::tests::drop("intern-content-init.db0.lock"); + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("intern-content-init"); + auto type = getTestClass(fixture); + type->addField("a", 0); + type->addField("b", 0); + type->flush(); + + auto memspace = getMemspace(); + int sourceA = 0; + ObjectInitializerManager managerA; + auto &initializerA = makeInitializer(managerA, sourceA, type); + initializerA.set({2, 0}, StorageClass::INT64, Value(20)); + initializerA.set({0, 0}, StorageClass::INT64, Value(10)); + + int sourceB = 0; + ObjectInitializerManager managerB; + auto &initializerB = makeInitializer(managerB, sourceB, type); + initializerB.set({0, 0}, StorageClass::INT64, Value(10)); + initializerB.set({2, 0}, StorageClass::INT64, Value(20)); + + v_object object(memspace, type->getClassRef(), initializerA); + + ASSERT_EQ(intern_compare(fixture, initializerB, *object.getData()), 0); + ASSERT_EQ(intern_compare(fixture, initializerA, initializerB), 0); + ASSERT_EQ(intern_hash(fixture, initializerB), intern_hash(fixture, *object.getData())); + + workspace.close(); + db0::tests::drop("intern-content-init.db0"); + db0::tests::drop("intern-content-init.db0.lock"); + } + + TEST_F( EmbeddedObjectTest , testInternCompareComplexInitializerMatchesEmbeddedObjectWithEmbeddedValues ) + { + Py_Initialize(); + + db0::tests::drop("intern-content-complex.db0"); + db0::tests::drop("intern-content-complex.db0.lock"); + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("intern-content-complex"); + auto type = getTestClass(fixture); + type->addField("number", 0); + type->addField("when", 0); + type->addField("items", 0); + type->addField("attrs", 0); + type->flush(); + + auto makeItems = [] { + auto items = Py_OWN(PyList_New(3)); + db0::python::PySafeList_SetItem(items.get(), 0, Py_OWN(PyLong_FromLong(7))); + db0::python::PySafeList_SetItem(items.get(), 1, Py_OWN(PyUnicode_FromString("seven"))); + db0::python::PySafeList_SetItem(items.get(), 2, Py_OWN(PyBool_FromLong(1))); + return items; + }; + + auto makeAttrs = [] { + auto attrs = Py_OWN(PyDict_New()); + db0::python::PySafeDict_SetItem( + attrs.get(), Py_OWN(PyUnicode_FromString("count")), Py_OWN(PyLong_FromLong(3)) + ); + db0::python::PySafeDict_SetItem( + attrs.get(), Py_OWN(PyUnicode_FromString("name")), Py_OWN(PyUnicode_FromString("dbzero")) + ); + return attrs; + }; + + auto memspace = getMemspace(); + auto objectItems = makeItems(); + auto objectAttrs = makeAttrs(); + int sourceObject = 0; + ObjectInitializerManager objectManager; + auto &objectInitializer = makeInitializer(objectManager, sourceObject, type); + objectInitializer.set({0, 0}, StorageClass::INT64, Value(123)); + objectInitializer.set({20, 0}, StorageClass::DATE, Value(20260519)); + objectInitializer.setObject( + {100, 0}, StorageClass::DB0_LIST, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(objectItems.get()) + ); + objectInitializer.setObject( + {102, 0}, StorageClass::DB0_DICT, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(objectAttrs.get()) + ); + v_object object(memspace, type->getClassRef(), objectInitializer); + + auto initializerItems = makeItems(); + auto initializerAttrs = makeAttrs(); + int sourceInitializer = 0; + ObjectInitializerManager initializerManager; + auto &initializer = makeInitializer(initializerManager, sourceInitializer, type); + initializer.set({20, 0}, StorageClass::DATE, Value(20260519)); + initializer.setObject( + {102, 0}, StorageClass::DB0_DICT, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(initializerAttrs.get()) + ); + initializer.set({0, 0}, StorageClass::INT64, Value(123)); + initializer.setObject( + {100, 0}, StorageClass::DB0_LIST, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(initializerItems.get()) + ); + + ASSERT_EQ(intern_compare(fixture, initializer, *object.getData()), 0); + ASSERT_EQ(intern_compare(fixture, *object.getData(), initializer), 0); + ASSERT_EQ(intern_hash(fixture, initializer), intern_hash(fixture, *object.getData())); + + workspace.close(); + db0::tests::drop("intern-content-complex.db0"); + db0::tests::drop("intern-content-complex.db0.lock"); + } + + TEST_F( EmbeddedObjectTest , testInternCompareInitializerMatchesEmbeddedObjectWithReferences ) + { + Py_Initialize(); + + db0::tests::drop("intern-content-complex-ref.db0"); + db0::tests::drop("intern-content-complex-ref.db0.lock"); + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("intern-content-complex-ref"); + auto nestedClass = getTestClass(fixture); + auto rootClass = getTestClass(fixture); + rootClass->addField("embedded", 0); + rootClass->addField("resolved", 0); + rootClass->flush(); + auto pyMemoType = makeMemoType(); + ASSERT_TRUE(pyMemoType.get()); + + auto makeMemo = [&](std::int64_t value) { + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemo->makeNew(nestedClass); + auto *initializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemo->ext()) + ); + if (!initializer) { + throw std::runtime_error("memo initializer not found"); + } + initializer->set({0, 0}, StorageClass::INT64, Value(value)); + return pyMemo; + }; + + auto embeddedMemoForObject = makeMemo(11); + auto embeddedMemoForInitializer = makeMemo(11); + auto resolvedMemo = makeMemo(22); + + { + db0::FixtureLock lock(fixture); + auto &memo = resolvedMemo->modifyExt(); + memo.setLangObject(reinterpret_cast(resolvedMemo.get())); + memo.postInit(lock); + fixture->getLangCache().add(memo.getAddress(), reinterpret_cast(resolvedMemo.get())); + } + + auto memspace = getMemspace(); + int sourceObject = 0; + ObjectInitializerManager objectManager; + auto &objectInitializer = makeInitializer(objectManager, sourceObject, rootClass); + objectInitializer.setObject( + {0, 0}, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(embeddedMemoForObject.get())) + ); + objectInitializer.set( + {1, 0}, StorageClass::OBJECT_REF, Value(resolvedMemo->ext().getAddress()) + ); + v_object object(memspace, rootClass->getClassRef(), objectInitializer); + + int sourceInitializer = 0; + ObjectInitializerManager initializerManager; + auto &initializer = makeInitializer(initializerManager, sourceInitializer, rootClass); + initializer.set( + {1, 0}, StorageClass::OBJECT_REF, Value(resolvedMemo->ext().getAddress()) + ); + initializer.setObject( + {0, 0}, StorageClass::OBJECT_REF, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(reinterpret_cast(embeddedMemoForInitializer.get())) + ); + + ASSERT_EQ(intern_compare(fixture, initializer, *object.getData()), 0); + ASSERT_EQ(intern_compare(fixture, *object.getData(), initializer), 0); + ASSERT_EQ(intern_hash(fixture, initializer), intern_hash(fixture, *object.getData())); + + workspace.close(); + db0::tests::drop("intern-content-complex-ref.db0"); + db0::tests::drop("intern-content-complex-ref.db0.lock"); + } + + TEST_F( EmbeddedObjectTest , testInternContentTraversesResolvedObjectReferences ) + { + Py_Initialize(); + + db0::tests::drop("intern-content-ref.db0"); + db0::tests::drop("intern-content-ref.db0.lock"); + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("intern-content-ref"); + auto type = getTestClass(fixture); + auto pyMemoType = makeMemoType(); + ASSERT_TRUE(pyMemoType.get()); + + auto pyMemo = Py_OWN(reinterpret_cast( + db0::python::MemoObjectStub_new(pyMemoType.get()) + )); + pyMemo->makeNew(type); + auto *memoInitializer = dynamic_cast( + InitManager::instance.findInitializer(pyMemo->ext()) + ); + ASSERT_NE(memoInitializer, nullptr); + memoInitializer->set({0, 0}, StorageClass::INT64, Value(7)); + + { + db0::FixtureLock lock(fixture); + auto &memo = pyMemo->modifyExt(); + memo.setLangObject(reinterpret_cast(pyMemo.get())); + memo.postInit(lock); + fixture->getLangCache().add(memo.getAddress(), reinterpret_cast(pyMemo.get())); + } + + auto memspace = getMemspace(); + int refSourceA = 0; + ObjectInitializerManager refManagerA; + auto &refInitializerA = makeInitializer(refManagerA, refSourceA); + refInitializerA.set({0, 0}, StorageClass::OBJECT_REF, Value(pyMemo->ext().getAddress())); + v_object refObjectA(memspace, 88u, refInitializerA); + + int refSourceB = 0; + ObjectInitializerManager refManagerB; + auto &refInitializerB = makeInitializer(refManagerB, refSourceB); + refInitializerB.set({0, 0}, StorageClass::OBJECT_REF, Value(pyMemo->ext().getAddress())); + v_object refObjectB(memspace, 88u, refInitializerB); + + ASSERT_EQ(intern_hash(fixture, *refObjectA.getData()), intern_hash(fixture, *refObjectB.getData())); + ASSERT_EQ(intern_compare(fixture, *refObjectA.getData(), *refObjectB.getData()), 0); + + workspace.close(); + db0::tests::drop("intern-content-ref.db0"); + db0::tests::drop("intern-content-ref.db0.lock"); + } + + TEST_F( EmbeddedObjectTest , testInternContentOversizedValueFailsByStreamLimit ) + { + Py_Initialize(); + + db0::tests::drop("intern-content-limit.db0"); + db0::tests::drop("intern-content-limit.db0.lock"); + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("intern-content-limit"); + auto type = getTestClass(fixture); + + int source = 0; + ObjectInitializerManager manager; + auto &initializer = makeInitializer(manager, source, type); + auto bytes = Py_OWN(PyBytes_FromStringAndSize(nullptr, 1024 * 1024 + 1)); + ASSERT_TRUE(bytes.get()); + initializer.setObject( + {0, 0}, StorageClass::DB0_BYTES, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(bytes.get()) + ); + + ASSERT_THROW((void)intern_hash(fixture, initializer), db0::InputException); + + workspace.close(); + db0::tests::drop("intern-content-limit.db0"); + db0::tests::drop("intern-content-limit.db0.lock"); + } + + TEST_F( EmbeddedObjectTest , testInternContentNormalizesDictInsertionOrder ) + { + Py_Initialize(); + + auto makeDict = [](bool reverse) { + auto dict = Py_OWN(PyDict_New()); + if (!dict) { + throw std::runtime_error("dict allocation failed"); + } + + auto add = [&](long keyValue, long itemValue) { + auto key = Py_OWN(PyLong_FromLong(keyValue)); + auto value = Py_OWN(PyLong_FromLong(itemValue)); + if (!key || !value) { + throw std::runtime_error("dict item allocation failed"); + } + db0::python::PySafeDict_SetItem(dict.get(), std::move(key), value); + }; + + if (reverse) { + add(2, 20); + add(1, 10); + } else { + add(1, 10); + add(2, 20); + } + return dict; + }; + + db0::tests::drop("intern-content-dict.db0"); + db0::tests::drop("intern-content-dict.db0.lock"); + Workspace workspace("", {}, {}, {}, {}, db0::object_model::initializer()); + auto fixture = workspace.getFixture("intern-content-dict"); + + auto memspace = getMemspace(); + auto dictA = makeDict(false); + int sourceA = 0; + ObjectInitializerManager managerA; + auto &initializerA = makeInitializer(managerA, sourceA); + initializerA.setObject( + {0, 0}, StorageClass::DB0_DICT, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(dictA.get()) + ); + v_object objectA(memspace, 99u, initializerA); + + auto dictB = makeDict(true); + int sourceB = 0; + ObjectInitializerManager managerB; + auto &initializerB = makeInitializer(managerB, sourceB); + initializerB.setObject( + {0, 0}, StorageClass::DB0_DICT, Value(0), + ImmutableObjectInitializer::ObjectSharedPtr(dictB.get()) + ); + v_object objectB(memspace, 99u, initializerB); + + ASSERT_EQ(intern_hash(fixture, *objectA.getData()), intern_hash(fixture, *objectB.getData())); + ASSERT_EQ(intern_compare(fixture, *objectA.getData(), *objectB.getData()), 0); + + workspace.close(); + db0::tests::drop("intern-content-dict.db0"); + db0::tests::drop("intern-content-dict.db0.lock"); + } + TEST_F( EmbeddedObjectTest , testImmutableRootEncapsulatesEmbeddedObjectStorage ) { Py_Initialize(); diff --git a/tests/unit_tests/ObjectInitializerTest.cpp b/tests/unit_tests/ObjectInitializerTest.cpp index 1f67268c..57f81257 100644 --- a/tests/unit_tests/ObjectInitializerTest.cpp +++ b/tests/unit_tests/ObjectInitializerTest.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include