diff --git a/knowledge_graph/conftest.py b/knowledge_graph/conftest.py new file mode 100644 index 0000000..63c8602 --- /dev/null +++ b/knowledge_graph/conftest.py @@ -0,0 +1,20 @@ +import pytest + +from knowledge_graph import common_relations, triplets_index + +APPLE = "apple" +ORANGE = "orange" +FRUIT = "fruit" +CHAIR = "chair" +TABLE = "table" + + +@pytest.fixture(scope="session") +def kg(): + return triplets_index.from_triplets( + [ + common_relations.type_triplet(APPLE, FRUIT), + common_relations.type_triplet(ORANGE, FRUIT), + common_relations.association_triplet(CHAIR, TABLE), + ] + ) diff --git a/knowledge_graph/querying_raw_test.py b/knowledge_graph/querying_raw_test.py index e83e7eb..a9bfab1 100644 --- a/knowledge_graph/querying_raw_test.py +++ b/knowledge_graph/querying_raw_test.py @@ -1,33 +1,17 @@ -import pytest - -from knowledge_graph import common_relations, querying_raw, triplets_index - -_APPLE = "apple" -_ORANGE = "orange" -_FRUIT = "fruit" -_CHAIR = "chair" -_TABLE = "table" - - -@pytest.fixture(scope="module") -def kg(): - return triplets_index.from_triplets( - [ - common_relations.type_triplet(_APPLE, _FRUIT), - common_relations.type_triplet(_ORANGE, _FRUIT), - common_relations.association_triplet(_CHAIR, _TABLE), - ] - ) +from knowledge_graph import common_relations, conftest, querying_raw def test_instances_of_type(kg): - assert set(querying_raw.instances_of_type(_FRUIT)(kg)) == {_APPLE, _ORANGE} + assert set(querying_raw.instances_of_type(conftest.FRUIT)(kg)) == { + conftest.APPLE, + conftest.ORANGE, + } def test_triplets_with_relation(kg): assert set(querying_raw.triplets_with_relation(common_relations.TYPE)(kg)) == { - common_relations.type_triplet(_APPLE, _FRUIT), - common_relations.type_triplet(_ORANGE, _FRUIT), + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT), + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT), } @@ -37,7 +21,19 @@ def test_triplets_with_relations(kg): [common_relations.TYPE, common_relations.ASSOCIATION] )(kg) ) == { - common_relations.type_triplet(_APPLE, _FRUIT), - common_relations.type_triplet(_ORANGE, _FRUIT), - common_relations.association_triplet(_CHAIR, _TABLE), + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT), + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT), + common_relations.association_triplet(conftest.CHAIR, conftest.TABLE), } + + +def test_neighbors(kg): + assert querying_raw.neighbors( + common_relations.TYPE, kg, conftest.APPLE + ) == frozenset([conftest.FRUIT]) + + +def test_neighbors_doesnt_exist(kg): + assert ( + querying_raw.neighbors(common_relations.TYPE, kg, conftest.CHAIR) == frozenset() + ) diff --git a/knowledge_graph/storage.py b/knowledge_graph/storage.py index 4eb749a..b3446a2 100644 --- a/knowledge_graph/storage.py +++ b/knowledge_graph/storage.py @@ -10,7 +10,12 @@ GraphHash = str - +_FIELDS_TO_SERIALIZE = [ + "triplets", + "_subject_index", + # "_relation_index", + # "_object_index", +] _REGISTERED_GRAPHS: Dict[GraphHash, triplets_index.TripletsWithIndex] = {} @@ -77,16 +82,29 @@ def inner(node: Node) -> Nodes: return inner -_from_json = gamla.compose_left( - gamla.itemgetter("triplets"), - gamla.map( - triplet.transform_object(gamla.when(gamla.is_instance(dict), gamla.freeze_deep)) +def _from_triplets_and_index(kg_dict: Dict) -> triplets_index.TripletsWithIndex: + ... + + +from_json = gamla.compose_left( + gamla.transform_item( + "triplets", + gamla.map( + triplet.transform_object( + gamla.when(gamla.is_instance(dict), gamla.freeze_deep) + ) + ), ), - triplets_index.from_triplets, + _from_triplets_and_index, ) + to_json: Callable[[triplets_index.TripletsWithIndex], Dict] = gamla.compose_left( - triplets_index.triplets, sorted, gamla.wrap_dict("triplets") + gamla.side_effect(triplets_index.TripletsWithIndex.trigger_cached_properties), + gamla.apply_spec( + {field: gamla.attrgetter(field) for field in _FIELDS_TO_SERIALIZE} + ), + gamla.transform_item("triplets", sorted), ) @@ -101,7 +119,7 @@ def load_to_kg( file_store.load_by_hash( environment == environment_local, bucket_name ), - _from_json, + from_json, ), exception_type=KeyError, ) @@ -111,4 +129,4 @@ def load_to_kg( ) -load_knowledge_graph_from_file = gamla.compose_left(json.load, _from_json) +load_knowledge_graph_from_file = gamla.compose_left(json.load, from_json) diff --git a/knowledge_graph/storage_test.py b/knowledge_graph/storage_test.py new file mode 100644 index 0000000..1ff1602 --- /dev/null +++ b/knowledge_graph/storage_test.py @@ -0,0 +1,7 @@ +import gamla + +from knowledge_graph import storage + + +def test_serialize_and_load(kg): + assert kg == gamla.pipe(kg, storage.to_json, storage.from_json) diff --git a/knowledge_graph/transform_test.py b/knowledge_graph/transform_test.py index 09a95c2..716211d 100644 --- a/knowledge_graph/transform_test.py +++ b/knowledge_graph/transform_test.py @@ -2,22 +2,26 @@ import gamla # noqa -from knowledge_graph import common_relations, transform, triplets_index +from knowledge_graph import common_relations, conftest, transform, triplets_index -APPLE = "apple" -ORANGE = "orange" -FRUIT = "fruit" +conftest.APPLE = "conftest.APPLE" +conftest.ORANGE = "conftest.ORANGE" +conftest.FRUIT = "conftest.FRUIT" def test_merge_graphs_nodes_by_id(): - kg1 = triplets_index.from_triplet(common_relations.type_triplet(APPLE, FRUIT)) - kg2 = triplets_index.from_triplet(common_relations.type_triplet(ORANGE, FRUIT)) + kg1 = triplets_index.from_triplet( + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT) + ) + kg2 = triplets_index.from_triplet( + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT) + ) assert transform.merge_graphs_nodes_by_id( [kg1, kg2] ) == triplets_index.from_triplets( [ - common_relations.type_triplet(APPLE, FRUIT), - common_relations.type_triplet(ORANGE, FRUIT), + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT), + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT), ] ) # Make sure merge implementation is better than just concatenating the triplets. diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 74d84c9..a1e2e78 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -5,6 +5,7 @@ import dataclasses_json import gamla import immutables +from gamla import nested_index from . import common_relations, triplet @@ -28,60 +29,73 @@ def trigger_cached_properties(self): self.object_relation_index self.subject_relation_and_object_type_index + @functools.cached_property + def _subject_index(self) -> nested_index.NestedIndex: + return nested_index.build([gamla.groupby(triplet.subject)], self.triplets) + @functools.cached_property def subject_index(self) -> _OneLevelIndex: - return gamla.pipe( - self, triplets, gamla.make_index(map(gamla.groupby, [triplet.subject])) - ) + return nested_index.to_query(self._subject_index) + + @functools.cached_property + def _relation_index(self) -> nested_index.NestedIndex: + return nested_index.build([gamla.groupby(triplet.relation)], self.triplets) @functools.cached_property def relation_index(self) -> _OneLevelIndex: - return gamla.pipe( - self, triplets, gamla.make_index(map(gamla.groupby, [triplet.relation])) - ) + return nested_index.to_query(self._relation_index) + + @functools.cached_property + def _object_index(self) -> nested_index.NestedIndex: + return nested_index.build([gamla.groupby(triplet.object)], self.triplets) @functools.cached_property def object_index(self) -> _OneLevelIndex: - return gamla.pipe( - self, triplets, gamla.make_index(map(gamla.groupby, [triplet.object])) + return nested_index.to_query(self._object_index) + + @functools.cached_property + def _subject_relation_index(self) -> nested_index.NestedIndex: + return nested_index.build( + map(gamla.groupby, [triplet.subject, triplet.relation]), self.triplets ) @functools.cached_property def subject_relation_index(self) -> _TwoLevelIndex: - return gamla.pipe( - self, - triplets, - gamla.make_index(map(gamla.groupby, [triplet.subject, triplet.relation])), + return nested_index.to_query(self._subject_relation_index) + + @functools.cached_property + def _object_relation_index(self) -> nested_index.NestedIndex: + return nested_index.build( + map(gamla.groupby, [triplet.object, triplet.relation]), self.triplets ) @functools.cached_property def object_relation_index(self) -> _TwoLevelIndex: - return gamla.pipe( - self, - triplets, - gamla.make_index(map(gamla.groupby, [triplet.object, triplet.relation])), + return nested_index.to_query(self._object_relation_index) + + @functools.cached_property + def _subject_relation_and_object_type_index( + self, + ) -> nested_index.NestedIndex: + return nested_index.build( + [ + gamla.groupby(triplet.subject), + gamla.groupby(triplet.relation), + gamla.groupby_many( + gamla.compose_left( + triplet.object, + self.subject_relation_index, + gamla.apply(common_relations.TYPE), + gamla.map(triplet.object), + ) + ), + ], + self.triplets, ) @functools.cached_property def subject_relation_and_object_type_index(self): - return gamla.pipe( - self, - triplets, - gamla.make_index( - [ - gamla.groupby(triplet.subject), - gamla.groupby(triplet.relation), - gamla.groupby_many( - gamla.compose_left( - triplet.object, - self.subject_relation_index, - gamla.apply(common_relations.TYPE), - gamla.map(triplet.object), - ) - ), - ] - ), - ) + return nested_index.to_query(self._subject_relation_and_object_type_index) triplets = gamla.attrgetter("triplets")