From d561eb52d07e4d82e2d53654ee4a63bd2c90efc2 Mon Sep 17 00:00:00 2001 From: itayzit Date: Wed, 27 Apr 2022 14:47:15 +0300 Subject: [PATCH 1/5] change index to save the dict and index function separately --- knowledge_graph/conftest.py | 20 +++++++ knowledge_graph/querying_raw_test.py | 48 ++++++++-------- knowledge_graph/transform_test.py | 20 ++++--- knowledge_graph/triplets_index.py | 82 ++++++++++++++++------------ 4 files changed, 102 insertions(+), 68 deletions(-) create mode 100644 knowledge_graph/conftest.py diff --git a/knowledge_graph/conftest.py b/knowledge_graph/conftest.py new file mode 100644 index 0000000..63c8602 --- /dev/null +++ b/knowledge_graph/conftest.py @@ -0,0 +1,20 @@ +import pytest + +from knowledge_graph import common_relations, triplets_index + +APPLE = "apple" +ORANGE = "orange" +FRUIT = "fruit" +CHAIR = "chair" +TABLE = "table" + + +@pytest.fixture(scope="session") +def kg(): + return triplets_index.from_triplets( + [ + common_relations.type_triplet(APPLE, FRUIT), + common_relations.type_triplet(ORANGE, FRUIT), + common_relations.association_triplet(CHAIR, TABLE), + ] + ) diff --git a/knowledge_graph/querying_raw_test.py b/knowledge_graph/querying_raw_test.py index e83e7eb..a9bfab1 100644 --- a/knowledge_graph/querying_raw_test.py +++ b/knowledge_graph/querying_raw_test.py @@ -1,33 +1,17 @@ -import pytest - -from knowledge_graph import common_relations, querying_raw, triplets_index - -_APPLE = "apple" -_ORANGE = "orange" -_FRUIT = "fruit" -_CHAIR = "chair" -_TABLE = "table" - - -@pytest.fixture(scope="module") -def kg(): - return triplets_index.from_triplets( - [ - common_relations.type_triplet(_APPLE, _FRUIT), - common_relations.type_triplet(_ORANGE, _FRUIT), - common_relations.association_triplet(_CHAIR, _TABLE), - ] - ) +from knowledge_graph import common_relations, conftest, querying_raw def test_instances_of_type(kg): - assert set(querying_raw.instances_of_type(_FRUIT)(kg)) == {_APPLE, _ORANGE} + assert set(querying_raw.instances_of_type(conftest.FRUIT)(kg)) == { + conftest.APPLE, + conftest.ORANGE, + } def test_triplets_with_relation(kg): assert set(querying_raw.triplets_with_relation(common_relations.TYPE)(kg)) == { - common_relations.type_triplet(_APPLE, _FRUIT), - common_relations.type_triplet(_ORANGE, _FRUIT), + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT), + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT), } @@ -37,7 +21,19 @@ def test_triplets_with_relations(kg): [common_relations.TYPE, common_relations.ASSOCIATION] )(kg) ) == { - common_relations.type_triplet(_APPLE, _FRUIT), - common_relations.type_triplet(_ORANGE, _FRUIT), - common_relations.association_triplet(_CHAIR, _TABLE), + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT), + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT), + common_relations.association_triplet(conftest.CHAIR, conftest.TABLE), } + + +def test_neighbors(kg): + assert querying_raw.neighbors( + common_relations.TYPE, kg, conftest.APPLE + ) == frozenset([conftest.FRUIT]) + + +def test_neighbors_doesnt_exist(kg): + assert ( + querying_raw.neighbors(common_relations.TYPE, kg, conftest.CHAIR) == frozenset() + ) diff --git a/knowledge_graph/transform_test.py b/knowledge_graph/transform_test.py index 09a95c2..716211d 100644 --- a/knowledge_graph/transform_test.py +++ b/knowledge_graph/transform_test.py @@ -2,22 +2,26 @@ import gamla # noqa -from knowledge_graph import common_relations, transform, triplets_index +from knowledge_graph import common_relations, conftest, transform, triplets_index -APPLE = "apple" -ORANGE = "orange" -FRUIT = "fruit" +conftest.APPLE = "conftest.APPLE" +conftest.ORANGE = "conftest.ORANGE" +conftest.FRUIT = "conftest.FRUIT" def test_merge_graphs_nodes_by_id(): - kg1 = triplets_index.from_triplet(common_relations.type_triplet(APPLE, FRUIT)) - kg2 = triplets_index.from_triplet(common_relations.type_triplet(ORANGE, FRUIT)) + kg1 = triplets_index.from_triplet( + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT) + ) + kg2 = triplets_index.from_triplet( + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT) + ) assert transform.merge_graphs_nodes_by_id( [kg1, kg2] ) == triplets_index.from_triplets( [ - common_relations.type_triplet(APPLE, FRUIT), - common_relations.type_triplet(ORANGE, FRUIT), + common_relations.type_triplet(conftest.APPLE, conftest.FRUIT), + common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT), ] ) # Make sure merge implementation is better than just concatenating the triplets. diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 74d84c9..3009210 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -5,6 +5,7 @@ import dataclasses_json import gamla import immutables +from gamla import nested_index from . import common_relations, triplet @@ -28,60 +29,73 @@ def trigger_cached_properties(self): self.object_relation_index self.subject_relation_and_object_type_index + @functools.cached_property + def _subject_index(self) -> nested_index.HierarchicalIndex: + return nested_index.build([gamla.groupby(triplet.subject)], self.triplets) + @functools.cached_property def subject_index(self) -> _OneLevelIndex: - return gamla.pipe( - self, triplets, gamla.make_index(map(gamla.groupby, [triplet.subject])) - ) + return nested_index.to_query(self._subject_index) + + @functools.cached_property + def _relation_index(self) -> nested_index.HierarchicalIndex: + return nested_index.build([gamla.groupby(triplet.relation)], self.triplets) @functools.cached_property def relation_index(self) -> _OneLevelIndex: - return gamla.pipe( - self, triplets, gamla.make_index(map(gamla.groupby, [triplet.relation])) - ) + return nested_index.to_query(self._relation_index) + + @functools.cached_property + def _object_index(self) -> nested_index.HierarchicalIndex: + return nested_index.build([gamla.groupby(triplet.object)], self.triplets) @functools.cached_property def object_index(self) -> _OneLevelIndex: - return gamla.pipe( - self, triplets, gamla.make_index(map(gamla.groupby, [triplet.object])) + return nested_index.to_query(self._object_index) + + @functools.cached_property + def _subject_relation_index(self) -> nested_index.HierarchicalIndex: + return nested_index.build( + map(gamla.groupby, [triplet.subject, triplet.relation]), self.triplets ) @functools.cached_property def subject_relation_index(self) -> _TwoLevelIndex: - return gamla.pipe( - self, - triplets, - gamla.make_index(map(gamla.groupby, [triplet.subject, triplet.relation])), + return nested_index.to_query(self._subject_relation_index) + + @functools.cached_property + def _object_relation_index(self) -> nested_index.HierarchicalIndex: + return nested_index.build( + map(gamla.groupby, [triplet.object, triplet.relation]), self.triplets ) @functools.cached_property def object_relation_index(self) -> _TwoLevelIndex: - return gamla.pipe( - self, - triplets, - gamla.make_index(map(gamla.groupby, [triplet.object, triplet.relation])), + return nested_index.to_query(self._object_relation_index) + + @functools.cached_property + def _subject_relation_and_object_type_index( + self, + ) -> nested_index.HierarchicalIndex: + return nested_index.build( + [ + gamla.groupby(triplet.subject), + gamla.groupby(triplet.relation), + gamla.groupby_many( + gamla.compose_left( + triplet.object, + self.subject_relation_index, + gamla.apply(common_relations.TYPE), + gamla.map(triplet.object), + ) + ), + ], + self.triplets, ) @functools.cached_property def subject_relation_and_object_type_index(self): - return gamla.pipe( - self, - triplets, - gamla.make_index( - [ - gamla.groupby(triplet.subject), - gamla.groupby(triplet.relation), - gamla.groupby_many( - gamla.compose_left( - triplet.object, - self.subject_relation_index, - gamla.apply(common_relations.TYPE), - gamla.map(triplet.object), - ) - ), - ] - ), - ) + return nested_index.to_query(self._subject_relation_and_object_type_index) triplets = gamla.attrgetter("triplets") From e9d4e2182cbb9f9a778b1c5e1391894fc6b45a80 Mon Sep 17 00:00:00 2001 From: itayzit Date: Wed, 27 Apr 2022 15:22:10 +0300 Subject: [PATCH 2/5] change name --- knowledge_graph/triplets_index.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 3009210..a1e2e78 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -30,7 +30,7 @@ def trigger_cached_properties(self): self.subject_relation_and_object_type_index @functools.cached_property - def _subject_index(self) -> nested_index.HierarchicalIndex: + def _subject_index(self) -> nested_index.NestedIndex: return nested_index.build([gamla.groupby(triplet.subject)], self.triplets) @functools.cached_property @@ -38,7 +38,7 @@ def subject_index(self) -> _OneLevelIndex: return nested_index.to_query(self._subject_index) @functools.cached_property - def _relation_index(self) -> nested_index.HierarchicalIndex: + def _relation_index(self) -> nested_index.NestedIndex: return nested_index.build([gamla.groupby(triplet.relation)], self.triplets) @functools.cached_property @@ -46,7 +46,7 @@ def relation_index(self) -> _OneLevelIndex: return nested_index.to_query(self._relation_index) @functools.cached_property - def _object_index(self) -> nested_index.HierarchicalIndex: + def _object_index(self) -> nested_index.NestedIndex: return nested_index.build([gamla.groupby(triplet.object)], self.triplets) @functools.cached_property @@ -54,7 +54,7 @@ def object_index(self) -> _OneLevelIndex: return nested_index.to_query(self._object_index) @functools.cached_property - def _subject_relation_index(self) -> nested_index.HierarchicalIndex: + def _subject_relation_index(self) -> nested_index.NestedIndex: return nested_index.build( map(gamla.groupby, [triplet.subject, triplet.relation]), self.triplets ) @@ -64,7 +64,7 @@ def subject_relation_index(self) -> _TwoLevelIndex: return nested_index.to_query(self._subject_relation_index) @functools.cached_property - def _object_relation_index(self) -> nested_index.HierarchicalIndex: + def _object_relation_index(self) -> nested_index.NestedIndex: return nested_index.build( map(gamla.groupby, [triplet.object, triplet.relation]), self.triplets ) @@ -76,7 +76,7 @@ def object_relation_index(self) -> _TwoLevelIndex: @functools.cached_property def _subject_relation_and_object_type_index( self, - ) -> nested_index.HierarchicalIndex: + ) -> nested_index.NestedIndex: return nested_index.build( [ gamla.groupby(triplet.subject), From b9f1c93ec8a755ae2638372c829c543575c78d14 Mon Sep 17 00:00:00 2001 From: itayzit Date: Thu, 28 Apr 2022 13:24:48 +0300 Subject: [PATCH 3/5] add storage_test.py --- knowledge_graph/storage.py | 6 +++--- knowledge_graph/storage_test.py | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 knowledge_graph/storage_test.py diff --git a/knowledge_graph/storage.py b/knowledge_graph/storage.py index 4eb749a..1d8208a 100644 --- a/knowledge_graph/storage.py +++ b/knowledge_graph/storage.py @@ -77,7 +77,7 @@ def inner(node: Node) -> Nodes: return inner -_from_json = gamla.compose_left( +from_json = gamla.compose_left( gamla.itemgetter("triplets"), gamla.map( triplet.transform_object(gamla.when(gamla.is_instance(dict), gamla.freeze_deep)) @@ -101,7 +101,7 @@ def load_to_kg( file_store.load_by_hash( environment == environment_local, bucket_name ), - _from_json, + from_json, ), exception_type=KeyError, ) @@ -111,4 +111,4 @@ def load_to_kg( ) -load_knowledge_graph_from_file = gamla.compose_left(json.load, _from_json) +load_knowledge_graph_from_file = gamla.compose_left(json.load, from_json) diff --git a/knowledge_graph/storage_test.py b/knowledge_graph/storage_test.py new file mode 100644 index 0000000..1ff1602 --- /dev/null +++ b/knowledge_graph/storage_test.py @@ -0,0 +1,7 @@ +import gamla + +from knowledge_graph import storage + + +def test_serialize_and_load(kg): + assert kg == gamla.pipe(kg, storage.to_json, storage.from_json) From b1c70bfe38ca66b60b778d5a836940d3d065d658 Mon Sep 17 00:00:00 2001 From: itayzit Date: Thu, 28 Apr 2022 17:45:55 +0300 Subject: [PATCH 4/5] add indices in to_json --- knowledge_graph/storage.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/knowledge_graph/storage.py b/knowledge_graph/storage.py index 1d8208a..c0de07f 100644 --- a/knowledge_graph/storage.py +++ b/knowledge_graph/storage.py @@ -10,7 +10,12 @@ GraphHash = str - +_FIELDS_TO_SERIALIZE = [ + "triplets", + "_subject_index", + "_relation_index", + "_object_index", +] _REGISTERED_GRAPHS: Dict[GraphHash, triplets_index.TripletsWithIndex] = {} @@ -86,7 +91,11 @@ def inner(node: Node) -> Nodes: ) to_json: Callable[[triplets_index.TripletsWithIndex], Dict] = gamla.compose_left( - triplets_index.triplets, sorted, gamla.wrap_dict("triplets") + gamla.side_effect(triplets_index.TripletsWithIndex.trigger_cached_properties), + gamla.apply_spec( + {field: gamla.attrgetter(field) for field in _FIELDS_TO_SERIALIZE} + ), + gamla.transform_item("triplets", sorted), ) From 35009e1ab54635e02caa6701e94fff76538d187c Mon Sep 17 00:00:00 2001 From: itayzit Date: Thu, 28 Apr 2022 23:44:10 +0300 Subject: [PATCH 5/5] start working on from_json --- knowledge_graph/storage.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/knowledge_graph/storage.py b/knowledge_graph/storage.py index c0de07f..b3446a2 100644 --- a/knowledge_graph/storage.py +++ b/knowledge_graph/storage.py @@ -13,8 +13,8 @@ _FIELDS_TO_SERIALIZE = [ "triplets", "_subject_index", - "_relation_index", - "_object_index", + # "_relation_index", + # "_object_index", ] _REGISTERED_GRAPHS: Dict[GraphHash, triplets_index.TripletsWithIndex] = {} @@ -82,14 +82,23 @@ def inner(node: Node) -> Nodes: return inner +def _from_triplets_and_index(kg_dict: Dict) -> triplets_index.TripletsWithIndex: + ... + + from_json = gamla.compose_left( - gamla.itemgetter("triplets"), - gamla.map( - triplet.transform_object(gamla.when(gamla.is_instance(dict), gamla.freeze_deep)) + gamla.transform_item( + "triplets", + gamla.map( + triplet.transform_object( + gamla.when(gamla.is_instance(dict), gamla.freeze_deep) + ) + ), ), - triplets_index.from_triplets, + _from_triplets_and_index, ) + to_json: Callable[[triplets_index.TripletsWithIndex], Dict] = gamla.compose_left( gamla.side_effect(triplets_index.TripletsWithIndex.trigger_cached_properties), gamla.apply_spec(