Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions knowledge_graph/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest

from knowledge_graph import common_relations, triplets_index

APPLE = "apple"
ORANGE = "orange"
FRUIT = "fruit"
CHAIR = "chair"
TABLE = "table"


@pytest.fixture(scope="session")
def kg():
return triplets_index.from_triplets(
[
common_relations.type_triplet(APPLE, FRUIT),
common_relations.type_triplet(ORANGE, FRUIT),
common_relations.association_triplet(CHAIR, TABLE),
]
)
48 changes: 22 additions & 26 deletions knowledge_graph/querying_raw_test.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,17 @@
import pytest

from knowledge_graph import common_relations, querying_raw, triplets_index

_APPLE = "apple"
_ORANGE = "orange"
_FRUIT = "fruit"
_CHAIR = "chair"
_TABLE = "table"


@pytest.fixture(scope="module")
def kg():
return triplets_index.from_triplets(
[
common_relations.type_triplet(_APPLE, _FRUIT),
common_relations.type_triplet(_ORANGE, _FRUIT),
common_relations.association_triplet(_CHAIR, _TABLE),
]
)
from knowledge_graph import common_relations, conftest, querying_raw


def test_instances_of_type(kg):
assert set(querying_raw.instances_of_type(_FRUIT)(kg)) == {_APPLE, _ORANGE}
assert set(querying_raw.instances_of_type(conftest.FRUIT)(kg)) == {
conftest.APPLE,
conftest.ORANGE,
}


def test_triplets_with_relation(kg):
assert set(querying_raw.triplets_with_relation(common_relations.TYPE)(kg)) == {
common_relations.type_triplet(_APPLE, _FRUIT),
common_relations.type_triplet(_ORANGE, _FRUIT),
common_relations.type_triplet(conftest.APPLE, conftest.FRUIT),
common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT),
}


Expand All @@ -37,7 +21,19 @@ def test_triplets_with_relations(kg):
[common_relations.TYPE, common_relations.ASSOCIATION]
)(kg)
) == {
common_relations.type_triplet(_APPLE, _FRUIT),
common_relations.type_triplet(_ORANGE, _FRUIT),
common_relations.association_triplet(_CHAIR, _TABLE),
common_relations.type_triplet(conftest.APPLE, conftest.FRUIT),
common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT),
common_relations.association_triplet(conftest.CHAIR, conftest.TABLE),
}


def test_neighbors(kg):
assert querying_raw.neighbors(
common_relations.TYPE, kg, conftest.APPLE
) == frozenset([conftest.FRUIT])


def test_neighbors_doesnt_exist(kg):
assert (
querying_raw.neighbors(common_relations.TYPE, kg, conftest.CHAIR) == frozenset()
)
36 changes: 27 additions & 9 deletions knowledge_graph/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@

GraphHash = str


_FIELDS_TO_SERIALIZE = [
"triplets",
"_subject_index",
# "_relation_index",
# "_object_index",
]
_REGISTERED_GRAPHS: Dict[GraphHash, triplets_index.TripletsWithIndex] = {}


Expand Down Expand Up @@ -77,16 +82,29 @@ def inner(node: Node) -> Nodes:
return inner


_from_json = gamla.compose_left(
gamla.itemgetter("triplets"),
gamla.map(
triplet.transform_object(gamla.when(gamla.is_instance(dict), gamla.freeze_deep))
def _from_triplets_and_index(kg_dict: Dict) -> triplets_index.TripletsWithIndex:
...


from_json = gamla.compose_left(
gamla.transform_item(
"triplets",
gamla.map(
triplet.transform_object(
gamla.when(gamla.is_instance(dict), gamla.freeze_deep)
)
),
),
triplets_index.from_triplets,
_from_triplets_and_index,
)


to_json: Callable[[triplets_index.TripletsWithIndex], Dict] = gamla.compose_left(
triplets_index.triplets, sorted, gamla.wrap_dict("triplets")
gamla.side_effect(triplets_index.TripletsWithIndex.trigger_cached_properties),
gamla.apply_spec(
{field: gamla.attrgetter(field) for field in _FIELDS_TO_SERIALIZE}
),
gamla.transform_item("triplets", sorted),
)


Expand All @@ -101,7 +119,7 @@ def load_to_kg(
file_store.load_by_hash(
environment == environment_local, bucket_name
),
_from_json,
from_json,
),
exception_type=KeyError,
)
Expand All @@ -111,4 +129,4 @@ def load_to_kg(
)


load_knowledge_graph_from_file = gamla.compose_left(json.load, _from_json)
load_knowledge_graph_from_file = gamla.compose_left(json.load, from_json)
7 changes: 7 additions & 0 deletions knowledge_graph/storage_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import gamla

from knowledge_graph import storage


def test_serialize_and_load(kg):
assert kg == gamla.pipe(kg, storage.to_json, storage.from_json)
20 changes: 12 additions & 8 deletions knowledge_graph/transform_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,26 @@

import gamla # noqa

from knowledge_graph import common_relations, transform, triplets_index
from knowledge_graph import common_relations, conftest, transform, triplets_index

APPLE = "apple"
ORANGE = "orange"
FRUIT = "fruit"
conftest.APPLE = "conftest.APPLE"
conftest.ORANGE = "conftest.ORANGE"
conftest.FRUIT = "conftest.FRUIT"


def test_merge_graphs_nodes_by_id():
kg1 = triplets_index.from_triplet(common_relations.type_triplet(APPLE, FRUIT))
kg2 = triplets_index.from_triplet(common_relations.type_triplet(ORANGE, FRUIT))
kg1 = triplets_index.from_triplet(
common_relations.type_triplet(conftest.APPLE, conftest.FRUIT)
)
kg2 = triplets_index.from_triplet(
common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT)
)
assert transform.merge_graphs_nodes_by_id(
[kg1, kg2]
) == triplets_index.from_triplets(
[
common_relations.type_triplet(APPLE, FRUIT),
common_relations.type_triplet(ORANGE, FRUIT),
common_relations.type_triplet(conftest.APPLE, conftest.FRUIT),
common_relations.type_triplet(conftest.ORANGE, conftest.FRUIT),
]
)
# Make sure merge implementation is better than just concatenating the triplets.
Expand Down
82 changes: 48 additions & 34 deletions knowledge_graph/triplets_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import dataclasses_json
import gamla
import immutables
from gamla import nested_index

from . import common_relations, triplet

Expand All @@ -28,60 +29,73 @@ def trigger_cached_properties(self):
self.object_relation_index
self.subject_relation_and_object_type_index

@functools.cached_property
def _subject_index(self) -> nested_index.NestedIndex:
return nested_index.build([gamla.groupby(triplet.subject)], self.triplets)

@functools.cached_property
def subject_index(self) -> _OneLevelIndex:
return gamla.pipe(
self, triplets, gamla.make_index(map(gamla.groupby, [triplet.subject]))
)
return nested_index.to_query(self._subject_index)

@functools.cached_property
def _relation_index(self) -> nested_index.NestedIndex:
return nested_index.build([gamla.groupby(triplet.relation)], self.triplets)

@functools.cached_property
def relation_index(self) -> _OneLevelIndex:
return gamla.pipe(
self, triplets, gamla.make_index(map(gamla.groupby, [triplet.relation]))
)
return nested_index.to_query(self._relation_index)

@functools.cached_property
def _object_index(self) -> nested_index.NestedIndex:
return nested_index.build([gamla.groupby(triplet.object)], self.triplets)

@functools.cached_property
def object_index(self) -> _OneLevelIndex:
return gamla.pipe(
self, triplets, gamla.make_index(map(gamla.groupby, [triplet.object]))
return nested_index.to_query(self._object_index)

@functools.cached_property
def _subject_relation_index(self) -> nested_index.NestedIndex:
return nested_index.build(
map(gamla.groupby, [triplet.subject, triplet.relation]), self.triplets
)

@functools.cached_property
def subject_relation_index(self) -> _TwoLevelIndex:
return gamla.pipe(
self,
triplets,
gamla.make_index(map(gamla.groupby, [triplet.subject, triplet.relation])),
return nested_index.to_query(self._subject_relation_index)

@functools.cached_property
def _object_relation_index(self) -> nested_index.NestedIndex:
return nested_index.build(
map(gamla.groupby, [triplet.object, triplet.relation]), self.triplets
)

@functools.cached_property
def object_relation_index(self) -> _TwoLevelIndex:
return gamla.pipe(
self,
triplets,
gamla.make_index(map(gamla.groupby, [triplet.object, triplet.relation])),
return nested_index.to_query(self._object_relation_index)

@functools.cached_property
def _subject_relation_and_object_type_index(
self,
) -> nested_index.NestedIndex:
return nested_index.build(
[
gamla.groupby(triplet.subject),
gamla.groupby(triplet.relation),
gamla.groupby_many(
gamla.compose_left(
triplet.object,
self.subject_relation_index,
gamla.apply(common_relations.TYPE),
gamla.map(triplet.object),
)
),
],
self.triplets,
)

@functools.cached_property
def subject_relation_and_object_type_index(self):
return gamla.pipe(
self,
triplets,
gamla.make_index(
[
gamla.groupby(triplet.subject),
gamla.groupby(triplet.relation),
gamla.groupby_many(
gamla.compose_left(
triplet.object,
self.subject_relation_index,
gamla.apply(common_relations.TYPE),
gamla.map(triplet.object),
)
),
]
),
)
return nested_index.to_query(self._subject_relation_and_object_type_index)


triplets = gamla.attrgetter("triplets")
Expand Down