diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index 40ea0be..a17b6b1 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -22,7 +22,7 @@ class NodeTitleMissing(Exception): # noqa pass -_node_to_graph: Callable[ +node_to_graph: Callable[ [storage.Node], triplets_index.TripletsWithIndex ] = gamla.compose_left(gamla.attrgetter("graph_id"), storage.get_graph) @@ -258,13 +258,7 @@ def get_node_inner_value( def filter_entities_by_attribute( entities: storage.Nodes, attributes: storage.Nodes ) -> storage.Nodes: - return gamla.pipe( - entities, - gamla.filter( - gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) - ), - frozenset, - ) + return entities & pointing_to(attributes) @gamla.curry @@ -425,7 +419,7 @@ def get_attribute_first_value(attribute: str, node: storage.Node) -> str: try: return gamla.head( find_attr_display_text( - node, find_exactly_bare(attribute, _node_to_graph(node)) + node, find_exactly_bare(attribute, node_to_graph(node)) ) ) except StopIteration: @@ -457,7 +451,7 @@ def query_by_primitive(kg: triplets_index.TripletsWithIndex): def get_entity_attribute( type: storage.Node, node: storage.Node ) -> Iterable[storage.Node]: - graph = _node_to_graph(node) + graph = node_to_graph(node) return gamla.pipe( graph.subject_relation_and_object_type_index(node.node_id)( common_relations.ASSOCIATION @@ -474,7 +468,7 @@ def get_entity_attribute_with_text( ) -> Iterable[storage.Node]: return gamla.pipe( node, - gamla.pair_with(gamla.compose_left(_node_to_graph, find_exactly_bare(type))), + gamla.pair_with(gamla.compose_left(node_to_graph, find_exactly_bare(type))), gamla.star(get_entity_attribute), ) @@ -491,7 +485,7 @@ def is_neighbor_by_id(node: storage.Node) -> bool: return gamla.pipe( node, get_node_edges, - gamla.inside(find_exactly_bare(text, _node_to_graph(node))), + gamla.inside(find_exactly_bare(text, node_to_graph(node))), ) return is_neighbor_by_id @@ -499,7 +493,7 @@ def is_neighbor_by_id(node: storage.Node) -> bool: def is_instance_by_id(text: str) -> Callable[[storage.Node], bool]: def is_instance_by_id(node: storage.Node) -> bool: - graph = _node_to_graph(node) + graph = node_to_graph(node) if not querying_raw.is_node_in_graph(graph, text): return False return gamla.pipe( @@ -594,7 +588,7 @@ def nodes_of_type_related_to_node( ) -> storage.Nodes: return get_node_reverse_edges(kg_node) & gamla.pipe( kg_node, - _node_to_graph, + node_to_graph, find_exactly_bare(nodes_type), get_node_instances, ) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 798f0b0..79de3f2 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -7,9 +7,6 @@ from . import common_relations, triplet -# This is a workaround for trading memory to query time in very large graphs -MAX_TRIPLETS_FOR_DEEP_INDICES = 5_000_000 - _OneLevelIndex = Callable[[triplet.Element], FrozenSet[triplet.Triplet]] _TwoLevelIndex = Callable[[triplet.Element], _OneLevelIndex] _ThreeLevelIndex = Callable[[triplet.Element], _TwoLevelIndex] @@ -30,6 +27,7 @@ def trigger_cached_properties(self): self.object_index self.object_relation_index self.subject_relation_and_object_type_index + self.subject_relation_index @functools.cached_property def subject_index(self) -> _OneLevelIndex: @@ -63,16 +61,15 @@ def object_index(self) -> _OneLevelIndex: @functools.cached_property def subject_relation_index(self) -> _TwoLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building subject_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.subject, triplet.relation]) - ) - ), - ) + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building subject_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.subject, triplet.relation]) + ) + ), + ) def subject_relation_index(subject: triplet.Element) -> _OneLevelIndex: def relation_for_subject(relation): @@ -84,16 +81,15 @@ def relation_for_subject(relation): @functools.cached_property def object_relation_index(self) -> _TwoLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building object_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.object, triplet.relation]) - ) - ), - ) + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building object_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.object, triplet.relation]) + ) + ), + ) def object_relation_index(object: triplet.Element) -> _OneLevelIndex: def relation_for_object(relation): @@ -105,29 +101,28 @@ def relation_for_object(relation): @functools.cached_property def subject_relation_and_object_type_index(self) -> _ThreeLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label( - "building subject_relation_and_object_type_index" - )( - gamla.make_index( - [ - gamla.groupby(triplet.subject), - gamla.groupby(triplet.relation), - gamla.groupby_many( - gamla.compose_left( - triplet.object, - self.subject_relation_index, - gamla.apply(common_relations.TYPE), - gamla.map(triplet.object), - ) - ), - ] - ) - ), - ) + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label( + "building subject_relation_and_object_type_index" + )( + gamla.make_index( + [ + gamla.groupby(triplet.subject), + gamla.groupby(triplet.relation), + gamla.groupby_many( + gamla.compose_left( + triplet.object, + self.subject_relation_index, + gamla.apply(common_relations.TYPE), + gamla.map(triplet.object), + ) + ), + ] + ) + ), + ) def subject_relation_and_object_type_index( subject: triplet.Element, diff --git a/setup.py b/setup.py index e90fd03..cc26d49 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setuptools.setup( name="knowledge-graph", - version="0.0.32", + version="0.0.33", python_requires=">=3.11", description="A library to store data in a knowledge graph", long_description=_LONG_DESCRIPTION,