From 0c57983e6393eacd2028f6fcf8bafaf4db86de51 Mon Sep 17 00:00:00 2001 From: danik Date: Thu, 24 Apr 2025 16:57:44 +0300 Subject: [PATCH 01/18] remove MAX_TRIPLETS_FOR_DEEP_INDICES dependency --- knowledge_graph/triplets_index.py | 86 +++++++++++++++---------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 798f0b0..13784d8 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -63,16 +63,16 @@ def object_index(self) -> _OneLevelIndex: @functools.cached_property def subject_relation_index(self) -> _TwoLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building subject_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.subject, triplet.relation]) - ) - ), - ) + # if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building subject_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.subject, triplet.relation]) + ) + ), + ) def subject_relation_index(subject: triplet.Element) -> _OneLevelIndex: def relation_for_subject(relation): @@ -84,16 +84,16 @@ def relation_for_subject(relation): @functools.cached_property def object_relation_index(self) -> _TwoLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building object_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.object, triplet.relation]) - ) - ), - ) + # if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building object_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.object, triplet.relation]) + ) + ), + ) def object_relation_index(object: triplet.Element) -> _OneLevelIndex: def relation_for_object(relation): @@ -105,29 +105,29 @@ def relation_for_object(relation): @functools.cached_property def subject_relation_and_object_type_index(self) -> _ThreeLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label( - "building subject_relation_and_object_type_index" - )( - gamla.make_index( - [ - gamla.groupby(triplet.subject), - gamla.groupby(triplet.relation), - gamla.groupby_many( - gamla.compose_left( - triplet.object, - self.subject_relation_index, - gamla.apply(common_relations.TYPE), - gamla.map(triplet.object), - ) - ), - ] - ) - ), - ) + # if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label( + "building subject_relation_and_object_type_index" + )( + gamla.make_index( + [ + gamla.groupby(triplet.subject), + gamla.groupby(triplet.relation), + gamla.groupby_many( + gamla.compose_left( + triplet.object, + self.subject_relation_index, + gamla.apply(common_relations.TYPE), + gamla.map(triplet.object), + ) + ), + ] + ) + ), + ) def subject_relation_and_object_type_index( subject: triplet.Element, From 78cf2c9ba85e11bb1549bdc977a478d309938c56 Mon Sep 17 00:00:00 2001 From: danik Date: Mon, 28 Apr 2025 12:47:07 +0300 Subject: [PATCH 02/18] more logs and fix for get_nodes_by_relations --- knowledge_graph/querying.py | 4 +--- knowledge_graph/triplets_index.py | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index 40ea0be..d1694c1 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -29,9 +29,7 @@ class NodeTitleMissing(Exception): # noqa get_nodes_by_relations: Callable[ [Iterable[triplet.Element]], Callable[[storage.Node], storage.Nodes] ] = gamla.compose_left( - gamla.map(querying_raw.neighbors), - frozenset, - gamla.star(gamla.juxtcat), + gamla.mapcat(querying_raw.neighbors), storage.run_on_kg_and_node, ) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 13784d8..5a070bc 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -172,6 +172,7 @@ def object_type(type_: triplet.Element): def retrieve( relation: triplet.Element, index: Callable[[TripletsWithIndex], _TwoLevelIndex] ): + @gamla.timeit def retrieve(graph: TripletsWithIndex, node: triplet.Element): return index(graph)(node)(relation) From 860eadb380390db494bde1f537afcce0aba8dbf9 Mon Sep 17 00:00:00 2001 From: danik Date: Mon, 28 Apr 2025 12:52:44 +0300 Subject: [PATCH 03/18] use MAX_TRIPLETS_FOR_DEEP_INDICES --- knowledge_graph/triplets_index.py | 86 +++++++++++++++---------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 5a070bc..c2734e2 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -63,16 +63,16 @@ def object_index(self) -> _OneLevelIndex: @functools.cached_property def subject_relation_index(self) -> _TwoLevelIndex: - # if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building subject_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.subject, triplet.relation]) - ) - ), - ) + if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building subject_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.subject, triplet.relation]) + ) + ), + ) def subject_relation_index(subject: triplet.Element) -> _OneLevelIndex: def relation_for_subject(relation): @@ -84,16 +84,16 @@ def relation_for_subject(relation): @functools.cached_property def object_relation_index(self) -> _TwoLevelIndex: - # if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building object_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.object, triplet.relation]) - ) - ), - ) + if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building object_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.object, triplet.relation]) + ) + ), + ) def object_relation_index(object: triplet.Element) -> _OneLevelIndex: def relation_for_object(relation): @@ -105,29 +105,29 @@ def relation_for_object(relation): @functools.cached_property def subject_relation_and_object_type_index(self) -> _ThreeLevelIndex: - # if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label( - "building subject_relation_and_object_type_index" - )( - gamla.make_index( - [ - gamla.groupby(triplet.subject), - gamla.groupby(triplet.relation), - gamla.groupby_many( - gamla.compose_left( - triplet.object, - self.subject_relation_index, - gamla.apply(common_relations.TYPE), - gamla.map(triplet.object), - ) - ), - ] - ) - ), - ) + if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label( + "building subject_relation_and_object_type_index" + )( + gamla.make_index( + [ + gamla.groupby(triplet.subject), + gamla.groupby(triplet.relation), + gamla.groupby_many( + gamla.compose_left( + triplet.object, + self.subject_relation_index, + gamla.apply(common_relations.TYPE), + gamla.map(triplet.object), + ) + ), + ] + ) + ), + ) def subject_relation_and_object_type_index( subject: triplet.Element, From 364c354cbeaecfd9addbc2d124fc1cffc761473c Mon Sep 17 00:00:00 2001 From: danik Date: Mon, 28 Apr 2025 12:59:10 +0300 Subject: [PATCH 04/18] use MAX_TRIPLETS_FOR_DEEP_INDICES --- knowledge_graph/triplets_index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index c2734e2..2449cfd 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -10,6 +10,7 @@ # This is a workaround for trading memory to query time in very large graphs MAX_TRIPLETS_FOR_DEEP_INDICES = 5_000_000 + _OneLevelIndex = Callable[[triplet.Element], FrozenSet[triplet.Triplet]] _TwoLevelIndex = Callable[[triplet.Element], _OneLevelIndex] _ThreeLevelIndex = Callable[[triplet.Element], _TwoLevelIndex] From 4eaa051112098b183d79227d118c4eb3e1a3cdcc Mon Sep 17 00:00:00 2001 From: danik Date: Mon, 28 Apr 2025 14:18:30 +0300 Subject: [PATCH 05/18] fix get_nodes_by_relations --- knowledge_graph/querying.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index d1694c1..40ea0be 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -29,7 +29,9 @@ class NodeTitleMissing(Exception): # noqa get_nodes_by_relations: Callable[ [Iterable[triplet.Element]], Callable[[storage.Node], storage.Nodes] ] = gamla.compose_left( - gamla.mapcat(querying_raw.neighbors), + gamla.map(querying_raw.neighbors), + frozenset, + gamla.star(gamla.juxtcat), storage.run_on_kg_and_node, ) From 550ad56e4a658b1127d8d453cb79e31812b32395 Mon Sep 17 00:00:00 2001 From: danik Date: Mon, 28 Apr 2025 16:31:56 +0300 Subject: [PATCH 06/18] remove timeit from retrieve add timeit to get_node_edges --- knowledge_graph/querying.py | 2 +- knowledge_graph/triplets_index.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index 40ea0be..b7b16df 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -261,7 +261,7 @@ def filter_entities_by_attribute( return gamla.pipe( entities, gamla.filter( - gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) + gamla.compose_left(gamla.timeit(get_node_edges), gamla.anymap(gamla.contains(attributes))) ), frozenset, ) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index 2449cfd..c5c2f42 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -173,7 +173,6 @@ def object_type(type_: triplet.Element): def retrieve( relation: triplet.Element, index: Callable[[TripletsWithIndex], _TwoLevelIndex] ): - @gamla.timeit def retrieve(graph: TripletsWithIndex, node: triplet.Element): return index(graph)(node)(relation) From 4473dc4650a39bcc8397a3628b7dad857f2ea48a Mon Sep 17 00:00:00 2001 From: danik Date: Tue, 29 Apr 2025 09:24:27 +0300 Subject: [PATCH 07/18] add subject_relation_index to trigger_cached_properties --- knowledge_graph/querying.py | 2 +- knowledge_graph/triplets_index.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index b7b16df..40ea0be 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -261,7 +261,7 @@ def filter_entities_by_attribute( return gamla.pipe( entities, gamla.filter( - gamla.compose_left(gamla.timeit(get_node_edges), gamla.anymap(gamla.contains(attributes))) + gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) ), frozenset, ) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index c5c2f42..c4ab6b3 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -10,7 +10,6 @@ # This is a workaround for trading memory to query time in very large graphs MAX_TRIPLETS_FOR_DEEP_INDICES = 5_000_000 - _OneLevelIndex = Callable[[triplet.Element], FrozenSet[triplet.Triplet]] _TwoLevelIndex = Callable[[triplet.Element], _OneLevelIndex] _ThreeLevelIndex = Callable[[triplet.Element], _TwoLevelIndex] @@ -31,6 +30,7 @@ def trigger_cached_properties(self): self.object_index self.object_relation_index self.subject_relation_and_object_type_index + self.subject_relation_index @functools.cached_property def subject_index(self) -> _OneLevelIndex: @@ -64,7 +64,7 @@ def object_index(self) -> _OneLevelIndex: @functools.cached_property def subject_relation_index(self) -> _TwoLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + if True or len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: return gamla.pipe( self, triplets, @@ -85,7 +85,7 @@ def relation_for_subject(relation): @functools.cached_property def object_relation_index(self) -> _TwoLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + if True or len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: return gamla.pipe( self, triplets, @@ -106,7 +106,7 @@ def relation_for_object(relation): @functools.cached_property def subject_relation_and_object_type_index(self) -> _ThreeLevelIndex: - if len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: + if True or len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: return gamla.pipe( self, triplets, From 3052ae8ccc3af5ced93b5e8ea9f03ae542a825fc Mon Sep 17 00:00:00 2001 From: danik Date: Tue, 29 Apr 2025 12:30:59 +0300 Subject: [PATCH 08/18] add multithreading for filtering --- knowledge_graph/querying.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index 40ea0be..ec8b299 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -1,6 +1,7 @@ # Queries that use a global storage of kgs. Best avoid and use `querying_raw.py`. import functools from typing import Callable, Dict, FrozenSet, Iterable, Optional, Tuple +import concurrent.futures import gamla @@ -253,18 +254,30 @@ def get_node_inner_value( ) +# @functools.cache +# @gamla.curry +# def filter_entities_by_attribute( +# entities: storage.Nodes, attributes: storage.Nodes +# ) -> storage.Nodes: +# return gamla.pipe( +# entities, +# gamla.filter( +# gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) +# ), +# frozenset, +# ) + @functools.cache @gamla.curry -def filter_entities_by_attribute( - entities: storage.Nodes, attributes: storage.Nodes -) -> storage.Nodes: - return gamla.pipe( - entities, - gamla.filter( - gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) - ), - frozenset, - ) +def filter_entities_by_attribute(entities: storage.Nodes, attributes: storage.Nodes) -> storage.Nodes: + + def filter_fn(entity): + edges = gamla.timeit(get_node_edges)(entity) + return any(attr in attributes for attr in edges) + + with concurrent.futures.ThreadPoolExecutor() as executor: + filtered_entities = filter(filter_fn, entities) + return frozenset(executor.map(lambda x: x, filtered_entities)) @gamla.curry From 110e8273084ac20cf963061e12ecaa0203081cf3 Mon Sep 17 00:00:00 2001 From: danik Date: Tue, 29 Apr 2025 18:59:57 +0300 Subject: [PATCH 09/18] remove multithreading from kg --- knowledge_graph/querying.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index ec8b299..b9e855a 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -254,30 +254,18 @@ def get_node_inner_value( ) -# @functools.cache -# @gamla.curry -# def filter_entities_by_attribute( -# entities: storage.Nodes, attributes: storage.Nodes -# ) -> storage.Nodes: -# return gamla.pipe( -# entities, -# gamla.filter( -# gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) -# ), -# frozenset, -# ) - @functools.cache @gamla.curry -def filter_entities_by_attribute(entities: storage.Nodes, attributes: storage.Nodes) -> storage.Nodes: - - def filter_fn(entity): - edges = gamla.timeit(get_node_edges)(entity) - return any(attr in attributes for attr in edges) - - with concurrent.futures.ThreadPoolExecutor() as executor: - filtered_entities = filter(filter_fn, entities) - return frozenset(executor.map(lambda x: x, filtered_entities)) +def filter_entities_by_attribute( + entities: storage.Nodes, attributes: storage.Nodes +) -> storage.Nodes: + return gamla.pipe( + entities, + gamla.filter( + gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) + ), + frozenset, + ) @gamla.curry From bdfc6382ad0ab61f6cd6b8a01d39f3f95e55dfc5 Mon Sep 17 00:00:00 2001 From: yoni17 Date: Wed, 30 Apr 2025 13:32:37 +0300 Subject: [PATCH 10/18] improve performance by using intersection of sets --- knowledge_graph/querying.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index b9e855a..2d5b8ba 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -1,7 +1,6 @@ # Queries that use a global storage of kgs. Best avoid and use `querying_raw.py`. import functools from typing import Callable, Dict, FrozenSet, Iterable, Optional, Tuple -import concurrent.futures import gamla @@ -259,13 +258,7 @@ def get_node_inner_value( def filter_entities_by_attribute( entities: storage.Nodes, attributes: storage.Nodes ) -> storage.Nodes: - return gamla.pipe( - entities, - gamla.filter( - gamla.compose_left(get_node_edges, gamla.anymap(gamla.contains(attributes))) - ), - frozenset, - ) + return entities & pointing_to(attributes) @gamla.curry From 106dd25505cb9e3ba746fca2dfc671e66c6e8a1d Mon Sep 17 00:00:00 2001 From: yoni17 Date: Tue, 6 May 2025 14:27:43 +0300 Subject: [PATCH 11/18] getting attribute values with intersection for optimization --- knowledge_graph/querying.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index 2d5b8ba..d173dcf 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -261,6 +261,12 @@ def filter_entities_by_attribute( return entities & pointing_to(attributes) +def get_attribute_values( + attribute: storage.Node, entity: storage.Node +) -> storage.Nodes: + return get_node_edges(entity) & get_node_instances(attribute) + + @gamla.curry def node_similarity(node1_id: storage.Node, node2_id: storage.Node) -> float: edges_union_len = len(get_node_edges(node1_id) | get_node_edges(node2_id)) From 410123d03e6825386b2e280507abf82d14cc6387 Mon Sep 17 00:00:00 2001 From: danik Date: Wed, 7 May 2025 16:20:46 +0300 Subject: [PATCH 12/18] remove get_attribute_values MAX_TRIPLETS_FOR_DEEP_INDICES limitation --- knowledge_graph/querying.py | 6 --- knowledge_graph/triplets_index.py | 86 ++++++++++++++----------------- 2 files changed, 40 insertions(+), 52 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index d173dcf..2d5b8ba 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -261,12 +261,6 @@ def filter_entities_by_attribute( return entities & pointing_to(attributes) -def get_attribute_values( - attribute: storage.Node, entity: storage.Node -) -> storage.Nodes: - return get_node_edges(entity) & get_node_instances(attribute) - - @gamla.curry def node_similarity(node1_id: storage.Node, node2_id: storage.Node) -> float: edges_union_len = len(get_node_edges(node1_id) | get_node_edges(node2_id)) diff --git a/knowledge_graph/triplets_index.py b/knowledge_graph/triplets_index.py index c4ab6b3..79de3f2 100644 --- a/knowledge_graph/triplets_index.py +++ b/knowledge_graph/triplets_index.py @@ -7,9 +7,6 @@ from . import common_relations, triplet -# This is a workaround for trading memory to query time in very large graphs -MAX_TRIPLETS_FOR_DEEP_INDICES = 5_000_000 - _OneLevelIndex = Callable[[triplet.Element], FrozenSet[triplet.Triplet]] _TwoLevelIndex = Callable[[triplet.Element], _OneLevelIndex] _ThreeLevelIndex = Callable[[triplet.Element], _TwoLevelIndex] @@ -64,16 +61,15 @@ def object_index(self) -> _OneLevelIndex: @functools.cached_property def subject_relation_index(self) -> _TwoLevelIndex: - if True or len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building subject_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.subject, triplet.relation]) - ) - ), - ) + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building subject_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.subject, triplet.relation]) + ) + ), + ) def subject_relation_index(subject: triplet.Element) -> _OneLevelIndex: def relation_for_subject(relation): @@ -85,16 +81,15 @@ def relation_for_subject(relation): @functools.cached_property def object_relation_index(self) -> _TwoLevelIndex: - if True or len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label("building object_relation_index")( - gamla.make_index( - map(gamla.groupby, [triplet.object, triplet.relation]) - ) - ), - ) + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label("building object_relation_index")( + gamla.make_index( + map(gamla.groupby, [triplet.object, triplet.relation]) + ) + ), + ) def object_relation_index(object: triplet.Element) -> _OneLevelIndex: def relation_for_object(relation): @@ -106,29 +101,28 @@ def relation_for_object(relation): @functools.cached_property def subject_relation_and_object_type_index(self) -> _ThreeLevelIndex: - if True or len(self.triplets) <= MAX_TRIPLETS_FOR_DEEP_INDICES: - return gamla.pipe( - self, - triplets, - gamla.timeit_with_label( - "building subject_relation_and_object_type_index" - )( - gamla.make_index( - [ - gamla.groupby(triplet.subject), - gamla.groupby(triplet.relation), - gamla.groupby_many( - gamla.compose_left( - triplet.object, - self.subject_relation_index, - gamla.apply(common_relations.TYPE), - gamla.map(triplet.object), - ) - ), - ] - ) - ), - ) + return gamla.pipe( + self, + triplets, + gamla.timeit_with_label( + "building subject_relation_and_object_type_index" + )( + gamla.make_index( + [ + gamla.groupby(triplet.subject), + gamla.groupby(triplet.relation), + gamla.groupby_many( + gamla.compose_left( + triplet.object, + self.subject_relation_index, + gamla.apply(common_relations.TYPE), + gamla.map(triplet.object), + ) + ), + ] + ) + ), + ) def subject_relation_and_object_type_index( subject: triplet.Element, From 58969fc905589fc077fcee842d0469ba84d89933 Mon Sep 17 00:00:00 2001 From: danik Date: Thu, 8 May 2025 16:50:03 +0300 Subject: [PATCH 13/18] "Add pip dependency caching to GitHub Actions workflow" --- .github/workflows/python-tests.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 21c9aa0..b545de9 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -21,6 +21,13 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Cache pip dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- - name: Install dependencies run: | python -m pip install --upgrade pip From 5998222dbe1e7f1ab6522e6627ef913482f817ad Mon Sep 17 00:00:00 2001 From: danik Date: Thu, 8 May 2025 16:55:32 +0300 Subject: [PATCH 14/18] update cache key in python-tests.yml to include runner architecture --- .github/workflows/python-tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index b545de9..511f20b 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -25,8 +25,9 @@ jobs: uses: actions/cache@v3 with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ runner.arch }} restore-keys: | + ${{ runner.os }}-pip-${{ runner.arch }}- ${{ runner.os }}-pip- - name: Install dependencies run: | From a76d2462464dcc022ebbae40eae3796b4d19ceba Mon Sep 17 00:00:00 2001 From: danik Date: Thu, 8 May 2025 17:04:26 +0300 Subject: [PATCH 15/18] Revert "update cache key in python-tests.yml to include runner architecture" This reverts commit 5998222dbe1e7f1ab6522e6627ef913482f817ad. --- .github/workflows/python-tests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 511f20b..b545de9 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -25,9 +25,8 @@ jobs: uses: actions/cache@v3 with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ runner.arch }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | - ${{ runner.os }}-pip-${{ runner.arch }}- ${{ runner.os }}-pip- - name: Install dependencies run: | From f0fcec8d6d2f1cbd541b520003e92001b96640b6 Mon Sep 17 00:00:00 2001 From: danik Date: Thu, 8 May 2025 17:04:26 +0300 Subject: [PATCH 16/18] Revert ""Add pip dependency caching to GitHub Actions workflow"" This reverts commit 58969fc905589fc077fcee842d0469ba84d89933. --- .github/workflows/python-tests.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index b545de9..21c9aa0 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -21,13 +21,6 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - - name: Cache pip dependencies - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - name: Install dependencies run: | python -m pip install --upgrade pip From d0f3e138c070e86a073dfc74ac288bb3263e870b Mon Sep 17 00:00:00 2001 From: danik Date: Sun, 11 May 2025 09:21:30 +0300 Subject: [PATCH 17/18] make node_to_graph public --- knowledge_graph/querying.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/knowledge_graph/querying.py b/knowledge_graph/querying.py index 2d5b8ba..a17b6b1 100644 --- a/knowledge_graph/querying.py +++ b/knowledge_graph/querying.py @@ -22,7 +22,7 @@ class NodeTitleMissing(Exception): # noqa pass -_node_to_graph: Callable[ +node_to_graph: Callable[ [storage.Node], triplets_index.TripletsWithIndex ] = gamla.compose_left(gamla.attrgetter("graph_id"), storage.get_graph) @@ -419,7 +419,7 @@ def get_attribute_first_value(attribute: str, node: storage.Node) -> str: try: return gamla.head( find_attr_display_text( - node, find_exactly_bare(attribute, _node_to_graph(node)) + node, find_exactly_bare(attribute, node_to_graph(node)) ) ) except StopIteration: @@ -451,7 +451,7 @@ def query_by_primitive(kg: triplets_index.TripletsWithIndex): def get_entity_attribute( type: storage.Node, node: storage.Node ) -> Iterable[storage.Node]: - graph = _node_to_graph(node) + graph = node_to_graph(node) return gamla.pipe( graph.subject_relation_and_object_type_index(node.node_id)( common_relations.ASSOCIATION @@ -468,7 +468,7 @@ def get_entity_attribute_with_text( ) -> Iterable[storage.Node]: return gamla.pipe( node, - gamla.pair_with(gamla.compose_left(_node_to_graph, find_exactly_bare(type))), + gamla.pair_with(gamla.compose_left(node_to_graph, find_exactly_bare(type))), gamla.star(get_entity_attribute), ) @@ -485,7 +485,7 @@ def is_neighbor_by_id(node: storage.Node) -> bool: return gamla.pipe( node, get_node_edges, - gamla.inside(find_exactly_bare(text, _node_to_graph(node))), + gamla.inside(find_exactly_bare(text, node_to_graph(node))), ) return is_neighbor_by_id @@ -493,7 +493,7 @@ def is_neighbor_by_id(node: storage.Node) -> bool: def is_instance_by_id(text: str) -> Callable[[storage.Node], bool]: def is_instance_by_id(node: storage.Node) -> bool: - graph = _node_to_graph(node) + graph = node_to_graph(node) if not querying_raw.is_node_in_graph(graph, text): return False return gamla.pipe( @@ -588,7 +588,7 @@ def nodes_of_type_related_to_node( ) -> storage.Nodes: return get_node_reverse_edges(kg_node) & gamla.pipe( kg_node, - _node_to_graph, + node_to_graph, find_exactly_bare(nodes_type), get_node_instances, ) From cbc8ecf30588ed5ba38fd0ec5416e9446ab9d32e Mon Sep 17 00:00:00 2001 From: danik Date: Thu, 15 May 2025 10:25:46 +0300 Subject: [PATCH 18/18] update version to 0.0.33 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e90fd03..cc26d49 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setuptools.setup( name="knowledge-graph", - version="0.0.32", + version="0.0.33", python_requires=">=3.11", description="A library to store data in a knowledge graph", long_description=_LONG_DESCRIPTION,