From 471d25a7cc718c067da1ef81b21820fd222e59b1 Mon Sep 17 00:00:00 2001 From: milanvdm Date: Sun, 21 Jan 2018 17:36:32 +0100 Subject: [PATCH 1/4] ADD merge_2_dicts speedup and small comments --- trumania/core/relationship.py | 25 ++++++++++--------- trumania/core/util_functions.py | 44 ++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/trumania/core/relationship.py b/trumania/core/relationship.py index 6e6b943..0fb83d5 100644 --- a/trumania/core/relationship.py +++ b/trumania/core/relationship.py @@ -11,15 +11,15 @@ # There are a lot of somewhat ugly optimizations here like in-place mutations, # caching, or usage of numpy instead of a more readable pandas alternative. The -# reason is the methods of this filetend to be called a large amount of time +# reason is the methods of this file tend to be called a large amount of time # in inner loop of the simulation, optimizing them make the whole simulation # faster. -class Relations(object): +class OutgoingRelationships(object): """ - This entity contains all the "to" sides of the relationships of a given - "from", together with the related weights. + For a given "from", this entity contains all the "to" sides of the + relationship with the related weights. This data structure seems to be the most optimal since it corresponds to a cached group-by result, and those group-by are expensive in the select_one @@ -45,8 +45,10 @@ def from_tuples(from_ids, to_ids, weights): a relationship is built here for each "line" read across those 3 arrays. - This methods builds one instance of Relations for each unique from_id + This methods builds one instance of OutgoingRelationships for each unique from_id value, containing all the to_id's it is related to. + + :returns Dictionary { id1 -> OutgoingRelationships1, id2 -> OutgoingRelationships2, ... } """ from_ids = np.array(from_ids) @@ -60,11 +62,12 @@ def from_tuples(from_ids, to_ids, weights): order = from_ids.argsort() ordered = zip(from_ids[order], to_ids[order], weights[order]) + # Find for every unique id in from_ids their matching "to" relations def _relations(): # itertools.groupby is much faster than pandas for from_id, tuples in itertools.groupby(ordered, lambda t: t[0]): - to_ids, weights = list(zip(*tuples))[1: 3] - yield from_id, Relations(list(to_ids), list(weights)) + to_ids, weights = list(zip(*tuples))[1:3] + yield from_id, OutgoingRelationships(list(to_ids), list(weights)) return {from_id: relz for from_id, relz in _relations()} @@ -72,7 +75,7 @@ def plus(self, other): """ Merge function for 2 sets of relations all starting from the same "from" """ - return Relations( + return OutgoingRelationships( np.hstack([self.to_ids, other.to_ids]), np.hstack([self.weights, other.weights])) @@ -83,7 +86,7 @@ def minus(self, other): """ removed_indices = np.argwhere( [idx in other.to_ids for idx in self.to_ids]) - return Relations( + return OutgoingRelationships( np.delete(self.to_ids, removed_indices), np.delete(self.weights, removed_indices)) @@ -157,7 +160,7 @@ def add_relations(self, from_ids, to_ids, weights=1): self.grouped = utils.merge_2_dicts( self.grouped, - Relations.from_tuples(from_ids, to_ids, weights), + OutgoingRelationships.from_tuples(from_ids, to_ids, weights), lambda r1, r2: r1.plus(r2)) def add_grouped_relations(self, from_ids, grouped_ids): @@ -185,7 +188,7 @@ def remove_relations(self, from_ids, to_ids): self.grouped = utils.merge_2_dicts( self.grouped, - Relations.from_tuples(from_ids, to_ids, weights=0), + OutgoingRelationships.from_tuples(from_ids, to_ids, weights=0), lambda r1, r2: r1.minus(r2)) def get_relations(self, from_ids=None): diff --git a/trumania/core/util_functions.py b/trumania/core/util_functions.py index f5495d5..fdee423 100644 --- a/trumania/core/util_functions.py +++ b/trumania/core/util_functions.py @@ -89,23 +89,33 @@ def merge_2_dicts(dict1, dict2, value_merge_func=None): if dict1 is None: return dict2 - def merged_value(key): - if key not in dict1: - return dict2[key] - elif key not in dict2: - return dict1[key] - else: - if value_merge_func is None: - raise ValueError( - "Conflict in merged dictionaries: merge function not " - "provided but key {} exists in both dictionaries".format( - key)) - - return value_merge_func(dict1[key], dict2[key]) - - keys = set(dict1.keys()) | set(dict2.keys()) - - return {key: merged_value(key) for key in keys} + if dict1 == dict2: + for k, v in dict1.items(): + dict1[k] = value_merge_func(v, v) + return dict1 + + dict1_set = set(dict1) + dict2_set = set(dict2) + + keys_to_merge = dict1_set.intersection(dict2_set) + + if len(keys_to_merge) == 0 and value_merge_func is None: + raise ValueError( + "Conflict in merged dictionaries: merge function not " + "provided but keys {} exists in both dictionaries".format( + keys_to_merge)) + + values_merged = dict() + + for key_to_merge in keys_to_merge: + old_value1 = dict1.pop(key_to_merge) + old_value2 = dict2.pop(key_to_merge) + + new_value = value_merge_func(old_value1, old_value2) + + values_merged[key_to_merge] = new_value + + return {**dict1, **dict2, **values_merged} def df_concat(d1, d2): From ff02d7d830dc97d44cd5ba83cab51b5d05dca717 Mon Sep 17 00:00:00 2001 From: milanvdm Date: Sat, 27 Jan 2018 19:23:17 +0100 Subject: [PATCH 2/4] REMOVE pop --- trumania/core/util_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trumania/core/util_functions.py b/trumania/core/util_functions.py index fdee423..a929f1d 100644 --- a/trumania/core/util_functions.py +++ b/trumania/core/util_functions.py @@ -108,8 +108,8 @@ def merge_2_dicts(dict1, dict2, value_merge_func=None): values_merged = dict() for key_to_merge in keys_to_merge: - old_value1 = dict1.pop(key_to_merge) - old_value2 = dict2.pop(key_to_merge) + old_value1 = dict1[key_to_merge] + old_value2 = dict2[key_to_merge] new_value = value_merge_func(old_value1, old_value2) From ee58cc16a5b6b5e60dc47a7d2a2d56596576f86d Mon Sep 17 00:00:00 2001 From: milanvdm Date: Sat, 27 Jan 2018 19:48:32 +0100 Subject: [PATCH 3/4] RENAME to OutgoingRelations --- trumania/core/relationship.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/trumania/core/relationship.py b/trumania/core/relationship.py index 0fb83d5..78f8107 100644 --- a/trumania/core/relationship.py +++ b/trumania/core/relationship.py @@ -16,7 +16,7 @@ # faster. -class OutgoingRelationships(object): +class OutgoingRelations(object): """ For a given "from", this entity contains all the "to" sides of the relationship with the related weights. @@ -45,10 +45,10 @@ def from_tuples(from_ids, to_ids, weights): a relationship is built here for each "line" read across those 3 arrays. - This methods builds one instance of OutgoingRelationships for each unique from_id + This methods builds one instance of OutgoingRelations for each unique from_id value, containing all the to_id's it is related to. - :returns Dictionary { id1 -> OutgoingRelationships1, id2 -> OutgoingRelationships2, ... } + :returns Dictionary { id1 -> OutgoingRelations1, id2 -> OutgoingRelations2, ... } """ from_ids = np.array(from_ids) @@ -67,7 +67,7 @@ def _relations(): # itertools.groupby is much faster than pandas for from_id, tuples in itertools.groupby(ordered, lambda t: t[0]): to_ids, weights = list(zip(*tuples))[1:3] - yield from_id, OutgoingRelationships(list(to_ids), list(weights)) + yield from_id, OutgoingRelations(list(to_ids), list(weights)) return {from_id: relz for from_id, relz in _relations()} @@ -75,7 +75,7 @@ def plus(self, other): """ Merge function for 2 sets of relations all starting from the same "from" """ - return OutgoingRelationships( + return OutgoingRelations( np.hstack([self.to_ids, other.to_ids]), np.hstack([self.weights, other.weights])) @@ -86,7 +86,7 @@ def minus(self, other): """ removed_indices = np.argwhere( [idx in other.to_ids for idx in self.to_ids]) - return OutgoingRelationships( + return OutgoingRelations( np.delete(self.to_ids, removed_indices), np.delete(self.weights, removed_indices)) @@ -160,7 +160,7 @@ def add_relations(self, from_ids, to_ids, weights=1): self.grouped = utils.merge_2_dicts( self.grouped, - OutgoingRelationships.from_tuples(from_ids, to_ids, weights), + OutgoingRelations.from_tuples(from_ids, to_ids, weights), lambda r1, r2: r1.plus(r2)) def add_grouped_relations(self, from_ids, grouped_ids): @@ -188,7 +188,7 @@ def remove_relations(self, from_ids, to_ids): self.grouped = utils.merge_2_dicts( self.grouped, - OutgoingRelationships.from_tuples(from_ids, to_ids, weights=0), + OutgoingRelations.from_tuples(from_ids, to_ids, weights=0), lambda r1, r2: r1.minus(r2)) def get_relations(self, from_ids=None): From c0f47fe6bc208279b0b3c5f71f52f79fe5367a4d Mon Sep 17 00:00:00 2001 From: milanvdm Date: Sun, 11 Feb 2018 21:26:23 +0100 Subject: [PATCH 4/4] FIX bug on merge function test --- trumania/core/util_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trumania/core/util_functions.py b/trumania/core/util_functions.py index a929f1d..727c8ef 100644 --- a/trumania/core/util_functions.py +++ b/trumania/core/util_functions.py @@ -99,7 +99,7 @@ def merge_2_dicts(dict1, dict2, value_merge_func=None): keys_to_merge = dict1_set.intersection(dict2_set) - if len(keys_to_merge) == 0 and value_merge_func is None: + if len(keys_to_merge) != 0 and value_merge_func is None: raise ValueError( "Conflict in merged dictionaries: merge function not " "provided but keys {} exists in both dictionaries".format(