From c4b198711be9ec865646d118667c2e5526958442 Mon Sep 17 00:00:00 2001 From: Juvid Aryaman Date: Sun, 21 Jun 2020 19:58:26 +0100 Subject: [PATCH 1/5] Fix networkx node deprecation error (#1) --- causality/inference/search/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/causality/inference/search/__init__.py b/causality/inference/search/__init__.py index 4f95a5c..8fd6046 100644 --- a/causality/inference/search/__init__.py +++ b/causality/inference/search/__init__.py @@ -44,9 +44,9 @@ def _build_g(self, variable_types): self._g = nx.Graph() self._g.add_nodes_from(variable_types.keys()) for var, var_type in variable_types.items(): - self._g.node[var]['type'] = var_type + self._g.nodes[var]['type'] = var_type edges_to_add = [] - for (node_a, node_b) in itertools.combinations(self._g.node.keys(), 2): + for (node_a, node_b) in itertools.combinations(self._g.nodes.keys(), 2): edges_to_add.append((node_a,node_b)) self._g.add_edges_from(edges_to_add, marked=False) @@ -121,7 +121,7 @@ def _find_skeleton(self, data, variable_types): """ self.separating_sets = {} if not self.max_k: - self.max_k = len(self._g.node)+1 + self.max_k = len(self._g.nodes)+1 for N in range(self.max_k + 1): for (x, y) in list(self._g.edges()): x_neighbors = list(self._g.neighbors(x)) From 8d9dc5bfc164e0e8fba785a73ed0e1f044f38ade Mon Sep 17 00:00:00 2001 From: Juvid Aryaman Date: Sun, 21 Jun 2020 20:03:50 +0100 Subject: [PATCH 2/5] 001 fix networkx node deprecation (#2) * Fix networkx node deprecation error * Fix test_build_g --- tests/unit/test_IC.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_IC.py b/tests/unit/test_IC.py index 12371a3..8a7ffb7 100644 --- a/tests/unit/test_IC.py +++ b/tests/unit/test_IC.py @@ -36,7 +36,7 @@ def test_build_g(self): assert(len(self.ic._g.edges()) == (V-1)*V / 2) assert(set(self.ic._g.nodes()) == set(self.variable_types.keys())) for node, variable_type in self.variable_types.items(): - assert(self.ic._g.node[node]['type'] == variable_type) + assert(self.ic._g.nodes[node]['type'] == variable_type) for i, j in self.ic._g.edges(): assert(self.ic._g.get_edge_data(i, j)['marked'] == False) From d3bea20efeac0555a0bffc5a29bd2b5dd8cedf68 Mon Sep 17 00:00:00 2001 From: Juvid Aryaman Date: Sat, 27 Jun 2020 11:00:47 +0100 Subject: [PATCH 3/5] 004 chi2 broken (#5) * Fix networkx node deprecation error * Fix test_build_g * Resolve #4 by replacing tuple with list --- causality/inference/independence_tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causality/inference/independence_tests/__init__.py b/causality/inference/independence_tests/__init__.py index fa7740e..94765f3 100644 --- a/causality/inference/independence_tests/__init__.py +++ b/causality/inference/independence_tests/__init__.py @@ -35,7 +35,7 @@ def __init__(self, y, x, z, data, alpha): self.total_dof = 0 for xi, yi in itertools.product(x,y): tables = data[[xi]+[yi]+z].copy() - groupby_key = tuple([zi for zi in z] + [xi]) + groupby_key = list([zi for zi in z] + [xi]) tables = tables.join(pd.get_dummies(data[yi],prefix=yi)).groupby(groupby_key).sum() del tables[yi] From db48186424f01df0eef047bf6c2b21c15f458ff9 Mon Sep 17 00:00:00 2001 From: Juvid Aryaman Date: Sat, 27 Jun 2020 12:18:05 +0100 Subject: [PATCH 4/5] Resolve #6 (#7) --- causality/plot/__init__.py | 47 ++++++++++++++++++++++++++++++++++++++ tests/unit/plot.py | 36 +++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 causality/plot/__init__.py create mode 100644 tests/unit/plot.py diff --git a/causality/plot/__init__.py b/causality/plot/__init__.py new file mode 100644 index 0000000..8596f20 --- /dev/null +++ b/causality/plot/__init__.py @@ -0,0 +1,47 @@ +import networkx as nx +import matplotlib.pyplot as plt + +def _drop_dummy_edges(graph, digraph): + edges_to_drop = [] + for edge, edge_metadata in digraph.edges.items(): + if edge not in list(graph.edges): + edges_to_drop.append(edge) + for edge in edges_to_drop: + digraph.remove_edge(*edge) + + +def _split_marked_edges(digraph): + marked_edges = [] + unmarked_edges = [] + for edge, edge_metadata in digraph.edges.items(): + if edge_metadata['marked']: + marked_edges.append(edge) + else: + unmarked_edges.append(edge) + return marked_edges, unmarked_edges + + +def plot_DAG(graph, plot_attributes=None): + if plot_attributes is None: + plot_attributes = {} + + unmarked_edge_color = plot_attributes.get('unmarked_edge_color', 'black') + marked_edge_color = plot_attributes.get('unmarked_edge_color', 'red') + arrowsize = plot_attributes.get('arrowsize', 25) + + digraph = graph.to_directed() + + _drop_dummy_edges(graph, digraph) + + marked_edges, unmarked_edges = _split_marked_edges(digraph) + + pos = nx.spring_layout(digraph) + nx.draw_networkx_nodes(digraph, pos) + nx.draw_networkx_labels(digraph, pos) + nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=unmarked_edges, + edge_color=unmarked_edge_color, + arrowsize=arrowsize) + nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=marked_edges, + edge_color=marked_edge_color, arrowsize=arrowsize) + plt.axis('off') + return pos, digraph diff --git a/tests/unit/plot.py b/tests/unit/plot.py new file mode 100644 index 0000000..27583cd --- /dev/null +++ b/tests/unit/plot.py @@ -0,0 +1,36 @@ +from causality.plot import plot_DAG + +import numpy +import pandas as pd + +from causality.inference.search import IC +from causality.inference.independence_tests import RobustRegressionTest +from networkx.classes.digraph import DiGraph + + +def _make_DAG(): + # generate some toy data: + SIZE = 2000 + x1 = numpy.random.normal(size=SIZE) + x2 = x1 + numpy.random.normal(size=SIZE) + x3 = x1 + numpy.random.normal(size=SIZE) + x4 = x2 + x3 + numpy.random.normal(size=SIZE) + x5 = x4 + numpy.random.normal(size=SIZE) + + X = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4, 'x5': x5}) + + # define the variable types: 'c' is 'continuous'. The variables defined here + # are the ones the search is performed over -- NOT all the variables defined + # in the data frame. + variable_types = {'x1': 'c', 'x2': 'c', 'x3': 'c', 'x4': 'c', 'x5': 'c'} + + # run the search + ic_algorithm = IC(RobustRegressionTest) + graph = ic_algorithm.search(X, variable_types) + return graph + +def test_plot_DAG(): + graph = _make_DAG() + pos, digraph = plot_DAG(graph) + assert isinstance(pos, dict) + assert isinstance(digraph, DiGraph) From 259f9be6f9ad95fa2b59de413abbd084146ed8cf Mon Sep 17 00:00:00 2001 From: Juvid Aryaman Date: Sun, 28 Jun 2020 15:13:49 +0100 Subject: [PATCH 5/5] 008 plot marked dag (#9) * Resolve #6 * Resolve #8 --- causality/inference/__init__.py | 41 ++++++++++++++++++ causality/plot/__init__.py | 51 ++++++++++------------- tests/unit/inference.py | 74 +++++++++++++++++++++++++++++++++ tests/unit/plot.py | 11 +++-- 4 files changed, 144 insertions(+), 33 deletions(-) create mode 100644 tests/unit/inference.py diff --git a/causality/inference/__init__.py b/causality/inference/__init__.py index e69de29..b63a4cb 100644 --- a/causality/inference/__init__.py +++ b/causality/inference/__init__.py @@ -0,0 +1,41 @@ +def get_directed_edges(edge: tuple, arrows: list) -> list: + assert len(arrows) >= 1 + if len(arrows) == 1: + endVertex = arrows[0] + startVertex = edge[int(not edge.index(endVertex))] + return [(startVertex, endVertex)] + else: + return [edge, edge[::-1]] + + +def as_digraph(graph): + digraph = graph.to_directed() + drop_dummy_edges(graph, digraph) + return digraph + +def drop_dummy_edges(graph, digraph): + edges_to_drop = [] + for edge, edge_metadata in digraph.edges.items(): + if edge not in list(graph.edges): + edges_to_drop.append(edge) + for edge in edges_to_drop: + digraph.remove_edge(*edge) + +def get_edges_ICstar(digraph): + edges_ICstar = {'marked':[], + 'undirected': [], + 'directed': [], + } + for edge, metadata in digraph.edges.items(): + # marked + if metadata['marked']: + edges_ICstar['marked'].append(get_directed_edges(edge, metadata['arrows'])[0]) + else: + # undirected + if len(metadata['arrows']) == 0: + edges_ICstar['undirected'].append(edge) + else: + directed_edges = get_directed_edges(edge, metadata['arrows']) + for e in directed_edges: edges_ICstar['directed'].append(e) + return edges_ICstar + diff --git a/causality/plot/__init__.py b/causality/plot/__init__.py index 8596f20..524321c 100644 --- a/causality/plot/__init__.py +++ b/causality/plot/__init__.py @@ -1,47 +1,38 @@ import networkx as nx import matplotlib.pyplot as plt -def _drop_dummy_edges(graph, digraph): - edges_to_drop = [] - for edge, edge_metadata in digraph.edges.items(): - if edge not in list(graph.edges): - edges_to_drop.append(edge) - for edge in edges_to_drop: - digraph.remove_edge(*edge) +from causality.inference import get_edges_ICstar +from causality.inference import as_digraph - -def _split_marked_edges(digraph): - marked_edges = [] - unmarked_edges = [] - for edge, edge_metadata in digraph.edges.items(): - if edge_metadata['marked']: - marked_edges.append(edge) - else: - unmarked_edges.append(edge) - return marked_edges, unmarked_edges - - -def plot_DAG(graph, plot_attributes=None): +def plot_marked_partially_directed_graph(graph, plot_attributes=None): if plot_attributes is None: plot_attributes = {} unmarked_edge_color = plot_attributes.get('unmarked_edge_color', 'black') - marked_edge_color = plot_attributes.get('unmarked_edge_color', 'red') + marked_edge_color = plot_attributes.get('unmarked_edge_color', 'black') arrowsize = plot_attributes.get('arrowsize', 25) - digraph = graph.to_directed() - - _drop_dummy_edges(graph, digraph) - - marked_edges, unmarked_edges = _split_marked_edges(digraph) + digraph = as_digraph(graph) + edges_ICstar = get_edges_ICstar(digraph) pos = nx.spring_layout(digraph) nx.draw_networkx_nodes(digraph, pos) nx.draw_networkx_labels(digraph, pos) - nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=unmarked_edges, + + # directed edges + nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=edges_ICstar['directed'], + edge_color=unmarked_edge_color, + arrowsize=arrowsize) + # undirected edges + nx.draw_networkx_edges(digraph, pos, arrows=False, edgelist=edges_ICstar['undirected'], edge_color=unmarked_edge_color, arrowsize=arrowsize) - nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=marked_edges, - edge_color=marked_edge_color, arrowsize=arrowsize) + + # marked edges + nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=edges_ICstar['marked'], + edge_color=marked_edge_color, + arrowsize=arrowsize) + nx.draw_networkx_edge_labels(digraph, pos, arrows=True, edgelist=edges_ICstar['marked'], + edge_labels={e: '*' for e in edges_ICstar['marked']}, arrowsize=arrowsize) plt.axis('off') - return pos, digraph + return pos, digraph, edges_ICstar diff --git a/tests/unit/inference.py b/tests/unit/inference.py new file mode 100644 index 0000000..76e2425 --- /dev/null +++ b/tests/unit/inference.py @@ -0,0 +1,74 @@ +import numpy as np +import pandas as pd +from causality.inference.search import IC +from causality.inference.independence_tests import RobustRegressionTest + +from causality.inference import drop_dummy_edges +from causality.inference import get_directed_edges +from causality.inference import get_edges_ICstar +from causality.inference import as_digraph + +from networkx.classes.digraph import DiGraph + +def _make_DAG(): + # generate some toy data: + np.random.seed(1) + SIZE = 2000 + x1 = np.random.normal(size=SIZE) + x2 = x1 + np.random.normal(size=SIZE) + x3 = x1 + np.random.normal(size=SIZE) + x4 = x2 + x3 + np.random.normal(size=SIZE) + x5 = x4 + np.random.normal(size=SIZE) + + X = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4, 'x5': x5}) + + # define the variable types: 'c' is 'continuous'. The variables defined here + # are the ones the search is performed over -- NOT all the variables defined + # in the data frame. + variable_types = {'x1': 'c', 'x2': 'c', 'x3': 'c', 'x4': 'c', 'x5': 'c'} + + # run the search + ic_algorithm = IC(RobustRegressionTest) + graph = ic_algorithm.search(X, variable_types) + return graph + + +def test_get_directed_edges(): + edge = ('a', 'b') + arrows = ['a'] + directed_edges = get_directed_edges(edge, arrows) + assert directed_edges == [('b', 'a')] + + arrows = ['b'] + directed_edges = get_directed_edges(edge, arrows) + assert directed_edges == [('a', 'b')] + + arrows = ['a','b'] + directed_edges = get_directed_edges(edge, arrows) + assert ('a', 'b') in directed_edges + assert ('b', 'a') in directed_edges + assert len(directed_edges) == 2 + + +def test_as_digraph(): + graph = _make_DAG() + digraph = as_digraph(graph) + for edge, metadata in graph.edges.items(): + assert edge in digraph.edges + assert isinstance(digraph, DiGraph) + + +def test_drop_dummy_edges(): + graph = _make_DAG() + digraph = as_digraph(graph) + drop_dummy_edges(graph, digraph) + assert set(graph.edges) == set(digraph.edges) + return digraph + + +def test_get_edges_ICstar(): + digraph = test_drop_dummy_edges() + edges_ICstar = get_edges_ICstar(digraph) + assert edges_ICstar['marked'] == [('x4', 'x5')] + assert set(edges_ICstar['undirected']) == {('x1', 'x2'), ('x1', 'x3')} + assert set(edges_ICstar['directed']) == {('x2', 'x4'), ('x3', 'x4')} diff --git a/tests/unit/plot.py b/tests/unit/plot.py index 27583cd..3e86eb3 100644 --- a/tests/unit/plot.py +++ b/tests/unit/plot.py @@ -1,11 +1,12 @@ -from causality.plot import plot_DAG +from causality.plot import plot_marked_partially_directed_graph +from networkx.classes.digraph import DiGraph import numpy import pandas as pd from causality.inference.search import IC from causality.inference.independence_tests import RobustRegressionTest -from networkx.classes.digraph import DiGraph + def _make_DAG(): @@ -31,6 +32,10 @@ def _make_DAG(): def test_plot_DAG(): graph = _make_DAG() - pos, digraph = plot_DAG(graph) + pos, digraph, edges_ICstar = plot_marked_partially_directed_graph(graph) assert isinstance(pos, dict) assert isinstance(digraph, DiGraph) + assert isinstance(edges_ICstar, dict) + assert 'marked' in edges_ICstar + assert 'directed' in edges_ICstar + assert 'undirected' in edges_ICstar