diff --git a/causality/inference/__init__.py b/causality/inference/__init__.py index e69de29..b63a4cb 100644 --- a/causality/inference/__init__.py +++ b/causality/inference/__init__.py @@ -0,0 +1,41 @@ +def get_directed_edges(edge: tuple, arrows: list) -> list: + assert len(arrows) >= 1 + if len(arrows) == 1: + endVertex = arrows[0] + startVertex = edge[int(not edge.index(endVertex))] + return [(startVertex, endVertex)] + else: + return [edge, edge[::-1]] + + +def as_digraph(graph): + digraph = graph.to_directed() + drop_dummy_edges(graph, digraph) + return digraph + +def drop_dummy_edges(graph, digraph): + edges_to_drop = [] + for edge, edge_metadata in digraph.edges.items(): + if edge not in list(graph.edges): + edges_to_drop.append(edge) + for edge in edges_to_drop: + digraph.remove_edge(*edge) + +def get_edges_ICstar(digraph): + edges_ICstar = {'marked':[], + 'undirected': [], + 'directed': [], + } + for edge, metadata in digraph.edges.items(): + # marked + if metadata['marked']: + edges_ICstar['marked'].append(get_directed_edges(edge, metadata['arrows'])[0]) + else: + # undirected + if len(metadata['arrows']) == 0: + edges_ICstar['undirected'].append(edge) + else: + directed_edges = get_directed_edges(edge, metadata['arrows']) + for e in directed_edges: edges_ICstar['directed'].append(e) + return edges_ICstar + diff --git a/causality/inference/independence_tests/__init__.py b/causality/inference/independence_tests/__init__.py index fa7740e..94765f3 100644 --- a/causality/inference/independence_tests/__init__.py +++ b/causality/inference/independence_tests/__init__.py @@ -35,7 +35,7 @@ def __init__(self, y, x, z, data, alpha): self.total_dof = 0 for xi, yi in itertools.product(x,y): tables = data[[xi]+[yi]+z].copy() - groupby_key = tuple([zi for zi in z] + [xi]) + groupby_key = list([zi for zi in z] + [xi]) tables = tables.join(pd.get_dummies(data[yi],prefix=yi)).groupby(groupby_key).sum() del tables[yi] diff --git a/causality/inference/search/__init__.py b/causality/inference/search/__init__.py index 4f95a5c..8fd6046 100644 --- a/causality/inference/search/__init__.py +++ b/causality/inference/search/__init__.py @@ -44,9 +44,9 @@ def _build_g(self, variable_types): self._g = nx.Graph() self._g.add_nodes_from(variable_types.keys()) for var, var_type in variable_types.items(): - self._g.node[var]['type'] = var_type + self._g.nodes[var]['type'] = var_type edges_to_add = [] - for (node_a, node_b) in itertools.combinations(self._g.node.keys(), 2): + for (node_a, node_b) in itertools.combinations(self._g.nodes.keys(), 2): edges_to_add.append((node_a,node_b)) self._g.add_edges_from(edges_to_add, marked=False) @@ -121,7 +121,7 @@ def _find_skeleton(self, data, variable_types): """ self.separating_sets = {} if not self.max_k: - self.max_k = len(self._g.node)+1 + self.max_k = len(self._g.nodes)+1 for N in range(self.max_k + 1): for (x, y) in list(self._g.edges()): x_neighbors = list(self._g.neighbors(x)) diff --git a/causality/plot/__init__.py b/causality/plot/__init__.py new file mode 100644 index 0000000..524321c --- /dev/null +++ b/causality/plot/__init__.py @@ -0,0 +1,38 @@ +import networkx as nx +import matplotlib.pyplot as plt + +from causality.inference import get_edges_ICstar +from causality.inference import as_digraph + +def plot_marked_partially_directed_graph(graph, plot_attributes=None): + if plot_attributes is None: + plot_attributes = {} + + unmarked_edge_color = plot_attributes.get('unmarked_edge_color', 'black') + marked_edge_color = plot_attributes.get('unmarked_edge_color', 'black') + arrowsize = plot_attributes.get('arrowsize', 25) + + digraph = as_digraph(graph) + edges_ICstar = get_edges_ICstar(digraph) + + pos = nx.spring_layout(digraph) + nx.draw_networkx_nodes(digraph, pos) + nx.draw_networkx_labels(digraph, pos) + + # directed edges + nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=edges_ICstar['directed'], + edge_color=unmarked_edge_color, + arrowsize=arrowsize) + # undirected edges + nx.draw_networkx_edges(digraph, pos, arrows=False, edgelist=edges_ICstar['undirected'], + edge_color=unmarked_edge_color, + arrowsize=arrowsize) + + # marked edges + nx.draw_networkx_edges(digraph, pos, arrows=True, edgelist=edges_ICstar['marked'], + edge_color=marked_edge_color, + arrowsize=arrowsize) + nx.draw_networkx_edge_labels(digraph, pos, arrows=True, edgelist=edges_ICstar['marked'], + edge_labels={e: '*' for e in edges_ICstar['marked']}, arrowsize=arrowsize) + plt.axis('off') + return pos, digraph, edges_ICstar diff --git a/tests/unit/inference.py b/tests/unit/inference.py new file mode 100644 index 0000000..76e2425 --- /dev/null +++ b/tests/unit/inference.py @@ -0,0 +1,74 @@ +import numpy as np +import pandas as pd +from causality.inference.search import IC +from causality.inference.independence_tests import RobustRegressionTest + +from causality.inference import drop_dummy_edges +from causality.inference import get_directed_edges +from causality.inference import get_edges_ICstar +from causality.inference import as_digraph + +from networkx.classes.digraph import DiGraph + +def _make_DAG(): + # generate some toy data: + np.random.seed(1) + SIZE = 2000 + x1 = np.random.normal(size=SIZE) + x2 = x1 + np.random.normal(size=SIZE) + x3 = x1 + np.random.normal(size=SIZE) + x4 = x2 + x3 + np.random.normal(size=SIZE) + x5 = x4 + np.random.normal(size=SIZE) + + X = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4, 'x5': x5}) + + # define the variable types: 'c' is 'continuous'. The variables defined here + # are the ones the search is performed over -- NOT all the variables defined + # in the data frame. + variable_types = {'x1': 'c', 'x2': 'c', 'x3': 'c', 'x4': 'c', 'x5': 'c'} + + # run the search + ic_algorithm = IC(RobustRegressionTest) + graph = ic_algorithm.search(X, variable_types) + return graph + + +def test_get_directed_edges(): + edge = ('a', 'b') + arrows = ['a'] + directed_edges = get_directed_edges(edge, arrows) + assert directed_edges == [('b', 'a')] + + arrows = ['b'] + directed_edges = get_directed_edges(edge, arrows) + assert directed_edges == [('a', 'b')] + + arrows = ['a','b'] + directed_edges = get_directed_edges(edge, arrows) + assert ('a', 'b') in directed_edges + assert ('b', 'a') in directed_edges + assert len(directed_edges) == 2 + + +def test_as_digraph(): + graph = _make_DAG() + digraph = as_digraph(graph) + for edge, metadata in graph.edges.items(): + assert edge in digraph.edges + assert isinstance(digraph, DiGraph) + + +def test_drop_dummy_edges(): + graph = _make_DAG() + digraph = as_digraph(graph) + drop_dummy_edges(graph, digraph) + assert set(graph.edges) == set(digraph.edges) + return digraph + + +def test_get_edges_ICstar(): + digraph = test_drop_dummy_edges() + edges_ICstar = get_edges_ICstar(digraph) + assert edges_ICstar['marked'] == [('x4', 'x5')] + assert set(edges_ICstar['undirected']) == {('x1', 'x2'), ('x1', 'x3')} + assert set(edges_ICstar['directed']) == {('x2', 'x4'), ('x3', 'x4')} diff --git a/tests/unit/plot.py b/tests/unit/plot.py new file mode 100644 index 0000000..3e86eb3 --- /dev/null +++ b/tests/unit/plot.py @@ -0,0 +1,41 @@ +from causality.plot import plot_marked_partially_directed_graph +from networkx.classes.digraph import DiGraph + +import numpy +import pandas as pd + +from causality.inference.search import IC +from causality.inference.independence_tests import RobustRegressionTest + + + +def _make_DAG(): + # generate some toy data: + SIZE = 2000 + x1 = numpy.random.normal(size=SIZE) + x2 = x1 + numpy.random.normal(size=SIZE) + x3 = x1 + numpy.random.normal(size=SIZE) + x4 = x2 + x3 + numpy.random.normal(size=SIZE) + x5 = x4 + numpy.random.normal(size=SIZE) + + X = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4, 'x5': x5}) + + # define the variable types: 'c' is 'continuous'. The variables defined here + # are the ones the search is performed over -- NOT all the variables defined + # in the data frame. + variable_types = {'x1': 'c', 'x2': 'c', 'x3': 'c', 'x4': 'c', 'x5': 'c'} + + # run the search + ic_algorithm = IC(RobustRegressionTest) + graph = ic_algorithm.search(X, variable_types) + return graph + +def test_plot_DAG(): + graph = _make_DAG() + pos, digraph, edges_ICstar = plot_marked_partially_directed_graph(graph) + assert isinstance(pos, dict) + assert isinstance(digraph, DiGraph) + assert isinstance(edges_ICstar, dict) + assert 'marked' in edges_ICstar + assert 'directed' in edges_ICstar + assert 'undirected' in edges_ICstar diff --git a/tests/unit/test_IC.py b/tests/unit/test_IC.py index 12371a3..8a7ffb7 100644 --- a/tests/unit/test_IC.py +++ b/tests/unit/test_IC.py @@ -36,7 +36,7 @@ def test_build_g(self): assert(len(self.ic._g.edges()) == (V-1)*V / 2) assert(set(self.ic._g.nodes()) == set(self.variable_types.keys())) for node, variable_type in self.variable_types.items(): - assert(self.ic._g.node[node]['type'] == variable_type) + assert(self.ic._g.nodes[node]['type'] == variable_type) for i, j in self.ic._g.edges(): assert(self.ic._g.get_edge_data(i, j)['marked'] == False)