Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 89 additions & 3 deletions causality/estimation/adjustments.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from networkx.algorithms import is_directed_acyclic_graph
import networkx as nx
#from pgmpy.models import BayesianModel
from causality.estimation.utils import utils
from itertools import combinations

class AdjustmentException(Exception):
pass

class AdjustForDirectCauses(object):
def __init__(self):
def __init__(self):
pass

def find_predecessors(self, g, causes):
Expand All @@ -17,11 +21,93 @@ def assumptions_satisfied(self, g, causes, effects, predecessors):
if not is_directed_acyclic_graph(g):
return False
if not len(set(effects).intersection(set(causes).union(predecessors))) == 0:
return False
return True
return False
return True

def admissable_set(self, g, causes, effects):
predecessors = self.find_predecessors(g, causes)
if not self.assumptions_satisfied(g, causes, effects, predecessors):
raise AdjustmentException("Failed to satisfy adjustment assumptions")
return predecessors

class backDoorAdjustments(object):
def __init__(self,):
pass

def assumptions_satisfied(self, g, causes, effects):
if not is_directed_acyclic_graph(g):
raise AdjustmentException("Suplied Graph is not Directed and Acyclic")
if (len(causes)==0 or len(effects)==0):
raise AdjustmentException("Causes/Effects can not be empty")

def __are_causes_dseparated_from_effects(self, g, s, causes, effects):
# Internal function to exit double loop
def is_cause_dseparated_from_effects(g, s, cause, effects):
for effect in effects:
if utils().is_active_trail(g, cause, effect, observed=s):
return(False)
return(True)

causesDSeparatedFromEffectsInGraph = True
for cause in causes:
if not is_cause_dseparated_from_effects(g,s,cause,effects):
causesDSeparatedFromEffectsInGraph = False
break
return(causesDSeparatedFromEffectsInGraph)


def minimal_backdoor_admissable_sets(self, g, causes, effects):

def is_superset_of_any_set_of_sets(s, setOfSets):
isSubset = False
for i in setOfSets:
if set(s).issuperset(i):
isSubset = True
break
return(isSubset)

# Check arguments
self.assumptions_satisfied(g, causes, effects)

# Bayesian Network is a DiGraph wrapper from pgmpy
# used because of its d-separation function (is_active_trail)
#backDoorGraph = BayesianModel(nx.edges(g))
backDoorGraph = nx.DiGraph(nx.edges(g))
descendantsOfCauses = set()

# Create back door graph and collect descendants from causes
for cause in causes:
outEdgesOfCause = backDoorGraph.out_edges(cause)
descendantsOfCauses = descendantsOfCauses.union(nx.descendants(backDoorGraph,cause))
backDoorGraph.remove_edges_from(outEdgesOfCause)

# Possible adjustment nodes are those from the original graph that:
# i) Are not causes
# ii) Are not effects
# iii) Are not descendants of the causes
possibleAdjustmentNodes = set(backDoorGraph.nodes()).difference(set(causes),
set(effects),
set(descendantsOfCauses))
# Keep track of which sets have been added
minAdmissablesSets = set()

# If the empty set d-separates causes and effects in the back door graph
# then return the empty set
if self.__are_causes_dseparated_from_effects(backDoorGraph, set(), causes, effects):
return

# Check all set partitions of possibleAdjustmentNodes
for r in range(len(possibleAdjustmentNodes)):
for s in combinations(possibleAdjustmentNodes,r+1):
# Check s only if s is not a super set of any set already in minAdmissablesSets
if not is_superset_of_any_set_of_sets(s,minAdmissablesSets):
# Only add set to minAdmissablesSets if all causes are d-Separated of causes
if self.__are_causes_dseparated_from_effects(backDoorGraph, s, causes, effects):
minAdmissablesSets.add(frozenset(s))
yield frozenset(s)

# If after checking all combinations we don't find any admissable set then raise an Exception
if len(minAdmissablesSets)==0:
raise AdjustmentException("Failed to satisfy adjustment assumptions")

return
138 changes: 138 additions & 0 deletions causality/estimation/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import networkx as nx

# All this file has the functions that are needed to be added to
# Causality from pgmpy in order to check active trail in the sense
# of d-connectness.
# It only has minimum changes in order to make it work as a standalone
# function.
# The function making all heavy work is active_trail_nodes

class utils(object):
def __init__(self):
pass

def _get_ancestors_of(self, g, obs_nodes_list):
"""
Returns a dictionary of all ancestors of all the observed nodes including the
node itself.
Parameters
----------
obs_nodes_list: string, list-type
name of all the observed nodes
Examples
--------
>>> from pgmpy.models import BayesianModel
>>> model = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'),
... ('I', 'L')])
>>> model._get_ancestors_of('G')
{'D', 'G', 'I'}
>>> model._get_ancestors_of(['G', 'I'])
{'D', 'G', 'I'}
"""
if not isinstance(obs_nodes_list, (list, tuple)):
obs_nodes_list = [obs_nodes_list]

for node in obs_nodes_list:
if node not in g.nodes():
raise ValueError('Node {s} not in not in graph'.format(s=node))

ancestors_list = set()
nodes_list = set(obs_nodes_list)
while nodes_list:
node = nodes_list.pop()
if node not in ancestors_list:
nodes_list.update(g.predecessors(node))
ancestors_list.add(node)
return ancestors_list

def active_trail_nodes(self, g, variables, observed=None):
"""
Returns a dictionary with the given variables as keys and all the nodes reachable
from that respective variable as values.
Parameters
----------
variables: str or array like
variables whose active trails are to be found.
observed : List of nodes (optional)
If given the active trails would be computed assuming these nodes to be observed.
Examples
--------
>>> from pgmpy.models import BayesianModel
>>> student = BayesianModel()
>>> student.add_nodes_from(['diff', 'intel', 'grades'])
>>> student.add_edges_from([('diff', 'grades'), ('intel', 'grades')])
>>> student.active_trail_nodes('diff')
{'diff': {'diff', 'grades'}}
>>> student.active_trail_nodes(['diff', 'intel'], observed='grades')
{'diff': {'diff', 'intel'}, 'intel': {'diff', 'intel'}}
References
----------
Details of the algorithm can be found in 'Probabilistic Graphical Model
Principles and Techniques' - Koller and Friedman
Page 75 Algorithm 3.1
"""
if observed:
observed_list = observed if isinstance(observed, (list, tuple)) else [observed]
else:
observed_list = []
ancestors_list = self._get_ancestors_of(g, observed_list)

# Direction of flow of information
# up -> from parent to child
# down -> from child to parent

active_trails = {}
for start in variables if isinstance(variables, (list, tuple)) else [variables]:
visit_list = set()
visit_list.add((start, 'up'))
traversed_list = set()
active_nodes = set()
while visit_list:
node, direction = visit_list.pop()
if (node, direction) not in traversed_list:
if node not in observed_list:
active_nodes.add(node)
traversed_list.add((node, direction))
if direction == 'up' and node not in observed_list:
for parent in g.predecessors(node):
visit_list.add((parent, 'up'))
for child in g.successors(node):
visit_list.add((child, 'down'))
elif direction == 'down':
if node not in observed_list:
for child in g.successors(node):
visit_list.add((child, 'down'))
if node in ancestors_list:
for parent in g.predecessors(node):
visit_list.add((parent, 'up'))
active_trails[start] = active_nodes
return active_trails

def is_active_trail(self, g, start, end, observed=None):
"""
Returns True if there is any active trail between start and end node
Parameters
----------
start : Graph Node
end : Graph Node
observed : List of nodes (optional)
If given the active trail would be computed assuming these nodes to be observed.
additional_observed : List of nodes (optional)
If given the active trail would be computed assuming these nodes to be observed along with
the nodes marked as observed in the model.
Examples
--------
>>> from pgmpy.models import BayesianModel
>>> student = BayesianModel()
>>> student.add_nodes_from(['diff', 'intel', 'grades', 'letter', 'sat'])
>>> student.add_edges_from([('diff', 'grades'), ('intel', 'grades'), ('grades', 'letter'),
... ('intel', 'sat')])
>>> student.is_active_trail('diff', 'intel')
False
>>> student.is_active_trail('grades', 'sat')
True
"""
if end in self.active_trail_nodes(g, start, observed)[start]:
return True
else:
return False
Loading