Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions causy/causal_discovery/constraint/algorithms/pc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from causy.causal_discovery.constraint.independence_tests.conditional_independence_calculations import (
PearsonStudentsTTest,
FishersZTest,
)
from causy.causal_effect_estimation.multivariate_regression import (
ComputeDirectEffectsMultivariateRegression,
)
Expand All @@ -19,6 +23,7 @@
)
from causy.common_pipeline_steps.calculation import (
CalculatePearsonCorrelations,
CalculateEdgeCorrelations,
)
from causy.interfaces import AS_MANY_AS_FIELDS
from causy.models import ComparisonSettings, Algorithm
Expand All @@ -33,6 +38,8 @@
FloatVariable,
VariableReference,
IntegerVariable,
CausyObjectParameter,
CausyObjectVariable,
)

PC_DEFAULT_THRESHOLD = 0.005
Expand Down Expand Up @@ -63,18 +70,32 @@
PC = graph_model_factory(
Algorithm(
pipeline_steps=[
CalculatePearsonCorrelations(display_name="Calculate Pearson Correlations"),
CalculateEdgeCorrelations(
display_name="Calculate Edge Correlations",
conditional_independence_test=VariableReference(
name="conditional_independence_test"
),
),
CorrelationCoefficientTest(
threshold=VariableReference(name="threshold"),
display_name="Correlation Coefficient Test",
conditional_independence_test=VariableReference(
name="conditional_independence_test"
),
),
PartialCorrelationTest(
threshold=VariableReference(name="threshold"),
display_name="Partial Correlation Test",
conditional_independence_test=VariableReference(
name="conditional_independence_test"
),
),
ExtendedPartialCorrelationTestMatrix(
threshold=VariableReference(name="threshold"),
display_name="Extended Partial Correlation Test Matrix",
conditional_independence_test=VariableReference(
name="conditional_independence_test"
),
),
*PC_ORIENTATION_RULES,
ComputeDirectEffectsMultivariateRegression(
Expand All @@ -84,7 +105,12 @@
edge_types=PC_EDGE_TYPES,
extensions=[PC_GRAPH_UI_EXTENSION],
name="PC",
variables=[FloatVariable(name="threshold", value=PC_DEFAULT_THRESHOLD)],
variables=[
FloatVariable(name="threshold", value=PC_DEFAULT_THRESHOLD),
CausyObjectVariable(
name="conditional_independence_test", value=PearsonStudentsTTest()
),
],
)
)

Expand Down
92 changes: 18 additions & 74 deletions causy/causal_discovery/constraint/independence_tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

import torch

from causy.causal_discovery.constraint.independence_tests.conditional_independence_calculations import (
FishersZTest,
PearsonStudentsTTest,
ConditionalIndependenceTestInterface,
)
from causy.generators import AllCombinationsGenerator, PairsWithNeighboursGenerator
from causy.math_utils import get_t_and_critical_t
from causy.interfaces import (
Expand All @@ -15,7 +20,7 @@
PipelineStepInterfaceType,
)
from causy.models import ComparisonSettings, TestResultAction, TestResult
from causy.variables import IntegerParameter, BoolParameter
from causy.variables import IntegerParameter, BoolParameter, CausyObjectParameter

logger = logging.getLogger(__name__)

Expand All @@ -28,6 +33,7 @@ class CorrelationCoefficientTest(
)
chunk_size_parallel_processing: IntegerParameter = 1
parallel: BoolParameter = False
conditional_independence_test: CausyObjectParameter = PearsonStudentsTTest()

def process(
self, nodes: List[str], graph: BaseGraphInterface
Expand All @@ -40,14 +46,9 @@ def process(
x = graph.nodes[nodes[0]]
y = graph.nodes[nodes[1]]

# make t test for independency of u and v
sample_size = len(x.values)
nb_of_control_vars = 0
corr = graph.edge_value(x, y)["correlation"]
t, critical_t = get_t_and_critical_t(
sample_size, nb_of_control_vars, corr, self.threshold
)
if abs(t) < critical_t:
if self.conditional_independence_test.test(
graph, x.name, y.name, [], self.threshold
):
logger.debug(f"Nodes {x.name} and {y.name} are uncorrelated")
return TestResult(
u=x,
Expand All @@ -65,6 +66,7 @@ class PartialCorrelationTest(
)
chunk_size_parallel_processing: IntegerParameter = 1
parallel: BoolParameter = False
conditional_independence_test: CausyObjectParameter = PearsonStudentsTTest()

def process(
self, nodes: Tuple[str], graph: BaseGraphInterface
Expand Down Expand Up @@ -92,30 +94,9 @@ def process(
if not graph.edge_exists(x, y) or (y, x) in already_deleted_edges:
continue

try:
cor_xy = graph.edge_value(x, y)["correlation"]
cor_xz = graph.edge_value(x, z)["correlation"]
cor_yz = graph.edge_value(y, z)["correlation"]
except (KeyError, TypeError):
return

numerator = cor_xy - cor_xz * cor_yz
denominator = ((1 - cor_xz**2) * (1 - cor_yz**2)) ** 0.5

# Avoid division by zero
if denominator == 0:
return

par_corr = numerator / denominator

# make t test for independency of u and v given z
sample_size = len(x.values)
nb_of_control_vars = len(nodes) - 2
t, critical_t = get_t_and_critical_t(
sample_size, nb_of_control_vars, par_corr, self.threshold
)

if abs(t) < critical_t:
if self.conditional_independence_test.test(
graph, x.name, y.name, [z.name], self.threshold
):
logger.debug(
f"Nodes {x.name} and {y.name} are uncorrelated given {z.name}"
)
Expand All @@ -142,6 +123,7 @@ class ExtendedPartialCorrelationTestMatrix(
)
chunk_size_parallel_processing: IntegerParameter = 1000
parallel: BoolParameter = False
conditional_independence_test: CausyObjectParameter = PearsonStudentsTTest()

def process(
self, nodes: List[str], graph: BaseGraphInterface
Expand Down Expand Up @@ -170,48 +152,10 @@ def process(

if not set(nodes[2:]).issubset(set([on for on in list(other_neighbours)])):
return
cov_matrix = torch.cov(
torch.stack([graph.nodes[node].values for node in nodes])
)
# check if the covariance matrix is ill-conditioned
if torch.det(cov_matrix) == 0:
logger.warning(
"The covariance matrix is ill-conditioned. The precision matrix is not reliable."
)
return

inverse_cov_matrix = torch.inverse(cov_matrix)

n = inverse_cov_matrix.size(0)
diagonal = torch.diag(inverse_cov_matrix)
diagonal_matrix = torch.zeros((n, n), dtype=torch.float64)
for i in range(n):
diagonal_matrix[i, i] = diagonal[i]

helper = torch.mm(torch.sqrt(diagonal_matrix), inverse_cov_matrix)
precision_matrix = torch.mm(helper, torch.sqrt(diagonal_matrix))

sample_size = len(graph.nodes[nodes[0]].values)
nb_of_control_vars = len(nodes) - 2

# prevent math domain error
try:
t, critical_t = get_t_and_critical_t(
sample_size,
nb_of_control_vars,
(
(-1 * precision_matrix[0][1])
/ torch.sqrt(precision_matrix[0][0] * precision_matrix[1][1])
).item(),
self.threshold,
)
except ValueError:
logger.warning(
"Math domain error. The covariance matrix is ill-conditioned. The precision matrix is not reliable."
)
return

if abs(t) < critical_t:
if self.conditional_independence_test.test(
graph, nodes[0], nodes[1], nodes[2:], self.threshold
):
logger.debug(
f"Nodes {graph.nodes[nodes[0]].name} and {graph.nodes[nodes[1]].name} are uncorrelated given nodes {','.join([graph.nodes[on].name for on in other_neighbours])}"
)
Expand Down
Loading