causy-dev · LilithWittmann · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/causy/causal_discovery/constraint/algorithms/pc.py b/causy/causal_discovery/constraint/algorithms/pc.py
@@ -1,3 +1,7 @@
+from causy.causal_discovery.constraint.independence_tests.conditional_independence_calculations import (
+    PearsonStudentsTTest,
+    FishersZTest,
+)
 from causy.causal_effect_estimation.multivariate_regression import (
     ComputeDirectEffectsMultivariateRegression,
 )
@@ -19,6 +23,7 @@
 )
 from causy.common_pipeline_steps.calculation import (
     CalculatePearsonCorrelations,
+    CalculateEdgeCorrelations,
 )
 from causy.interfaces import AS_MANY_AS_FIELDS
 from causy.models import ComparisonSettings, Algorithm
@@ -33,6 +38,8 @@
     FloatVariable,
     VariableReference,
     IntegerVariable,
+    CausyObjectParameter,
+    CausyObjectVariable,
 )
 
 PC_DEFAULT_THRESHOLD = 0.005
@@ -63,18 +70,32 @@
 PC = graph_model_factory(
     Algorithm(
         pipeline_steps=[
-            CalculatePearsonCorrelations(display_name="Calculate Pearson Correlations"),
+            CalculateEdgeCorrelations(
+                display_name="Calculate Edge Correlations",
+                conditional_independence_test=VariableReference(
+                    name="conditional_independence_test"
+                ),
+            ),
             CorrelationCoefficientTest(
                 threshold=VariableReference(name="threshold"),
                 display_name="Correlation Coefficient Test",
+                conditional_independence_test=VariableReference(
+                    name="conditional_independence_test"
+                ),
             ),
             PartialCorrelationTest(
                 threshold=VariableReference(name="threshold"),
                 display_name="Partial Correlation Test",
+                conditional_independence_test=VariableReference(
+                    name="conditional_independence_test"
+                ),
             ),
             ExtendedPartialCorrelationTestMatrix(
                 threshold=VariableReference(name="threshold"),
                 display_name="Extended Partial Correlation Test Matrix",
+                conditional_independence_test=VariableReference(
+                    name="conditional_independence_test"
+                ),
             ),
             *PC_ORIENTATION_RULES,
             ComputeDirectEffectsMultivariateRegression(
@@ -84,7 +105,12 @@
         edge_types=PC_EDGE_TYPES,
         extensions=[PC_GRAPH_UI_EXTENSION],
         name="PC",
-        variables=[FloatVariable(name="threshold", value=PC_DEFAULT_THRESHOLD)],
+        variables=[
+            FloatVariable(name="threshold", value=PC_DEFAULT_THRESHOLD),
+            CausyObjectVariable(
+                name="conditional_independence_test", value=PearsonStudentsTTest()
+            ),
+        ],
     )
 )
 

diff --git a/causy/causal_discovery/constraint/independence_tests/common.py b/causy/causal_discovery/constraint/independence_tests/common.py
@@ -4,6 +4,11 @@
 
 import torch
 
+from causy.causal_discovery.constraint.independence_tests.conditional_independence_calculations import (
+    FishersZTest,
+    PearsonStudentsTTest,
+    ConditionalIndependenceTestInterface,
+)
 from causy.generators import AllCombinationsGenerator, PairsWithNeighboursGenerator
 from causy.math_utils import get_t_and_critical_t
 from causy.interfaces import (
@@ -15,7 +20,7 @@
     PipelineStepInterfaceType,
 )
 from causy.models import ComparisonSettings, TestResultAction, TestResult
-from causy.variables import IntegerParameter, BoolParameter
+from causy.variables import IntegerParameter, BoolParameter, CausyObjectParameter
 
 logger = logging.getLogger(__name__)
 
@@ -28,6 +33,7 @@ class CorrelationCoefficientTest(
     )
     chunk_size_parallel_processing: IntegerParameter = 1
     parallel: BoolParameter = False
+    conditional_independence_test: CausyObjectParameter = PearsonStudentsTTest()
 
     def process(
         self, nodes: List[str], graph: BaseGraphInterface
@@ -40,14 +46,9 @@ def process(
         x = graph.nodes[nodes[0]]
         y = graph.nodes[nodes[1]]
 
-        # make t test for independency of u and v
-        sample_size = len(x.values)
-        nb_of_control_vars = 0
-        corr = graph.edge_value(x, y)["correlation"]
-        t, critical_t = get_t_and_critical_t(
-            sample_size, nb_of_control_vars, corr, self.threshold
-        )
-        if abs(t) < critical_t:
+        if self.conditional_independence_test.test(
+            graph, x.name, y.name, [], self.threshold
+        ):
             logger.debug(f"Nodes {x.name} and {y.name} are uncorrelated")
             return TestResult(
                 u=x,
@@ -65,6 +66,7 @@ class PartialCorrelationTest(
     )
     chunk_size_parallel_processing: IntegerParameter = 1
     parallel: BoolParameter = False
+    conditional_independence_test: CausyObjectParameter = PearsonStudentsTTest()
 
     def process(
         self, nodes: Tuple[str], graph: BaseGraphInterface
@@ -92,30 +94,9 @@ def process(
             if not graph.edge_exists(x, y) or (y, x) in already_deleted_edges:
                 continue
 
-            try:
-                cor_xy = graph.edge_value(x, y)["correlation"]
-                cor_xz = graph.edge_value(x, z)["correlation"]
-                cor_yz = graph.edge_value(y, z)["correlation"]
-            except (KeyError, TypeError):
-                return
-
-            numerator = cor_xy - cor_xz * cor_yz
-            denominator = ((1 - cor_xz**2) * (1 - cor_yz**2)) ** 0.5
-
-            # Avoid division by zero
-            if denominator == 0:
-                return
-
-            par_corr = numerator / denominator
-
-            # make t test for independency of u and v given z
-            sample_size = len(x.values)
-            nb_of_control_vars = len(nodes) - 2
-            t, critical_t = get_t_and_critical_t(
-                sample_size, nb_of_control_vars, par_corr, self.threshold
-            )
-
-            if abs(t) < critical_t:
+            if self.conditional_independence_test.test(
+                graph, x.name, y.name, [z.name], self.threshold
+            ):
                 logger.debug(
                     f"Nodes {x.name} and {y.name} are uncorrelated given {z.name}"
                 )
@@ -142,6 +123,7 @@ class ExtendedPartialCorrelationTestMatrix(
     )
     chunk_size_parallel_processing: IntegerParameter = 1000
     parallel: BoolParameter = False
+    conditional_independence_test: CausyObjectParameter = PearsonStudentsTTest()
 
     def process(
         self, nodes: List[str], graph: BaseGraphInterface
@@ -170,48 +152,10 @@ def process(
 
         if not set(nodes[2:]).issubset(set([on for on in list(other_neighbours)])):
             return
-        cov_matrix = torch.cov(
-            torch.stack([graph.nodes[node].values for node in nodes])
-        )
-        # check if the covariance matrix is ill-conditioned
-        if torch.det(cov_matrix) == 0:
-            logger.warning(
-                "The covariance matrix is ill-conditioned. The precision matrix is not reliable."
-            )
-            return
-
-        inverse_cov_matrix = torch.inverse(cov_matrix)
-
-        n = inverse_cov_matrix.size(0)
-        diagonal = torch.diag(inverse_cov_matrix)
-        diagonal_matrix = torch.zeros((n, n), dtype=torch.float64)
-        for i in range(n):
-            diagonal_matrix[i, i] = diagonal[i]
 
-        helper = torch.mm(torch.sqrt(diagonal_matrix), inverse_cov_matrix)
-        precision_matrix = torch.mm(helper, torch.sqrt(diagonal_matrix))
-
-        sample_size = len(graph.nodes[nodes[0]].values)
-        nb_of_control_vars = len(nodes) - 2
-
-        # prevent math domain error
-        try:
-            t, critical_t = get_t_and_critical_t(
-                sample_size,
-                nb_of_control_vars,
-                (
-                    (-1 * precision_matrix[0][1])
-                    / torch.sqrt(precision_matrix[0][0] * precision_matrix[1][1])
-                ).item(),
-                self.threshold,
-            )
-        except ValueError:
-            logger.warning(
-                "Math domain error. The covariance matrix is ill-conditioned. The precision matrix is not reliable."
-            )
-            return
-
-        if abs(t) < critical_t:
+        if self.conditional_independence_test.test(
+            graph, nodes[0], nodes[1], nodes[2:], self.threshold
+        ):
             logger.debug(
                 f"Nodes {graph.nodes[nodes[0]].name} and {graph.nodes[nodes[1]].name} are uncorrelated given nodes {','.join([graph.nodes[on].name for on in other_neighbours])}"
             )