CITCOM-project · jmafoster1 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/README.md b/README.md
@@ -89,12 +89,12 @@ For more information on how to use the Causal Testing Framework, please refer to
 
 2. If you do not already have causal test cases, you can convert your causal DAG to causal tests by running the following command.
 ```
-python -m causal_testing generate --dag-path $PATH_TO_DAG --output $PATH_TO_TESTS
+causal-testing generate --dag-path $PATH_TO_DAG --output $PATH_TO_TESTS
 ```
 
 3. You can now execute your tests by running the following command.
 ```
-python -m causal_testing test --dag-path $PATH_TO_DAG --data-paths $PATH_TO_DATA --test-config $PATH_TO_TESTS --output $OUTPUT
+causal-testing test --dag-path $PATH_TO_DAG --data-paths $PATH_TO_DATA --test-config $PATH_TO_TESTS --output $OUTPUT
 ```
 The results will be saved for inspection in a JSON file located at `$OUTPUT`.
 In the future, we hope to add a visualisation tool to assist with this.

diff --git a/causal_testing/main.py b/causal_testing/main.py
@@ -7,18 +7,16 @@
 from enum import Enum
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Union
+from importlib.metadata import entry_points
 
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
 
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.testing.causal_effect import Negative, NoEffect, Positive, SomeEffect
 from causal_testing.testing.causal_test_adequacy import DataAdequacy
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_result import CausalTestResult
@@ -259,48 +257,30 @@ def create_causal_test(self, test: dict, base_test: BaseTestCase) -> CausalTestC
         :return: CausalTestCase object
         :raises: ValueError if invalid estimator or configuration is provided
         """
-        # Map effect string to effect class
-        effect_map = {
-            "NoEffect": NoEffect(),
-            "SomeEffect": SomeEffect(),
-            "Positive": Positive(),
-            "Negative": Negative(),
-        }
-
-        # Map estimator string to estimator class
-        estimator_map = {
-            "LinearRegressionEstimator": LinearRegressionEstimator,
-            "LogisticRegressionEstimator": LogisticRegressionEstimator,
-        }
+        estimator_map = {ff.name: ff for ff in entry_points(group="estimators")}
+        effect_map = {ff.name: ff for ff in entry_points(group="causal_effects")}
 
         if "estimator" not in test:
             raise ValueError("Test configuration must specify an estimator")
 
-        # Get the estimator class
-        estimator_class = estimator_map.get(test["estimator"])
-        if estimator_class is None:
-            raise ValueError(f"Unknown estimator: {test['estimator']}")
-
-        # Handle combined queries (global and test-specific)
-        test_query = test.get("query")
-        combined_query = None
-
-        if self.query and test_query:
-            combined_query = f"({self.query}) and ({test_query})"
-            logger.info(
-                f"Combining global query '{self.query}' with test-specific query "
-                f"'{test_query}' for test '{test['name']}'"
+        if test["estimator"] not in estimator_map:
+            raise ValueError(
+                f"Unsupported estimator {test['estimator']}. Supported: {sorted(estimator_map)}. "
+                "If you have implemented a custom estimator, you will need to add this to your entrypoints via your "
+                "pyproject.toml file."
             )
-        elif test_query:
-            combined_query = test_query
-            logger.info(f"Using test-specific query for '{test['name']}': {test_query}")
-        elif self.query:
-            combined_query = self.query
-            logger.info(f"Using global query for '{test['name']}': {self.query}")
 
-        filtered_df = self.data.query(combined_query) if combined_query else self.data
+        # Handle global queries
+        # Test-specific queries are handled by the estimator as not all estimators support them
+        filtered_df = self.data
+        if self.query:
+            filtered_df = self.data.query(self.query)
 
         # Create the estimator with correct parameters
+        estimator_class = estimator_map.get(test["estimator"]).load()
+        estimator_kwargs = test.get("estimator_kwargs", {})
+        if "query" in test:
+            estimator_kwargs["query"] = test["query"]
         estimator = estimator_class(
             base_test_case=base_test,
             treatment_value=test.get("treatment_value"),
@@ -310,15 +290,19 @@ def create_causal_test(self, test: dict, base_test: BaseTestCase) -> CausalTestC
                 self.dag.identification(base_test, self.scenario.hidden_variables()),
             ),
             df=filtered_df,
-            effect_modifiers=None,
-            formula=test.get("formula"),
             alpha=test.get("alpha", 0.05),
-            query=combined_query,
+            **estimator_kwargs,
         )
 
         # Get effect type and create expected effect
         effect_type = test["expected_effect"][base_test.outcome_variable.name]
-        expected_effect = effect_map[effect_type]
+        if effect_type not in effect_map:
+            raise ValueError(
+                f"Unsupported causal effect {effect_type}. Supported: {sorted(effect_map)}. "
+                "If you have implemented a custom causal effect, you will need to add this to your entrypoints via "
+                "your pyproject.toml file."
+            )
+        expected_effect = effect_map[effect_type].load()(**test.get("effect_kwargs", {}))
 
         return CausalTestCase(
             base_test_case=base_test,

diff --git a/causal_testing/testing/metamorphic_relation.py b/causal_testing/testing/metamorphic_relation.py
@@ -49,18 +49,26 @@ def to_json_stub(
         :param estimator: The name of the estimator class to use when evaluating the test
         :param alpha: The significance level to use when calculating the confidence intervals
         """
+        if estimator not in ["LinearRegressionEstimator", "LogisticRegressionEstimator"]:
+            raise ValueError(
+                f"Unsupported estimator {estimator}. "
+                "We only support autogeneration using LinearRegressionEstimator or LogisticRegressionEstimator."
+                "More advanced estimators require careful thought that cannot be easily automated."
+            )
         return {
             "name": str(self),
             "estimator": estimator,
             "estimate_type": estimate_type,
             "effect": effect_type,
             "treatment_variable": self.base_test_case.treatment_variable,
-            "formula": (
-                f"{self.base_test_case.outcome_variable} ~ "
-                f"{' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}"
-            ),
             "alpha": alpha,
             "skip": skip,
+            "estimator_kwargs": {
+                "formula": (
+                    f"{self.base_test_case.outcome_variable} ~ "
+                    f"{' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}"
+                ),
+            },
         }
 
 
@@ -271,7 +279,7 @@ def generate_causal_tests(
 
     logger.warning(
         "The skip parameter is hard-coded to False during test generation for better integration with the "
-        "causal testing component (python -m causal_testing test ...)"
+        "causal testing component (causal-testing test ...)"
         "Please carefully review the generated tests and decide which to skip."
     )
 

diff --git a/dafni/entrypoint.sh b/dafni/entrypoint.sh
@@ -74,7 +74,7 @@ if [ "$EXECUTION_MODE" = "generate" ]; then
     echo "Running causal_testing GENERATE..."
     echo "Will write causal tests to: $CAUSAL_TESTS_OUTPUT_PATH"
 
-    python -m causal_testing generate \
+    causal-testing generate \
         -D "$DAG_PATH" \
         -o "$CAUSAL_TESTS_OUTPUT_PATH" \
         -e "$ESTIMATOR" \
@@ -107,7 +107,7 @@ elif [ "$EXECUTION_MODE" = "test" ]; then
     # Build command with adequacy flags only when ADEQUACY is true
     if [ "$ADEQUACY" = "true" ]; then
         echo "DEBUG: Executing WITH adequacy flags"
-        python -m causal_testing test \
+        causal-testing test \
             -D "$DAG_PATH" \
             -d $DATA_PATHS \
             -t "$CAUSAL_TESTS_INPUT_PATH" \
@@ -120,7 +120,7 @@ elif [ "$EXECUTION_MODE" = "test" ]; then
             $([ "$BATCH_SIZE" != "0" ] && echo "--batch-size $BATCH_SIZE")
     else
         echo "DEBUG: Executing WITHOUT adequacy flags"
-        python -m causal_testing test \
+        causal-testing test \
             -D "$DAG_PATH" \
             -d $DATA_PATHS \
             -t "$CAUSAL_TESTS_INPUT_PATH" \

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -41,6 +41,7 @@ If you have any questions about our framework, you can also reach us by `email <
 
    /modules/causal_specification
    /modules/estimators
+   /modules/custom_estimators
    /modules/causal_testing
 
 .. toctree::

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -95,11 +95,11 @@ Next Steps
 * Read about :doc:`modules/causal_specification` to understand causal specifications and :doc:`modules/causal_testing` for the end-to-end causal testing process.
 * Run the command for guidance on how to generate your causal tests directly from your input DAG::
 
-    python -m causal_testing generate --help
+    causal-testing generate --help
 
 * and the command on guidance on how to execute your causal tests::
 
-    python -m causal_testing test --help
+    causal-testing test --help
 
 
 Using the CTF on DAFNI

diff --git a/docs/source/modules/causal_testing.rst b/docs/source/modules/causal_testing.rst
@@ -34,7 +34,7 @@ In the following sections, we describe the end-to-end process of ``causal testin
 In particular, suppose we're interested in how various precautions, such as hand-washing and mask-wearing, can prevent the spread of a virus within a classroom.
 
 1. Modelling Scenario
-----------------
+---------------------
 
 For our modelling scenario, suppose we define the scenario with the following constraints:
 

diff --git a/docs/source/modules/custom_estimators.rst b/docs/source/modules/custom_estimators.rst
@@ -0,0 +1,65 @@
+Custom Estimators
+=================
+
+If the supported :ref:`estimators` are not sufficient for your needs, you can implement your own custom estimator by extending the :code:`Estimator` class and implementing the abstract :code:`add_modelling_assumptions` method and the estimation method for the causal effect measure you wish to calculate.
+For example, if you wished to estimate the ATE using the empirical mean of the recorded outcome under the control and treatment values, you would need to implement a method called :code:`estimate_ate`.
+If you wished to estimate the risk ratio, you would need to call your method :code:`estimate_risk_ratio`.
+The code for the :code:`EmpiricalMeanEstimator` is shown below.
+
+..  code-block:: python
+
+  from causal_testing.estimation.abstract_estimator import Estimator
+  from scipy.stats import bootstrap
+
+  class EmpiricalMeanEstimator(Estimator):
+      """
+      Custom estimator class to estimate the causal effect based on the empirical mean.
+      """
+
+      def add_modelling_assumptions(self):
+          """
+          Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
+          must hold if the resulting causal inference is to be considered valid.
+          """
+          self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."
+
+      def estimate_ate(self) -> EffectEstimate:
+          """Estimate the outcomes under control and treatment.
+          :return: The empirical average treatment effect.
+          """
+          treatment_variable = self.base_test_case.treatment_variable.name
+          outcome_variable = self.base_test_case.outcome_variable.name
+
+          control_results = self.df.where(self.df[treatment_variable] == self.control_value)[outcome_variable].dropna()
+          treatment_results = self.df.where(self.df[treatment_variable] == self.treatment_value)[
+              outcome_variable
+          ].dropna()
+
+          def risk_ratio(sample1, sample2):
+              return sample1.mean() - sample2.mean()
+
+          bootstraps = bootstrap((treatment_results, control_results), risk_ratio, confidence_level=self.alpha)
+          return EffectEstimate(
+              type="risk_ratio",
+              value=risk_ratio(treatment_results, control_results),
+              ci_low=bootstraps.confidence_interval.low,
+              ci_high=bootstraps.confidence_interval.high,
+          )
+
+Once you have implemented your estimator, you will need to register it as an extra entry point in your project's :code:`pyproject.toml` file so that the Causal Testing Framework can find it.
+For example, if you had defined your :code:`EmpiricalMeanEstimator` class in a module called :code:`empirical_mean_estimator` in a folder called :code:`custom_estimators`, you would register it as follows.
+You will also need to reinstall your project, e.g. with :code:`pip install -e .` each time you add a new estimator to your :code:`pyproject.toml`.
+You do not need to reinstall each time you edit your project for source code edits.
+
+
+..  code-block:: ini
+
+  [project.entry-points."estimators"]
+  CustomFlakefighter = "custom_estimators.empirical_mean_estimator:EmpiricalMeanEstimator"
+
+Of course, for this to work, your module needs to be discoverable on your python path.
+That is, you should be able to execute :code:`from custom_estimators.empirical_mean_estimator import EmpiricalMeanEstimator` successfully from within the current working directory.
+
+You can also add your custom estimator to causal test cases specified in JSON.
+To do so, you can simply set the :code:`estimator` property to the name of your estimator class and the :code:`estimate_type` property to the name of your causal effect measure.
+In the above :code:`EmpiricalMeanEstimator` example, :code:`estimator` would be set to  :code:`"EmpiricalMeanEstimator"` and :code:`estimate_type` would be set to :code:`"ate"`.
diff --git a/docs/source/modules/estimators.rst b/docs/source/modules/estimators.rst
@@ -1,3 +1,5 @@
+.. _estimators:
+
 Estimators Overview
 ===================
 

diff --git a/docs/source/tutorials/poisson_line_process/poisson_line_process_tutorial.ipynb b/docs/source/tutorials/poisson_line_process/poisson_line_process_tutorial.ipynb
@@ -5,7 +5,7 @@
    "id": "5adf7cdc-fd96-47a4-a194-f1f060a4c0c5",
    "metadata": {},
    "source": [
-    "##  Overview"
+    "#  Statistical Metamorphic Testing using the API"
    ]
   },
   {
@@ -26,14 +26,6 @@
     "Before diving into the details, a good first step is to define your file paths, including your input configurations:"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "56965fba-b90b-4233-a819-bb747ecd9d81",
-   "metadata": {},
-   "source": [
-    "# Statistical Metamorphic Testing using the API"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -841,7 +833,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.14"
+   "version": "3.11.15"
   }
  },
  "nbformat": 4,

diff --git a/docs/source/tutorials/vaccinating_elderly/causal_test_results.json b/docs/source/tutorials/vaccinating_elderly/causal_test_results.json
@@ -176,7 +176,7 @@
       "cum_vaccinated": "NoEffect"
     },
     "alpha": 0.05,
-    "formula": "cum_vaccinated ~ cum_vaccinations + vaccine",
+    "formula": "cum_vaccinated ~ cum_vaccinations+vaccine",
     "skip": false,
     "passed": false,
     "result": {
@@ -206,7 +206,7 @@
       "cum_infections": "NoEffect"
     },
     "alpha": 0.05,
-    "formula": "cum_infections ~ cum_vaccinations + vaccine",
+    "formula": "cum_infections ~ cum_vaccinations+vaccine",
     "skip": false,
     "passed": true,
     "result": {
@@ -236,7 +236,7 @@
       "cum_infections": "NoEffect"
     },
     "alpha": 0.05,
-    "formula": "cum_infections ~ cum_vaccinated + vaccine",
+    "formula": "cum_infections ~ cum_vaccinated+vaccine",
     "skip": false,
     "passed": true,
     "result": {
-Original file line number
+Diff line change
@@ Expand Up @@
     In particular, suppose we're interested in how various precautions, such as hand-washing and mask-wearing, can prevent the spread of a virus within a classroom.
 . Modelling Scenario
-    ----------------
+    ---------------------
     For our modelling scenario, suppose we define the scenario with the following constraints:
@@ Expand Down @@