Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ For more information on how to use the Causal Testing Framework, please refer to

2. If you do not already have causal test cases, you can convert your causal DAG to causal tests by running the following command.
```
python -m causal_testing generate --dag-path $PATH_TO_DAG --output $PATH_TO_TESTS
causal-testing generate --dag-path $PATH_TO_DAG --output $PATH_TO_TESTS
```

3. You can now execute your tests by running the following command.
```
python -m causal_testing test --dag-path $PATH_TO_DAG --data-paths $PATH_TO_DATA --test-config $PATH_TO_TESTS --output $OUTPUT
causal-testing test --dag-path $PATH_TO_DAG --data-paths $PATH_TO_DATA --test-config $PATH_TO_TESTS --output $OUTPUT
```
The results will be saved for inspection in a JSON file located at `$OUTPUT`.
In the future, we hope to add a visualisation tool to assist with this.
Expand Down
66 changes: 25 additions & 41 deletions causal_testing/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,16 @@
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Union
from importlib.metadata import entry_points

import numpy as np
import pandas as pd
from tqdm import tqdm

from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
from causal_testing.specification.causal_dag import CausalDAG
from causal_testing.specification.scenario import Scenario
from causal_testing.specification.variable import Input, Output
from causal_testing.testing.base_test_case import BaseTestCase
from causal_testing.testing.causal_effect import Negative, NoEffect, Positive, SomeEffect
from causal_testing.testing.causal_test_adequacy import DataAdequacy
from causal_testing.testing.causal_test_case import CausalTestCase
from causal_testing.testing.causal_test_result import CausalTestResult
Expand Down Expand Up @@ -259,48 +257,30 @@ def create_causal_test(self, test: dict, base_test: BaseTestCase) -> CausalTestC
:return: CausalTestCase object
:raises: ValueError if invalid estimator or configuration is provided
"""
# Map effect string to effect class
effect_map = {
"NoEffect": NoEffect(),
"SomeEffect": SomeEffect(),
"Positive": Positive(),
"Negative": Negative(),
}

# Map estimator string to estimator class
estimator_map = {
"LinearRegressionEstimator": LinearRegressionEstimator,
"LogisticRegressionEstimator": LogisticRegressionEstimator,
}
estimator_map = {ff.name: ff for ff in entry_points(group="estimators")}
effect_map = {ff.name: ff for ff in entry_points(group="causal_effects")}

if "estimator" not in test:
raise ValueError("Test configuration must specify an estimator")

# Get the estimator class
estimator_class = estimator_map.get(test["estimator"])
if estimator_class is None:
raise ValueError(f"Unknown estimator: {test['estimator']}")

# Handle combined queries (global and test-specific)
test_query = test.get("query")
combined_query = None

if self.query and test_query:
combined_query = f"({self.query}) and ({test_query})"
logger.info(
f"Combining global query '{self.query}' with test-specific query "
f"'{test_query}' for test '{test['name']}'"
if test["estimator"] not in estimator_map:
raise ValueError(
f"Unsupported estimator {test['estimator']}. Supported: {sorted(estimator_map)}. "
"If you have implemented a custom estimator, you will need to add this to your entrypoints via your "
"pyproject.toml file."
)
elif test_query:
combined_query = test_query
logger.info(f"Using test-specific query for '{test['name']}': {test_query}")
elif self.query:
combined_query = self.query
logger.info(f"Using global query for '{test['name']}': {self.query}")

Comment on lines -284 to 300
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jmafoster1 Are we dropping functionality for test-specific queries now?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the estimators all handle it anyway, so there's no need to do it here as well. The alternative would be to move the "query" argument from the estimators, which I could also do. That's possibly a bit DRY-er, but I'm not sure whether it might be nice to keep it as an option if people want to use the estimators without driving it from the main frontend?

filtered_df = self.data.query(combined_query) if combined_query else self.data
# Handle global queries
# Test-specific queries are handled by the estimator as not all estimators support them
filtered_df = self.data
if self.query:
filtered_df = self.data.query(self.query)

# Create the estimator with correct parameters
estimator_class = estimator_map.get(test["estimator"]).load()
estimator_kwargs = test.get("estimator_kwargs", {})
if "query" in test:
estimator_kwargs["query"] = test["query"]
estimator = estimator_class(
base_test_case=base_test,
treatment_value=test.get("treatment_value"),
Expand All @@ -310,15 +290,19 @@ def create_causal_test(self, test: dict, base_test: BaseTestCase) -> CausalTestC
self.dag.identification(base_test, self.scenario.hidden_variables()),
),
df=filtered_df,
effect_modifiers=None,
formula=test.get("formula"),
alpha=test.get("alpha", 0.05),
query=combined_query,
**estimator_kwargs,
)

# Get effect type and create expected effect
effect_type = test["expected_effect"][base_test.outcome_variable.name]
expected_effect = effect_map[effect_type]
if effect_type not in effect_map:
raise ValueError(
f"Unsupported causal effect {effect_type}. Supported: {sorted(effect_map)}. "
"If you have implemented a custom causal effect, you will need to add this to your entrypoints via "
"your pyproject.toml file."
)
expected_effect = effect_map[effect_type].load()(**test.get("effect_kwargs", {}))

return CausalTestCase(
base_test_case=base_test,
Expand Down
18 changes: 13 additions & 5 deletions causal_testing/testing/metamorphic_relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,26 @@ def to_json_stub(
:param estimator: The name of the estimator class to use when evaluating the test
:param alpha: The significance level to use when calculating the confidence intervals
"""
if estimator not in ["LinearRegressionEstimator", "LogisticRegressionEstimator"]:
raise ValueError(
f"Unsupported estimator {estimator}. "
"We only support autogeneration using LinearRegressionEstimator or LogisticRegressionEstimator."
"More advanced estimators require careful thought that cannot be easily automated."
)
return {
"name": str(self),
"estimator": estimator,
"estimate_type": estimate_type,
"effect": effect_type,
"treatment_variable": self.base_test_case.treatment_variable,
"formula": (
f"{self.base_test_case.outcome_variable} ~ "
f"{' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}"
),
"alpha": alpha,
"skip": skip,
"estimator_kwargs": {
"formula": (
f"{self.base_test_case.outcome_variable} ~ "
f"{' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}"
),
},
}


Expand Down Expand Up @@ -271,7 +279,7 @@ def generate_causal_tests(

logger.warning(
"The skip parameter is hard-coded to False during test generation for better integration with the "
"causal testing component (python -m causal_testing test ...)"
"causal testing component (causal-testing test ...)"
"Please carefully review the generated tests and decide which to skip."
)

Expand Down
6 changes: 3 additions & 3 deletions dafni/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ if [ "$EXECUTION_MODE" = "generate" ]; then
echo "Running causal_testing GENERATE..."
echo "Will write causal tests to: $CAUSAL_TESTS_OUTPUT_PATH"

python -m causal_testing generate \
causal-testing generate \
-D "$DAG_PATH" \
-o "$CAUSAL_TESTS_OUTPUT_PATH" \
-e "$ESTIMATOR" \
Expand Down Expand Up @@ -107,7 +107,7 @@ elif [ "$EXECUTION_MODE" = "test" ]; then
# Build command with adequacy flags only when ADEQUACY is true
if [ "$ADEQUACY" = "true" ]; then
echo "DEBUG: Executing WITH adequacy flags"
python -m causal_testing test \
causal-testing test \
-D "$DAG_PATH" \
-d $DATA_PATHS \
-t "$CAUSAL_TESTS_INPUT_PATH" \
Expand All @@ -120,7 +120,7 @@ elif [ "$EXECUTION_MODE" = "test" ]; then
$([ "$BATCH_SIZE" != "0" ] && echo "--batch-size $BATCH_SIZE")
else
echo "DEBUG: Executing WITHOUT adequacy flags"
python -m causal_testing test \
causal-testing test \
-D "$DAG_PATH" \
-d $DATA_PATHS \
-t "$CAUSAL_TESTS_INPUT_PATH" \
Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ If you have any questions about our framework, you can also reach us by `email <

/modules/causal_specification
/modules/estimators
/modules/custom_estimators
/modules/causal_testing

.. toctree::
Expand Down
4 changes: 2 additions & 2 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ Next Steps
* Read about :doc:`modules/causal_specification` to understand causal specifications and :doc:`modules/causal_testing` for the end-to-end causal testing process.
* Run the command for guidance on how to generate your causal tests directly from your input DAG::

python -m causal_testing generate --help
causal-testing generate --help

* and the command on guidance on how to execute your causal tests::

python -m causal_testing test --help
causal-testing test --help


Using the CTF on DAFNI
Expand Down
2 changes: 1 addition & 1 deletion docs/source/modules/causal_testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ In the following sections, we describe the end-to-end process of ``causal testin
In particular, suppose we're interested in how various precautions, such as hand-washing and mask-wearing, can prevent the spread of a virus within a classroom.

1. Modelling Scenario
----------------
---------------------

For our modelling scenario, suppose we define the scenario with the following constraints:

Expand Down
65 changes: 65 additions & 0 deletions docs/source/modules/custom_estimators.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
Custom Estimators
=================

If the supported :ref:`estimators` are not sufficient for your needs, you can implement your own custom estimator by extending the :code:`Estimator` class and implementing the abstract :code:`add_modelling_assumptions` method and the estimation method for the causal effect measure you wish to calculate.
For example, if you wished to estimate the ATE using the empirical mean of the recorded outcome under the control and treatment values, you would need to implement a method called :code:`estimate_ate`.
If you wished to estimate the risk ratio, you would need to call your method :code:`estimate_risk_ratio`.
The code for the :code:`EmpiricalMeanEstimator` is shown below.

.. code-block:: python

from causal_testing.estimation.abstract_estimator import Estimator
from scipy.stats import bootstrap

class EmpiricalMeanEstimator(Estimator):
"""
Custom estimator class to estimate the causal effect based on the empirical mean.
"""

def add_modelling_assumptions(self):
"""
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
must hold if the resulting causal inference is to be considered valid.
"""
self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."

def estimate_ate(self) -> EffectEstimate:
"""Estimate the outcomes under control and treatment.
:return: The empirical average treatment effect.
"""
treatment_variable = self.base_test_case.treatment_variable.name
outcome_variable = self.base_test_case.outcome_variable.name

control_results = self.df.where(self.df[treatment_variable] == self.control_value)[outcome_variable].dropna()
treatment_results = self.df.where(self.df[treatment_variable] == self.treatment_value)[
outcome_variable
].dropna()

def risk_ratio(sample1, sample2):
return sample1.mean() - sample2.mean()

bootstraps = bootstrap((treatment_results, control_results), risk_ratio, confidence_level=self.alpha)
return EffectEstimate(
type="risk_ratio",
value=risk_ratio(treatment_results, control_results),
ci_low=bootstraps.confidence_interval.low,
ci_high=bootstraps.confidence_interval.high,
)

Once you have implemented your estimator, you will need to register it as an extra entry point in your project's :code:`pyproject.toml` file so that the Causal Testing Framework can find it.
For example, if you had defined your :code:`EmpiricalMeanEstimator` class in a module called :code:`empirical_mean_estimator` in a folder called :code:`custom_estimators`, you would register it as follows.
You will also need to reinstall your project, e.g. with :code:`pip install -e .` each time you add a new estimator to your :code:`pyproject.toml`.
You do not need to reinstall each time you edit your project for source code edits.


.. code-block:: ini

[project.entry-points."estimators"]
CustomFlakefighter = "custom_estimators.empirical_mean_estimator:EmpiricalMeanEstimator"

Of course, for this to work, your module needs to be discoverable on your python path.
That is, you should be able to execute :code:`from custom_estimators.empirical_mean_estimator import EmpiricalMeanEstimator` successfully from within the current working directory.

You can also add your custom estimator to causal test cases specified in JSON.
To do so, you can simply set the :code:`estimator` property to the name of your estimator class and the :code:`estimate_type` property to the name of your causal effect measure.
In the above :code:`EmpiricalMeanEstimator` example, :code:`estimator` would be set to :code:`"EmpiricalMeanEstimator"` and :code:`estimate_type` would be set to :code:`"ate"`.
2 changes: 2 additions & 0 deletions docs/source/modules/estimators.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.. _estimators:

Estimators Overview
===================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"id": "5adf7cdc-fd96-47a4-a194-f1f060a4c0c5",
"metadata": {},
"source": [
"## Overview"
"# Statistical Metamorphic Testing using the API"
]
},
{
Expand All @@ -26,14 +26,6 @@
"Before diving into the details, a good first step is to define your file paths, including your input configurations:"
]
},
{
"cell_type": "markdown",
"id": "56965fba-b90b-4233-a819-bb747ecd9d81",
"metadata": {},
"source": [
"# Statistical Metamorphic Testing using the API"
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down Expand Up @@ -841,7 +833,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.14"
"version": "3.11.15"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@
"cum_vaccinated": "NoEffect"
},
"alpha": 0.05,
"formula": "cum_vaccinated ~ cum_vaccinations + vaccine",
"formula": "cum_vaccinated ~ cum_vaccinations+vaccine",
"skip": false,
"passed": false,
"result": {
Expand Down Expand Up @@ -206,7 +206,7 @@
"cum_infections": "NoEffect"
},
"alpha": 0.05,
"formula": "cum_infections ~ cum_vaccinations + vaccine",
"formula": "cum_infections ~ cum_vaccinations+vaccine",
"skip": false,
"passed": true,
"result": {
Expand Down Expand Up @@ -236,7 +236,7 @@
"cum_infections": "NoEffect"
},
"alpha": 0.05,
"formula": "cum_infections ~ cum_vaccinated + vaccine",
"formula": "cum_infections ~ cum_vaccinated+vaccine",
"skip": false,
"passed": true,
"result": {
Expand Down
Loading
Loading