From 98bc868b9ae1b70f6e6eef522166a082a3d51c30 Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Wed, 31 Dec 2025 11:32:30 +0100
Subject: [PATCH 01/18] Add conformal prediction for missing data extension

Introduces the CP_missing_data extension for TabPFNRegressor, providing conformal prediction intervals in the presence of missing data. Includes implementation, example usage, and tests for calibration and prediction with missing data patterns.
---
 .../CP_missing_data_example.py                |  49 ++++
 .../CP_missing_data/CP_missing_data.py        | 238 ++++++++++++++++++
 .../CP_missing_data/__init__.py               |  11 +
 tests/test_CP_missing_data.py                 |  98 ++++++++
 4 files changed, 396 insertions(+)
 create mode 100644 examples/CP_missing_data/CP_missing_data_example.py
 create mode 100644 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
 create mode 100644 src/tabpfn_extensions/CP_missing_data/__init__.py
 create mode 100644 tests/test_CP_missing_data.py

diff --git a/examples/CP_missing_data/CP_missing_data_example.py b/examples/CP_missing_data/CP_missing_data_example.py
new file mode 100644
index 00000000..5453f54a
--- /dev/null
+++ b/examples/CP_missing_data/CP_missing_data_example.py
@@ -0,0 +1,49 @@
+"""Provides a detailed example of obtaining conformalised prediction intervals when there is missing data.
+
+This script demonstrates the complete workflow for obtaining conformal prediction intervals
+for the TabPFNRegressor when these are missing values in the dataset. The process is shown
+in two steps. Using the training data to train the model and obtain correction terms for
+each mask, and appying the corrcetion terms with the trained model to a new dataset.
+
+Note: This algorithms works well then the missing pattern is small.
+"""
+
+import numpy as np
+import pandas as pd
+import warnings
+
+import tabpfn
+from tabpfn import TabPFNRegressor
+
+from sklearn.model_selection import train_test_split
+from tabpfn_extensions.CP_missing_data import CP_MDA_TabPFNRegressor, CP_MDA_TabPFNRegressor_newdata
+
+# generate some data
+np.random.seed(42)  # For reproducibility
+X = np.random.rand(100, 5)
+Y = np.random.rand(100)
+
+# add missing values in X under MCAR
+X[np.random.randint(0, 100, 10), np.random.randint(0, 5, 10)] = np.nan
+
+# Check how many unique patterns there are 
+unique_patterns = pd.DataFrame(X).isnull().astype(int).drop_duplicates()
+print(f"Number of unique missing data patterns: {len(unique_patterns)}")
+print("\nUnique patterns:")
+print(unique_patterns)
+
+# Use TabPFN+CP-MDA
+model = CP_MDA_TabPFNRegressor(X, Y, quantiles=[0.05, 0.5, 0.95], val_size=0.5, seed = 123)
+calibration_results, model_fit = model.fit()
+print(calibration_results)
+
+# Apply the model to new cases 
+cp_apply = CP_MDA_TabPFNRegressor_newdata(model_fit, X_new = X, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
+CP_results = cp_apply.fit()
+
+print("\nConformal prediction results:")
+print(f"Lower bound (corrected): {CP_results[0][:5]}")  # Show first 5
+print(f"Predictions: {CP_results[1][:5]}")
+print(f"Upper bound (corrected): {CP_results[2][:5]}")
+print(f"Lower bound (uncorrected): {CP_results[3][:5]}")
+print(f"Upper bound (uncorrected): {CP_results[4][:5]}")
\ No newline at end of file
diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
new file mode 100644
index 00000000..bb7b187d
--- /dev/null
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -0,0 +1,238 @@
+"""Conformal prediction for TabPFN with missing data patterns.
+
+This module provides conformal prediction intervals that are calibrated
+for different missing data patterns in the input features.
+"""
+
+from __future__ import annotations
+
+import warnings
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from tabpfn import TabPFNRegressor
+
+
+class CP_MDA_TabPFNRegressor:
+    """
+    Compute the correction terms for missing data masks using conformal prediction.
+
+    Parameters:
+        X_train : matrix-like of shape (n_samples, n_predictors)
+
+        Y_train : array-like of continious outcome with shape (n_samples,)
+
+        quantiles : array with three arumgnent denoting the qualitens of intrest.
+            The default is [0.05, 0.5, 0.95], where the first indicates the lowerbound,
+            the second the median, and the third the upperbound.
+
+        val_size : float between 0 and 1, indicating the size of the validation set
+            as a fraction of the training data.
+
+
+    Returns:
+     mask_unique: DataFrame with the correction terms for each mask.
+
+     model: Fitted TabPFNRegressor model.
+
+    """
+
+    def __init__(self, X_train, Y_train, quantiles, val_size, seed):
+        self.X = pd.DataFrame(X_train)
+        self.Y = Y_train
+        self.quantiles = quantiles
+        self.val_size = val_size
+        self.alpha = quantiles[0] * 2
+        self.seed = seed
+
+    def calc_correction_term(self, predictions, y_val, alpha):
+        """Calculate the correction term for conformal prediction."""
+        # obtain the lowerbound, median, and upperbound
+        lb, pred, ub = predictions
+        # calculate difference between bounds and observed values
+        error_lb = (lb - y_val)
+        error_ub = (y_val - ub)
+        s = np.maximum(error_lb, error_ub)
+        # obtain the right quantile
+
+        Q_use = (1 - alpha) / (1 + 1/len(s))
+        correction_term = np.quantile(s, Q_use)
+        return correction_term
+
+    def split_data(self):
+        """Split data into training and validation sets."""
+        # create df with missing data indicator
+        missing_bool_df = self.X.isnull().astype(int)
+        self.X_train, self.X_val, Y_train_arr, Y_val_arr, self.Mask_train, self.Mask_val = train_test_split(
+            self.X, self.Y, missing_bool_df, test_size=self.val_size, random_state = self.seed
+        )
+
+        # Convert Y arrays back to pandas Series to maintain .iloc functionality
+        self.Y_train = pd.Series(Y_train_arr, index=self.X_train.index)
+        self.Y_val = pd.Series(Y_val_arr, index=self.X_val.index)
+
+    def run_TABPFN(self):
+        """Fit the TabPFN model."""
+        # fit model
+        m_fit = TabPFNRegressor()
+        m_fit.fit(self.X_train, self.Y_train)
+        self.model = m_fit
+
+    def mask_preprocess(self):
+        """Preprocess masks and identify nested relationships."""
+        # drop duplicates masks
+        mask_unique = self.Mask_val.drop_duplicates().copy()
+        # add mask id
+        mask_unique["mask_id"] = range(1, len(mask_unique) + 1)
+        # Get mask columns (all columns except mask_id)
+        mask_cols = [col for col in mask_unique.columns if col != 'mask_id']
+
+        # Check nesting for all pairs of masks
+        results = []
+        for i, row_a in mask_unique.iterrows():
+            mask_a = row_a[mask_cols].values
+            mask_a_id = row_a['mask_id']
+            nested_masks = []
+
+            for j, row_b in mask_unique.iterrows():
+                if i == j:  # Skip comparing mask with itself
+                    continue
+                mask_b = row_b[mask_cols].values
+                mask_b_id = row_b['mask_id']
+
+                if ((mask_b == 1) & (mask_a == 0)).sum() == 0:
+                    nested_masks.append(mask_b_id)
+
+            results.append({
+                'mask_id': mask_a_id,
+                'nested_masks': nested_masks
+            })
+
+        self.mask_unique = mask_unique  
+        self.mask_nested = pd.DataFrame(results)
+
+    def create_calibration_sets(self):
+        """Create calibration sets for each mask pattern."""
+        # obtain list of columns
+        mask_cols = list(self.Mask_val.columns.values)
+
+        # Using merge to add the id of the mask
+        df_with_ids = self.Mask_val.merge(
+            self.mask_unique,  
+            on=mask_cols,
+            how='left'
+        )
+
+        for i in self.mask_unique["mask_id"]:
+            # select the nested masks
+            nested_masks = self.mask_nested[self.mask_nested["mask_id"] == i]["nested_masks"].values[0]
+            # add the mask itself
+            nested_masks_with_self = nested_masks + [i]  # Create new list instead of append
+
+            # obtain indexes for the rows
+            indexes = df_with_ids[df_with_ids["mask_id"].isin(nested_masks_with_self)].index
+
+            # select the validation data based on the indices
+            X_val_nested = self.X_val.iloc[indexes]
+            Y_val_nested = self.Y_val.iloc[indexes]
+
+            # obtain predictions
+            predictions = self.model.predict(
+                X_val_nested,
+                output_type="quantiles",
+                quantiles=self.quantiles
+            )
+
+            # calculate correction term
+            correction_term = self.calc_correction_term(predictions, Y_val_nested, self.alpha)
+
+            # save the correction term to the mask_unique dataframe
+            self.mask_unique.loc[self.mask_unique["mask_id"] == i, "correction_term"] = correction_term
+            self.mask_unique.loc[self.mask_unique["mask_id"] == i, "val_size"] =  X_val_nested.shape[0]
+
+
+        return self.mask_unique, self.model
+
+    def fit(self):
+        """Convenience method to run the entire pipeline"""
+        self.split_data()
+        self.run_TABPFN()
+        self.mask_preprocess()
+        mask_unique, model = self.create_calibration_sets()
+
+        return mask_unique, model
+
+class CP_MDA_TabPFNRegressor_newdata:
+    """
+    Compute the correction terms for missing data masks using conformal prediction.
+
+    Parameters:
+
+    TabPFN: Fitted TabPFNRegressor model.
+
+    X_train : matrix-like of shape (n_samples, n_predictors)
+
+    quantiles : array with three arumgnent denoting the qualitens of intrest used
+                in fitting the model. The default is [0.05, 0.5, 0.95].
+
+    calibration_results : matrix with the correction terms for each mask.
+
+
+    Returns:
+     CP_results: DataFrame with shape (n_samples, 5). Included are the corrected lower bound,
+     prediction, corrected upper bound, non-corrected lower bound, and non-corrected upper bound.
+
+    """
+
+    def __init__(self,TabPFN, X_new, quantiles, calibration_results):
+        self.TabPFN = TabPFN
+        self.X = pd.DataFrame(X_new)
+        self.quantiles = quantiles
+        self.calibration_results = calibration_results
+
+    def obtain_preds(self):
+        """Obtain predictions from fitted model."""
+        preds_test = self.TabPFN.predict(
+            self.X,
+            output_type="quantiles",
+            quantiles=self.quantiles
+        )
+        self.preds_test = preds_test
+
+    def match_mask(self):
+      """Add correction terms to the new masks from the test set."""
+      mask_test = self.X.isnull().astype(int)
+      mask_cols = list(mask_test.columns.values)
+
+      mask_test_cor = mask_test.merge(
+            self.calibration_results,
+            on=mask_cols,
+            how='left'
+        )
+
+      # check if there are masks in the test set that are not in the calibration set
+      new_masks = mask_test_cor[mask_test_cor["correction_term"].isnull()][mask_cols]
+
+      if new_masks.shape[0] > 0:
+          warnings.warn(
+              "The following masks are not in the calibration set:\n"
+              f"{new_masks.to_string()}\n"
+              "The baseline quantile estimates will be returned for those cases."
+          )
+
+      self.mask_test_cor = mask_test_cor
+
+    def perf_correction(self):
+      """Add correction terms to the new masks from the test set."""
+      preds_test = self.preds_test.copy()
+      lb_corr = preds_test[0] - self.mask_test_cor["correction_term"].values
+      ub_corr = preds_test[2] + self.mask_test_cor["correction_term"].values
+
+      return lb_corr, preds_test[1], ub_corr, preds_test[0], preds_test[2]
+
+    def fit(self):
+        """Convenience method to run the entire pipeline"""
+        self.obtain_preds()
+        self.match_mask()
+        CP_results =  self.perf_correction()
+        return CP_results
diff --git a/src/tabpfn_extensions/CP_missing_data/__init__.py b/src/tabpfn_extensions/CP_missing_data/__init__.py
new file mode 100644
index 00000000..594e90ba
--- /dev/null
+++ b/src/tabpfn_extensions/CP_missing_data/__init__.py
@@ -0,0 +1,11 @@
+"""Conformal prediction for missing data module for tabpfn_extensions package."""
+
+from .CP_missing_data import (
+   CP_MDA_TabPFNRegressor,
+    CP_MDA_TabPFNRegressor_newdata,
+)
+
+__all__ = [
+    "CP_MDA_TabPFNRegressor",
+    "CP_MDA_TabPFNRegressor_newdata",
+]
diff --git a/tests/test_CP_missing_data.py b/tests/test_CP_missing_data.py
new file mode 100644
index 00000000..1c8e2458
--- /dev/null
+++ b/tests/test_CP_missing_data.py
@@ -0,0 +1,98 @@
+"""Tests for the CP_missing_data extension.
+
+This file tests the CP_MDA_TabPFNRegressor and CP_MDA_TabPFNRegressor_newdata functions,
+which attempts to obtain correct uncertainity estimates in case if missing data. 
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import pytest
+from numpy.testing import assert_array_equal
+
+try:
+    from tabpfn_extensions.CP_missing_data import (
+        CP_MDA_TabPFNRegressor,
+        CP_MDA_TabPFNRegressor_newdata,
+    )
+except ImportError:
+    pytest.skip("Required libraries (tabpfn) not installed", allow_module_level=True)
+
+# -------- Fixtures --------
+
+@pytest.fixture
+def X_train():
+    return np.array([
+        [0.1, np.nan], [0.3, 0.4], [np.nan, 0.6], [0.7, 0.8],
+        [0.2, np.nan], [0.2, np.nan], [0.9, 0.4], [np.nan, 0.4],
+        [0.3, 0.2], [np.nan, 0.9], [0.8, np.nan], [0.1, 0.2],
+        [np.nan, 0.5], [0.3, 0.7], [0.7, np.nan], [0.7, np.nan],
+        [0.3, 0.4], [np.nan, 0.2], [0.9, 0.7], [np.nan, 0.3],
+        [0.3, 0.7], [0.4, 0.8], [0.5, 0.4], [0.7, 0.2], [0.8, 0.3],
+    ])
+
+
+@pytest.fixture
+def Y_train():
+    return np.array([1,3,1,2,3,4,5,6,1,2,3,4,5,6,7,2,3,5,6,8,4,2,1,2,3])
+
+
+@pytest.fixture
+def X_new():
+    return np.array([
+        [0.1, 0.1],
+        [0.3, np.nan],
+        [np.nan, 0.6],
+    ])
+
+
+@pytest.fixture
+def seed():
+    return 123
+
+
+# -- Test --
+
+def test_model_CP(X_train, Y_train, seed):
+    """Tests if the calibration corrections are of the correct shape and type."""
+    model = CP_MDA_TabPFNRegressor(X_train, Y_train,  quantiles = [0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    calibration_results, model_fit = model.fit()
+
+    # not the best check since we do not control which cases are in the valset
+    missing_df = pd.DataFrame(X_train).isnull().astype(int).drop_duplicates()
+
+    # check type, size of the calibration results
+    assert calibration_results.shape[0] == missing_df.shape[0]
+    assert calibration_results.shape[1] == 5
+    assert isinstance(calibration_results, pd.DataFrame)
+
+
+def test_reproducibility(X_train, Y_train, seed):
+    """Tests that random_state ensures deterministic correction terms."""
+
+    model_1 = CP_MDA_TabPFNRegressor(X_train, Y_train, quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    calibration_results_1, model_fit_1 = model_1.fit()
+
+    # Second model with the same seed
+    model_2 = CP_MDA_TabPFNRegressor(X_train, Y_train, quantiles=[0.05, 0.5, 0.95] , val_size = 0.5, seed = seed)
+    calibration_results_2, model_fit_2 = model_2.fit()
+
+    # Assert that the outputs are identical
+    assert_array_equal(calibration_results_1, calibration_results_2)
+
+
+def test_predict(X_train, Y_train, seed, X_new):
+    """Tests if the predictions have the correct shape and type."""
+
+    # fit model 
+    model = CP_MDA_TabPFNRegressor(X_train, Y_train, quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    calibration_results, model_fit = model.fit()
+
+    # Apply the model to new cases 
+    cp_apply = CP_MDA_TabPFNRegressor_newdata(model_fit, X_new = X_new, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
+    CP_results = cp_apply.fit()
+
+    assert CP_results[1].size== X_new.shape[0]
+    assert isinstance(CP_results[1], np.ndarray)
+    assert len(CP_results)== 5
\ No newline at end of file

From 840ca599dc7c5710f7cc96f80cac1bd68cac6b6e Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Wed, 31 Dec 2025 11:46:35 +0100
Subject: [PATCH 02/18] Import TabPFN in a flexible way

---
 examples/CP_missing_data/CP_missing_data_example.py      | 8 ++++++--
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py | 8 +++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/examples/CP_missing_data/CP_missing_data_example.py b/examples/CP_missing_data/CP_missing_data_example.py
index 5453f54a..0babc385 100644
--- a/examples/CP_missing_data/CP_missing_data_example.py
+++ b/examples/CP_missing_data/CP_missing_data_example.py
@@ -12,8 +12,12 @@
 import pandas as pd
 import warnings
 
-import tabpfn
-from tabpfn import TabPFNRegressor
+try:
+    # Try standard TabPFN package first
+    from tabpfn import TabPFNRegressor
+except ImportError:
+    # Fall back to TabPFN client
+    from tabpfn_client import  TabPFNRegressor
 
 from sklearn.model_selection import train_test_split
 from tabpfn_extensions.CP_missing_data import CP_MDA_TabPFNRegressor, CP_MDA_TabPFNRegressor_newdata
diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index bb7b187d..f4cb6317 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -10,7 +10,13 @@
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import train_test_split
-from tabpfn import TabPFNRegressor
+
+try:
+    # Try standard TabPFN package first
+    from tabpfn import TabPFNRegressor
+except ImportError:
+    # Fall back to TabPFN client
+    from tabpfn_client import  TabPFNRegressor
 
 
 class CP_MDA_TabPFNRegressor:

From baf8528ac5c72196a5b32aef5b5d30698d9aace4 Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Wed, 31 Dec 2025 11:55:56 +0100
Subject: [PATCH 03/18] Update example

Less features to reduce the number of masks for the example
---
 examples/CP_missing_data/CP_missing_data_example.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/CP_missing_data/CP_missing_data_example.py b/examples/CP_missing_data/CP_missing_data_example.py
index 0babc385..c7561043 100644
--- a/examples/CP_missing_data/CP_missing_data_example.py
+++ b/examples/CP_missing_data/CP_missing_data_example.py
@@ -24,11 +24,11 @@
 
 # generate some data
 np.random.seed(42)  # For reproducibility
-X = np.random.rand(100, 5)
+X = np.random.rand(100, 2)
 Y = np.random.rand(100)
 
 # add missing values in X under MCAR
-X[np.random.randint(0, 100, 10), np.random.randint(0, 5, 10)] = np.nan
+X[np.random.randint(0, 100, 40), np.random.randint(0, 2, 40)] = np.nan
 
 # Check how many unique patterns there are 
 unique_patterns = pd.DataFrame(X).isnull().astype(int).drop_duplicates()

From de6575f8a93f42edb4424f59c6ba31ce344a972f Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Wed, 31 Dec 2025 12:19:00 +0100
Subject: [PATCH 04/18] update the correct scoring rule

---
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index f4cb6317..efa5d686 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -59,9 +59,9 @@ def calc_correction_term(self, predictions, y_val, alpha):
         error_lb = (lb - y_val)
         error_ub = (y_val - ub)
         s = np.maximum(error_lb, error_ub)
-        # obtain the right quantile
 
-        Q_use = (1 - alpha) / (1 + 1/len(s))
+        # obtain the emperical quantile
+        Q_use = (1 - alpha) * (1 + 1/len(s))
         correction_term = np.quantile(s, Q_use)
         return correction_term
 
@@ -132,6 +132,7 @@ def create_calibration_sets(self):
         for i in self.mask_unique["mask_id"]:
             # select the nested masks
             nested_masks = self.mask_nested[self.mask_nested["mask_id"] == i]["nested_masks"].values[0]
+            
             # add the mask itself
             nested_masks_with_self = nested_masks + [i]  # Create new list instead of append
 

From 7ba1129d71f7fade3fccae16d8d20256aaa575f9 Mon Sep 17 00:00:00 2001
From: Florian D van Leeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Fri, 2 Jan 2026 12:23:35 +0100
Subject: [PATCH 05/18] Update
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index efa5d686..65b836a1 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -26,11 +26,11 @@ class CP_MDA_TabPFNRegressor:
     Parameters:
         X_train : matrix-like of shape (n_samples, n_predictors)
 
-        Y_train : array-like of continious outcome with shape (n_samples,)
+        Y_train : array-like of continuous outcome with shape (n_samples,)
 
-        quantiles : array with three arumgnent denoting the qualitens of intrest.
-            The default is [0.05, 0.5, 0.95], where the first indicates the lowerbound,
-            the second the median, and the third the upperbound.
+        quantiles : array with three arguments denoting the quantiles of interest.
+            The default is [0.05, 0.5, 0.95], where the first indicates the lower bound,
+            the second the median, and the third the upper bound.
 
         val_size : float between 0 and 1, indicating the size of the validation set
             as a fraction of the training data.

From 2fdf17bb66004aab8387c7c8d2b0620b73a23da8 Mon Sep 17 00:00:00 2001
From: Florian D van Leeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Fri, 2 Jan 2026 12:24:01 +0100
Subject: [PATCH 06/18] Update
 src/tabpfn_extensions/CP_missing_data/__init__.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/tabpfn_extensions/CP_missing_data/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tabpfn_extensions/CP_missing_data/__init__.py b/src/tabpfn_extensions/CP_missing_data/__init__.py
index 594e90ba..4c76af49 100644
--- a/src/tabpfn_extensions/CP_missing_data/__init__.py
+++ b/src/tabpfn_extensions/CP_missing_data/__init__.py
@@ -1,7 +1,7 @@
 """Conformal prediction for missing data module for tabpfn_extensions package."""
 
 from .CP_missing_data import (
-   CP_MDA_TabPFNRegressor,
+    CP_MDA_TabPFNRegressor,
     CP_MDA_TabPFNRegressor_newdata,
 )
 

From 1f2ae5578815b6e2dedcf08fc3070ca65aef7d60 Mon Sep 17 00:00:00 2001
From: Florian D van Leeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Fri, 2 Jan 2026 12:24:19 +0100
Subject: [PATCH 07/18] Update
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index 65b836a1..6a474497 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -177,9 +177,9 @@ class CP_MDA_TabPFNRegressor_newdata:
 
     TabPFN: Fitted TabPFNRegressor model.
 
-    X_train : matrix-like of shape (n_samples, n_predictors)
+    X_new : matrix-like of shape (n_samples, n_predictors)
 
-    quantiles : array with three arumgnent denoting the qualitens of intrest used
+    quantiles : array with three arguments denoting the quantiles of interest used
                 in fitting the model. The default is [0.05, 0.5, 0.95].
 
     calibration_results : matrix with the correction terms for each mask.

From 662a17d37b5c23fba535e44707afcb28b856a2f5 Mon Sep 17 00:00:00 2001
From: Florian D van Leeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Fri, 2 Jan 2026 12:24:39 +0100
Subject: [PATCH 08/18] Update
 examples/CP_missing_data/CP_missing_data_example.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 examples/CP_missing_data/CP_missing_data_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/CP_missing_data/CP_missing_data_example.py b/examples/CP_missing_data/CP_missing_data_example.py
index c7561043..0735eb7d 100644
--- a/examples/CP_missing_data/CP_missing_data_example.py
+++ b/examples/CP_missing_data/CP_missing_data_example.py
@@ -5,7 +5,7 @@
 in two steps. Using the training data to train the model and obtain correction terms for
 each mask, and appying the corrcetion terms with the trained model to a new dataset.
 
-Note: This algorithms works well then the missing pattern is small.
+Note: This algorithm works well when the missing pattern is small.
 """
 
 import numpy as np

From 4c0d9c63c358e80c3b2fea5dd160a0e7aeb9bc37 Mon Sep 17 00:00:00 2001
From: Florian D van Leeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Fri, 2 Jan 2026 12:25:09 +0100
Subject: [PATCH 09/18] Update
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index 6a474497..fe9844ea 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -229,8 +229,8 @@ def match_mask(self):
 
       self.mask_test_cor = mask_test_cor
 
-    def perf_correction(self):
-      """Add correction terms to the new masks from the test set."""
+    def perform_correction(self):
+      """Apply correction terms to the prediction intervals."""
       preds_test = self.preds_test.copy()
       lb_corr = preds_test[0] - self.mask_test_cor["correction_term"].values
       ub_corr = preds_test[2] + self.mask_test_cor["correction_term"].values

From 73ef78172d448697afb8db68e11edc2bf8ece5ab Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Fri, 2 Jan 2026 12:30:12 +0100
Subject: [PATCH 10/18] Update based on gemini-code-assist

---
 .../CP_missing_data_example.py                |  1 -
 .../CP_missing_data/CP_missing_data.py        | 46 +++++++++----------
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/examples/CP_missing_data/CP_missing_data_example.py b/examples/CP_missing_data/CP_missing_data_example.py
index 0735eb7d..d042d67c 100644
--- a/examples/CP_missing_data/CP_missing_data_example.py
+++ b/examples/CP_missing_data/CP_missing_data_example.py
@@ -19,7 +19,6 @@
     # Fall back to TabPFN client
     from tabpfn_client import  TabPFNRegressor
 
-from sklearn.model_selection import train_test_split
 from tabpfn_extensions.CP_missing_data import CP_MDA_TabPFNRegressor, CP_MDA_TabPFNRegressor_newdata
 
 # generate some data
diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index fe9844ea..fc586fdf 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -207,35 +207,35 @@ def obtain_preds(self):
         self.preds_test = preds_test
 
     def match_mask(self):
-      """Add correction terms to the new masks from the test set."""
-      mask_test = self.X.isnull().astype(int)
-      mask_cols = list(mask_test.columns.values)
-
-      mask_test_cor = mask_test.merge(
-            self.calibration_results,
-            on=mask_cols,
-            how='left'
-        )
+        """Add correction terms to the new masks from the test set."""
+        mask_test = self.X.isnull().astype(int)
+        mask_cols = list(mask_test.columns.values)
+
+        mask_test_cor = mask_test.merge(
+                self.calibration_results,
+                on=mask_cols,
+                how='left'
+            )
 
-      # check if there are masks in the test set that are not in the calibration set
-      new_masks = mask_test_cor[mask_test_cor["correction_term"].isnull()][mask_cols]
+        # check if there are masks in the test set that are not in the calibration set
+        new_masks = mask_test_cor[mask_test_cor["correction_term"].isnull()][mask_cols]
 
-      if new_masks.shape[0] > 0:
-          warnings.warn(
-              "The following masks are not in the calibration set:\n"
-              f"{new_masks.to_string()}\n"
-              "The baseline quantile estimates will be returned for those cases."
-          )
+        if new_masks.shape[0] > 0:
+            warnings.warn(
+                "The following masks are not in the calibration set:\n"
+                f"{new_masks.to_string()}\n"
+                "The baseline quantile estimates will be returned for those cases."
+            )
 
-      self.mask_test_cor = mask_test_cor
+        self.mask_test_cor = mask_test_cor
 
     def perform_correction(self):
-      """Apply correction terms to the prediction intervals."""
-      preds_test = self.preds_test.copy()
-      lb_corr = preds_test[0] - self.mask_test_cor["correction_term"].values
-      ub_corr = preds_test[2] + self.mask_test_cor["correction_term"].values
+        """Apply correction terms to the prediction intervals."""
+        preds_test = self.preds_test.copy()
+        lb_corr = preds_test[0] - self.mask_test_cor["correction_term"].values
+        ub_corr = preds_test[2] + self.mask_test_cor["correction_term"].values
 
-      return lb_corr, preds_test[1], ub_corr, preds_test[0], preds_test[2]
+        return lb_corr, preds_test[1], ub_corr, preds_test[0], preds_test[2]
 
     def fit(self):
         """Convenience method to run the entire pipeline"""

From 57bcccc245edefe4e9f6d73e03b7e0f799ccd6a9 Mon Sep 17 00:00:00 2001
From: Florian D van Leeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Fri, 2 Jan 2026 12:30:59 +0100
Subject: [PATCH 11/18] Update tests/test_CP_missing_data.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 tests/test_CP_missing_data.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_CP_missing_data.py b/tests/test_CP_missing_data.py
index 1c8e2458..7b9b4a4b 100644
--- a/tests/test_CP_missing_data.py
+++ b/tests/test_CP_missing_data.py
@@ -59,8 +59,10 @@ def test_model_CP(X_train, Y_train, seed):
     model = CP_MDA_TabPFNRegressor(X_train, Y_train,  quantiles = [0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
     calibration_results, model_fit = model.fit()
 
-    # not the best check since we do not control which cases are in the valset
-    missing_df = pd.DataFrame(X_train).isnull().astype(int).drop_duplicates()
+    # Replicate the split to get the validation set and find its unique masks.
+    from sklearn.model_selection import train_test_split
+    _, X_val, _, _ = train_test_split(X_train, Y_train, test_size=0.5, random_state=seed)
+    missing_df = pd.DataFrame(X_val).isnull().astype(int).drop_duplicates()
 
     # check type, size of the calibration results
     assert calibration_results.shape[0] == missing_df.shape[0]

From 512cf47d46b4bca3838e5246162d0612f45eaa7c Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Sat, 3 Jan 2026 11:06:16 +0100
Subject: [PATCH 12/18] Tidy up the code

---
 .../CP_missing_data/CP_missing_data.py           | 16 ++++++++++++----
 tests/test_CP_missing_data.py                    |  2 +-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index fc586fdf..9f735b0c 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -62,6 +62,13 @@ def calc_correction_term(self, predictions, y_val, alpha):
 
         # obtain the emperical quantile
         Q_use = (1 - alpha) * (1 + 1/len(s))
+
+        # Check is Q_use if not larger then 1
+        if Q_use > 1:
+            Q_use = 1
+            warnings.warn(
+                "Some masks have very small calibration sets")
+
         correction_term = np.quantile(s, Q_use)
         return correction_term
 
@@ -123,7 +130,8 @@ def create_calibration_sets(self):
         mask_cols = list(self.Mask_val.columns.values)
 
         # Using merge to add the id of the mask
-        df_with_ids = self.Mask_val.merge(
+        # use original index values
+        df_with_ids = self.Mask_val.reset_index().merge(
             self.mask_unique,  
             on=mask_cols,
             how='left'
@@ -137,11 +145,11 @@ def create_calibration_sets(self):
             nested_masks_with_self = nested_masks + [i]  # Create new list instead of append
 
             # obtain indexes for the rows
-            indexes = df_with_ids[df_with_ids["mask_id"].isin(nested_masks_with_self)].index
+            indexes = df_with_ids[df_with_ids["mask_id"].isin(nested_masks_with_self)]["index"]
 
             # select the validation data based on the indices
-            X_val_nested = self.X_val.iloc[indexes]
-            Y_val_nested = self.Y_val.iloc[indexes]
+            X_val_nested = self.X_val.loc[indexes]
+            Y_val_nested = self.Y_val.loc[indexes]
 
             # obtain predictions
             predictions = self.model.predict(
diff --git a/tests/test_CP_missing_data.py b/tests/test_CP_missing_data.py
index 7b9b4a4b..a24bbb5e 100644
--- a/tests/test_CP_missing_data.py
+++ b/tests/test_CP_missing_data.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import pytest
 from numpy.testing import assert_array_equal
+from sklearn.model_selection import train_test_split
 
 try:
     from tabpfn_extensions.CP_missing_data import (
@@ -60,7 +61,6 @@ def test_model_CP(X_train, Y_train, seed):
     calibration_results, model_fit = model.fit()
 
     # Replicate the split to get the validation set and find its unique masks.
-    from sklearn.model_selection import train_test_split
     _, X_val, _, _ = train_test_split(X_train, Y_train, test_size=0.5, random_state=seed)
     missing_df = pd.DataFrame(X_val).isnull().astype(int).drop_duplicates()
 

From 07ee7e93f705aa9f8cf1747ecc2fd05df7df1ecc Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Mon, 5 Jan 2026 13:52:19 +0100
Subject: [PATCH 13/18] Update CP_missing_data.py

Make seed optional and update name change of internal function in pipeline.
---
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index 9f735b0c..50ff3808 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -43,7 +43,7 @@ class CP_MDA_TabPFNRegressor:
 
     """
 
-    def __init__(self, X_train, Y_train, quantiles, val_size, seed):
+    def __init__(self, X_train, Y_train, quantiles, val_size, seed=None):
         self.X = pd.DataFrame(X_train)
         self.Y = Y_train
         self.quantiles = quantiles
@@ -249,5 +249,5 @@ def fit(self):
         """Convenience method to run the entire pipeline"""
         self.obtain_preds()
         self.match_mask()
-        CP_results =  self.perf_correction()
+        CP_results =  self.perform_correction()
         return CP_results

From 22b16853b066973bda7267d8d75dbad98b68d6ef Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Mon, 5 Jan 2026 18:05:10 +0100
Subject: [PATCH 14/18] Add the masking of nested columns

---
 src/tabpfn_extensions/CP_missing_data/CP_missing_data.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index 50ff3808..2d32013f 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -151,6 +151,14 @@ def create_calibration_sets(self):
             X_val_nested = self.X_val.loc[indexes]
             Y_val_nested = self.Y_val.loc[indexes]
 
+            # SET ENTIRE COLUMNS TO NaN WHERE THE MASK HAS MISSING VALUES
+            current_mask = self.mask_unique[self.mask_unique["mask_id"] == i][mask_cols].iloc[0]
+
+            # For each column where the mask indicates missing (value = 1), set entire column to NaN
+            for col_idx, col_name in enumerate(mask_cols):
+                if current_mask.iloc[col_idx] == 1:
+                    X_val_nested.loc[:, col_name] = np.nan
+
             # obtain predictions
             predictions = self.model.predict(
                 X_val_nested,

From c9aa878a8698d3387101adc7a6a8a5ebdc10a424 Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Sat, 10 Jan 2026 19:51:18 +0100
Subject: [PATCH 15/18] Changes based on review

---
 .../CP_missing_data_example.py                |  17 +-
 .../CP_missing_data/CP_missing_data.py        | 246 +++++++++++-------
 .../CP_missing_data/__init__.py               |  12 +-
 tests/test_CP_missing_data.py                 |  28 +-
 4 files changed, 177 insertions(+), 126 deletions(-)

diff --git a/examples/CP_missing_data/CP_missing_data_example.py b/examples/CP_missing_data/CP_missing_data_example.py
index d042d67c..407c3c9d 100644
--- a/examples/CP_missing_data/CP_missing_data_example.py
+++ b/examples/CP_missing_data/CP_missing_data_example.py
@@ -12,14 +12,9 @@
 import pandas as pd
 import warnings
 
-try:
-    # Try standard TabPFN package first
-    from tabpfn import TabPFNRegressor
-except ImportError:
-    # Fall back to TabPFN client
-    from tabpfn_client import  TabPFNRegressor
+from tabpfn_extensions.utils import TabPFNClassifier, TabPFNRegressor
 
-from tabpfn_extensions.CP_missing_data import CP_MDA_TabPFNRegressor, CP_MDA_TabPFNRegressor_newdata
+from tabpfn_extensions.cp_missing_data import CPMDATabPFNRegressor, CPMDATabPFNRegressorNewData
 
 # generate some data
 np.random.seed(42)  # For reproducibility
@@ -36,13 +31,13 @@
 print(unique_patterns)
 
 # Use TabPFN+CP-MDA
-model = CP_MDA_TabPFNRegressor(X, Y, quantiles=[0.05, 0.5, 0.95], val_size=0.5, seed = 123)
-calibration_results, model_fit = model.fit()
+model = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size=0.5, seed = 123)
+calibration_results, model_fit = model.fit(X, Y)
 print(calibration_results)
 
 # Apply the model to new cases 
-cp_apply = CP_MDA_TabPFNRegressor_newdata(model_fit, X_new = X, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
-CP_results = cp_apply.fit()
+cp_apply = CPMDATabPFNRegressorNewData(model_fit, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
+CP_results = cp_apply.predict(X)
 
 print("\nConformal prediction results:")
 print(f"Lower bound (corrected): {CP_results[0][:5]}")  # Show first 5
diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
index 2d32013f..25a80a44 100644
--- a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
+++ b/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
@@ -7,26 +7,19 @@
 from __future__ import annotations
 
 import warnings
+from typing import Optional
+
 import numpy as np
 import pandas as pd
+from numpy.typing import ArrayLike
 from sklearn.model_selection import train_test_split
 
-try:
-    # Try standard TabPFN package first
-    from tabpfn import TabPFNRegressor
-except ImportError:
-    # Fall back to TabPFN client
-    from tabpfn_client import  TabPFNRegressor
-
+from tabpfn_extensions.utils import TabPFNRegressor
 
-class CP_MDA_TabPFNRegressor:
-    """
-    Compute the correction terms for missing data masks using conformal prediction.
+class CPMDATabPFNRegressor:
+    """Compute the correction terms for missing data masks using conformal prediction.
 
     Parameters:
-        X_train : matrix-like of shape (n_samples, n_predictors)
-
-        Y_train : array-like of continuous outcome with shape (n_samples,)
 
         quantiles : array with three arguments denoting the quantiles of interest.
             The default is [0.05, 0.5, 0.95], where the first indicates the lower bound,
@@ -43,15 +36,23 @@ class CP_MDA_TabPFNRegressor:
 
     """
 
-    def __init__(self, X_train, Y_train, quantiles, val_size, seed=None):
-        self.X = pd.DataFrame(X_train)
-        self.Y = Y_train
+    def __init__(
+        self, 
+        quantiles: list[float], 
+        val_size: float, 
+        seed: Optional[int] = None
+    ) -> None:
         self.quantiles = quantiles
         self.val_size = val_size
         self.alpha = quantiles[0] * 2
         self.seed = seed
 
-    def calc_correction_term(self, predictions, y_val, alpha):
+    def calc_correction_term(
+        self,
+        predictions: tuple[np.ndarray, np.ndarray, np.ndarray], 
+        y_val: pd.Series, 
+        alpha: float
+    ) -> float:
         """Calculate the correction term for conformal prediction."""
         # obtain the lowerbound, median, and upperbound
         lb, pred, ub = predictions
@@ -67,134 +68,168 @@ def calc_correction_term(self, predictions, y_val, alpha):
         if Q_use > 1:
             Q_use = 1
             warnings.warn(
-                "Some masks have very small calibration sets")
+                "Some masks have very small calibration sets",  stacklevel=2)
 
         correction_term = np.quantile(s, Q_use)
         return correction_term
 
-    def split_data(self):
+    def split_data(self,
+        x: pd.DataFrame, 
+        y: np.ndarray
+    ) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.DataFrame, pd.DataFrame]:
         """Split data into training and validation sets."""
         # create df with missing data indicator
-        missing_bool_df = self.X.isnull().astype(int)
-        self.X_train, self.X_val, Y_train_arr, Y_val_arr, self.Mask_train, self.Mask_val = train_test_split(
-            self.X, self.Y, missing_bool_df, test_size=self.val_size, random_state = self.seed
+        missing_bool_df = x.isna().astype(int)
+        x_train, x_val, y_train_arr, y_val_arr, mask_train, mask_val = train_test_split(
+            x, y, missing_bool_df, test_size=self.val_size, random_state = self.seed
         )
 
-        # Convert Y arrays back to pandas Series to maintain .iloc functionality
-        self.Y_train = pd.Series(Y_train_arr, index=self.X_train.index)
-        self.Y_val = pd.Series(Y_val_arr, index=self.X_val.index)
+        # Convert y arrays back to pandas Series to maintain .iloc functionality
+        y_train = pd.Series(y_train_arr, index=x_train.index)
+        y_val = pd.Series(y_val_arr, index=x_val.index)
+        
+        return x_train, x_val, y_train, y_val, mask_train, mask_val
 
-    def run_TABPFN(self):
+    def run_TABPFN(self, 
+        x_train: pd.DataFrame, 
+        y_train: pd.Series
+    ) -> TabPFNRegressor:
         """Fit the TabPFN model."""
         # fit model
-        m_fit = TabPFNRegressor()
-        m_fit.fit(self.X_train, self.Y_train)
-        self.model = m_fit
-
-    def mask_preprocess(self):
+        model = TabPFNRegressor()
+        model.fit(x_train, y_train)
+        return(model)
+
+    def mask_preprocess(
+        self, 
+        mask_val: pd.DataFrame
+    ) -> tuple[pd.DataFrame, pd.DataFrame]:
         """Preprocess masks and identify nested relationships."""
         # drop duplicates masks
-        mask_unique = self.Mask_val.drop_duplicates().copy()
+        mask_unique = mask_val.drop_duplicates().copy()
         # add mask id
         mask_unique["mask_id"] = range(1, len(mask_unique) + 1)
         # Get mask columns (all columns except mask_id)
-        mask_cols = [col for col in mask_unique.columns if col != 'mask_id']
+        mask_cols = [col for col in mask_unique.columns if col != "mask_id"]
 
         # Check nesting for all pairs of masks
         results = []
         for i, row_a in mask_unique.iterrows():
             mask_a = row_a[mask_cols].values
-            mask_a_id = row_a['mask_id']
+            mask_a_id = row_a["mask_id"]
             nested_masks = []
 
             for j, row_b in mask_unique.iterrows():
                 if i == j:  # Skip comparing mask with itself
                     continue
                 mask_b = row_b[mask_cols].values
-                mask_b_id = row_b['mask_id']
+                mask_b_id = row_b["mask_id"]
 
                 if ((mask_b == 1) & (mask_a == 0)).sum() == 0:
                     nested_masks.append(mask_b_id)
 
             results.append({
-                'mask_id': mask_a_id,
-                'nested_masks': nested_masks
+                "mask_id": mask_a_id,
+                "nested_masks": nested_masks
             })
 
-        self.mask_unique = mask_unique  
-        self.mask_nested = pd.DataFrame(results)
-
-    def create_calibration_sets(self):
+        mask_nested = pd.DataFrame(results)
+        return mask_unique, mask_nested
+
+    def create_calibration_sets(
+        self,
+        x_val: pd.DataFrame,
+        y_val: pd.Series,
+        mask_val: pd.DataFrame,
+        mask_unique: pd.DataFrame,
+        mask_nested: pd.DataFrame,
+        model: TabPFNRegressor
+    ) -> tuple[pd.DataFrame, TabPFNRegressor]:
         """Create calibration sets for each mask pattern."""
         # obtain list of columns
-        mask_cols = list(self.Mask_val.columns.values)
+        mask_cols = list(mask_val.columns.values)
 
         # Using merge to add the id of the mask
         # use original index values
-        df_with_ids = self.Mask_val.reset_index().merge(
-            self.mask_unique,  
+        df_with_ids = mask_val.reset_index().merge(
+            mask_unique,  
             on=mask_cols,
-            how='left'
+            how="left"
         )
 
-        for i in self.mask_unique["mask_id"]:
+        for i in mask_unique["mask_id"]:
             # select the nested masks
-            nested_masks = self.mask_nested[self.mask_nested["mask_id"] == i]["nested_masks"].values[0]
+            nested_masks = mask_nested[mask_nested["mask_id"] == i]["nested_masks"].values[0]
             
             # add the mask itself
-            nested_masks_with_self = nested_masks + [i]  # Create new list instead of append
+            nested_masks_with_self = [*nested_masks, i] 
 
             # obtain indexes for the rows
             indexes = df_with_ids[df_with_ids["mask_id"].isin(nested_masks_with_self)]["index"]
 
             # select the validation data based on the indices
-            X_val_nested = self.X_val.loc[indexes]
-            Y_val_nested = self.Y_val.loc[indexes]
+            x_val_nested = x_val.loc[indexes]
+            y_val_nested = y_val.loc[indexes]
 
             # SET ENTIRE COLUMNS TO NaN WHERE THE MASK HAS MISSING VALUES
-            current_mask = self.mask_unique[self.mask_unique["mask_id"] == i][mask_cols].iloc[0]
+            current_mask = mask_unique[mask_unique["mask_id"] == i][mask_cols].iloc[0]
 
             # For each column where the mask indicates missing (value = 1), set entire column to NaN
             for col_idx, col_name in enumerate(mask_cols):
                 if current_mask.iloc[col_idx] == 1:
-                    X_val_nested.loc[:, col_name] = np.nan
+                    x_val_nested.loc[:, col_name] = np.nan
 
             # obtain predictions
-            predictions = self.model.predict(
-                X_val_nested,
+            predictions = model.predict(
+                x_val_nested,
                 output_type="quantiles",
                 quantiles=self.quantiles
             )
 
             # calculate correction term
-            correction_term = self.calc_correction_term(predictions, Y_val_nested, self.alpha)
+            correction_term = self.calc_correction_term(predictions, y_val_nested, self.alpha)
 
             # save the correction term to the mask_unique dataframe
-            self.mask_unique.loc[self.mask_unique["mask_id"] == i, "correction_term"] = correction_term
-            self.mask_unique.loc[self.mask_unique["mask_id"] == i, "val_size"] =  X_val_nested.shape[0]
+            mask_unique.loc[mask_unique["mask_id"] == i, "correction_term"] = correction_term
+            mask_unique.loc[mask_unique["mask_id"] == i, "val_size"] =  x_val_nested.shape[0]
+
+
+        return mask_unique, model
+
+    def fit(
+        self, 
+        x_train: ArrayLike, 
+        y_train: ArrayLike
+    ) -> tuple[pd.DataFrame, TabPFNRegressor]:
+        """Convenience method to run the entire pipeline
 
+        Parameters:
+            x_train : matrix-like of shape (n_samples, n_predictors)
 
-        return self.mask_unique, self.model
+            y_train : array-like of continuous outcome with shape (n_samples,)
+        """
 
-    def fit(self):
-        """Convenience method to run the entire pipeline"""
-        self.split_data()
-        self.run_TABPFN()
-        self.mask_preprocess()
-        mask_unique, model = self.create_calibration_sets()
+        # Store and parse the data
+
+        x = pd.DataFrame(x_train)
+        y = y_train
+        
+        # Run trough all the functions
+        x_train, x_val, y_train, y_val, mask_train, mask_val = self.split_data(x, y)
+        model = self.run_TABPFN(x_train, y_train)
+        mask_unique, mask_nested = self.mask_preprocess(mask_val)
+        mask_unique, model = self.create_calibration_sets(
+            x_val, y_val, mask_val, mask_unique, mask_nested, model)
 
         return mask_unique, model
 
-class CP_MDA_TabPFNRegressor_newdata:
-    """
-    Compute the correction terms for missing data masks using conformal prediction.
+class CPMDATabPFNRegressorNewData:
+    """Compute the correction terms for missing data masks using conformal prediction.
 
     Parameters:
 
     TabPFN: Fitted TabPFNRegressor model.
 
-    X_new : matrix-like of shape (n_samples, n_predictors)
-
     quantiles : array with three arguments denoting the quantiles of interest used
                 in fitting the model. The default is [0.05, 0.5, 0.95].
 
@@ -207,55 +242,76 @@ class CP_MDA_TabPFNRegressor_newdata:
 
     """
 
-    def __init__(self,TabPFN, X_new, quantiles, calibration_results):
-        self.TabPFN = TabPFN
-        self.X = pd.DataFrame(X_new)
+    def __init__(
+        self, 
+        tabpfn: TabPFNRegressor, 
+        quantiles: list[float], 
+        calibration_results: pd.DataFrame
+    ) -> None:
+        self.tabpfn = tabpfn
         self.quantiles = quantiles
         self.calibration_results = calibration_results
 
-    def obtain_preds(self):
+    def obtain_preds(self, 
+        x: pd.DataFrame) -> np.ndarray:
         """Obtain predictions from fitted model."""
-        preds_test = self.TabPFN.predict(
-            self.X,
+        preds = self.tabpfn.predict(
+            x,
             output_type="quantiles",
             quantiles=self.quantiles
         )
-        self.preds_test = preds_test
+        return preds
 
-    def match_mask(self):
+    def match_mask(self,
+        x: pd.DataFrame) -> pd.DataFrame:
         """Add correction terms to the new masks from the test set."""
-        mask_test = self.X.isnull().astype(int)
+        mask_test = x.isna().astype(int)
         mask_cols = list(mask_test.columns.values)
 
         mask_test_cor = mask_test.merge(
                 self.calibration_results,
                 on=mask_cols,
-                how='left'
+                how="left"
             )
 
         # check if there are masks in the test set that are not in the calibration set
-        new_masks = mask_test_cor[mask_test_cor["correction_term"].isnull()][mask_cols]
+        new_masks = mask_test_cor[mask_test_cor["correction_term"].isna()][mask_cols]
 
         if new_masks.shape[0] > 0:
             warnings.warn(
                 "The following masks are not in the calibration set:\n"
                 f"{new_masks.to_string()}\n"
-                "The baseline quantile estimates will be returned for those cases."
+                "The baseline quantile estimates will be returned for those cases.",  stacklevel=2
             )
 
-        self.mask_test_cor = mask_test_cor
+        return mask_test_cor
 
-    def perform_correction(self):
+    def perform_correction(
+        self, 
+        preds: np.ndarray, 
+        mask_test_cor: pd.DataFrame
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         """Apply correction terms to the prediction intervals."""
-        preds_test = self.preds_test.copy()
-        lb_corr = preds_test[0] - self.mask_test_cor["correction_term"].values
-        ub_corr = preds_test[2] + self.mask_test_cor["correction_term"].values
-
-        return lb_corr, preds_test[1], ub_corr, preds_test[0], preds_test[2]
-
-    def fit(self):
-        """Convenience method to run the entire pipeline"""
-        self.obtain_preds()
-        self.match_mask()
-        CP_results =  self.perform_correction()
-        return CP_results
+
+        lb_corr = preds[0] - mask_test_cor["correction_term"].values
+        ub_corr = preds[2] + mask_test_cor["correction_term"].values
+
+        return lb_corr, preds[1], ub_corr, preds[0], preds[2]
+
+    def predict(
+        self, 
+        x_new: ArrayLike
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """Convenience method to run the entire pipeline
+           Parameters:
+
+            x_new : matrix-like of shape (n_samples, n_predictors)
+        """
+
+        x = pd.DataFrame(x_new)
+
+        preds = self.obtain_preds(x)
+        mask_test_cor = self.match_mask(x)
+        cp_results = self.perform_correction(preds, mask_test_cor)
+
+        return cp_results
diff --git a/src/tabpfn_extensions/CP_missing_data/__init__.py b/src/tabpfn_extensions/CP_missing_data/__init__.py
index 4c76af49..18f6e08d 100644
--- a/src/tabpfn_extensions/CP_missing_data/__init__.py
+++ b/src/tabpfn_extensions/CP_missing_data/__init__.py
@@ -1,11 +1,11 @@
 """Conformal prediction for missing data module for tabpfn_extensions package."""
 
-from .CP_missing_data import (
-    CP_MDA_TabPFNRegressor,
-    CP_MDA_TabPFNRegressor_newdata,
+from .cp_missing_data import (
+    CPMDATabPFNRegressor,
+    CPMDATabPFNRegressorNewData,
 )
 
 __all__ = [
-    "CP_MDA_TabPFNRegressor",
-    "CP_MDA_TabPFNRegressor_newdata",
-]
+    "CPMDATabPFNRegressor",
+    "CPMDATabPFNRegressorNewData",
+]
\ No newline at end of file
diff --git a/tests/test_CP_missing_data.py b/tests/test_CP_missing_data.py
index a24bbb5e..45b1e97d 100644
--- a/tests/test_CP_missing_data.py
+++ b/tests/test_CP_missing_data.py
@@ -1,6 +1,6 @@
 """Tests for the CP_missing_data extension.
 
-This file tests the CP_MDA_TabPFNRegressor and CP_MDA_TabPFNRegressor_newdata functions,
+This file tests the CPMDATabPFNRegressor and CPMDATabPFNRegressorNewData functions,
 which attempts to obtain correct uncertainity estimates in case if missing data. 
 """
 
@@ -13,9 +13,9 @@
 from sklearn.model_selection import train_test_split
 
 try:
-    from tabpfn_extensions.CP_missing_data import (
-        CP_MDA_TabPFNRegressor,
-        CP_MDA_TabPFNRegressor_newdata,
+    from tabpfn_extensions.cp_missing_data import (
+        CPMDATabPFNRegressor,
+        CPMDATabPFNRegressorNewData,
     )
 except ImportError:
     pytest.skip("Required libraries (tabpfn) not installed", allow_module_level=True)
@@ -57,8 +57,8 @@ def seed():
 
 def test_model_CP(X_train, Y_train, seed):
     """Tests if the calibration corrections are of the correct shape and type."""
-    model = CP_MDA_TabPFNRegressor(X_train, Y_train,  quantiles = [0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
-    calibration_results, model_fit = model.fit()
+    model = CPMDATabPFNRegressor(quantiles = [0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    calibration_results, model_fit = model.fit(X_train, Y_train)
 
     # Replicate the split to get the validation set and find its unique masks.
     _, X_val, _, _ = train_test_split(X_train, Y_train, test_size=0.5, random_state=seed)
@@ -73,12 +73,12 @@ def test_model_CP(X_train, Y_train, seed):
 def test_reproducibility(X_train, Y_train, seed):
     """Tests that random_state ensures deterministic correction terms."""
 
-    model_1 = CP_MDA_TabPFNRegressor(X_train, Y_train, quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
-    calibration_results_1, model_fit_1 = model_1.fit()
+    model_1 = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    calibration_results_1, model_fit_1 = model_1.fit(X_train, Y_train)
 
     # Second model with the same seed
-    model_2 = CP_MDA_TabPFNRegressor(X_train, Y_train, quantiles=[0.05, 0.5, 0.95] , val_size = 0.5, seed = seed)
-    calibration_results_2, model_fit_2 = model_2.fit()
+    model_2 = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95] , val_size = 0.5, seed = seed)
+    calibration_results_2, model_fit_2 = model_2.fit(X_train, Y_train)
 
     # Assert that the outputs are identical
     assert_array_equal(calibration_results_1, calibration_results_2)
@@ -88,12 +88,12 @@ def test_predict(X_train, Y_train, seed, X_new):
     """Tests if the predictions have the correct shape and type."""
 
     # fit model 
-    model = CP_MDA_TabPFNRegressor(X_train, Y_train, quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
-    calibration_results, model_fit = model.fit()
+    model = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    calibration_results, model_fit = model.fit(X_train, Y_train)
 
     # Apply the model to new cases 
-    cp_apply = CP_MDA_TabPFNRegressor_newdata(model_fit, X_new = X_new, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
-    CP_results = cp_apply.fit()
+    cp_apply = CPMDATabPFNRegressorNewData(model_fit, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
+    CP_results = cp_apply.predict(X_new)
 
     assert CP_results[1].size== X_new.shape[0]
     assert isinstance(CP_results[1], np.ndarray)

From ce539a418e0f47e78bd79e30fd2736c3696b235f Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Tue, 3 Feb 2026 12:18:10 +0100
Subject: [PATCH 16/18] Fix folder capitalization

---
 .../cp_missing_data_example.py}                                   | 0
 .../{CP_missing_data => cp_missing_data}/__init__.py              | 0
 .../CP_missing_data.py => cp_missing_data/cp_missing_data.py}     | 0
 tests/{test_CP_missing_data.py => test_cp_missing_data.py}        | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename examples/{CP_missing_data/CP_missing_data_example.py => cp_missing_data/cp_missing_data_example.py} (100%)
 rename src/tabpfn_extensions/{CP_missing_data => cp_missing_data}/__init__.py (100%)
 rename src/tabpfn_extensions/{CP_missing_data/CP_missing_data.py => cp_missing_data/cp_missing_data.py} (100%)
 rename tests/{test_CP_missing_data.py => test_cp_missing_data.py} (100%)

diff --git a/examples/CP_missing_data/CP_missing_data_example.py b/examples/cp_missing_data/cp_missing_data_example.py
similarity index 100%
rename from examples/CP_missing_data/CP_missing_data_example.py
rename to examples/cp_missing_data/cp_missing_data_example.py
diff --git a/src/tabpfn_extensions/CP_missing_data/__init__.py b/src/tabpfn_extensions/cp_missing_data/__init__.py
similarity index 100%
rename from src/tabpfn_extensions/CP_missing_data/__init__.py
rename to src/tabpfn_extensions/cp_missing_data/__init__.py
diff --git a/src/tabpfn_extensions/CP_missing_data/CP_missing_data.py b/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
similarity index 100%
rename from src/tabpfn_extensions/CP_missing_data/CP_missing_data.py
rename to src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
diff --git a/tests/test_CP_missing_data.py b/tests/test_cp_missing_data.py
similarity index 100%
rename from tests/test_CP_missing_data.py
rename to tests/test_cp_missing_data.py

From d42d29da76d6a9710b93ab7f9bf814a8bda63e5a Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Tue, 3 Feb 2026 13:07:55 +0100
Subject: [PATCH 17/18] Changes based on the ruff linting & formatting

---
 .../cp_missing_data/cp_missing_data.py        | 96 ++++++++-----------
 tests/test_cp_missing_data.py                 | 13 +--
 2 files changed, 45 insertions(+), 64 deletions(-)

diff --git a/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py b/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
index 25a80a44..f9714efd 100644
--- a/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
+++ b/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
@@ -7,40 +7,38 @@
 from __future__ import annotations
 
 import warnings
-from typing import Optional
+from typing import TYPE_CHECKING
 
 import numpy as np
 import pandas as pd
-from numpy.typing import ArrayLike
 from sklearn.model_selection import train_test_split
 
 from tabpfn_extensions.utils import TabPFNRegressor
 
+if TYPE_CHECKING:
+    from numpy.typing import ArrayLike
+
+
 class CPMDATabPFNRegressor:
     """Compute the correction terms for missing data masks using conformal prediction.
 
     Parameters:
-
         quantiles : array with three arguments denoting the quantiles of interest.
             The default is [0.05, 0.5, 0.95], where the first indicates the lower bound,
             the second the median, and the third the upper bound.
-
         val_size : float between 0 and 1, indicating the size of the validation set
             as a fraction of the training data.
 
-
     Returns:
-     mask_unique: DataFrame with the correction terms for each mask.
-
-     model: Fitted TabPFNRegressor model.
-
+        mask_unique: DataFrame with the correction terms for each mask.
+        model: Fitted TabPFNRegressor model.
     """
 
     def __init__(
-        self, 
-        quantiles: list[float], 
-        val_size: float, 
-        seed: Optional[int] = None
+        self,
+        quantiles: list[float],
+        val_size: float,
+        seed: int | None = None
     ) -> None:
         self.quantiles = quantiles
         self.val_size = val_size
@@ -49,8 +47,8 @@ def __init__(
 
     def calc_correction_term(
         self,
-        predictions: tuple[np.ndarray, np.ndarray, np.ndarray], 
-        y_val: pd.Series, 
+        predictions: tuple[np.ndarray, np.ndarray, np.ndarray],
+        y_val: pd.Series,
         alpha: float
     ) -> float:
         """Calculate the correction term for conformal prediction."""
@@ -74,7 +72,7 @@ def calc_correction_term(
         return correction_term
 
     def split_data(self,
-        x: pd.DataFrame, 
+        x: pd.DataFrame,
         y: np.ndarray
     ) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.DataFrame, pd.DataFrame]:
         """Split data into training and validation sets."""
@@ -87,11 +85,11 @@ def split_data(self,
         # Convert y arrays back to pandas Series to maintain .iloc functionality
         y_train = pd.Series(y_train_arr, index=x_train.index)
         y_val = pd.Series(y_val_arr, index=x_val.index)
-        
+
         return x_train, x_val, y_train, y_val, mask_train, mask_val
 
-    def run_TABPFN(self, 
-        x_train: pd.DataFrame, 
+    def run_TABPFN(self,
+        x_train: pd.DataFrame,
         y_train: pd.Series
     ) -> TabPFNRegressor:
         """Fit the TabPFN model."""
@@ -101,7 +99,7 @@ def run_TABPFN(self,
         return(model)
 
     def mask_preprocess(
-        self, 
+        self,
         mask_val: pd.DataFrame
     ) -> tuple[pd.DataFrame, pd.DataFrame]:
         """Preprocess masks and identify nested relationships."""
@@ -152,7 +150,7 @@ def create_calibration_sets(
         # Using merge to add the id of the mask
         # use original index values
         df_with_ids = mask_val.reset_index().merge(
-            mask_unique,  
+            mask_unique,
             on=mask_cols,
             how="left"
         )
@@ -160,9 +158,9 @@ def create_calibration_sets(
         for i in mask_unique["mask_id"]:
             # select the nested masks
             nested_masks = mask_nested[mask_nested["mask_id"] == i]["nested_masks"].values[0]
-            
+
             # add the mask itself
-            nested_masks_with_self = [*nested_masks, i] 
+            nested_masks_with_self = [*nested_masks, i]
 
             # obtain indexes for the rows
             indexes = df_with_ids[df_with_ids["mask_id"].isin(nested_masks_with_self)]["index"]
@@ -193,27 +191,23 @@ def create_calibration_sets(
             mask_unique.loc[mask_unique["mask_id"] == i, "correction_term"] = correction_term
             mask_unique.loc[mask_unique["mask_id"] == i, "val_size"] =  x_val_nested.shape[0]
 
-
         return mask_unique, model
 
     def fit(
-        self, 
-        x_train: ArrayLike, 
+        self,
+        x_train: ArrayLike,
         y_train: ArrayLike
     ) -> tuple[pd.DataFrame, TabPFNRegressor]:
-        """Convenience method to run the entire pipeline
+        """Convenience method to run the entire pipeline.
 
         Parameters:
             x_train : matrix-like of shape (n_samples, n_predictors)
-
             y_train : array-like of continuous outcome with shape (n_samples,)
         """
-
         # Store and parse the data
-
         x = pd.DataFrame(x_train)
         y = y_train
-        
+
         # Run trough all the functions
         x_train, x_val, y_train, y_val, mask_train, mask_val = self.split_data(x, y)
         model = self.run_TABPFN(x_train, y_train)
@@ -223,36 +217,28 @@ def fit(
 
         return mask_unique, model
 
+
 class CPMDATabPFNRegressorNewData:
     """Compute the correction terms for missing data masks using conformal prediction.
 
     Parameters:
-
-    TabPFN: Fitted TabPFNRegressor model.
-
-    quantiles : array with three arguments denoting the quantiles of interest used
-                in fitting the model. The default is [0.05, 0.5, 0.95].
-
-    calibration_results : matrix with the correction terms for each mask.
-
-
-    Returns:
-     CP_results: DataFrame with shape (n_samples, 5). Included are the corrected lower bound,
-     prediction, corrected upper bound, non-corrected lower bound, and non-corrected upper bound.
-
+        tabpfn : Fitted TabPFNRegressor model.
+        quantiles : Array with three arguments denoting the quantiles of interest used
+            in fitting the model. The default is [0.05, 0.5, 0.95].
+        calibration_results : Matrix with the correction terms for each mask.
     """
 
     def __init__(
-        self, 
-        tabpfn: TabPFNRegressor, 
-        quantiles: list[float], 
+        self,
+        tabpfn: TabPFNRegressor,
+        quantiles: list[float],
         calibration_results: pd.DataFrame
     ) -> None:
         self.tabpfn = tabpfn
         self.quantiles = quantiles
         self.calibration_results = calibration_results
 
-    def obtain_preds(self, 
+    def obtain_preds(self,
         x: pd.DataFrame) -> np.ndarray:
         """Obtain predictions from fitted model."""
         preds = self.tabpfn.predict(
@@ -287,31 +273,29 @@ def match_mask(self,
         return mask_test_cor
 
     def perform_correction(
-        self, 
-        preds: np.ndarray, 
+        self,
+        preds: np.ndarray,
         mask_test_cor: pd.DataFrame
     ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         """Apply correction terms to the prediction intervals."""
-
         lb_corr = preds[0] - mask_test_cor["correction_term"].values
         ub_corr = preds[2] + mask_test_cor["correction_term"].values
 
         return lb_corr, preds[1], ub_corr, preds[0], preds[2]
 
     def predict(
-        self, 
+        self,
         x_new: ArrayLike
     ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-        """Convenience method to run the entire pipeline
-           Parameters:
+        """Convenience method to run the entire pipeline.
 
+        Parameters:
             x_new : matrix-like of shape (n_samples, n_predictors)
         """
-
         x = pd.DataFrame(x_new)
 
         preds = self.obtain_preds(x)
         mask_test_cor = self.match_mask(x)
         cp_results = self.perform_correction(preds, mask_test_cor)
 
-        return cp_results
+        return cp_results
\ No newline at end of file
diff --git a/tests/test_cp_missing_data.py b/tests/test_cp_missing_data.py
index 45b1e97d..9eaa219d 100644
--- a/tests/test_cp_missing_data.py
+++ b/tests/test_cp_missing_data.py
@@ -1,7 +1,7 @@
 """Tests for the CP_missing_data extension.
 
 This file tests the CPMDATabPFNRegressor and CPMDATabPFNRegressorNewData functions,
-which attempts to obtain correct uncertainity estimates in case if missing data. 
+which attempts to obtain correct uncertainity estimates in case if missing data.
 """
 
 from __future__ import annotations
@@ -20,8 +20,8 @@
 except ImportError:
     pytest.skip("Required libraries (tabpfn) not installed", allow_module_level=True)
 
-# -------- Fixtures --------
 
+# -------- Fixtures --------
 @pytest.fixture
 def X_train():
     return np.array([
@@ -54,7 +54,6 @@ def seed():
 
 
 # -- Test --
-
 def test_model_CP(X_train, Y_train, seed):
     """Tests if the calibration corrections are of the correct shape and type."""
     model = CPMDATabPFNRegressor(quantiles = [0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
@@ -62,7 +61,7 @@ def test_model_CP(X_train, Y_train, seed):
 
     # Replicate the split to get the validation set and find its unique masks.
     _, X_val, _, _ = train_test_split(X_train, Y_train, test_size=0.5, random_state=seed)
-    missing_df = pd.DataFrame(X_val).isnull().astype(int).drop_duplicates()
+    missing_df = pd.DataFrame(X_val).isna().astype(int).drop_duplicates()
 
     # check type, size of the calibration results
     assert calibration_results.shape[0] == missing_df.shape[0]
@@ -72,7 +71,6 @@ def test_model_CP(X_train, Y_train, seed):
 
 def test_reproducibility(X_train, Y_train, seed):
     """Tests that random_state ensures deterministic correction terms."""
-
     model_1 = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
     calibration_results_1, model_fit_1 = model_1.fit(X_train, Y_train)
 
@@ -86,12 +84,11 @@ def test_reproducibility(X_train, Y_train, seed):
 
 def test_predict(X_train, Y_train, seed, X_new):
     """Tests if the predictions have the correct shape and type."""
-
-    # fit model 
+    # fit model
     model = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
     calibration_results, model_fit = model.fit(X_train, Y_train)
 
-    # Apply the model to new cases 
+    # Apply the model to new cases
     cp_apply = CPMDATabPFNRegressorNewData(model_fit, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
     CP_results = cp_apply.predict(X_new)
 

From f20d0fdb776c24793a3dcf59fd33d61014cf22bf Mon Sep 17 00:00:00 2001
From: Fdvanleeuwen <94541170+Fdvanleeuwen@users.noreply.github.com>
Date: Tue, 3 Feb 2026 15:13:11 +0100
Subject: [PATCH 18/18] Apply ruff formatting

---
 .../cp_missing_data/__init__.py               |   2 +-
 .../cp_missing_data/cp_missing_data.py        | 109 ++++++++----------
 tests/test_cp_missing_data.py                 |  73 ++++++++----
 3 files changed, 100 insertions(+), 84 deletions(-)

diff --git a/src/tabpfn_extensions/cp_missing_data/__init__.py b/src/tabpfn_extensions/cp_missing_data/__init__.py
index 18f6e08d..2391856f 100644
--- a/src/tabpfn_extensions/cp_missing_data/__init__.py
+++ b/src/tabpfn_extensions/cp_missing_data/__init__.py
@@ -8,4 +8,4 @@
 __all__ = [
     "CPMDATabPFNRegressor",
     "CPMDATabPFNRegressorNewData",
-]
\ No newline at end of file
+]
diff --git a/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py b/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
index f9714efd..40aaf9f4 100644
--- a/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
+++ b/src/tabpfn_extensions/cp_missing_data/cp_missing_data.py
@@ -35,10 +35,7 @@ class CPMDATabPFNRegressor:
     """
 
     def __init__(
-        self,
-        quantiles: list[float],
-        val_size: float,
-        seed: int | None = None
+        self, quantiles: list[float], val_size: float, seed: int | None = None
     ) -> None:
         self.quantiles = quantiles
         self.val_size = val_size
@@ -49,37 +46,37 @@ def calc_correction_term(
         self,
         predictions: tuple[np.ndarray, np.ndarray, np.ndarray],
         y_val: pd.Series,
-        alpha: float
+        alpha: float,
     ) -> float:
         """Calculate the correction term for conformal prediction."""
         # obtain the lowerbound, median, and upperbound
         lb, pred, ub = predictions
         # calculate difference between bounds and observed values
-        error_lb = (lb - y_val)
-        error_ub = (y_val - ub)
+        error_lb = lb - y_val
+        error_ub = y_val - ub
         s = np.maximum(error_lb, error_ub)
 
         # obtain the emperical quantile
-        Q_use = (1 - alpha) * (1 + 1/len(s))
+        Q_use = (1 - alpha) * (1 + 1 / len(s))
 
         # Check is Q_use if not larger then 1
         if Q_use > 1:
             Q_use = 1
-            warnings.warn(
-                "Some masks have very small calibration sets",  stacklevel=2)
+            warnings.warn("Some masks have very small calibration sets", stacklevel=2)
 
         correction_term = np.quantile(s, Q_use)
         return correction_term
 
-    def split_data(self,
-        x: pd.DataFrame,
-        y: np.ndarray
-    ) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.DataFrame, pd.DataFrame]:
+    def split_data(
+        self, x: pd.DataFrame, y: np.ndarray
+    ) -> tuple[
+        pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.DataFrame, pd.DataFrame
+    ]:
         """Split data into training and validation sets."""
         # create df with missing data indicator
         missing_bool_df = x.isna().astype(int)
         x_train, x_val, y_train_arr, y_val_arr, mask_train, mask_val = train_test_split(
-            x, y, missing_bool_df, test_size=self.val_size, random_state = self.seed
+            x, y, missing_bool_df, test_size=self.val_size, random_state=self.seed
         )
 
         # Convert y arrays back to pandas Series to maintain .iloc functionality
@@ -88,19 +85,15 @@ def split_data(self,
 
         return x_train, x_val, y_train, y_val, mask_train, mask_val
 
-    def run_TABPFN(self,
-        x_train: pd.DataFrame,
-        y_train: pd.Series
-    ) -> TabPFNRegressor:
+    def run_TABPFN(self, x_train: pd.DataFrame, y_train: pd.Series) -> TabPFNRegressor:
         """Fit the TabPFN model."""
         # fit model
         model = TabPFNRegressor()
         model.fit(x_train, y_train)
-        return(model)
+        return model
 
     def mask_preprocess(
-        self,
-        mask_val: pd.DataFrame
+        self, mask_val: pd.DataFrame
     ) -> tuple[pd.DataFrame, pd.DataFrame]:
         """Preprocess masks and identify nested relationships."""
         # drop duplicates masks
@@ -126,10 +119,7 @@ def mask_preprocess(
                 if ((mask_b == 1) & (mask_a == 0)).sum() == 0:
                     nested_masks.append(mask_b_id)
 
-            results.append({
-                "mask_id": mask_a_id,
-                "nested_masks": nested_masks
-            })
+            results.append({"mask_id": mask_a_id, "nested_masks": nested_masks})
 
         mask_nested = pd.DataFrame(results)
         return mask_unique, mask_nested
@@ -141,7 +131,7 @@ def create_calibration_sets(
         mask_val: pd.DataFrame,
         mask_unique: pd.DataFrame,
         mask_nested: pd.DataFrame,
-        model: TabPFNRegressor
+        model: TabPFNRegressor,
     ) -> tuple[pd.DataFrame, TabPFNRegressor]:
         """Create calibration sets for each mask pattern."""
         # obtain list of columns
@@ -150,20 +140,22 @@ def create_calibration_sets(
         # Using merge to add the id of the mask
         # use original index values
         df_with_ids = mask_val.reset_index().merge(
-            mask_unique,
-            on=mask_cols,
-            how="left"
+            mask_unique, on=mask_cols, how="left"
         )
 
         for i in mask_unique["mask_id"]:
             # select the nested masks
-            nested_masks = mask_nested[mask_nested["mask_id"] == i]["nested_masks"].values[0]
+            nested_masks = mask_nested[mask_nested["mask_id"] == i][
+                "nested_masks"
+            ].values[0]
 
             # add the mask itself
             nested_masks_with_self = [*nested_masks, i]
 
             # obtain indexes for the rows
-            indexes = df_with_ids[df_with_ids["mask_id"].isin(nested_masks_with_self)]["index"]
+            indexes = df_with_ids[df_with_ids["mask_id"].isin(nested_masks_with_self)][
+                "index"
+            ]
 
             # select the validation data based on the indices
             x_val_nested = x_val.loc[indexes]
@@ -179,24 +171,26 @@ def create_calibration_sets(
 
             # obtain predictions
             predictions = model.predict(
-                x_val_nested,
-                output_type="quantiles",
-                quantiles=self.quantiles
+                x_val_nested, output_type="quantiles", quantiles=self.quantiles
             )
 
             # calculate correction term
-            correction_term = self.calc_correction_term(predictions, y_val_nested, self.alpha)
+            correction_term = self.calc_correction_term(
+                predictions, y_val_nested, self.alpha
+            )
 
             # save the correction term to the mask_unique dataframe
-            mask_unique.loc[mask_unique["mask_id"] == i, "correction_term"] = correction_term
-            mask_unique.loc[mask_unique["mask_id"] == i, "val_size"] =  x_val_nested.shape[0]
+            mask_unique.loc[mask_unique["mask_id"] == i, "correction_term"] = (
+                correction_term
+            )
+            mask_unique.loc[mask_unique["mask_id"] == i, "val_size"] = (
+                x_val_nested.shape[0]
+            )
 
         return mask_unique, model
 
     def fit(
-        self,
-        x_train: ArrayLike,
-        y_train: ArrayLike
+        self, x_train: ArrayLike, y_train: ArrayLike
     ) -> tuple[pd.DataFrame, TabPFNRegressor]:
         """Convenience method to run the entire pipeline.
 
@@ -213,7 +207,8 @@ def fit(
         model = self.run_TABPFN(x_train, y_train)
         mask_unique, mask_nested = self.mask_preprocess(mask_val)
         mask_unique, model = self.create_calibration_sets(
-            x_val, y_val, mask_val, mask_unique, mask_nested, model)
+            x_val, y_val, mask_val, mask_unique, mask_nested, model
+        )
 
         return mask_unique, model
 
@@ -232,33 +227,27 @@ def __init__(
         self,
         tabpfn: TabPFNRegressor,
         quantiles: list[float],
-        calibration_results: pd.DataFrame
+        calibration_results: pd.DataFrame,
     ) -> None:
         self.tabpfn = tabpfn
         self.quantiles = quantiles
         self.calibration_results = calibration_results
 
-    def obtain_preds(self,
-        x: pd.DataFrame) -> np.ndarray:
+    def obtain_preds(self, x: pd.DataFrame) -> np.ndarray:
         """Obtain predictions from fitted model."""
         preds = self.tabpfn.predict(
-            x,
-            output_type="quantiles",
-            quantiles=self.quantiles
+            x, output_type="quantiles", quantiles=self.quantiles
         )
         return preds
 
-    def match_mask(self,
-        x: pd.DataFrame) -> pd.DataFrame:
+    def match_mask(self, x: pd.DataFrame) -> pd.DataFrame:
         """Add correction terms to the new masks from the test set."""
         mask_test = x.isna().astype(int)
         mask_cols = list(mask_test.columns.values)
 
         mask_test_cor = mask_test.merge(
-                self.calibration_results,
-                on=mask_cols,
-                how="left"
-            )
+            self.calibration_results, on=mask_cols, how="left"
+        )
 
         # check if there are masks in the test set that are not in the calibration set
         new_masks = mask_test_cor[mask_test_cor["correction_term"].isna()][mask_cols]
@@ -267,15 +256,14 @@ def match_mask(self,
             warnings.warn(
                 "The following masks are not in the calibration set:\n"
                 f"{new_masks.to_string()}\n"
-                "The baseline quantile estimates will be returned for those cases.",  stacklevel=2
+                "The baseline quantile estimates will be returned for those cases.",
+                stacklevel=2,
             )
 
         return mask_test_cor
 
     def perform_correction(
-        self,
-        preds: np.ndarray,
-        mask_test_cor: pd.DataFrame
+        self, preds: np.ndarray, mask_test_cor: pd.DataFrame
     ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         """Apply correction terms to the prediction intervals."""
         lb_corr = preds[0] - mask_test_cor["correction_term"].values
@@ -284,8 +272,7 @@ def perform_correction(
         return lb_corr, preds[1], ub_corr, preds[0], preds[2]
 
     def predict(
-        self,
-        x_new: ArrayLike
+        self, x_new: ArrayLike
     ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         """Convenience method to run the entire pipeline.
 
@@ -298,4 +285,4 @@ def predict(
         mask_test_cor = self.match_mask(x)
         cp_results = self.perform_correction(preds, mask_test_cor)
 
-        return cp_results
\ No newline at end of file
+        return cp_results
diff --git a/tests/test_cp_missing_data.py b/tests/test_cp_missing_data.py
index 9eaa219d..0530cbc1 100644
--- a/tests/test_cp_missing_data.py
+++ b/tests/test_cp_missing_data.py
@@ -24,28 +24,53 @@
 # -------- Fixtures --------
 @pytest.fixture
 def X_train():
-    return np.array([
-        [0.1, np.nan], [0.3, 0.4], [np.nan, 0.6], [0.7, 0.8],
-        [0.2, np.nan], [0.2, np.nan], [0.9, 0.4], [np.nan, 0.4],
-        [0.3, 0.2], [np.nan, 0.9], [0.8, np.nan], [0.1, 0.2],
-        [np.nan, 0.5], [0.3, 0.7], [0.7, np.nan], [0.7, np.nan],
-        [0.3, 0.4], [np.nan, 0.2], [0.9, 0.7], [np.nan, 0.3],
-        [0.3, 0.7], [0.4, 0.8], [0.5, 0.4], [0.7, 0.2], [0.8, 0.3],
-    ])
+    return np.array(
+        [
+            [0.1, np.nan],
+            [0.3, 0.4],
+            [np.nan, 0.6],
+            [0.7, 0.8],
+            [0.2, np.nan],
+            [0.2, np.nan],
+            [0.9, 0.4],
+            [np.nan, 0.4],
+            [0.3, 0.2],
+            [np.nan, 0.9],
+            [0.8, np.nan],
+            [0.1, 0.2],
+            [np.nan, 0.5],
+            [0.3, 0.7],
+            [0.7, np.nan],
+            [0.7, np.nan],
+            [0.3, 0.4],
+            [np.nan, 0.2],
+            [0.9, 0.7],
+            [np.nan, 0.3],
+            [0.3, 0.7],
+            [0.4, 0.8],
+            [0.5, 0.4],
+            [0.7, 0.2],
+            [0.8, 0.3],
+        ]
+    )
 
 
 @pytest.fixture
 def Y_train():
-    return np.array([1,3,1,2,3,4,5,6,1,2,3,4,5,6,7,2,3,5,6,8,4,2,1,2,3])
+    return np.array(
+        [1, 3, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 2, 3, 5, 6, 8, 4, 2, 1, 2, 3]
+    )
 
 
 @pytest.fixture
 def X_new():
-    return np.array([
-        [0.1, 0.1],
-        [0.3, np.nan],
-        [np.nan, 0.6],
-    ])
+    return np.array(
+        [
+            [0.1, 0.1],
+            [0.3, np.nan],
+            [np.nan, 0.6],
+        ]
+    )
 
 
 @pytest.fixture
@@ -56,11 +81,13 @@ def seed():
 # -- Test --
 def test_model_CP(X_train, Y_train, seed):
     """Tests if the calibration corrections are of the correct shape and type."""
-    model = CPMDATabPFNRegressor(quantiles = [0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    model = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size=0.5, seed=seed)
     calibration_results, model_fit = model.fit(X_train, Y_train)
 
     # Replicate the split to get the validation set and find its unique masks.
-    _, X_val, _, _ = train_test_split(X_train, Y_train, test_size=0.5, random_state=seed)
+    _, X_val, _, _ = train_test_split(
+        X_train, Y_train, test_size=0.5, random_state=seed
+    )
     missing_df = pd.DataFrame(X_val).isna().astype(int).drop_duplicates()
 
     # check type, size of the calibration results
@@ -71,11 +98,11 @@ def test_model_CP(X_train, Y_train, seed):
 
 def test_reproducibility(X_train, Y_train, seed):
     """Tests that random_state ensures deterministic correction terms."""
-    model_1 = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    model_1 = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size=0.5, seed=seed)
     calibration_results_1, model_fit_1 = model_1.fit(X_train, Y_train)
 
     # Second model with the same seed
-    model_2 = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95] , val_size = 0.5, seed = seed)
+    model_2 = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size=0.5, seed=seed)
     calibration_results_2, model_fit_2 = model_2.fit(X_train, Y_train)
 
     # Assert that the outputs are identical
@@ -85,13 +112,15 @@ def test_reproducibility(X_train, Y_train, seed):
 def test_predict(X_train, Y_train, seed, X_new):
     """Tests if the predictions have the correct shape and type."""
     # fit model
-    model = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size = 0.5, seed = seed)
+    model = CPMDATabPFNRegressor(quantiles=[0.05, 0.5, 0.95], val_size=0.5, seed=seed)
     calibration_results, model_fit = model.fit(X_train, Y_train)
 
     # Apply the model to new cases
-    cp_apply = CPMDATabPFNRegressorNewData(model_fit, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results)
+    cp_apply = CPMDATabPFNRegressorNewData(
+        model_fit, quantiles=[0.05, 0.5, 0.95], calibration_results=calibration_results
+    )
     CP_results = cp_apply.predict(X_new)
 
-    assert CP_results[1].size== X_new.shape[0]
+    assert CP_results[1].size == X_new.shape[0]
     assert isinstance(CP_results[1], np.ndarray)
-    assert len(CP_results)== 5
\ No newline at end of file
+    assert len(CP_results) == 5