From 39d6a8cbb1d6f1ba7eb9e4afa4613d6a8b309685 Mon Sep 17 00:00:00 2001
From: noahho <Noah.homa@gmail.com>
Date: Mon, 1 Sep 2025 13:34:09 +0200
Subject: [PATCH 1/5] - add logging for dt pfn

---
 .../sklearn_based_decision_tree_tabpfn.py     | 120 +++++++++++++++++-
 1 file changed, 117 insertions(+), 3 deletions(-)

diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
index 3874a5f8..92dc11aa 100644
--- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
+++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
@@ -5,6 +5,7 @@
 
 import random
 import warnings
+import logging
 
 # For type checking only
 from typing import TYPE_CHECKING, Any
@@ -35,6 +36,11 @@
 )
 from tabpfn_extensions.utils import softmax
 
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
 ###############################################################################
 #                             BASE DECISION TREE                              #
 ###############################################################################
@@ -296,6 +302,8 @@ def _fit(
         self : DecisionTreeTabPFNBase
             The fitted model.
         """
+        if self.verbose:
+            logging.info("Starting DecisionTreeTabPFN fit process...")
         # Initialize attributes (per scikit-learn conventions)
         self._leaf_nodes = []
         self._leaf_train_data = {}
@@ -317,6 +325,8 @@ def _fit(
             y,
             ensure_all_finite=False,  # scikit-learn sets self.n_features_in_ automatically
         )
+        if self.verbose:
+            logging.info(f"Input data shape: X={X.shape}, y={y.shape}")
 
         if self.task_type == "multiclass":
             self.classes_ = unique_labels(y)
@@ -345,6 +355,8 @@ def _fit(
 
         # If adaptive_tree is on, do a train/validation split
         if self.adaptive_tree:
+            if self.verbose:
+                logging.info("Adaptive tree is enabled. Preparing train/validation split.")
             stratify = y_ if (self.task_type == "multiclass") else None
 
             # Basic checks for classification to see if splitting is feasible
@@ -352,6 +364,10 @@ def _fit(
                 unique_classes, counts = np.unique(y_, return_counts=True)
                 # Disable adaptive tree in extreme cases
                 if counts.min() == 1 or len(unique_classes) < 2:
+                    if self.verbose:
+                        logging.info(
+                            "Disabling adaptive tree: minimum class count is 1 or only one class present."
+                        )
                     self.adaptive_tree = False
                 elif len(unique_classes) > int(len(y_) * self.adaptive_tree_test_size):
                     self.adaptive_tree_test_size = min(
@@ -359,6 +375,8 @@ def _fit(
                         len(unique_classes) / len(y_) * 1.5,
                     )
             if len(y_) < 10:
+                if self.verbose:
+                    logging.info("Disabling adaptive tree: fewer than 10 samples.")
                 self.adaptive_tree = False
 
             if self.adaptive_tree:
@@ -380,9 +398,18 @@ def _fit(
                     random_state=self.random_state,
                     stratify=stratify,
                 )
+                if self.verbose:
+                    logging.info(
+                        f"Train/Valid split created: "
+                        f"Train size={len(y_train)}, Valid size={len(y_valid)}"
+                    )
 
                 # Safety check - if split is empty, revert
                 if len(y_train) == 0 or len(y_valid) == 0:
+                    if self.verbose:
+                        logging.info(
+                            "Disabling adaptive tree: train or validation split is empty."
+                        )
                     self.adaptive_tree = False
                     X_train, X_preproc_train, y_train, sw_train = (
                         X,
@@ -398,6 +425,10 @@ def _fit(
                     and self.adaptive_tree
                     and (len(np.unique(y_train)) != len(np.unique(y_valid)))
                 ):
+                    if self.verbose:
+                        logging.info(
+                            "Disabling adaptive tree: train and validation sets have different classes."
+                        )
                     self.adaptive_tree = False
             else:
                 # If we were disabled, keep all data as training
@@ -410,6 +441,8 @@ def _fit(
                 X_valid = X_preproc_valid = y_valid = sw_valid = None
         else:
             # Not adaptive, everything is train
+            if self.verbose:
+                logging.info("Adaptive tree is disabled. Using all data for training.")
             X_train, X_preproc_train, y_train, sw_train = (
                 X,
                 X_preprocessed,
@@ -419,9 +452,15 @@ def _fit(
             X_valid = X_preproc_valid = y_valid = sw_valid = None
 
         # Build the sklearn decision tree
+        if self.verbose:
+            logging.info("Fitting the initial scikit-learn decision tree structure...")
         self._decision_tree = self._init_decision_tree()
         self._decision_tree.fit(X_preproc_train, y_train, sample_weight=sw_train)
         self._tree = self._decision_tree  # for sklearn compatibility
+        if self.verbose:
+            logging.info(
+                f"Decision tree fitting complete. Tree has {self._tree.tree_.node_count} nodes."
+            )
 
         # Keep references for potential post-fitting (leaf-level fitting)
         self.X = X
@@ -439,6 +478,8 @@ def _fit(
 
         # We will do a leaf-fitting step on demand (lazy) in predict
         self._need_post_fit = True
+        if self.verbose:
+            logging.info("Leaf fitting is deferred until the first predict() call.")
 
         # If verbose, optionally do it right away:
         if self.verbose:
@@ -461,7 +502,7 @@ def _init_decision_tree(self) -> BaseDecisionTree:
     def _post_fit(self) -> None:
         """Hook after the decision tree is fitted. Can be used for final prints/logs."""
         if self.verbose:
-            pass
+            logging.info("Base tree structure has been fitted.")
 
     def _preprocess_data_for_tree(self, X: np.ndarray) -> np.ndarray:
         """Handle missing data prior to feeding into the decision tree.
@@ -620,15 +661,23 @@ def _predict_internal(
         """
         # If we haven't yet done the final leaf fit, do it here
         if self._need_post_fit:
+            if self.verbose:
+                logging.info("First predict call: executing deferred leaf fitting.")
             self._need_post_fit = False
             if self.adaptive_tree:
                 # Fit leaves on train data, check performance on valid data if available
+                if self.verbose:
+                    logging.info("Fitting leaves on training data for adaptive pruning...")
                 self.fit_leaves(self.train_X, self.train_y)
                 if (
                     hasattr(self, "valid_X")
                     and self.valid_X is not None
                     and self.valid_y is not None
                 ):
+                    if self.verbose:
+                        logging.info(
+                            "Evaluating node performance on validation set for pruning decisions."
+                        )
                     # Force a pass to evaluate node performance
                     # so we can prune or decide node updates
                     self._predict_internal(
@@ -637,6 +686,8 @@ def _predict_internal(
                         check_input=False,
                     )
             # Now fit leaves again using the entire dataset (train + valid, effectively)
+            if self.verbose:
+                logging.info("Fitting leaves on the full dataset.")
             self.fit_leaves(self.X, self.y)
 
         # Assign TabPFNs categorical features if needed
@@ -646,6 +697,10 @@ def _predict_internal(
         # Find leaf membership in X
         X_leaf_nodes = self._apply_tree(X)
         n_samples, n_nodes, n_estims = X_leaf_nodes.shape
+        if self.verbose:
+            logging.info(
+                f"Starting prediction for {n_samples} samples across {n_nodes} nodes."
+            )
 
         # Track intermediate predictions
         y_prob: dict[int, dict[int, np.ndarray]] = {}
@@ -701,6 +756,13 @@ def _predict_internal(
                     X_leaf_nodes[test_sample_indices, leaf_id + 1 :, est_id].sum()
                     == 0.0
                 )
+                if self.verbose:
+                    logging.info(
+                        f"Processing Node {leaf_id}: "
+                        f"Train Samples={X_train_leaf.shape[0]}, "
+                        f"Test Samples={len(test_sample_indices)}, "
+                        f"Is Final Leaf={is_leaf}"
+                    )
 
                 # If it's not a leaf and we are not fitting internal nodes, skip
                 # (unless leaf_id==0 and we do a top-level check for adaptive_tree)
@@ -709,6 +771,10 @@ def _predict_internal(
                     and (not self.fit_nodes)
                     and not (leaf_id == 0 and self.adaptive_tree)
                 ):
+                    if self.verbose:
+                        logging.info(
+                            f"  -> Skipping Node {leaf_id}: Not a final leaf and fit_nodes is False."
+                        )
                     if do_pruning:
                         self._node_prediction_type[est_id][leaf_id] = "previous"
                     continue
@@ -725,6 +791,10 @@ def _predict_internal(
                             should_skip_previously_pruned = True
 
                     if should_skip_previously_pruned:
+                        if self.verbose:
+                            logging.info(
+                                f"  -> Skipping Node {leaf_id}: Node was previously pruned."
+                            )
                         continue
 
                     # Skip if classification is missing a class
@@ -733,6 +803,10 @@ def _predict_internal(
                         and len(np.unique(y_train_leaf)) < self.n_classes_
                         and self.adaptive_tree_skip_class_missing
                     ):
+                        if self.verbose:
+                            logging.info(
+                                f"  -> Skipping Node {leaf_id}: Not all classes are present in training data."
+                            )
                         self._node_prediction_type[est_id][leaf_id] = "previous"
                         continue
 
@@ -749,6 +823,10 @@ def _predict_internal(
                             and not is_leaf
                         )
                     ):
+                        if self.verbose:
+                            logging.info(
+                                f"  -> Skipping Node {leaf_id}: Does not meet sample size requirements for adaptive fitting."
+                            )
                         if do_pruning:
                             self._node_prediction_type[est_id][leaf_id] = "previous"
                         continue
@@ -797,10 +875,18 @@ def _predict_internal(
                         y,
                         y_prob[est_id][leaf_id],
                     )
+                    if self.verbose:
+                        logging.info(
+                            f"  -> Pruning Result for Node {leaf_id}: "
+                            f"Type='{self._node_prediction_type[est_id][leaf_id]}', "
+                            f"Score={y_metric[est_id][leaf_id]:.4f}"
+                        )
                 else:
                     # If not validating and not adaptive, just use replacement
                     y_prob[est_id][leaf_id] = y_prob_replacement
 
+        if self.verbose:
+            logging.info("Prediction process finished.")
         # Final predictions come from the last estimators last node
         return y_prob[n_estims - 1][n_nodes - 1]
 
@@ -1151,12 +1237,18 @@ def _predict_leaf(
 
         # If only one class, fill probability 1.0 for that class
         if len(classes_in_leaf) == 1:
+            if self.verbose:
+                logging.info(
+                    f"  -> Node {leaf_id}: Only one class present. Predicting 1.0 for class {classes_in_leaf[0]}."
+                )
             y_eval_prob[indices, classes_in_leaf[0]] = 1.0
             return y_eval_prob
 
         # Otherwise, fit TabPFN
         leaf_seed = leaf_id + self.tree_seed
         try:
+            if self.verbose:
+                logging.info(f"  -> Node {leaf_id}: Fitting TabPFNClassifier.")
             self.tabpfn.random_state = leaf_seed
             self.tabpfn.fit(X_train_leaf, y_train_leaf)
 
@@ -1182,6 +1274,10 @@ def _predict_leaf(
                 "One node has constant features for TabPFN. Using class-ratio fallback.",
                 stacklevel=2,
             )
+            if self.verbose:
+                logging.warning(
+                    f"  -> Node {leaf_id}: TabPFN failed due to constant features. Using class ratio fallback."
+                )
             _, counts = np.unique(y_train_leaf, return_counts=True)
             ratio = counts / counts.sum()
             for i, c in enumerate(classes_in_leaf):
@@ -1231,7 +1327,7 @@ def predict_proba(self, X: np.ndarray, check_input: bool = True) -> np.ndarray:
     def _post_fit(self) -> None:
         """Optional hook after the decision tree is fitted."""
         if self.verbose:
-            pass
+            logging.info("Classifier tree structure has been fitted.")
 
 
 ###############################################################################
@@ -1354,23 +1450,37 @@ def _predict_leaf(
 
         # If no training data or just 1 sample, fall back to 0 or single value
         if len(X_train_leaf) < 1:
+            if self.verbose:
+                logging.info(
+                    f"  -> Node {leaf_id}: No training samples. Predicting 0.0."
+                )
             warnings.warn(
                 f"Leaf {leaf_id} has zero training samples. Returning 0.0 predictions.",
                 stacklevel=2,
             )
             return y_eval
         elif len(X_train_leaf) == 1:
+            if self.verbose:
+                logging.info(
+                    f"  -> Node {leaf_id}: Only one training sample. Predicting its value."
+                )
             y_eval[indices] = y_train_leaf[0]
             return y_eval
 
         # If all y are identical, return that constant
         if np.all(y_train_leaf == y_train_leaf[0]):
+            if self.verbose:
+                logging.info(
+                    f"  -> Node {leaf_id}: All target values are constant. Predicting {y_train_leaf[0]}."
+                )
             y_eval[indices] = y_train_leaf[0]
             return y_eval
 
         # Fit TabPFNRegressor
         leaf_seed = leaf_id + self.tree_seed
         try:
+            if self.verbose:
+                logging.info(f"  -> Node {leaf_id}: Fitting TabPFNRegressor.")
             self.tabpfn.random_state = leaf_seed
             self.tabpfn.fit(X_train_leaf, y_train_leaf)
 
@@ -1389,6 +1499,10 @@ def _predict_leaf(
                 f"TabPFN fit/predict failed at leaf {leaf_id}: {e}. Using mean fallback.",
                 stacklevel=2,
             )
+            if self.verbose:
+                logging.warning(
+                    f"  -> Node {leaf_id}: TabPFN failed ({e}). Using mean fallback."
+                )
             y_eval[indices] = np.mean(y_train_leaf)
 
         return y_eval
@@ -1442,4 +1556,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray:
     def _post_fit(self) -> None:
         """Optional hook after the regressor's tree is fitted."""
         if self.verbose:
-            pass
+            logging.info("Regressor tree structure has been fitted.")
\ No newline at end of file

From 75a52a14933ee6ed1d9022dfab88751761366b9d Mon Sep 17 00:00:00 2001
From: noahho <Noah.homa@gmail.com>
Date: Mon, 1 Sep 2025 13:34:40 +0200
Subject: [PATCH 2/5] ruff

---
 .../rf_pfn/sklearn_based_decision_tree_tabpfn.py     | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
index 92dc11aa..3a6c8bae 100644
--- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
+++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
@@ -3,9 +3,9 @@
 
 from __future__ import annotations
 
+import logging
 import random
 import warnings
-import logging
 
 # For type checking only
 from typing import TYPE_CHECKING, Any
@@ -356,7 +356,9 @@ def _fit(
         # If adaptive_tree is on, do a train/validation split
         if self.adaptive_tree:
             if self.verbose:
-                logging.info("Adaptive tree is enabled. Preparing train/validation split.")
+                logging.info(
+                    "Adaptive tree is enabled. Preparing train/validation split."
+                )
             stratify = y_ if (self.task_type == "multiclass") else None
 
             # Basic checks for classification to see if splitting is feasible
@@ -667,7 +669,9 @@ def _predict_internal(
             if self.adaptive_tree:
                 # Fit leaves on train data, check performance on valid data if available
                 if self.verbose:
-                    logging.info("Fitting leaves on training data for adaptive pruning...")
+                    logging.info(
+                        "Fitting leaves on training data for adaptive pruning..."
+                    )
                 self.fit_leaves(self.train_X, self.train_y)
                 if (
                     hasattr(self, "valid_X")
@@ -1556,4 +1560,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray:
     def _post_fit(self) -> None:
         """Optional hook after the regressor's tree is fitted."""
         if self.verbose:
-            logging.info("Regressor tree structure has been fitted.")
\ No newline at end of file
+            logging.info("Regressor tree structure has been fitted.")

From 5a2b190c873b99b204324b9070dbdbb7e80b133a Mon Sep 17 00:00:00 2001
From: noahho <Noah.homa@gmail.com>
Date: Tue, 2 Sep 2025 20:29:09 +0200
Subject: [PATCH 3/5] ruff

---
 .../sklearn_based_decision_tree_tabpfn.py     | 76 +++++++++----------
 1 file changed, 37 insertions(+), 39 deletions(-)

diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
index 3a6c8bae..0929a31f 100644
--- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
+++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
@@ -36,10 +36,8 @@
 )
 from tabpfn_extensions.utils import softmax
 
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
+# Define a module-level logger
+logger = logging.getLogger(__name__)
 
 ###############################################################################
 #                             BASE DECISION TREE                              #
@@ -303,7 +301,7 @@ def _fit(
             The fitted model.
         """
         if self.verbose:
-            logging.info("Starting DecisionTreeTabPFN fit process...")
+            logger.info("Starting DecisionTreeTabPFN fit process...")
         # Initialize attributes (per scikit-learn conventions)
         self._leaf_nodes = []
         self._leaf_train_data = {}
@@ -326,7 +324,7 @@ def _fit(
             ensure_all_finite=False,  # scikit-learn sets self.n_features_in_ automatically
         )
         if self.verbose:
-            logging.info(f"Input data shape: X={X.shape}, y={y.shape}")
+            logger.info(f"Input data shape: X={X.shape}, y={y.shape}")
 
         if self.task_type == "multiclass":
             self.classes_ = unique_labels(y)
@@ -356,7 +354,7 @@ def _fit(
         # If adaptive_tree is on, do a train/validation split
         if self.adaptive_tree:
             if self.verbose:
-                logging.info(
+                logger.info(
                     "Adaptive tree is enabled. Preparing train/validation split."
                 )
             stratify = y_ if (self.task_type == "multiclass") else None
@@ -367,7 +365,7 @@ def _fit(
                 # Disable adaptive tree in extreme cases
                 if counts.min() == 1 or len(unique_classes) < 2:
                     if self.verbose:
-                        logging.info(
+                        logger.info(
                             "Disabling adaptive tree: minimum class count is 1 or only one class present."
                         )
                     self.adaptive_tree = False
@@ -378,7 +376,7 @@ def _fit(
                     )
             if len(y_) < 10:
                 if self.verbose:
-                    logging.info("Disabling adaptive tree: fewer than 10 samples.")
+                    logger.info("Disabling adaptive tree: fewer than 10 samples.")
                 self.adaptive_tree = False
 
             if self.adaptive_tree:
@@ -401,7 +399,7 @@ def _fit(
                     stratify=stratify,
                 )
                 if self.verbose:
-                    logging.info(
+                    logger.info(
                         f"Train/Valid split created: "
                         f"Train size={len(y_train)}, Valid size={len(y_valid)}"
                     )
@@ -409,7 +407,7 @@ def _fit(
                 # Safety check - if split is empty, revert
                 if len(y_train) == 0 or len(y_valid) == 0:
                     if self.verbose:
-                        logging.info(
+                        logger.info(
                             "Disabling adaptive tree: train or validation split is empty."
                         )
                     self.adaptive_tree = False
@@ -428,7 +426,7 @@ def _fit(
                     and (len(np.unique(y_train)) != len(np.unique(y_valid)))
                 ):
                     if self.verbose:
-                        logging.info(
+                        logger.info(
                             "Disabling adaptive tree: train and validation sets have different classes."
                         )
                     self.adaptive_tree = False
@@ -444,7 +442,7 @@ def _fit(
         else:
             # Not adaptive, everything is train
             if self.verbose:
-                logging.info("Adaptive tree is disabled. Using all data for training.")
+                logger.info("Adaptive tree is disabled. Using all data for training.")
             X_train, X_preproc_train, y_train, sw_train = (
                 X,
                 X_preprocessed,
@@ -455,12 +453,12 @@ def _fit(
 
         # Build the sklearn decision tree
         if self.verbose:
-            logging.info("Fitting the initial scikit-learn decision tree structure...")
+            logger.info("Fitting the initial scikit-learn decision tree structure...")
         self._decision_tree = self._init_decision_tree()
         self._decision_tree.fit(X_preproc_train, y_train, sample_weight=sw_train)
         self._tree = self._decision_tree  # for sklearn compatibility
         if self.verbose:
-            logging.info(
+            logger.info(
                 f"Decision tree fitting complete. Tree has {self._tree.tree_.node_count} nodes."
             )
 
@@ -481,7 +479,7 @@ def _fit(
         # We will do a leaf-fitting step on demand (lazy) in predict
         self._need_post_fit = True
         if self.verbose:
-            logging.info("Leaf fitting is deferred until the first predict() call.")
+            logger.info("Leaf fitting is deferred until the first predict() call.")
 
         # If verbose, optionally do it right away:
         if self.verbose:
@@ -504,7 +502,7 @@ def _init_decision_tree(self) -> BaseDecisionTree:
     def _post_fit(self) -> None:
         """Hook after the decision tree is fitted. Can be used for final prints/logs."""
         if self.verbose:
-            logging.info("Base tree structure has been fitted.")
+            logger.info("Base tree structure has been fitted.")
 
     def _preprocess_data_for_tree(self, X: np.ndarray) -> np.ndarray:
         """Handle missing data prior to feeding into the decision tree.
@@ -664,12 +662,12 @@ def _predict_internal(
         # If we haven't yet done the final leaf fit, do it here
         if self._need_post_fit:
             if self.verbose:
-                logging.info("First predict call: executing deferred leaf fitting.")
+                logger.info("First predict call: executing deferred leaf fitting.")
             self._need_post_fit = False
             if self.adaptive_tree:
                 # Fit leaves on train data, check performance on valid data if available
                 if self.verbose:
-                    logging.info(
+                    logger.info(
                         "Fitting leaves on training data for adaptive pruning..."
                     )
                 self.fit_leaves(self.train_X, self.train_y)
@@ -679,7 +677,7 @@ def _predict_internal(
                     and self.valid_y is not None
                 ):
                     if self.verbose:
-                        logging.info(
+                        logger.info(
                             "Evaluating node performance on validation set for pruning decisions."
                         )
                     # Force a pass to evaluate node performance
@@ -691,7 +689,7 @@ def _predict_internal(
                     )
             # Now fit leaves again using the entire dataset (train + valid, effectively)
             if self.verbose:
-                logging.info("Fitting leaves on the full dataset.")
+                logger.info("Fitting leaves on the full dataset.")
             self.fit_leaves(self.X, self.y)
 
         # Assign TabPFNs categorical features if needed
@@ -702,7 +700,7 @@ def _predict_internal(
         X_leaf_nodes = self._apply_tree(X)
         n_samples, n_nodes, n_estims = X_leaf_nodes.shape
         if self.verbose:
-            logging.info(
+            logger.info(
                 f"Starting prediction for {n_samples} samples across {n_nodes} nodes."
             )
 
@@ -761,7 +759,7 @@ def _predict_internal(
                     == 0.0
                 )
                 if self.verbose:
-                    logging.info(
+                    logger.info(
                         f"Processing Node {leaf_id}: "
                         f"Train Samples={X_train_leaf.shape[0]}, "
                         f"Test Samples={len(test_sample_indices)}, "
@@ -776,7 +774,7 @@ def _predict_internal(
                     and not (leaf_id == 0 and self.adaptive_tree)
                 ):
                     if self.verbose:
-                        logging.info(
+                        logger.info(
                             f"  -> Skipping Node {leaf_id}: Not a final leaf and fit_nodes is False."
                         )
                     if do_pruning:
@@ -796,7 +794,7 @@ def _predict_internal(
 
                     if should_skip_previously_pruned:
                         if self.verbose:
-                            logging.info(
+                            logger.info(
                                 f"  -> Skipping Node {leaf_id}: Node was previously pruned."
                             )
                         continue
@@ -808,7 +806,7 @@ def _predict_internal(
                         and self.adaptive_tree_skip_class_missing
                     ):
                         if self.verbose:
-                            logging.info(
+                            logger.info(
                                 f"  -> Skipping Node {leaf_id}: Not all classes are present in training data."
                             )
                         self._node_prediction_type[est_id][leaf_id] = "previous"
@@ -828,7 +826,7 @@ def _predict_internal(
                         )
                     ):
                         if self.verbose:
-                            logging.info(
+                            logger.info(
                                 f"  -> Skipping Node {leaf_id}: Does not meet sample size requirements for adaptive fitting."
                             )
                         if do_pruning:
@@ -880,7 +878,7 @@ def _predict_internal(
                         y_prob[est_id][leaf_id],
                     )
                     if self.verbose:
-                        logging.info(
+                        logger.info(
                             f"  -> Pruning Result for Node {leaf_id}: "
                             f"Type='{self._node_prediction_type[est_id][leaf_id]}', "
                             f"Score={y_metric[est_id][leaf_id]:.4f}"
@@ -890,7 +888,7 @@ def _predict_internal(
                     y_prob[est_id][leaf_id] = y_prob_replacement
 
         if self.verbose:
-            logging.info("Prediction process finished.")
+            logger.info("Prediction process finished.")
         # Final predictions come from the last estimators last node
         return y_prob[n_estims - 1][n_nodes - 1]
 
@@ -1242,7 +1240,7 @@ def _predict_leaf(
         # If only one class, fill probability 1.0 for that class
         if len(classes_in_leaf) == 1:
             if self.verbose:
-                logging.info(
+                logger.info(
                     f"  -> Node {leaf_id}: Only one class present. Predicting 1.0 for class {classes_in_leaf[0]}."
                 )
             y_eval_prob[indices, classes_in_leaf[0]] = 1.0
@@ -1252,7 +1250,7 @@ def _predict_leaf(
         leaf_seed = leaf_id + self.tree_seed
         try:
             if self.verbose:
-                logging.info(f"  -> Node {leaf_id}: Fitting TabPFNClassifier.")
+                logger.info(f"  -> Node {leaf_id}: Fitting TabPFNClassifier.")
             self.tabpfn.random_state = leaf_seed
             self.tabpfn.fit(X_train_leaf, y_train_leaf)
 
@@ -1279,7 +1277,7 @@ def _predict_leaf(
                 stacklevel=2,
             )
             if self.verbose:
-                logging.warning(
+                logger.warning(
                     f"  -> Node {leaf_id}: TabPFN failed due to constant features. Using class ratio fallback."
                 )
             _, counts = np.unique(y_train_leaf, return_counts=True)
@@ -1331,7 +1329,7 @@ def predict_proba(self, X: np.ndarray, check_input: bool = True) -> np.ndarray:
     def _post_fit(self) -> None:
         """Optional hook after the decision tree is fitted."""
         if self.verbose:
-            logging.info("Classifier tree structure has been fitted.")
+            logger.info("Classifier tree structure has been fitted.")
 
 
 ###############################################################################
@@ -1455,7 +1453,7 @@ def _predict_leaf(
         # If no training data or just 1 sample, fall back to 0 or single value
         if len(X_train_leaf) < 1:
             if self.verbose:
-                logging.info(
+                logger.info(
                     f"  -> Node {leaf_id}: No training samples. Predicting 0.0."
                 )
             warnings.warn(
@@ -1465,7 +1463,7 @@ def _predict_leaf(
             return y_eval
         elif len(X_train_leaf) == 1:
             if self.verbose:
-                logging.info(
+                logger.info(
                     f"  -> Node {leaf_id}: Only one training sample. Predicting its value."
                 )
             y_eval[indices] = y_train_leaf[0]
@@ -1474,7 +1472,7 @@ def _predict_leaf(
         # If all y are identical, return that constant
         if np.all(y_train_leaf == y_train_leaf[0]):
             if self.verbose:
-                logging.info(
+                logger.info(
                     f"  -> Node {leaf_id}: All target values are constant. Predicting {y_train_leaf[0]}."
                 )
             y_eval[indices] = y_train_leaf[0]
@@ -1484,7 +1482,7 @@ def _predict_leaf(
         leaf_seed = leaf_id + self.tree_seed
         try:
             if self.verbose:
-                logging.info(f"  -> Node {leaf_id}: Fitting TabPFNRegressor.")
+                logger.info(f"  -> Node {leaf_id}: Fitting TabPFNRegressor.")
             self.tabpfn.random_state = leaf_seed
             self.tabpfn.fit(X_train_leaf, y_train_leaf)
 
@@ -1504,7 +1502,7 @@ def _predict_leaf(
                 stacklevel=2,
             )
             if self.verbose:
-                logging.warning(
+                logger.warning(
                     f"  -> Node {leaf_id}: TabPFN failed ({e}). Using mean fallback."
                 )
             y_eval[indices] = np.mean(y_train_leaf)
@@ -1560,4 +1558,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray:
     def _post_fit(self) -> None:
         """Optional hook after the regressor's tree is fitted."""
         if self.verbose:
-            logging.info("Regressor tree structure has been fitted.")
+            logger.info("Regressor tree structure has been fitted.")
\ No newline at end of file

From 6c9db2721e0f40506c099fbf2a061d69cd199116 Mon Sep 17 00:00:00 2001
From: noahho <Noah.homa@gmail.com>
Date: Wed, 3 Sep 2025 09:25:57 +0200
Subject: [PATCH 4/5] ruff

---
 .../rf_pfn/sklearn_based_decision_tree_tabpfn.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
index 0929a31f..0cbcb272 100644
--- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
+++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
@@ -1558,4 +1558,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray:
     def _post_fit(self) -> None:
         """Optional hook after the regressor's tree is fitted."""
         if self.verbose:
-            logger.info("Regressor tree structure has been fitted.")
\ No newline at end of file
+            logger.info("Regressor tree structure has been fitted.")

From ac68e133a8f7f5c2f0e465ec5dce108406364d3a Mon Sep 17 00:00:00 2001
From: Noah Hollmann <noah@priorlabs.ai>
Date: Mon, 23 Mar 2026 10:54:14 +0100
Subject: [PATCH 5/5] Update
 src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py

Co-authored-by: Oscar Key <oscar@priorlabs.ai>
---
 .../rf_pfn/sklearn_based_decision_tree_tabpfn.py                 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
index 8046f810..b58334ba 100644
--- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
+++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py
@@ -39,7 +39,6 @@
 )
 from tabpfn_extensions.utils import softmax
 
-# Define a module-level logger
 logger = logging.getLogger(__name__)
 
 ###############################################################################