From 39d6a8cbb1d6f1ba7eb9e4afa4613d6a8b309685 Mon Sep 17 00:00:00 2001 From: noahho Date: Mon, 1 Sep 2025 13:34:09 +0200 Subject: [PATCH 1/5] - add logging for dt pfn --- .../sklearn_based_decision_tree_tabpfn.py | 120 +++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py index 3874a5f8..92dc11aa 100644 --- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py +++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py @@ -5,6 +5,7 @@ import random import warnings +import logging # For type checking only from typing import TYPE_CHECKING, Any @@ -35,6 +36,11 @@ ) from tabpfn_extensions.utils import softmax +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + ############################################################################### # BASE DECISION TREE # ############################################################################### @@ -296,6 +302,8 @@ def _fit( self : DecisionTreeTabPFNBase The fitted model. """ + if self.verbose: + logging.info("Starting DecisionTreeTabPFN fit process...") # Initialize attributes (per scikit-learn conventions) self._leaf_nodes = [] self._leaf_train_data = {} @@ -317,6 +325,8 @@ def _fit( y, ensure_all_finite=False, # scikit-learn sets self.n_features_in_ automatically ) + if self.verbose: + logging.info(f"Input data shape: X={X.shape}, y={y.shape}") if self.task_type == "multiclass": self.classes_ = unique_labels(y) @@ -345,6 +355,8 @@ def _fit( # If adaptive_tree is on, do a train/validation split if self.adaptive_tree: + if self.verbose: + logging.info("Adaptive tree is enabled. Preparing train/validation split.") stratify = y_ if (self.task_type == "multiclass") else None # Basic checks for classification to see if splitting is feasible @@ -352,6 +364,10 @@ def _fit( unique_classes, counts = np.unique(y_, return_counts=True) # Disable adaptive tree in extreme cases if counts.min() == 1 or len(unique_classes) < 2: + if self.verbose: + logging.info( + "Disabling adaptive tree: minimum class count is 1 or only one class present." + ) self.adaptive_tree = False elif len(unique_classes) > int(len(y_) * self.adaptive_tree_test_size): self.adaptive_tree_test_size = min( @@ -359,6 +375,8 @@ def _fit( len(unique_classes) / len(y_) * 1.5, ) if len(y_) < 10: + if self.verbose: + logging.info("Disabling adaptive tree: fewer than 10 samples.") self.adaptive_tree = False if self.adaptive_tree: @@ -380,9 +398,18 @@ def _fit( random_state=self.random_state, stratify=stratify, ) + if self.verbose: + logging.info( + f"Train/Valid split created: " + f"Train size={len(y_train)}, Valid size={len(y_valid)}" + ) # Safety check - if split is empty, revert if len(y_train) == 0 or len(y_valid) == 0: + if self.verbose: + logging.info( + "Disabling adaptive tree: train or validation split is empty." + ) self.adaptive_tree = False X_train, X_preproc_train, y_train, sw_train = ( X, @@ -398,6 +425,10 @@ def _fit( and self.adaptive_tree and (len(np.unique(y_train)) != len(np.unique(y_valid))) ): + if self.verbose: + logging.info( + "Disabling adaptive tree: train and validation sets have different classes." + ) self.adaptive_tree = False else: # If we were disabled, keep all data as training @@ -410,6 +441,8 @@ def _fit( X_valid = X_preproc_valid = y_valid = sw_valid = None else: # Not adaptive, everything is train + if self.verbose: + logging.info("Adaptive tree is disabled. Using all data for training.") X_train, X_preproc_train, y_train, sw_train = ( X, X_preprocessed, @@ -419,9 +452,15 @@ def _fit( X_valid = X_preproc_valid = y_valid = sw_valid = None # Build the sklearn decision tree + if self.verbose: + logging.info("Fitting the initial scikit-learn decision tree structure...") self._decision_tree = self._init_decision_tree() self._decision_tree.fit(X_preproc_train, y_train, sample_weight=sw_train) self._tree = self._decision_tree # for sklearn compatibility + if self.verbose: + logging.info( + f"Decision tree fitting complete. Tree has {self._tree.tree_.node_count} nodes." + ) # Keep references for potential post-fitting (leaf-level fitting) self.X = X @@ -439,6 +478,8 @@ def _fit( # We will do a leaf-fitting step on demand (lazy) in predict self._need_post_fit = True + if self.verbose: + logging.info("Leaf fitting is deferred until the first predict() call.") # If verbose, optionally do it right away: if self.verbose: @@ -461,7 +502,7 @@ def _init_decision_tree(self) -> BaseDecisionTree: def _post_fit(self) -> None: """Hook after the decision tree is fitted. Can be used for final prints/logs.""" if self.verbose: - pass + logging.info("Base tree structure has been fitted.") def _preprocess_data_for_tree(self, X: np.ndarray) -> np.ndarray: """Handle missing data prior to feeding into the decision tree. @@ -620,15 +661,23 @@ def _predict_internal( """ # If we haven't yet done the final leaf fit, do it here if self._need_post_fit: + if self.verbose: + logging.info("First predict call: executing deferred leaf fitting.") self._need_post_fit = False if self.adaptive_tree: # Fit leaves on train data, check performance on valid data if available + if self.verbose: + logging.info("Fitting leaves on training data for adaptive pruning...") self.fit_leaves(self.train_X, self.train_y) if ( hasattr(self, "valid_X") and self.valid_X is not None and self.valid_y is not None ): + if self.verbose: + logging.info( + "Evaluating node performance on validation set for pruning decisions." + ) # Force a pass to evaluate node performance # so we can prune or decide node updates self._predict_internal( @@ -637,6 +686,8 @@ def _predict_internal( check_input=False, ) # Now fit leaves again using the entire dataset (train + valid, effectively) + if self.verbose: + logging.info("Fitting leaves on the full dataset.") self.fit_leaves(self.X, self.y) # Assign TabPFNs categorical features if needed @@ -646,6 +697,10 @@ def _predict_internal( # Find leaf membership in X X_leaf_nodes = self._apply_tree(X) n_samples, n_nodes, n_estims = X_leaf_nodes.shape + if self.verbose: + logging.info( + f"Starting prediction for {n_samples} samples across {n_nodes} nodes." + ) # Track intermediate predictions y_prob: dict[int, dict[int, np.ndarray]] = {} @@ -701,6 +756,13 @@ def _predict_internal( X_leaf_nodes[test_sample_indices, leaf_id + 1 :, est_id].sum() == 0.0 ) + if self.verbose: + logging.info( + f"Processing Node {leaf_id}: " + f"Train Samples={X_train_leaf.shape[0]}, " + f"Test Samples={len(test_sample_indices)}, " + f"Is Final Leaf={is_leaf}" + ) # If it's not a leaf and we are not fitting internal nodes, skip # (unless leaf_id==0 and we do a top-level check for adaptive_tree) @@ -709,6 +771,10 @@ def _predict_internal( and (not self.fit_nodes) and not (leaf_id == 0 and self.adaptive_tree) ): + if self.verbose: + logging.info( + f" -> Skipping Node {leaf_id}: Not a final leaf and fit_nodes is False." + ) if do_pruning: self._node_prediction_type[est_id][leaf_id] = "previous" continue @@ -725,6 +791,10 @@ def _predict_internal( should_skip_previously_pruned = True if should_skip_previously_pruned: + if self.verbose: + logging.info( + f" -> Skipping Node {leaf_id}: Node was previously pruned." + ) continue # Skip if classification is missing a class @@ -733,6 +803,10 @@ def _predict_internal( and len(np.unique(y_train_leaf)) < self.n_classes_ and self.adaptive_tree_skip_class_missing ): + if self.verbose: + logging.info( + f" -> Skipping Node {leaf_id}: Not all classes are present in training data." + ) self._node_prediction_type[est_id][leaf_id] = "previous" continue @@ -749,6 +823,10 @@ def _predict_internal( and not is_leaf ) ): + if self.verbose: + logging.info( + f" -> Skipping Node {leaf_id}: Does not meet sample size requirements for adaptive fitting." + ) if do_pruning: self._node_prediction_type[est_id][leaf_id] = "previous" continue @@ -797,10 +875,18 @@ def _predict_internal( y, y_prob[est_id][leaf_id], ) + if self.verbose: + logging.info( + f" -> Pruning Result for Node {leaf_id}: " + f"Type='{self._node_prediction_type[est_id][leaf_id]}', " + f"Score={y_metric[est_id][leaf_id]:.4f}" + ) else: # If not validating and not adaptive, just use replacement y_prob[est_id][leaf_id] = y_prob_replacement + if self.verbose: + logging.info("Prediction process finished.") # Final predictions come from the last estimators last node return y_prob[n_estims - 1][n_nodes - 1] @@ -1151,12 +1237,18 @@ def _predict_leaf( # If only one class, fill probability 1.0 for that class if len(classes_in_leaf) == 1: + if self.verbose: + logging.info( + f" -> Node {leaf_id}: Only one class present. Predicting 1.0 for class {classes_in_leaf[0]}." + ) y_eval_prob[indices, classes_in_leaf[0]] = 1.0 return y_eval_prob # Otherwise, fit TabPFN leaf_seed = leaf_id + self.tree_seed try: + if self.verbose: + logging.info(f" -> Node {leaf_id}: Fitting TabPFNClassifier.") self.tabpfn.random_state = leaf_seed self.tabpfn.fit(X_train_leaf, y_train_leaf) @@ -1182,6 +1274,10 @@ def _predict_leaf( "One node has constant features for TabPFN. Using class-ratio fallback.", stacklevel=2, ) + if self.verbose: + logging.warning( + f" -> Node {leaf_id}: TabPFN failed due to constant features. Using class ratio fallback." + ) _, counts = np.unique(y_train_leaf, return_counts=True) ratio = counts / counts.sum() for i, c in enumerate(classes_in_leaf): @@ -1231,7 +1327,7 @@ def predict_proba(self, X: np.ndarray, check_input: bool = True) -> np.ndarray: def _post_fit(self) -> None: """Optional hook after the decision tree is fitted.""" if self.verbose: - pass + logging.info("Classifier tree structure has been fitted.") ############################################################################### @@ -1354,23 +1450,37 @@ def _predict_leaf( # If no training data or just 1 sample, fall back to 0 or single value if len(X_train_leaf) < 1: + if self.verbose: + logging.info( + f" -> Node {leaf_id}: No training samples. Predicting 0.0." + ) warnings.warn( f"Leaf {leaf_id} has zero training samples. Returning 0.0 predictions.", stacklevel=2, ) return y_eval elif len(X_train_leaf) == 1: + if self.verbose: + logging.info( + f" -> Node {leaf_id}: Only one training sample. Predicting its value." + ) y_eval[indices] = y_train_leaf[0] return y_eval # If all y are identical, return that constant if np.all(y_train_leaf == y_train_leaf[0]): + if self.verbose: + logging.info( + f" -> Node {leaf_id}: All target values are constant. Predicting {y_train_leaf[0]}." + ) y_eval[indices] = y_train_leaf[0] return y_eval # Fit TabPFNRegressor leaf_seed = leaf_id + self.tree_seed try: + if self.verbose: + logging.info(f" -> Node {leaf_id}: Fitting TabPFNRegressor.") self.tabpfn.random_state = leaf_seed self.tabpfn.fit(X_train_leaf, y_train_leaf) @@ -1389,6 +1499,10 @@ def _predict_leaf( f"TabPFN fit/predict failed at leaf {leaf_id}: {e}. Using mean fallback.", stacklevel=2, ) + if self.verbose: + logging.warning( + f" -> Node {leaf_id}: TabPFN failed ({e}). Using mean fallback." + ) y_eval[indices] = np.mean(y_train_leaf) return y_eval @@ -1442,4 +1556,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray: def _post_fit(self) -> None: """Optional hook after the regressor's tree is fitted.""" if self.verbose: - pass + logging.info("Regressor tree structure has been fitted.") \ No newline at end of file From 75a52a14933ee6ed1d9022dfab88751761366b9d Mon Sep 17 00:00:00 2001 From: noahho Date: Mon, 1 Sep 2025 13:34:40 +0200 Subject: [PATCH 2/5] ruff --- .../rf_pfn/sklearn_based_decision_tree_tabpfn.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py index 92dc11aa..3a6c8bae 100644 --- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py +++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py @@ -3,9 +3,9 @@ from __future__ import annotations +import logging import random import warnings -import logging # For type checking only from typing import TYPE_CHECKING, Any @@ -356,7 +356,9 @@ def _fit( # If adaptive_tree is on, do a train/validation split if self.adaptive_tree: if self.verbose: - logging.info("Adaptive tree is enabled. Preparing train/validation split.") + logging.info( + "Adaptive tree is enabled. Preparing train/validation split." + ) stratify = y_ if (self.task_type == "multiclass") else None # Basic checks for classification to see if splitting is feasible @@ -667,7 +669,9 @@ def _predict_internal( if self.adaptive_tree: # Fit leaves on train data, check performance on valid data if available if self.verbose: - logging.info("Fitting leaves on training data for adaptive pruning...") + logging.info( + "Fitting leaves on training data for adaptive pruning..." + ) self.fit_leaves(self.train_X, self.train_y) if ( hasattr(self, "valid_X") @@ -1556,4 +1560,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray: def _post_fit(self) -> None: """Optional hook after the regressor's tree is fitted.""" if self.verbose: - logging.info("Regressor tree structure has been fitted.") \ No newline at end of file + logging.info("Regressor tree structure has been fitted.") From 5a2b190c873b99b204324b9070dbdbb7e80b133a Mon Sep 17 00:00:00 2001 From: noahho Date: Tue, 2 Sep 2025 20:29:09 +0200 Subject: [PATCH 3/5] ruff --- .../sklearn_based_decision_tree_tabpfn.py | 76 +++++++++---------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py index 3a6c8bae..0929a31f 100644 --- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py +++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py @@ -36,10 +36,8 @@ ) from tabpfn_extensions.utils import softmax -# Configure logging -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) +# Define a module-level logger +logger = logging.getLogger(__name__) ############################################################################### # BASE DECISION TREE # @@ -303,7 +301,7 @@ def _fit( The fitted model. """ if self.verbose: - logging.info("Starting DecisionTreeTabPFN fit process...") + logger.info("Starting DecisionTreeTabPFN fit process...") # Initialize attributes (per scikit-learn conventions) self._leaf_nodes = [] self._leaf_train_data = {} @@ -326,7 +324,7 @@ def _fit( ensure_all_finite=False, # scikit-learn sets self.n_features_in_ automatically ) if self.verbose: - logging.info(f"Input data shape: X={X.shape}, y={y.shape}") + logger.info(f"Input data shape: X={X.shape}, y={y.shape}") if self.task_type == "multiclass": self.classes_ = unique_labels(y) @@ -356,7 +354,7 @@ def _fit( # If adaptive_tree is on, do a train/validation split if self.adaptive_tree: if self.verbose: - logging.info( + logger.info( "Adaptive tree is enabled. Preparing train/validation split." ) stratify = y_ if (self.task_type == "multiclass") else None @@ -367,7 +365,7 @@ def _fit( # Disable adaptive tree in extreme cases if counts.min() == 1 or len(unique_classes) < 2: if self.verbose: - logging.info( + logger.info( "Disabling adaptive tree: minimum class count is 1 or only one class present." ) self.adaptive_tree = False @@ -378,7 +376,7 @@ def _fit( ) if len(y_) < 10: if self.verbose: - logging.info("Disabling adaptive tree: fewer than 10 samples.") + logger.info("Disabling adaptive tree: fewer than 10 samples.") self.adaptive_tree = False if self.adaptive_tree: @@ -401,7 +399,7 @@ def _fit( stratify=stratify, ) if self.verbose: - logging.info( + logger.info( f"Train/Valid split created: " f"Train size={len(y_train)}, Valid size={len(y_valid)}" ) @@ -409,7 +407,7 @@ def _fit( # Safety check - if split is empty, revert if len(y_train) == 0 or len(y_valid) == 0: if self.verbose: - logging.info( + logger.info( "Disabling adaptive tree: train or validation split is empty." ) self.adaptive_tree = False @@ -428,7 +426,7 @@ def _fit( and (len(np.unique(y_train)) != len(np.unique(y_valid))) ): if self.verbose: - logging.info( + logger.info( "Disabling adaptive tree: train and validation sets have different classes." ) self.adaptive_tree = False @@ -444,7 +442,7 @@ def _fit( else: # Not adaptive, everything is train if self.verbose: - logging.info("Adaptive tree is disabled. Using all data for training.") + logger.info("Adaptive tree is disabled. Using all data for training.") X_train, X_preproc_train, y_train, sw_train = ( X, X_preprocessed, @@ -455,12 +453,12 @@ def _fit( # Build the sklearn decision tree if self.verbose: - logging.info("Fitting the initial scikit-learn decision tree structure...") + logger.info("Fitting the initial scikit-learn decision tree structure...") self._decision_tree = self._init_decision_tree() self._decision_tree.fit(X_preproc_train, y_train, sample_weight=sw_train) self._tree = self._decision_tree # for sklearn compatibility if self.verbose: - logging.info( + logger.info( f"Decision tree fitting complete. Tree has {self._tree.tree_.node_count} nodes." ) @@ -481,7 +479,7 @@ def _fit( # We will do a leaf-fitting step on demand (lazy) in predict self._need_post_fit = True if self.verbose: - logging.info("Leaf fitting is deferred until the first predict() call.") + logger.info("Leaf fitting is deferred until the first predict() call.") # If verbose, optionally do it right away: if self.verbose: @@ -504,7 +502,7 @@ def _init_decision_tree(self) -> BaseDecisionTree: def _post_fit(self) -> None: """Hook after the decision tree is fitted. Can be used for final prints/logs.""" if self.verbose: - logging.info("Base tree structure has been fitted.") + logger.info("Base tree structure has been fitted.") def _preprocess_data_for_tree(self, X: np.ndarray) -> np.ndarray: """Handle missing data prior to feeding into the decision tree. @@ -664,12 +662,12 @@ def _predict_internal( # If we haven't yet done the final leaf fit, do it here if self._need_post_fit: if self.verbose: - logging.info("First predict call: executing deferred leaf fitting.") + logger.info("First predict call: executing deferred leaf fitting.") self._need_post_fit = False if self.adaptive_tree: # Fit leaves on train data, check performance on valid data if available if self.verbose: - logging.info( + logger.info( "Fitting leaves on training data for adaptive pruning..." ) self.fit_leaves(self.train_X, self.train_y) @@ -679,7 +677,7 @@ def _predict_internal( and self.valid_y is not None ): if self.verbose: - logging.info( + logger.info( "Evaluating node performance on validation set for pruning decisions." ) # Force a pass to evaluate node performance @@ -691,7 +689,7 @@ def _predict_internal( ) # Now fit leaves again using the entire dataset (train + valid, effectively) if self.verbose: - logging.info("Fitting leaves on the full dataset.") + logger.info("Fitting leaves on the full dataset.") self.fit_leaves(self.X, self.y) # Assign TabPFNs categorical features if needed @@ -702,7 +700,7 @@ def _predict_internal( X_leaf_nodes = self._apply_tree(X) n_samples, n_nodes, n_estims = X_leaf_nodes.shape if self.verbose: - logging.info( + logger.info( f"Starting prediction for {n_samples} samples across {n_nodes} nodes." ) @@ -761,7 +759,7 @@ def _predict_internal( == 0.0 ) if self.verbose: - logging.info( + logger.info( f"Processing Node {leaf_id}: " f"Train Samples={X_train_leaf.shape[0]}, " f"Test Samples={len(test_sample_indices)}, " @@ -776,7 +774,7 @@ def _predict_internal( and not (leaf_id == 0 and self.adaptive_tree) ): if self.verbose: - logging.info( + logger.info( f" -> Skipping Node {leaf_id}: Not a final leaf and fit_nodes is False." ) if do_pruning: @@ -796,7 +794,7 @@ def _predict_internal( if should_skip_previously_pruned: if self.verbose: - logging.info( + logger.info( f" -> Skipping Node {leaf_id}: Node was previously pruned." ) continue @@ -808,7 +806,7 @@ def _predict_internal( and self.adaptive_tree_skip_class_missing ): if self.verbose: - logging.info( + logger.info( f" -> Skipping Node {leaf_id}: Not all classes are present in training data." ) self._node_prediction_type[est_id][leaf_id] = "previous" @@ -828,7 +826,7 @@ def _predict_internal( ) ): if self.verbose: - logging.info( + logger.info( f" -> Skipping Node {leaf_id}: Does not meet sample size requirements for adaptive fitting." ) if do_pruning: @@ -880,7 +878,7 @@ def _predict_internal( y_prob[est_id][leaf_id], ) if self.verbose: - logging.info( + logger.info( f" -> Pruning Result for Node {leaf_id}: " f"Type='{self._node_prediction_type[est_id][leaf_id]}', " f"Score={y_metric[est_id][leaf_id]:.4f}" @@ -890,7 +888,7 @@ def _predict_internal( y_prob[est_id][leaf_id] = y_prob_replacement if self.verbose: - logging.info("Prediction process finished.") + logger.info("Prediction process finished.") # Final predictions come from the last estimators last node return y_prob[n_estims - 1][n_nodes - 1] @@ -1242,7 +1240,7 @@ def _predict_leaf( # If only one class, fill probability 1.0 for that class if len(classes_in_leaf) == 1: if self.verbose: - logging.info( + logger.info( f" -> Node {leaf_id}: Only one class present. Predicting 1.0 for class {classes_in_leaf[0]}." ) y_eval_prob[indices, classes_in_leaf[0]] = 1.0 @@ -1252,7 +1250,7 @@ def _predict_leaf( leaf_seed = leaf_id + self.tree_seed try: if self.verbose: - logging.info(f" -> Node {leaf_id}: Fitting TabPFNClassifier.") + logger.info(f" -> Node {leaf_id}: Fitting TabPFNClassifier.") self.tabpfn.random_state = leaf_seed self.tabpfn.fit(X_train_leaf, y_train_leaf) @@ -1279,7 +1277,7 @@ def _predict_leaf( stacklevel=2, ) if self.verbose: - logging.warning( + logger.warning( f" -> Node {leaf_id}: TabPFN failed due to constant features. Using class ratio fallback." ) _, counts = np.unique(y_train_leaf, return_counts=True) @@ -1331,7 +1329,7 @@ def predict_proba(self, X: np.ndarray, check_input: bool = True) -> np.ndarray: def _post_fit(self) -> None: """Optional hook after the decision tree is fitted.""" if self.verbose: - logging.info("Classifier tree structure has been fitted.") + logger.info("Classifier tree structure has been fitted.") ############################################################################### @@ -1455,7 +1453,7 @@ def _predict_leaf( # If no training data or just 1 sample, fall back to 0 or single value if len(X_train_leaf) < 1: if self.verbose: - logging.info( + logger.info( f" -> Node {leaf_id}: No training samples. Predicting 0.0." ) warnings.warn( @@ -1465,7 +1463,7 @@ def _predict_leaf( return y_eval elif len(X_train_leaf) == 1: if self.verbose: - logging.info( + logger.info( f" -> Node {leaf_id}: Only one training sample. Predicting its value." ) y_eval[indices] = y_train_leaf[0] @@ -1474,7 +1472,7 @@ def _predict_leaf( # If all y are identical, return that constant if np.all(y_train_leaf == y_train_leaf[0]): if self.verbose: - logging.info( + logger.info( f" -> Node {leaf_id}: All target values are constant. Predicting {y_train_leaf[0]}." ) y_eval[indices] = y_train_leaf[0] @@ -1484,7 +1482,7 @@ def _predict_leaf( leaf_seed = leaf_id + self.tree_seed try: if self.verbose: - logging.info(f" -> Node {leaf_id}: Fitting TabPFNRegressor.") + logger.info(f" -> Node {leaf_id}: Fitting TabPFNRegressor.") self.tabpfn.random_state = leaf_seed self.tabpfn.fit(X_train_leaf, y_train_leaf) @@ -1504,7 +1502,7 @@ def _predict_leaf( stacklevel=2, ) if self.verbose: - logging.warning( + logger.warning( f" -> Node {leaf_id}: TabPFN failed ({e}). Using mean fallback." ) y_eval[indices] = np.mean(y_train_leaf) @@ -1560,4 +1558,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray: def _post_fit(self) -> None: """Optional hook after the regressor's tree is fitted.""" if self.verbose: - logging.info("Regressor tree structure has been fitted.") + logger.info("Regressor tree structure has been fitted.") \ No newline at end of file From 6c9db2721e0f40506c099fbf2a061d69cd199116 Mon Sep 17 00:00:00 2001 From: noahho Date: Wed, 3 Sep 2025 09:25:57 +0200 Subject: [PATCH 4/5] ruff --- .../rf_pfn/sklearn_based_decision_tree_tabpfn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py index 0929a31f..0cbcb272 100644 --- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py +++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py @@ -1558,4 +1558,4 @@ def predict_full(self, X: np.ndarray) -> np.ndarray: def _post_fit(self) -> None: """Optional hook after the regressor's tree is fitted.""" if self.verbose: - logger.info("Regressor tree structure has been fitted.") \ No newline at end of file + logger.info("Regressor tree structure has been fitted.") From ac68e133a8f7f5c2f0e465ec5dce108406364d3a Mon Sep 17 00:00:00 2001 From: Noah Hollmann Date: Mon, 23 Mar 2026 10:54:14 +0100 Subject: [PATCH 5/5] Update src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py Co-authored-by: Oscar Key --- .../rf_pfn/sklearn_based_decision_tree_tabpfn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py index 8046f810..b58334ba 100644 --- a/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py +++ b/src/tabpfn_extensions/rf_pfn/sklearn_based_decision_tree_tabpfn.py @@ -39,7 +39,6 @@ ) from tabpfn_extensions.utils import softmax -# Define a module-level logger logger = logging.getLogger(__name__) ###############################################################################