From 4d15a875a45e57a21578830b5d26d1ee305d42b4 Mon Sep 17 00:00:00 2001 From: Georg Grab Date: Wed, 6 May 2026 18:14:43 +0000 Subject: [PATCH 1/6] feat: replace enhanced_fit_mode with effort/effort_timeout_s/effort_metric TabPFNClassifier and TabPFNRegressor now take: - effort: Literal["medium", "high"] | None (None disables the autogluon- wrapped fit; medium/high pick the portfolio count server-side) - effort_timeout_s: float | None (user budget; server forwards 0.9x to AG) - effort_metric: str | None (eval metric for the sweep) Replaces enhanced_fit_mode (bool) plus the old enhanced_fit_mode_metric and enhanced_fit_mode_time_limit_s. validate_effort() enforces the literal range, the 2400 s cap, and rejects timeout/metric set without effort. client.py lifts the three fields to top-level FitRequest siblings and keeps tabpfn_systems consistent ("enhanced" present iff effort is set). --- src/tabpfn_client/api_models.py | 14 ++- src/tabpfn_client/client.py | 40 ++++---- src/tabpfn_client/estimator.py | 138 ++++++++++++++++----------- tests/unit/test_tabpfn_classifier.py | 6 +- tests/unit/test_tabpfn_regressor.py | 6 +- 5 files changed, 114 insertions(+), 90 deletions(-) diff --git a/src/tabpfn_client/api_models.py b/src/tabpfn_client/api_models.py index b68f21b..27b9ef3 100644 --- a/src/tabpfn_client/api_models.py +++ b/src/tabpfn_client/api_models.py @@ -103,14 +103,12 @@ class FitRequest(BaseModel): # `tabpfn_systems` values on the server need this at fit time; the # server ignores it otherwise. tabpfn_config: TabPFNConfig = None - # Drives model selection + ensemble weighting during the enhanced-fit - # sweep. Only consulted when `"enhanced"` is in `tabpfn_systems`. None - # falls back to the sweep's default per problem type. - enhanced_fit_mode_metric: Optional[str] = None - # Ceiling on the enhanced-fit sweep (seconds). Only consulted when - # `"enhanced"` is in `tabpfn_systems`. None falls back to the server - # default (300s). - enhanced_fit_time_limit_s: Optional[float] = None + # User-facing effort level ("medium" or "high"). None disables it. + effort: Optional[str] = None + # Budget for the fit (seconds). Only consulted when `effort` is set. + effort_timeout_s: Optional[float] = None + # Optimization metric for the fit. Only consulted when `effort` is set. + effort_metric: Optional[str] = None class FitResponse(BaseModel): diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py index c5448cc..6241baa 100644 --- a/src/tabpfn_client/client.py +++ b/src/tabpfn_client/client.py @@ -388,27 +388,24 @@ def fit( raise tabpfn_systems = ["preprocessing", "text"] + effort = tabpfn_config.get("effort") if tabpfn_config else None if tabpfn_config: if tabpfn_config.get("paper_version") is True: tabpfn_systems = [] - elif tabpfn_config.get("enhanced_fit_mode") is True: + elif effort is not None: # Enhanced mode runs on top of the base systems rather than # replacing them — keep preprocessing + text alongside it. tabpfn_systems = ["preprocessing", "text", "enhanced"] - # `enhanced_fit_mode_metric` and `enhanced_fit_mode_time_limit_s` - # are top-level FitRequest fields on the server (siblings to - # `tabpfn_systems`), not part of `tabpfn_config`. Lift them out - # before stripping the rest of the client-only keys. The server - # field drops the `mode_` infix (`enhanced_fit_time_limit_s`); - # units are seconds on both sides, no conversion. - enhanced_fit_mode_metric = ( - tabpfn_config.get("enhanced_fit_mode_metric") if tabpfn_config else None + # `effort`, `effort_timeout_s`, `effort_metric` are top-level + # FitRequest fields on the server (siblings to `tabpfn_systems`), + # not part of `tabpfn_config`. Lift them out before stripping the + # rest of the client-only keys. + effort_timeout_s = ( + tabpfn_config.get("effort_timeout_s") if tabpfn_config else None ) - enhanced_fit_time_limit_s = ( - tabpfn_config.get("enhanced_fit_mode_time_limit_s") - if tabpfn_config - else None + effort_metric = ( + tabpfn_config.get("effort_metric") if tabpfn_config else None ) # Strip client-only keys that the server does not expect (mirrors @@ -420,9 +417,9 @@ def fit( if k not in { "paper_version", - "enhanced_fit_mode", - "enhanced_fit_mode_metric", - "enhanced_fit_mode_time_limit_s", + "effort", + "effort_timeout_s", + "effort_metric", } } if tabpfn_config is not None @@ -436,8 +433,9 @@ def fit( tabpfn_systems=tabpfn_systems, force_refit=force_refit or force_refit_enabled(), tabpfn_config=server_tabpfn_config, - enhanced_fit_mode_metric=enhanced_fit_mode_metric, - enhanced_fit_time_limit_s=enhanced_fit_time_limit_s, + effort=effort, + effort_timeout_s=effort_timeout_s, + effort_metric=effort_metric, ), timeout=client_options.timeout, headers=client_options.headers, @@ -584,9 +582,9 @@ def predict( if k not in { "paper_version", - "enhanced_fit_mode", - "enhanced_fit_mode_metric", - "enhanced_fit_mode_time_limit_s", + "effort", + "effort_timeout_s", + "effort_metric", } } diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py index 27d5b2f..03dae6e 100644 --- a/src/tabpfn_client/estimator.py +++ b/src/tabpfn_client/estimator.py @@ -54,7 +54,10 @@ # is kept as a backward-compatible alias. _AUTO_MODEL_PATH_ALIASES = frozenset({"auto", "default"}) -ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S = 40 * 60 +EFFORT_TIMEOUT_MAX_S = 40 * 60 + +EffortLevel = Literal["medium", "high"] +_VALID_EFFORT_LEVELS = frozenset({"medium", "high"}) class TabPFNModelSelection: @@ -180,9 +183,9 @@ def __init__( ] = 0, inference_config: Optional[Dict] = None, paper_version: bool = False, - enhanced_fit_mode: bool = False, - enhanced_fit_mode_metric: Optional[str] = None, - enhanced_fit_mode_time_limit_s: Optional[float] = None, + effort: Optional[EffortLevel] = None, + effort_timeout_s: Optional[float] = None, + effort_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -240,24 +243,33 @@ def __init__( paper_version: bool, default=False If True, will use the model described in the paper, instead of the newest version available on the API, which e.g handles text features better. - enhanced_fit_mode: bool, default=False - If True, trades off fit time for precision by running an - automated feature-engineering pipeline on top of TabPFN during - fit. - enhanced_fit_mode_metric: str or None, default=None - Only consulted when `enhanced_fit_mode=True`. Drives model - selection + ensemble weighting during the enhanced-fit sweep - (e.g. "accuracy"/"log_loss"/"roc_auc"/"balanced_accuracy"/ - "f1" for classification). None falls back to the sweep's - default for the problem type. Distinct from the local - `eval_metric`/`tuning_config` knobs used for decision-threshold - tuning on the standalone TabPFN classifier. - enhanced_fit_mode_time_limit_s: float or None, default=None - Only consulted when `enhanced_fit_mode=True`. Ceiling on the - enhanced-fit sweep (seconds). Raise for larger datasets where - the default ~5-minute sweep leaves performance on the table. - None falls back to the server-side default (300s). Capped at - 2400 seconds (40 minutes); higher values raise ValueError at fit. + effort: {"medium", "high"} or None, default=None + Spends extra fit-time compute for higher precision. None + disables it. + effort_timeout_s: float or None, default=None + Budget for the fit, in seconds. Only consulted when `effort` + is set. Capped at 2400. + effort_metric: str or None, default=None + Optimization metric for the fit. Only consulted when `effort` + is set. + + Binary classification: + "accuracy", "balanced_accuracy", "mcc", "log_loss", + "pac", "quadratic_kappa", "roc_auc", "average_precision", + "precision", "precision_macro", "precision_micro", + "precision_weighted", "recall", "recall_macro", + "recall_micro", "recall_weighted", "f1", "f1_macro", + "f1_micro", "f1_weighted". + Multiclass classification: + "accuracy", "balanced_accuracy", "mcc", "log_loss", + "pac", "quadratic_kappa", "precision_macro", + "precision_micro", "precision_weighted", "recall_macro", + "recall_micro", "recall_weighted", "f1_macro", + "f1_micro", "f1_weighted", "roc_auc_ovo", + "roc_auc_ovo_macro", "roc_auc_ovr", "roc_auc_ovr_macro", + "roc_auc_ovr_micro", "roc_auc_ovr_weighted". + + Aliases "acc", "nll", "pac_score" are also accepted. """ self.model_path = model_path self.n_estimators = n_estimators @@ -269,9 +281,9 @@ def __init__( self.random_state = random_state self.inference_config = inference_config self.paper_version = paper_version - self.enhanced_fit_mode = enhanced_fit_mode - self.enhanced_fit_mode_metric = enhanced_fit_mode_metric - self.enhanced_fit_mode_time_limit_s = enhanced_fit_mode_time_limit_s + self.effort = effort + self.effort_timeout_s = effort_timeout_s + self.effort_metric = effort_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -294,7 +306,7 @@ def fit( estimator_param = self._get_estimator_params_with_model_path("classification") validate_train_set(X, y) - validate_enhanced_fit_mode_time_limit(self.enhanced_fit_mode_time_limit_s) + validate_effort(self.effort, self.effort_timeout_s, self.effort_metric) X = _clean_text_features(X) self._validate_targets_and_classes(y) @@ -467,9 +479,9 @@ def __init__( ] = 0, inference_config: Optional[Dict] = None, paper_version: bool = False, - enhanced_fit_mode: bool = False, - enhanced_fit_mode_metric: Optional[str] = None, - enhanced_fit_mode_time_limit_s: Optional[float] = None, + effort: Optional[EffortLevel] = None, + effort_timeout_s: Optional[float] = None, + effort_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -519,21 +531,25 @@ def __init__( paper_version: bool, default=False If True, will use the model described in the paper, instead of the newest version available on the API, which e.g handles text features better. - enhanced_fit_mode: bool, default=False - If True, trades off fit time for precision by running an - automated feature-engineering pipeline on top of TabPFN during - fit. - enhanced_fit_mode_metric: str or None, default=None - Only consulted when `enhanced_fit_mode=True`. Drives model - selection + ensemble weighting during the enhanced-fit sweep - (e.g. "rmse"/"mae"/"r2"/"mape" for regression). None falls - back to the sweep's default for the problem type. - enhanced_fit_mode_time_limit_s: float or None, default=None - Only consulted when `enhanced_fit_mode=True`. Ceiling on the - enhanced-fit sweep (seconds). Raise for larger datasets where - the default ~5-minute sweep leaves performance on the table. - None falls back to the server-side default (300s). Capped at - 2400 seconds (40 minutes); higher values raise ValueError at fit. + effort: {"medium", "high"} or None, default=None + Spends extra fit-time compute for higher precision. None + disables it. + effort_timeout_s: float or None, default=None + Budget for the fit, in seconds. Only consulted when `effort` + is set. Capped at 2400. + effort_metric: str or None, default=None + Optimization metric for the fit. Only consulted when `effort` + is set. + + Regression: + "r2", "mean_squared_error", "root_mean_squared_error", + "mean_absolute_error", "median_absolute_error", + "mean_absolute_percentage_error", + "symmetric_mean_absolute_percentage_error", "spearmanr", + "pearsonr". + + Aliases "mse", "rmse", "mae", "mape", "smape" are also + accepted. force_refit: bool, default=False Whether to force refit the model even if the model has already been fitted. client_options : ClientOptions, default=None @@ -548,9 +564,9 @@ def __init__( self.random_state = random_state self.inference_config = inference_config self.paper_version = paper_version - self.enhanced_fit_mode = enhanced_fit_mode - self.enhanced_fit_mode_metric = enhanced_fit_mode_metric - self.enhanced_fit_mode_time_limit_s = enhanced_fit_mode_time_limit_s + self.effort = effort + self.effort_timeout_s = effort_timeout_s + self.effort_metric = effort_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -573,7 +589,7 @@ def fit( estimator_param = self._get_estimator_params_with_model_path("regression") validate_train_set(X, y) - validate_enhanced_fit_mode_time_limit(self.enhanced_fit_mode_time_limit_s) + validate_effort(self.effort, self.effort_timeout_s, self.effort_metric) self._validate_targets(y) X = _clean_text_features(X) @@ -718,14 +734,26 @@ def _validate_targets(self, y) -> np.ndarray: raise ValueError("Input y contains NaN.") -def validate_enhanced_fit_mode_time_limit(time_limit_s: Optional[float]) -> None: - if time_limit_s is None: - return - if time_limit_s > ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S: +def validate_effort( + effort: Optional[str], + effort_timeout_s: Optional[float], + effort_metric: Optional[str], +) -> None: + if effort is not None and effort not in _VALID_EFFORT_LEVELS: + raise ValueError( + f"effort must be one of {sorted(_VALID_EFFORT_LEVELS)} or None, " + f"got {effort!r}." + ) + if effort is None and (effort_timeout_s is not None or effort_metric is not None): + raise ValueError( + "effort_timeout_s and effort_metric are only consulted when " + "`effort` is set; pass effort='medium' or effort='high' to use them." + ) + if effort_timeout_s is not None and effort_timeout_s > EFFORT_TIMEOUT_MAX_S: raise ValueError( - f"enhanced_fit_mode_time_limit_s ({time_limit_s}) exceeds the " - f"maximum allowed of {ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S} seconds " - f"({ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S // 60} minutes)." + f"effort_timeout_s ({effort_timeout_s}) exceeds the " + f"maximum allowed of {EFFORT_TIMEOUT_MAX_S} seconds " + f"({EFFORT_TIMEOUT_MAX_S // 60} minutes)." ) diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py index 1bc9b10..d2de6ef 100644 --- a/tests/unit/test_tabpfn_classifier.py +++ b/tests/unit/test_tabpfn_classifier.py @@ -475,9 +475,9 @@ def test_only_allowed_parameters_passed_to_config(self): "model_path", "balance_probabilities", "paper_version", - "enhanced_fit_mode", - "enhanced_fit_mode_metric", - "enhanced_fit_mode_time_limit_s", + "effort", + "effort_timeout_s", + "effort_metric", } OPTIONAL_PARAMS = { # These may be emitted by newer model versions, but are not required. diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py index 912f9f2..ac97b72 100644 --- a/tests/unit/test_tabpfn_regressor.py +++ b/tests/unit/test_tabpfn_regressor.py @@ -469,9 +469,9 @@ def test_only_allowed_parameters_passed_to_config(self): "inference_config", "model_path", "paper_version", - "enhanced_fit_mode", - "enhanced_fit_mode_metric", - "enhanced_fit_mode_time_limit_s", + "effort", + "effort_timeout_s", + "effort_metric", } OPTIONAL_PARAMS = { "thinking", From ea2ce7be93992abc12c7c161c4474ba2d0a8be8c Mon Sep 17 00:00:00 2001 From: Georg Grab Date: Fri, 8 May 2026 14:24:26 +0200 Subject: [PATCH 2/6] feat: gate enhanced fit on enhanced_fit_mode and rename tunables The estimator-side knobs are renamed to make their relationship explicit: - effort -> enhanced_effort, defaulting to "medium" (only consulted when enhanced_fit_mode=True) - effort_timeout_s -> enhanced_timeout_s - effort_metric -> enhanced_effort_metric Activation moves from "effort is not None" back to a dedicated boolean (enhanced_fit_mode, default False). enhanced_effort is a Literal ["medium", "high"] with a documented default of "medium" so users can flip on enhanced fit mode without picking an effort level. Wire format is unchanged: client.py still sends effort, effort_timeout_s and effort_metric on FitRequest, so no server companion change is needed. --- src/tabpfn_client/client.py | 42 +++++---- src/tabpfn_client/estimator.py | 127 ++++++++++++++++----------- tests/unit/test_tabpfn_classifier.py | 7 +- tests/unit/test_tabpfn_regressor.py | 7 +- 4 files changed, 110 insertions(+), 73 deletions(-) diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py index 6241baa..739b9ce 100644 --- a/src/tabpfn_client/client.py +++ b/src/tabpfn_client/client.py @@ -388,25 +388,29 @@ def fit( raise tabpfn_systems = ["preprocessing", "text"] - effort = tabpfn_config.get("effort") if tabpfn_config else None + enhanced_fit_mode = ( + bool(tabpfn_config.get("enhanced_fit_mode")) if tabpfn_config else False + ) if tabpfn_config: if tabpfn_config.get("paper_version") is True: tabpfn_systems = [] - elif effort is not None: + elif enhanced_fit_mode: # Enhanced mode runs on top of the base systems rather than # replacing them — keep preprocessing + text alongside it. tabpfn_systems = ["preprocessing", "text", "enhanced"] - # `effort`, `effort_timeout_s`, `effort_metric` are top-level - # FitRequest fields on the server (siblings to `tabpfn_systems`), - # not part of `tabpfn_config`. Lift them out before stripping the - # rest of the client-only keys. - effort_timeout_s = ( - tabpfn_config.get("effort_timeout_s") if tabpfn_config else None - ) - effort_metric = ( - tabpfn_config.get("effort_metric") if tabpfn_config else None - ) + # The client-side `enhanced_*` knobs are translated to the server's + # top-level FitRequest fields (`effort`, `effort_timeout_s`, + # `effort_metric`). They are only consulted when enhanced fit mode + # is enabled; otherwise we send None. + if enhanced_fit_mode and tabpfn_config: + effort = tabpfn_config.get("enhanced_effort", "medium") + effort_timeout_s = tabpfn_config.get("enhanced_timeout_s") + effort_metric = tabpfn_config.get("enhanced_effort_metric") + else: + effort = None + effort_timeout_s = None + effort_metric = None # Strip client-only keys that the server does not expect (mirrors # the predict path's filter below). @@ -417,9 +421,10 @@ def fit( if k not in { "paper_version", - "effort", - "effort_timeout_s", - "effort_metric", + "enhanced_fit_mode", + "enhanced_effort", + "enhanced_timeout_s", + "enhanced_effort_metric", } } if tabpfn_config is not None @@ -582,9 +587,10 @@ def predict( if k not in { "paper_version", - "effort", - "effort_timeout_s", - "effort_metric", + "enhanced_fit_mode", + "enhanced_effort", + "enhanced_timeout_s", + "enhanced_effort_metric", } } diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py index 03dae6e..9071bae 100644 --- a/src/tabpfn_client/estimator.py +++ b/src/tabpfn_client/estimator.py @@ -54,10 +54,10 @@ # is kept as a backward-compatible alias. _AUTO_MODEL_PATH_ALIASES = frozenset({"auto", "default"}) -EFFORT_TIMEOUT_MAX_S = 40 * 60 +ENHANCED_TIMEOUT_MAX_S = 40 * 60 -EffortLevel = Literal["medium", "high"] -_VALID_EFFORT_LEVELS = frozenset({"medium", "high"}) +EnhancedEffort = Literal["medium", "high"] +_VALID_ENHANCED_EFFORT_LEVELS = frozenset({"medium", "high"}) class TabPFNModelSelection: @@ -183,9 +183,10 @@ def __init__( ] = 0, inference_config: Optional[Dict] = None, paper_version: bool = False, - effort: Optional[EffortLevel] = None, - effort_timeout_s: Optional[float] = None, - effort_metric: Optional[str] = None, + enhanced_fit_mode: bool = False, + enhanced_effort: EnhancedEffort = "medium", + enhanced_timeout_s: Optional[float] = None, + enhanced_effort_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -243,15 +244,19 @@ def __init__( paper_version: bool, default=False If True, will use the model described in the paper, instead of the newest version available on the API, which e.g handles text features better. - effort: {"medium", "high"} or None, default=None - Spends extra fit-time compute for higher precision. None - disables it. - effort_timeout_s: float or None, default=None - Budget for the fit, in seconds. Only consulted when `effort` - is set. Capped at 2400. - effort_metric: str or None, default=None - Optimization metric for the fit. Only consulted when `effort` - is set. + enhanced_fit_mode: bool, default=False + If True, spend extra fit-time compute for higher precision. + The remaining `enhanced_*` parameters are only consulted when + this is True. + enhanced_effort: {"medium", "high"}, default="medium" + Effort level for enhanced fit mode. Only consulted when + `enhanced_fit_mode=True`. + enhanced_timeout_s: float or None, default=None + Budget for the fit, in seconds. Only consulted when + `enhanced_fit_mode=True`. Capped at 2400. + enhanced_effort_metric: str or None, default=None + Optimization metric for the fit. Only consulted when + `enhanced_fit_mode=True`. Binary classification: "accuracy", "balanced_accuracy", "mcc", "log_loss", @@ -281,9 +286,10 @@ def __init__( self.random_state = random_state self.inference_config = inference_config self.paper_version = paper_version - self.effort = effort - self.effort_timeout_s = effort_timeout_s - self.effort_metric = effort_metric + self.enhanced_fit_mode = enhanced_fit_mode + self.enhanced_effort = enhanced_effort + self.enhanced_timeout_s = enhanced_timeout_s + self.enhanced_effort_metric = enhanced_effort_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -306,7 +312,12 @@ def fit( estimator_param = self._get_estimator_params_with_model_path("classification") validate_train_set(X, y) - validate_effort(self.effort, self.effort_timeout_s, self.effort_metric) + validate_enhanced_fit_mode( + self.enhanced_fit_mode, + self.enhanced_effort, + self.enhanced_timeout_s, + self.enhanced_effort_metric, + ) X = _clean_text_features(X) self._validate_targets_and_classes(y) @@ -479,9 +490,10 @@ def __init__( ] = 0, inference_config: Optional[Dict] = None, paper_version: bool = False, - effort: Optional[EffortLevel] = None, - effort_timeout_s: Optional[float] = None, - effort_metric: Optional[str] = None, + enhanced_fit_mode: bool = False, + enhanced_effort: EnhancedEffort = "medium", + enhanced_timeout_s: Optional[float] = None, + enhanced_effort_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -531,15 +543,19 @@ def __init__( paper_version: bool, default=False If True, will use the model described in the paper, instead of the newest version available on the API, which e.g handles text features better. - effort: {"medium", "high"} or None, default=None - Spends extra fit-time compute for higher precision. None - disables it. - effort_timeout_s: float or None, default=None - Budget for the fit, in seconds. Only consulted when `effort` - is set. Capped at 2400. - effort_metric: str or None, default=None - Optimization metric for the fit. Only consulted when `effort` - is set. + enhanced_fit_mode: bool, default=False + If True, spend extra fit-time compute for higher precision. + The remaining `enhanced_*` parameters are only consulted when + this is True. + enhanced_effort: {"medium", "high"}, default="medium" + Effort level for enhanced fit mode. Only consulted when + `enhanced_fit_mode=True`. + enhanced_timeout_s: float or None, default=None + Budget for the fit, in seconds. Only consulted when + `enhanced_fit_mode=True`. Capped at 2400. + enhanced_effort_metric: str or None, default=None + Optimization metric for the fit. Only consulted when + `enhanced_fit_mode=True`. Regression: "r2", "mean_squared_error", "root_mean_squared_error", @@ -564,9 +580,10 @@ def __init__( self.random_state = random_state self.inference_config = inference_config self.paper_version = paper_version - self.effort = effort - self.effort_timeout_s = effort_timeout_s - self.effort_metric = effort_metric + self.enhanced_fit_mode = enhanced_fit_mode + self.enhanced_effort = enhanced_effort + self.enhanced_timeout_s = enhanced_timeout_s + self.enhanced_effort_metric = enhanced_effort_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -589,7 +606,12 @@ def fit( estimator_param = self._get_estimator_params_with_model_path("regression") validate_train_set(X, y) - validate_effort(self.effort, self.effort_timeout_s, self.effort_metric) + validate_enhanced_fit_mode( + self.enhanced_fit_mode, + self.enhanced_effort, + self.enhanced_timeout_s, + self.enhanced_effort_metric, + ) self._validate_targets(y) X = _clean_text_features(X) @@ -734,26 +756,33 @@ def _validate_targets(self, y) -> np.ndarray: raise ValueError("Input y contains NaN.") -def validate_effort( - effort: Optional[str], - effort_timeout_s: Optional[float], - effort_metric: Optional[str], +def validate_enhanced_fit_mode( + enhanced_fit_mode: bool, + enhanced_effort: str, + enhanced_timeout_s: Optional[float], + enhanced_effort_metric: Optional[str], ) -> None: - if effort is not None and effort not in _VALID_EFFORT_LEVELS: + if enhanced_effort not in _VALID_ENHANCED_EFFORT_LEVELS: raise ValueError( - f"effort must be one of {sorted(_VALID_EFFORT_LEVELS)} or None, " - f"got {effort!r}." + f"enhanced_effort must be one of " + f"{sorted(_VALID_ENHANCED_EFFORT_LEVELS)}, got {enhanced_effort!r}." ) - if effort is None and (effort_timeout_s is not None or effort_metric is not None): + if not enhanced_fit_mode and ( + enhanced_timeout_s is not None or enhanced_effort_metric is not None + ): raise ValueError( - "effort_timeout_s and effort_metric are only consulted when " - "`effort` is set; pass effort='medium' or effort='high' to use them." + "enhanced_timeout_s and enhanced_effort_metric are only consulted " + "when `enhanced_fit_mode=True`; pass enhanced_fit_mode=True to use " + "them." ) - if effort_timeout_s is not None and effort_timeout_s > EFFORT_TIMEOUT_MAX_S: + if ( + enhanced_timeout_s is not None + and enhanced_timeout_s > ENHANCED_TIMEOUT_MAX_S + ): raise ValueError( - f"effort_timeout_s ({effort_timeout_s}) exceeds the " - f"maximum allowed of {EFFORT_TIMEOUT_MAX_S} seconds " - f"({EFFORT_TIMEOUT_MAX_S // 60} minutes)." + f"enhanced_timeout_s ({enhanced_timeout_s}) exceeds the " + f"maximum allowed of {ENHANCED_TIMEOUT_MAX_S} seconds " + f"({ENHANCED_TIMEOUT_MAX_S // 60} minutes)." ) diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py index d2de6ef..ee1466c 100644 --- a/tests/unit/test_tabpfn_classifier.py +++ b/tests/unit/test_tabpfn_classifier.py @@ -475,9 +475,10 @@ def test_only_allowed_parameters_passed_to_config(self): "model_path", "balance_probabilities", "paper_version", - "effort", - "effort_timeout_s", - "effort_metric", + "enhanced_fit_mode", + "enhanced_effort", + "enhanced_timeout_s", + "enhanced_effort_metric", } OPTIONAL_PARAMS = { # These may be emitted by newer model versions, but are not required. diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py index ac97b72..b026747 100644 --- a/tests/unit/test_tabpfn_regressor.py +++ b/tests/unit/test_tabpfn_regressor.py @@ -469,9 +469,10 @@ def test_only_allowed_parameters_passed_to_config(self): "inference_config", "model_path", "paper_version", - "effort", - "effort_timeout_s", - "effort_metric", + "enhanced_fit_mode", + "enhanced_effort", + "enhanced_timeout_s", + "enhanced_effort_metric", } OPTIONAL_PARAMS = { "thinking", From 43451a9003b99b73522bc764e1896a7fe75997b2 Mon Sep 17 00:00:00 2001 From: Georg Grab Date: Sat, 9 May 2026 21:33:59 +0200 Subject: [PATCH 3/6] rename enhanced_* knobs to thinking_* --- src/tabpfn_client/client.py | 38 ++++---- src/tabpfn_client/estimator.py | 128 +++++++++++++-------------- tests/unit/test_tabpfn_classifier.py | 8 +- tests/unit/test_tabpfn_regressor.py | 8 +- 4 files changed, 91 insertions(+), 91 deletions(-) diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py index 739b9ce..41dae22 100644 --- a/src/tabpfn_client/client.py +++ b/src/tabpfn_client/client.py @@ -388,25 +388,25 @@ def fit( raise tabpfn_systems = ["preprocessing", "text"] - enhanced_fit_mode = ( - bool(tabpfn_config.get("enhanced_fit_mode")) if tabpfn_config else False + thinking_mode = ( + bool(tabpfn_config.get("thinking_mode")) if tabpfn_config else False ) if tabpfn_config: if tabpfn_config.get("paper_version") is True: tabpfn_systems = [] - elif enhanced_fit_mode: - # Enhanced mode runs on top of the base systems rather than + elif thinking_mode: + # Thinking mode runs on top of the base systems rather than # replacing them — keep preprocessing + text alongside it. - tabpfn_systems = ["preprocessing", "text", "enhanced"] + tabpfn_systems = ["preprocessing", "text", "thinking"] - # The client-side `enhanced_*` knobs are translated to the server's + # The client-side `thinking_*` knobs are translated to the server's # top-level FitRequest fields (`effort`, `effort_timeout_s`, - # `effort_metric`). They are only consulted when enhanced fit mode + # `effort_metric`). They are only consulted when thinking mode # is enabled; otherwise we send None. - if enhanced_fit_mode and tabpfn_config: - effort = tabpfn_config.get("enhanced_effort", "medium") - effort_timeout_s = tabpfn_config.get("enhanced_timeout_s") - effort_metric = tabpfn_config.get("enhanced_effort_metric") + if thinking_mode and tabpfn_config: + effort = tabpfn_config.get("thinking_effort", "medium") + effort_timeout_s = tabpfn_config.get("thinking_timeout_s") + effort_metric = tabpfn_config.get("thinking_effort_metric") else: effort = None effort_timeout_s = None @@ -421,10 +421,10 @@ def fit( if k not in { "paper_version", - "enhanced_fit_mode", - "enhanced_effort", - "enhanced_timeout_s", - "enhanced_effort_metric", + "thinking_mode", + "thinking_effort", + "thinking_timeout_s", + "thinking_effort_metric", } } if tabpfn_config is not None @@ -587,10 +587,10 @@ def predict( if k not in { "paper_version", - "enhanced_fit_mode", - "enhanced_effort", - "enhanced_timeout_s", - "enhanced_effort_metric", + "thinking_mode", + "thinking_effort", + "thinking_timeout_s", + "thinking_effort_metric", } } diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py index 9071bae..c7a216e 100644 --- a/src/tabpfn_client/estimator.py +++ b/src/tabpfn_client/estimator.py @@ -54,10 +54,10 @@ # is kept as a backward-compatible alias. _AUTO_MODEL_PATH_ALIASES = frozenset({"auto", "default"}) -ENHANCED_TIMEOUT_MAX_S = 40 * 60 +THINKING_TIMEOUT_MAX_S = 40 * 60 -EnhancedEffort = Literal["medium", "high"] -_VALID_ENHANCED_EFFORT_LEVELS = frozenset({"medium", "high"}) +ThinkingEffort = Literal["medium", "high"] +_VALID_THINKING_EFFORT_LEVELS = frozenset({"medium", "high"}) class TabPFNModelSelection: @@ -183,10 +183,10 @@ def __init__( ] = 0, inference_config: Optional[Dict] = None, paper_version: bool = False, - enhanced_fit_mode: bool = False, - enhanced_effort: EnhancedEffort = "medium", - enhanced_timeout_s: Optional[float] = None, - enhanced_effort_metric: Optional[str] = None, + thinking_mode: bool = False, + thinking_effort: ThinkingEffort = "medium", + thinking_timeout_s: Optional[float] = None, + thinking_effort_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -244,19 +244,19 @@ def __init__( paper_version: bool, default=False If True, will use the model described in the paper, instead of the newest version available on the API, which e.g handles text features better. - enhanced_fit_mode: bool, default=False + thinking_mode: bool, default=False If True, spend extra fit-time compute for higher precision. - The remaining `enhanced_*` parameters are only consulted when + The remaining `thinking_*` parameters are only consulted when this is True. - enhanced_effort: {"medium", "high"}, default="medium" - Effort level for enhanced fit mode. Only consulted when - `enhanced_fit_mode=True`. - enhanced_timeout_s: float or None, default=None + thinking_effort: {"medium", "high"}, default="medium" + Effort level for thinking mode. Only consulted when + `thinking_mode=True`. + thinking_timeout_s: float or None, default=None Budget for the fit, in seconds. Only consulted when - `enhanced_fit_mode=True`. Capped at 2400. - enhanced_effort_metric: str or None, default=None + `thinking_mode=True`. Capped at 2400. + thinking_effort_metric: str or None, default=None Optimization metric for the fit. Only consulted when - `enhanced_fit_mode=True`. + `thinking_mode=True`. Binary classification: "accuracy", "balanced_accuracy", "mcc", "log_loss", @@ -286,10 +286,10 @@ def __init__( self.random_state = random_state self.inference_config = inference_config self.paper_version = paper_version - self.enhanced_fit_mode = enhanced_fit_mode - self.enhanced_effort = enhanced_effort - self.enhanced_timeout_s = enhanced_timeout_s - self.enhanced_effort_metric = enhanced_effort_metric + self.thinking_mode = thinking_mode + self.thinking_effort = thinking_effort + self.thinking_timeout_s = thinking_timeout_s + self.thinking_effort_metric = thinking_effort_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -312,11 +312,11 @@ def fit( estimator_param = self._get_estimator_params_with_model_path("classification") validate_train_set(X, y) - validate_enhanced_fit_mode( - self.enhanced_fit_mode, - self.enhanced_effort, - self.enhanced_timeout_s, - self.enhanced_effort_metric, + validate_thinking_mode( + self.thinking_mode, + self.thinking_effort, + self.thinking_timeout_s, + self.thinking_effort_metric, ) X = _clean_text_features(X) self._validate_targets_and_classes(y) @@ -490,10 +490,10 @@ def __init__( ] = 0, inference_config: Optional[Dict] = None, paper_version: bool = False, - enhanced_fit_mode: bool = False, - enhanced_effort: EnhancedEffort = "medium", - enhanced_timeout_s: Optional[float] = None, - enhanced_effort_metric: Optional[str] = None, + thinking_mode: bool = False, + thinking_effort: ThinkingEffort = "medium", + thinking_timeout_s: Optional[float] = None, + thinking_effort_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -543,19 +543,19 @@ def __init__( paper_version: bool, default=False If True, will use the model described in the paper, instead of the newest version available on the API, which e.g handles text features better. - enhanced_fit_mode: bool, default=False + thinking_mode: bool, default=False If True, spend extra fit-time compute for higher precision. - The remaining `enhanced_*` parameters are only consulted when + The remaining `thinking_*` parameters are only consulted when this is True. - enhanced_effort: {"medium", "high"}, default="medium" - Effort level for enhanced fit mode. Only consulted when - `enhanced_fit_mode=True`. - enhanced_timeout_s: float or None, default=None + thinking_effort: {"medium", "high"}, default="medium" + Effort level for thinking mode. Only consulted when + `thinking_mode=True`. + thinking_timeout_s: float or None, default=None Budget for the fit, in seconds. Only consulted when - `enhanced_fit_mode=True`. Capped at 2400. - enhanced_effort_metric: str or None, default=None + `thinking_mode=True`. Capped at 2400. + thinking_effort_metric: str or None, default=None Optimization metric for the fit. Only consulted when - `enhanced_fit_mode=True`. + `thinking_mode=True`. Regression: "r2", "mean_squared_error", "root_mean_squared_error", @@ -580,10 +580,10 @@ def __init__( self.random_state = random_state self.inference_config = inference_config self.paper_version = paper_version - self.enhanced_fit_mode = enhanced_fit_mode - self.enhanced_effort = enhanced_effort - self.enhanced_timeout_s = enhanced_timeout_s - self.enhanced_effort_metric = enhanced_effort_metric + self.thinking_mode = thinking_mode + self.thinking_effort = thinking_effort + self.thinking_timeout_s = thinking_timeout_s + self.thinking_effort_metric = thinking_effort_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -606,11 +606,11 @@ def fit( estimator_param = self._get_estimator_params_with_model_path("regression") validate_train_set(X, y) - validate_enhanced_fit_mode( - self.enhanced_fit_mode, - self.enhanced_effort, - self.enhanced_timeout_s, - self.enhanced_effort_metric, + validate_thinking_mode( + self.thinking_mode, + self.thinking_effort, + self.thinking_timeout_s, + self.thinking_effort_metric, ) self._validate_targets(y) X = _clean_text_features(X) @@ -756,33 +756,33 @@ def _validate_targets(self, y) -> np.ndarray: raise ValueError("Input y contains NaN.") -def validate_enhanced_fit_mode( - enhanced_fit_mode: bool, - enhanced_effort: str, - enhanced_timeout_s: Optional[float], - enhanced_effort_metric: Optional[str], +def validate_thinking_mode( + thinking_mode: bool, + thinking_effort: str, + thinking_timeout_s: Optional[float], + thinking_effort_metric: Optional[str], ) -> None: - if enhanced_effort not in _VALID_ENHANCED_EFFORT_LEVELS: + if thinking_effort not in _VALID_THINKING_EFFORT_LEVELS: raise ValueError( - f"enhanced_effort must be one of " - f"{sorted(_VALID_ENHANCED_EFFORT_LEVELS)}, got {enhanced_effort!r}." + f"thinking_effort must be one of " + f"{sorted(_VALID_THINKING_EFFORT_LEVELS)}, got {thinking_effort!r}." ) - if not enhanced_fit_mode and ( - enhanced_timeout_s is not None or enhanced_effort_metric is not None + if not thinking_mode and ( + thinking_timeout_s is not None or thinking_effort_metric is not None ): raise ValueError( - "enhanced_timeout_s and enhanced_effort_metric are only consulted " - "when `enhanced_fit_mode=True`; pass enhanced_fit_mode=True to use " + "thinking_timeout_s and thinking_effort_metric are only consulted " + "when `thinking_mode=True`; pass thinking_mode=True to use " "them." ) if ( - enhanced_timeout_s is not None - and enhanced_timeout_s > ENHANCED_TIMEOUT_MAX_S + thinking_timeout_s is not None + and thinking_timeout_s > THINKING_TIMEOUT_MAX_S ): raise ValueError( - f"enhanced_timeout_s ({enhanced_timeout_s}) exceeds the " - f"maximum allowed of {ENHANCED_TIMEOUT_MAX_S} seconds " - f"({ENHANCED_TIMEOUT_MAX_S // 60} minutes)." + f"thinking_timeout_s ({thinking_timeout_s}) exceeds the " + f"maximum allowed of {THINKING_TIMEOUT_MAX_S} seconds " + f"({THINKING_TIMEOUT_MAX_S // 60} minutes)." ) diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py index ee1466c..a884e86 100644 --- a/tests/unit/test_tabpfn_classifier.py +++ b/tests/unit/test_tabpfn_classifier.py @@ -475,10 +475,10 @@ def test_only_allowed_parameters_passed_to_config(self): "model_path", "balance_probabilities", "paper_version", - "enhanced_fit_mode", - "enhanced_effort", - "enhanced_timeout_s", - "enhanced_effort_metric", + "thinking_mode", + "thinking_effort", + "thinking_timeout_s", + "thinking_effort_metric", } OPTIONAL_PARAMS = { # These may be emitted by newer model versions, but are not required. diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py index b026747..4a7f13a 100644 --- a/tests/unit/test_tabpfn_regressor.py +++ b/tests/unit/test_tabpfn_regressor.py @@ -469,10 +469,10 @@ def test_only_allowed_parameters_passed_to_config(self): "inference_config", "model_path", "paper_version", - "enhanced_fit_mode", - "enhanced_effort", - "enhanced_timeout_s", - "enhanced_effort_metric", + "thinking_mode", + "thinking_effort", + "thinking_timeout_s", + "thinking_effort_metric", } OPTIONAL_PARAMS = { "thinking", From ce8a75eb076e8cae4f65364fa098d19ad2c9b548 Mon Sep 17 00:00:00 2001 From: Georg Grab Date: Sun, 10 May 2026 10:58:07 +0200 Subject: [PATCH 4/6] feat: pass thinking_* through to server FitRequest 1:1 Server-side wire fields renamed effort/effort_timeout_s/effort_metric -> thinking_effort/thinking_timeout_s/thinking_effort_metric. Drop the client-side translation layer and forward the user-facing knobs directly. --- src/tabpfn_client/api_models.py | 12 ++++++------ src/tabpfn_client/client.py | 23 +++++++++++------------ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/tabpfn_client/api_models.py b/src/tabpfn_client/api_models.py index 27b9ef3..9a26fff 100644 --- a/src/tabpfn_client/api_models.py +++ b/src/tabpfn_client/api_models.py @@ -103,12 +103,12 @@ class FitRequest(BaseModel): # `tabpfn_systems` values on the server need this at fit time; the # server ignores it otherwise. tabpfn_config: TabPFNConfig = None - # User-facing effort level ("medium" or "high"). None disables it. - effort: Optional[str] = None - # Budget for the fit (seconds). Only consulted when `effort` is set. - effort_timeout_s: Optional[float] = None - # Optimization metric for the fit. Only consulted when `effort` is set. - effort_metric: Optional[str] = None + # User-facing thinking-effort level ("medium" or "high"). None disables it. + thinking_effort: Optional[str] = None + # Budget for the fit (seconds). Only consulted when `thinking_effort` is set. + thinking_timeout_s: Optional[float] = None + # Optimization metric for the fit. Only consulted when `thinking_effort` is set. + thinking_effort_metric: Optional[str] = None class FitResponse(BaseModel): diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py index 41dae22..a2e79bd 100644 --- a/src/tabpfn_client/client.py +++ b/src/tabpfn_client/client.py @@ -399,18 +399,17 @@ def fit( # replacing them — keep preprocessing + text alongside it. tabpfn_systems = ["preprocessing", "text", "thinking"] - # The client-side `thinking_*` knobs are translated to the server's - # top-level FitRequest fields (`effort`, `effort_timeout_s`, - # `effort_metric`). They are only consulted when thinking mode + # The client-side `thinking_*` knobs forward 1:1 to the server's + # top-level FitRequest fields. Only consulted when thinking_mode # is enabled; otherwise we send None. if thinking_mode and tabpfn_config: - effort = tabpfn_config.get("thinking_effort", "medium") - effort_timeout_s = tabpfn_config.get("thinking_timeout_s") - effort_metric = tabpfn_config.get("thinking_effort_metric") + thinking_effort = tabpfn_config.get("thinking_effort", "medium") + thinking_timeout_s = tabpfn_config.get("thinking_timeout_s") + thinking_effort_metric = tabpfn_config.get("thinking_effort_metric") else: - effort = None - effort_timeout_s = None - effort_metric = None + thinking_effort = None + thinking_timeout_s = None + thinking_effort_metric = None # Strip client-only keys that the server does not expect (mirrors # the predict path's filter below). @@ -438,9 +437,9 @@ def fit( tabpfn_systems=tabpfn_systems, force_refit=force_refit or force_refit_enabled(), tabpfn_config=server_tabpfn_config, - effort=effort, - effort_timeout_s=effort_timeout_s, - effort_metric=effort_metric, + thinking_effort=thinking_effort, + thinking_timeout_s=thinking_timeout_s, + thinking_effort_metric=thinking_effort_metric, ), timeout=client_options.timeout, headers=client_options.headers, From bebe98d338b35be003aee0fdf97b79003475ab48 Mon Sep 17 00:00:00 2001 From: Georg Grab Date: Sun, 10 May 2026 11:02:38 +0200 Subject: [PATCH 5/6] feat: thinking_effort=... implies thinking is enabled Drops the "medium" default on thinking_effort (now Optional, default None) so the constructor can tell whether the user set it. Either signal turns thinking on: - thinking_mode=True alone -> defaults thinking_effort to "medium" - thinking_effort="medium" or "high" alone -> implies thinking_mode=True Updates validate_thinking_mode and the FitRequest builder in client.py to honour the unified rule. Adds test_thinking_validation.py pinning the contract. --- src/tabpfn_client/client.py | 21 +++++---- src/tabpfn_client/estimator.py | 63 +++++++++++++++----------- tests/unit/test_thinking_validation.py | 62 +++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 34 deletions(-) create mode 100644 tests/unit/test_thinking_validation.py diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py index a2e79bd..2bd1a9b 100644 --- a/src/tabpfn_client/client.py +++ b/src/tabpfn_client/client.py @@ -388,22 +388,27 @@ def fit( raise tabpfn_systems = ["preprocessing", "text"] - thinking_mode = ( - bool(tabpfn_config.get("thinking_mode")) if tabpfn_config else False + # Thinking is enabled when either flag is set: explicit `thinking_mode=True`, + # or any non-None `thinking_effort`. Setting `thinking_effort` alone is + # enough — the server-side validator on FitRequest also normalises this, + # but doing it here means the request body itself is consistent. + thinking_enabled = bool(tabpfn_config) and ( + bool(tabpfn_config.get("thinking_mode")) + or tabpfn_config.get("thinking_effort") is not None ) if tabpfn_config: if tabpfn_config.get("paper_version") is True: tabpfn_systems = [] - elif thinking_mode: - # Thinking mode runs on top of the base systems rather than + elif thinking_enabled: + # Thinking runs on top of the base systems rather than # replacing them — keep preprocessing + text alongside it. tabpfn_systems = ["preprocessing", "text", "thinking"] # The client-side `thinking_*` knobs forward 1:1 to the server's - # top-level FitRequest fields. Only consulted when thinking_mode - # is enabled; otherwise we send None. - if thinking_mode and tabpfn_config: - thinking_effort = tabpfn_config.get("thinking_effort", "medium") + # top-level FitRequest fields. When the user enabled thinking via + # `thinking_mode=True` without picking a level, default to "medium". + if thinking_enabled and tabpfn_config: + thinking_effort = tabpfn_config.get("thinking_effort") or "medium" thinking_timeout_s = tabpfn_config.get("thinking_timeout_s") thinking_effort_metric = tabpfn_config.get("thinking_effort_metric") else: diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py index c7a216e..9eca2fa 100644 --- a/src/tabpfn_client/estimator.py +++ b/src/tabpfn_client/estimator.py @@ -184,7 +184,7 @@ def __init__( inference_config: Optional[Dict] = None, paper_version: bool = False, thinking_mode: bool = False, - thinking_effort: ThinkingEffort = "medium", + thinking_effort: Optional[ThinkingEffort] = None, thinking_timeout_s: Optional[float] = None, thinking_effort_metric: Optional[str] = None, force_refit: bool = False, @@ -246,17 +246,19 @@ def __init__( version available on the API, which e.g handles text features better. thinking_mode: bool, default=False If True, spend extra fit-time compute for higher precision. - The remaining `thinking_*` parameters are only consulted when - this is True. - thinking_effort: {"medium", "high"}, default="medium" - Effort level for thinking mode. Only consulted when - `thinking_mode=True`. + Equivalent to passing `thinking_effort="medium"` — setting any + `thinking_effort` value also enables thinking, so this flag is + optional when you've set the level explicitly. + thinking_effort: {"medium", "high"} or None, default=None + Effort level for thinking mode. When set, thinking is enabled + (you don't also need `thinking_mode=True`). When None and + `thinking_mode=True`, defaults to "medium". thinking_timeout_s: float or None, default=None - Budget for the fit, in seconds. Only consulted when - `thinking_mode=True`. Capped at 2400. + Budget for the fit, in seconds. Only consulted when thinking is + enabled. Capped at 2400. thinking_effort_metric: str or None, default=None - Optimization metric for the fit. Only consulted when - `thinking_mode=True`. + Optimization metric for the fit. Only consulted when thinking + is enabled. Binary classification: "accuracy", "balanced_accuracy", "mcc", "log_loss", @@ -491,7 +493,7 @@ def __init__( inference_config: Optional[Dict] = None, paper_version: bool = False, thinking_mode: bool = False, - thinking_effort: ThinkingEffort = "medium", + thinking_effort: Optional[ThinkingEffort] = None, thinking_timeout_s: Optional[float] = None, thinking_effort_metric: Optional[str] = None, force_refit: bool = False, @@ -545,17 +547,19 @@ def __init__( version available on the API, which e.g handles text features better. thinking_mode: bool, default=False If True, spend extra fit-time compute for higher precision. - The remaining `thinking_*` parameters are only consulted when - this is True. - thinking_effort: {"medium", "high"}, default="medium" - Effort level for thinking mode. Only consulted when - `thinking_mode=True`. + Equivalent to passing `thinking_effort="medium"` — setting any + `thinking_effort` value also enables thinking, so this flag is + optional when you've set the level explicitly. + thinking_effort: {"medium", "high"} or None, default=None + Effort level for thinking mode. When set, thinking is enabled + (you don't also need `thinking_mode=True`). When None and + `thinking_mode=True`, defaults to "medium". thinking_timeout_s: float or None, default=None - Budget for the fit, in seconds. Only consulted when - `thinking_mode=True`. Capped at 2400. + Budget for the fit, in seconds. Only consulted when thinking is + enabled. Capped at 2400. thinking_effort_metric: str or None, default=None - Optimization metric for the fit. Only consulted when - `thinking_mode=True`. + Optimization metric for the fit. Only consulted when thinking + is enabled. Regression: "r2", "mean_squared_error", "root_mean_squared_error", @@ -758,22 +762,29 @@ def _validate_targets(self, y) -> np.ndarray: def validate_thinking_mode( thinking_mode: bool, - thinking_effort: str, + thinking_effort: Optional[str], thinking_timeout_s: Optional[float], thinking_effort_metric: Optional[str], ) -> None: - if thinking_effort not in _VALID_THINKING_EFFORT_LEVELS: + if ( + thinking_effort is not None + and thinking_effort not in _VALID_THINKING_EFFORT_LEVELS + ): raise ValueError( f"thinking_effort must be one of " f"{sorted(_VALID_THINKING_EFFORT_LEVELS)}, got {thinking_effort!r}." ) - if not thinking_mode and ( + # Setting `thinking_effort` is itself a way to enable thinking, so the + # effective state is "either flag set". Knobs that only make sense when + # thinking is on are rejected only when neither is set. + thinking_enabled = thinking_mode or thinking_effort is not None + if not thinking_enabled and ( thinking_timeout_s is not None or thinking_effort_metric is not None ): raise ValueError( - "thinking_timeout_s and thinking_effort_metric are only consulted " - "when `thinking_mode=True`; pass thinking_mode=True to use " - "them." + "thinking_timeout_s and thinking_effort_metric are only " + "consulted when thinking is enabled; pass `thinking_mode=True` " + "or `thinking_effort=...` to use them." ) if ( thinking_timeout_s is not None diff --git a/tests/unit/test_thinking_validation.py b/tests/unit/test_thinking_validation.py new file mode 100644 index 0000000..3bfdf16 --- /dev/null +++ b/tests/unit/test_thinking_validation.py @@ -0,0 +1,62 @@ +"""Validator contract for the thinking_* knobs on TabPFNClassifier/Regressor. + +Pins the rule that thinking is enabled when either `thinking_mode=True` OR +`thinking_effort` is set, so callers can pass either or both without surprise. +""" + +import pytest + +from tabpfn_client.estimator import ( + THINKING_TIMEOUT_MAX_S, + validate_thinking_mode, +) + + +def _v(**overrides): + args = dict( + thinking_mode=False, + thinking_effort=None, + thinking_timeout_s=None, + thinking_effort_metric=None, + ) + args.update(overrides) + return validate_thinking_mode(**args) + + +class TestThinkingValidator: + def test_neither_flag_is_off(self): + # No flags: thinking off, no errors. + _v() + + def test_thinking_mode_alone_is_on(self): + # Just `thinking_mode=True` is enough; downstream defaults effort to "medium". + _v(thinking_mode=True) + + def test_thinking_effort_alone_implies_on(self): + # The whole point of this contract: setting thinking_effort enables + # thinking even without thinking_mode=True. + _v(thinking_effort="medium") + _v(thinking_effort="high") + + def test_extra_knobs_with_thinking_effort_set_are_allowed(self): + # If thinking is on (via either flag), the budget/metric knobs apply. + _v(thinking_effort="high", thinking_timeout_s=60.0, thinking_effort_metric="rmse") + _v(thinking_mode=True, thinking_timeout_s=60.0, thinking_effort_metric="rmse") + + def test_extra_knobs_without_thinking_are_rejected(self): + # Knobs that only matter when thinking is on must error if neither flag is set. + with pytest.raises(ValueError, match="thinking is enabled"): + _v(thinking_timeout_s=60.0) + with pytest.raises(ValueError, match="thinking is enabled"): + _v(thinking_effort_metric="rmse") + + def test_invalid_effort_level_rejected(self): + with pytest.raises(ValueError, match="thinking_effort must be one of"): + _v(thinking_effort="extreme") + + def test_timeout_above_cap_rejected(self): + with pytest.raises(ValueError, match="exceeds the"): + _v(thinking_effort="high", thinking_timeout_s=THINKING_TIMEOUT_MAX_S + 1) + + def test_timeout_at_cap_allowed(self): + _v(thinking_effort="high", thinking_timeout_s=THINKING_TIMEOUT_MAX_S) From 10a09500cc580e3cf6140599a457c9a79f3448e2 Mon Sep 17 00:00:00 2001 From: Georg Grab Date: Sun, 10 May 2026 12:42:17 +0200 Subject: [PATCH 6/6] chore: brendan comments -- rename thinking_effort_metric to thinking_metric --- src/tabpfn_client/api_models.py | 8 ++++---- src/tabpfn_client/client.py | 10 +++++----- src/tabpfn_client/estimator.py | 22 +++++++++++----------- tests/unit/test_tabpfn_classifier.py | 2 +- tests/unit/test_tabpfn_regressor.py | 2 +- tests/unit/test_thinking_validation.py | 8 ++++---- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/tabpfn_client/api_models.py b/src/tabpfn_client/api_models.py index 9a26fff..cf1a5a8 100644 --- a/src/tabpfn_client/api_models.py +++ b/src/tabpfn_client/api_models.py @@ -1,5 +1,5 @@ from uuid import UUID -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union from pydantic import BaseModel, Field # Classification output_type="preds" preserves the original label type, so @@ -103,12 +103,12 @@ class FitRequest(BaseModel): # `tabpfn_systems` values on the server need this at fit time; the # server ignores it otherwise. tabpfn_config: TabPFNConfig = None - # User-facing thinking-effort level ("medium" or "high"). None disables it. - thinking_effort: Optional[str] = None + # User-facing thinking-effort level. None disables it. + thinking_effort: Optional[Literal["medium", "high"]] = None # Budget for the fit (seconds). Only consulted when `thinking_effort` is set. thinking_timeout_s: Optional[float] = None # Optimization metric for the fit. Only consulted when `thinking_effort` is set. - thinking_effort_metric: Optional[str] = None + thinking_metric: Optional[str] = None class FitResponse(BaseModel): diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py index 2bd1a9b..8ab3a4c 100644 --- a/src/tabpfn_client/client.py +++ b/src/tabpfn_client/client.py @@ -410,11 +410,11 @@ def fit( if thinking_enabled and tabpfn_config: thinking_effort = tabpfn_config.get("thinking_effort") or "medium" thinking_timeout_s = tabpfn_config.get("thinking_timeout_s") - thinking_effort_metric = tabpfn_config.get("thinking_effort_metric") + thinking_metric = tabpfn_config.get("thinking_metric") else: thinking_effort = None thinking_timeout_s = None - thinking_effort_metric = None + thinking_metric = None # Strip client-only keys that the server does not expect (mirrors # the predict path's filter below). @@ -428,7 +428,7 @@ def fit( "thinking_mode", "thinking_effort", "thinking_timeout_s", - "thinking_effort_metric", + "thinking_metric", } } if tabpfn_config is not None @@ -444,7 +444,7 @@ def fit( tabpfn_config=server_tabpfn_config, thinking_effort=thinking_effort, thinking_timeout_s=thinking_timeout_s, - thinking_effort_metric=thinking_effort_metric, + thinking_metric=thinking_metric, ), timeout=client_options.timeout, headers=client_options.headers, @@ -594,7 +594,7 @@ def predict( "thinking_mode", "thinking_effort", "thinking_timeout_s", - "thinking_effort_metric", + "thinking_metric", } } diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py index 9eca2fa..27e2015 100644 --- a/src/tabpfn_client/estimator.py +++ b/src/tabpfn_client/estimator.py @@ -186,7 +186,7 @@ def __init__( thinking_mode: bool = False, thinking_effort: Optional[ThinkingEffort] = None, thinking_timeout_s: Optional[float] = None, - thinking_effort_metric: Optional[str] = None, + thinking_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -256,7 +256,7 @@ def __init__( thinking_timeout_s: float or None, default=None Budget for the fit, in seconds. Only consulted when thinking is enabled. Capped at 2400. - thinking_effort_metric: str or None, default=None + thinking_metric: str or None, default=None Optimization metric for the fit. Only consulted when thinking is enabled. @@ -291,7 +291,7 @@ def __init__( self.thinking_mode = thinking_mode self.thinking_effort = thinking_effort self.thinking_timeout_s = thinking_timeout_s - self.thinking_effort_metric = thinking_effort_metric + self.thinking_metric = thinking_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -318,7 +318,7 @@ def fit( self.thinking_mode, self.thinking_effort, self.thinking_timeout_s, - self.thinking_effort_metric, + self.thinking_metric, ) X = _clean_text_features(X) self._validate_targets_and_classes(y) @@ -495,7 +495,7 @@ def __init__( thinking_mode: bool = False, thinking_effort: Optional[ThinkingEffort] = None, thinking_timeout_s: Optional[float] = None, - thinking_effort_metric: Optional[str] = None, + thinking_metric: Optional[str] = None, force_refit: bool = False, client_options: ClientOptions | None = None, ): @@ -557,7 +557,7 @@ def __init__( thinking_timeout_s: float or None, default=None Budget for the fit, in seconds. Only consulted when thinking is enabled. Capped at 2400. - thinking_effort_metric: str or None, default=None + thinking_metric: str or None, default=None Optimization metric for the fit. Only consulted when thinking is enabled. @@ -587,7 +587,7 @@ def __init__( self.thinking_mode = thinking_mode self.thinking_effort = thinking_effort self.thinking_timeout_s = thinking_timeout_s - self.thinking_effort_metric = thinking_effort_metric + self.thinking_metric = thinking_metric self.force_refit = force_refit self.client_options = client_options or ClientOptions() @@ -614,7 +614,7 @@ def fit( self.thinking_mode, self.thinking_effort, self.thinking_timeout_s, - self.thinking_effort_metric, + self.thinking_metric, ) self._validate_targets(y) X = _clean_text_features(X) @@ -764,7 +764,7 @@ def validate_thinking_mode( thinking_mode: bool, thinking_effort: Optional[str], thinking_timeout_s: Optional[float], - thinking_effort_metric: Optional[str], + thinking_metric: Optional[str], ) -> None: if ( thinking_effort is not None @@ -779,10 +779,10 @@ def validate_thinking_mode( # thinking is on are rejected only when neither is set. thinking_enabled = thinking_mode or thinking_effort is not None if not thinking_enabled and ( - thinking_timeout_s is not None or thinking_effort_metric is not None + thinking_timeout_s is not None or thinking_metric is not None ): raise ValueError( - "thinking_timeout_s and thinking_effort_metric are only " + "thinking_timeout_s and thinking_metric are only " "consulted when thinking is enabled; pass `thinking_mode=True` " "or `thinking_effort=...` to use them." ) diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py index a884e86..fd9f361 100644 --- a/tests/unit/test_tabpfn_classifier.py +++ b/tests/unit/test_tabpfn_classifier.py @@ -478,7 +478,7 @@ def test_only_allowed_parameters_passed_to_config(self): "thinking_mode", "thinking_effort", "thinking_timeout_s", - "thinking_effort_metric", + "thinking_metric", } OPTIONAL_PARAMS = { # These may be emitted by newer model versions, but are not required. diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py index 4a7f13a..da2bb0c 100644 --- a/tests/unit/test_tabpfn_regressor.py +++ b/tests/unit/test_tabpfn_regressor.py @@ -472,7 +472,7 @@ def test_only_allowed_parameters_passed_to_config(self): "thinking_mode", "thinking_effort", "thinking_timeout_s", - "thinking_effort_metric", + "thinking_metric", } OPTIONAL_PARAMS = { "thinking", diff --git a/tests/unit/test_thinking_validation.py b/tests/unit/test_thinking_validation.py index 3bfdf16..27e7604 100644 --- a/tests/unit/test_thinking_validation.py +++ b/tests/unit/test_thinking_validation.py @@ -17,7 +17,7 @@ def _v(**overrides): thinking_mode=False, thinking_effort=None, thinking_timeout_s=None, - thinking_effort_metric=None, + thinking_metric=None, ) args.update(overrides) return validate_thinking_mode(**args) @@ -40,15 +40,15 @@ def test_thinking_effort_alone_implies_on(self): def test_extra_knobs_with_thinking_effort_set_are_allowed(self): # If thinking is on (via either flag), the budget/metric knobs apply. - _v(thinking_effort="high", thinking_timeout_s=60.0, thinking_effort_metric="rmse") - _v(thinking_mode=True, thinking_timeout_s=60.0, thinking_effort_metric="rmse") + _v(thinking_effort="high", thinking_timeout_s=60.0, thinking_metric="rmse") + _v(thinking_mode=True, thinking_timeout_s=60.0, thinking_metric="rmse") def test_extra_knobs_without_thinking_are_rejected(self): # Knobs that only matter when thinking is on must error if neither flag is set. with pytest.raises(ValueError, match="thinking is enabled"): _v(thinking_timeout_s=60.0) with pytest.raises(ValueError, match="thinking is enabled"): - _v(thinking_effort_metric="rmse") + _v(thinking_metric="rmse") def test_invalid_effort_level_rejected(self): with pytest.raises(ValueError, match="thinking_effort must be one of"):