From 4d15a875a45e57a21578830b5d26d1ee305d42b4 Mon Sep 17 00:00:00 2001
From: Georg Grab <georg@priorlabs.ai>
Date: Wed, 6 May 2026 18:14:43 +0000
Subject: [PATCH 1/6] feat: replace enhanced_fit_mode with
 effort/effort_timeout_s/effort_metric

TabPFNClassifier and TabPFNRegressor now take:
- effort: Literal["medium", "high"] | None (None disables the autogluon-
  wrapped fit; medium/high pick the portfolio count server-side)
- effort_timeout_s: float | None (user budget; server forwards 0.9x to AG)
- effort_metric: str | None (eval metric for the sweep)

Replaces enhanced_fit_mode (bool) plus the old enhanced_fit_mode_metric
and enhanced_fit_mode_time_limit_s. validate_effort() enforces the literal
range, the 2400 s cap, and rejects timeout/metric set without effort.
client.py lifts the three fields to top-level FitRequest siblings and
keeps tabpfn_systems consistent ("enhanced" present iff effort is set).
---
 src/tabpfn_client/api_models.py      |  14 ++-
 src/tabpfn_client/client.py          |  40 ++++----
 src/tabpfn_client/estimator.py       | 138 ++++++++++++++++-----------
 tests/unit/test_tabpfn_classifier.py |   6 +-
 tests/unit/test_tabpfn_regressor.py  |   6 +-
 5 files changed, 114 insertions(+), 90 deletions(-)

diff --git a/src/tabpfn_client/api_models.py b/src/tabpfn_client/api_models.py
index b68f21b..27b9ef3 100644
--- a/src/tabpfn_client/api_models.py
+++ b/src/tabpfn_client/api_models.py
@@ -103,14 +103,12 @@ class FitRequest(BaseModel):
     # `tabpfn_systems` values on the server need this at fit time; the
     # server ignores it otherwise.
     tabpfn_config: TabPFNConfig = None
-    # Drives model selection + ensemble weighting during the enhanced-fit
-    # sweep. Only consulted when `"enhanced"` is in `tabpfn_systems`. None
-    # falls back to the sweep's default per problem type.
-    enhanced_fit_mode_metric: Optional[str] = None
-    # Ceiling on the enhanced-fit sweep (seconds). Only consulted when
-    # `"enhanced"` is in `tabpfn_systems`. None falls back to the server
-    # default (300s).
-    enhanced_fit_time_limit_s: Optional[float] = None
+    # User-facing effort level ("medium" or "high"). None disables it.
+    effort: Optional[str] = None
+    # Budget for the fit (seconds). Only consulted when `effort` is set.
+    effort_timeout_s: Optional[float] = None
+    # Optimization metric for the fit. Only consulted when `effort` is set.
+    effort_metric: Optional[str] = None
 
 
 class FitResponse(BaseModel):
diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py
index c5448cc..6241baa 100644
--- a/src/tabpfn_client/client.py
+++ b/src/tabpfn_client/client.py
@@ -388,27 +388,24 @@ def fit(
                     raise
 
         tabpfn_systems = ["preprocessing", "text"]
+        effort = tabpfn_config.get("effort") if tabpfn_config else None
         if tabpfn_config:
             if tabpfn_config.get("paper_version") is True:
                 tabpfn_systems = []
-            elif tabpfn_config.get("enhanced_fit_mode") is True:
+            elif effort is not None:
                 # Enhanced mode runs on top of the base systems rather than
                 # replacing them — keep preprocessing + text alongside it.
                 tabpfn_systems = ["preprocessing", "text", "enhanced"]
 
-        # `enhanced_fit_mode_metric` and `enhanced_fit_mode_time_limit_s`
-        # are top-level FitRequest fields on the server (siblings to
-        # `tabpfn_systems`), not part of `tabpfn_config`. Lift them out
-        # before stripping the rest of the client-only keys. The server
-        # field drops the `mode_` infix (`enhanced_fit_time_limit_s`);
-        # units are seconds on both sides, no conversion.
-        enhanced_fit_mode_metric = (
-            tabpfn_config.get("enhanced_fit_mode_metric") if tabpfn_config else None
+        # `effort`, `effort_timeout_s`, `effort_metric` are top-level
+        # FitRequest fields on the server (siblings to `tabpfn_systems`),
+        # not part of `tabpfn_config`. Lift them out before stripping the
+        # rest of the client-only keys.
+        effort_timeout_s = (
+            tabpfn_config.get("effort_timeout_s") if tabpfn_config else None
         )
-        enhanced_fit_time_limit_s = (
-            tabpfn_config.get("enhanced_fit_mode_time_limit_s")
-            if tabpfn_config
-            else None
+        effort_metric = (
+            tabpfn_config.get("effort_metric") if tabpfn_config else None
         )
 
         # Strip client-only keys that the server does not expect (mirrors
@@ -420,9 +417,9 @@ def fit(
                 if k
                 not in {
                     "paper_version",
-                    "enhanced_fit_mode",
-                    "enhanced_fit_mode_metric",
-                    "enhanced_fit_mode_time_limit_s",
+                    "effort",
+                    "effort_timeout_s",
+                    "effort_metric",
                 }
             }
             if tabpfn_config is not None
@@ -436,8 +433,9 @@ def fit(
                 tabpfn_systems=tabpfn_systems,
                 force_refit=force_refit or force_refit_enabled(),
                 tabpfn_config=server_tabpfn_config,
-                enhanced_fit_mode_metric=enhanced_fit_mode_metric,
-                enhanced_fit_time_limit_s=enhanced_fit_time_limit_s,
+                effort=effort,
+                effort_timeout_s=effort_timeout_s,
+                effort_metric=effort_metric,
             ),
             timeout=client_options.timeout,
             headers=client_options.headers,
@@ -584,9 +582,9 @@ def predict(
                 if k
                 not in {
                     "paper_version",
-                    "enhanced_fit_mode",
-                    "enhanced_fit_mode_metric",
-                    "enhanced_fit_mode_time_limit_s",
+                    "effort",
+                    "effort_timeout_s",
+                    "effort_metric",
                 }
             }
 
diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py
index 27d5b2f..03dae6e 100644
--- a/src/tabpfn_client/estimator.py
+++ b/src/tabpfn_client/estimator.py
@@ -54,7 +54,10 @@
 # is kept as a backward-compatible alias.
 _AUTO_MODEL_PATH_ALIASES = frozenset({"auto", "default"})
 
-ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S = 40 * 60
+EFFORT_TIMEOUT_MAX_S = 40 * 60
+
+EffortLevel = Literal["medium", "high"]
+_VALID_EFFORT_LEVELS = frozenset({"medium", "high"})
 
 
 class TabPFNModelSelection:
@@ -180,9 +183,9 @@ def __init__(
         ] = 0,
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
-        enhanced_fit_mode: bool = False,
-        enhanced_fit_mode_metric: Optional[str] = None,
-        enhanced_fit_mode_time_limit_s: Optional[float] = None,
+        effort: Optional[EffortLevel] = None,
+        effort_timeout_s: Optional[float] = None,
+        effort_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -240,24 +243,33 @@ def __init__(
         paper_version: bool, default=False
             If True, will use the model described in the paper, instead of the newest
             version available on the API, which e.g handles text features better.
-        enhanced_fit_mode: bool, default=False
-            If True, trades off fit time for precision by running an
-            automated feature-engineering pipeline on top of TabPFN during
-            fit.
-        enhanced_fit_mode_metric: str or None, default=None
-            Only consulted when `enhanced_fit_mode=True`. Drives model
-            selection + ensemble weighting during the enhanced-fit sweep
-            (e.g. "accuracy"/"log_loss"/"roc_auc"/"balanced_accuracy"/
-            "f1" for classification). None falls back to the sweep's
-            default for the problem type. Distinct from the local
-            `eval_metric`/`tuning_config` knobs used for decision-threshold
-            tuning on the standalone TabPFN classifier.
-        enhanced_fit_mode_time_limit_s: float or None, default=None
-            Only consulted when `enhanced_fit_mode=True`. Ceiling on the
-            enhanced-fit sweep (seconds). Raise for larger datasets where
-            the default ~5-minute sweep leaves performance on the table.
-            None falls back to the server-side default (300s). Capped at
-            2400 seconds (40 minutes); higher values raise ValueError at fit.
+        effort: {"medium", "high"} or None, default=None
+            Spends extra fit-time compute for higher precision. None
+            disables it.
+        effort_timeout_s: float or None, default=None
+            Budget for the fit, in seconds. Only consulted when `effort`
+            is set. Capped at 2400.
+        effort_metric: str or None, default=None
+            Optimization metric for the fit. Only consulted when `effort`
+            is set.
+
+            Binary classification:
+                "accuracy", "balanced_accuracy", "mcc", "log_loss",
+                "pac", "quadratic_kappa", "roc_auc", "average_precision",
+                "precision", "precision_macro", "precision_micro",
+                "precision_weighted", "recall", "recall_macro",
+                "recall_micro", "recall_weighted", "f1", "f1_macro",
+                "f1_micro", "f1_weighted".
+            Multiclass classification:
+                "accuracy", "balanced_accuracy", "mcc", "log_loss",
+                "pac", "quadratic_kappa", "precision_macro",
+                "precision_micro", "precision_weighted", "recall_macro",
+                "recall_micro", "recall_weighted", "f1_macro",
+                "f1_micro", "f1_weighted", "roc_auc_ovo",
+                "roc_auc_ovo_macro", "roc_auc_ovr", "roc_auc_ovr_macro",
+                "roc_auc_ovr_micro", "roc_auc_ovr_weighted".
+
+            Aliases "acc", "nll", "pac_score" are also accepted.
         """
         self.model_path = model_path
         self.n_estimators = n_estimators
@@ -269,9 +281,9 @@ def __init__(
         self.random_state = random_state
         self.inference_config = inference_config
         self.paper_version = paper_version
-        self.enhanced_fit_mode = enhanced_fit_mode
-        self.enhanced_fit_mode_metric = enhanced_fit_mode_metric
-        self.enhanced_fit_mode_time_limit_s = enhanced_fit_mode_time_limit_s
+        self.effort = effort
+        self.effort_timeout_s = effort_timeout_s
+        self.effort_metric = effort_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -294,7 +306,7 @@ def fit(
 
         estimator_param = self._get_estimator_params_with_model_path("classification")
         validate_train_set(X, y)
-        validate_enhanced_fit_mode_time_limit(self.enhanced_fit_mode_time_limit_s)
+        validate_effort(self.effort, self.effort_timeout_s, self.effort_metric)
         X = _clean_text_features(X)
         self._validate_targets_and_classes(y)
 
@@ -467,9 +479,9 @@ def __init__(
         ] = 0,
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
-        enhanced_fit_mode: bool = False,
-        enhanced_fit_mode_metric: Optional[str] = None,
-        enhanced_fit_mode_time_limit_s: Optional[float] = None,
+        effort: Optional[EffortLevel] = None,
+        effort_timeout_s: Optional[float] = None,
+        effort_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -519,21 +531,25 @@ def __init__(
         paper_version: bool, default=False
             If True, will use the model described in the paper, instead of the newest
             version available on the API, which e.g handles text features better.
-        enhanced_fit_mode: bool, default=False
-            If True, trades off fit time for precision by running an
-            automated feature-engineering pipeline on top of TabPFN during
-            fit.
-        enhanced_fit_mode_metric: str or None, default=None
-            Only consulted when `enhanced_fit_mode=True`. Drives model
-            selection + ensemble weighting during the enhanced-fit sweep
-            (e.g. "rmse"/"mae"/"r2"/"mape" for regression). None falls
-            back to the sweep's default for the problem type.
-        enhanced_fit_mode_time_limit_s: float or None, default=None
-            Only consulted when `enhanced_fit_mode=True`. Ceiling on the
-            enhanced-fit sweep (seconds). Raise for larger datasets where
-            the default ~5-minute sweep leaves performance on the table.
-            None falls back to the server-side default (300s). Capped at
-            2400 seconds (40 minutes); higher values raise ValueError at fit.
+        effort: {"medium", "high"} or None, default=None
+            Spends extra fit-time compute for higher precision. None
+            disables it.
+        effort_timeout_s: float or None, default=None
+            Budget for the fit, in seconds. Only consulted when `effort`
+            is set. Capped at 2400.
+        effort_metric: str or None, default=None
+            Optimization metric for the fit. Only consulted when `effort`
+            is set.
+
+            Regression:
+                "r2", "mean_squared_error", "root_mean_squared_error",
+                "mean_absolute_error", "median_absolute_error",
+                "mean_absolute_percentage_error",
+                "symmetric_mean_absolute_percentage_error", "spearmanr",
+                "pearsonr".
+
+            Aliases "mse", "rmse", "mae", "mape", "smape" are also
+            accepted.
         force_refit: bool, default=False
             Whether to force refit the model even if the model has already been fitted.
         client_options : ClientOptions, default=None
@@ -548,9 +564,9 @@ def __init__(
         self.random_state = random_state
         self.inference_config = inference_config
         self.paper_version = paper_version
-        self.enhanced_fit_mode = enhanced_fit_mode
-        self.enhanced_fit_mode_metric = enhanced_fit_mode_metric
-        self.enhanced_fit_mode_time_limit_s = enhanced_fit_mode_time_limit_s
+        self.effort = effort
+        self.effort_timeout_s = effort_timeout_s
+        self.effort_metric = effort_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -573,7 +589,7 @@ def fit(
 
         estimator_param = self._get_estimator_params_with_model_path("regression")
         validate_train_set(X, y)
-        validate_enhanced_fit_mode_time_limit(self.enhanced_fit_mode_time_limit_s)
+        validate_effort(self.effort, self.effort_timeout_s, self.effort_metric)
         self._validate_targets(y)
         X = _clean_text_features(X)
 
@@ -718,14 +734,26 @@ def _validate_targets(self, y) -> np.ndarray:
             raise ValueError("Input y contains NaN.")
 
 
-def validate_enhanced_fit_mode_time_limit(time_limit_s: Optional[float]) -> None:
-    if time_limit_s is None:
-        return
-    if time_limit_s > ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S:
+def validate_effort(
+    effort: Optional[str],
+    effort_timeout_s: Optional[float],
+    effort_metric: Optional[str],
+) -> None:
+    if effort is not None and effort not in _VALID_EFFORT_LEVELS:
+        raise ValueError(
+            f"effort must be one of {sorted(_VALID_EFFORT_LEVELS)} or None, "
+            f"got {effort!r}."
+        )
+    if effort is None and (effort_timeout_s is not None or effort_metric is not None):
+        raise ValueError(
+            "effort_timeout_s and effort_metric are only consulted when "
+            "`effort` is set; pass effort='medium' or effort='high' to use them."
+        )
+    if effort_timeout_s is not None and effort_timeout_s > EFFORT_TIMEOUT_MAX_S:
         raise ValueError(
-            f"enhanced_fit_mode_time_limit_s ({time_limit_s}) exceeds the "
-            f"maximum allowed of {ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S} seconds "
-            f"({ENHANCED_FIT_MODE_MAX_TIME_LIMIT_S // 60} minutes)."
+            f"effort_timeout_s ({effort_timeout_s}) exceeds the "
+            f"maximum allowed of {EFFORT_TIMEOUT_MAX_S} seconds "
+            f"({EFFORT_TIMEOUT_MAX_S // 60} minutes)."
         )
 
 
diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py
index 1bc9b10..d2de6ef 100644
--- a/tests/unit/test_tabpfn_classifier.py
+++ b/tests/unit/test_tabpfn_classifier.py
@@ -475,9 +475,9 @@ def test_only_allowed_parameters_passed_to_config(self):
             "model_path",
             "balance_probabilities",
             "paper_version",
-            "enhanced_fit_mode",
-            "enhanced_fit_mode_metric",
-            "enhanced_fit_mode_time_limit_s",
+            "effort",
+            "effort_timeout_s",
+            "effort_metric",
         }
         OPTIONAL_PARAMS = {
             # These may be emitted by newer model versions, but are not required.
diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py
index 912f9f2..ac97b72 100644
--- a/tests/unit/test_tabpfn_regressor.py
+++ b/tests/unit/test_tabpfn_regressor.py
@@ -469,9 +469,9 @@ def test_only_allowed_parameters_passed_to_config(self):
             "inference_config",
             "model_path",
             "paper_version",
-            "enhanced_fit_mode",
-            "enhanced_fit_mode_metric",
-            "enhanced_fit_mode_time_limit_s",
+            "effort",
+            "effort_timeout_s",
+            "effort_metric",
         }
         OPTIONAL_PARAMS = {
             "thinking",

From ea2ce7be93992abc12c7c161c4474ba2d0a8be8c Mon Sep 17 00:00:00 2001
From: Georg Grab <georg@priorlabs.ai>
Date: Fri, 8 May 2026 14:24:26 +0200
Subject: [PATCH 2/6] feat: gate enhanced fit on enhanced_fit_mode and rename
 tunables

The estimator-side knobs are renamed to make their relationship explicit:
- effort -> enhanced_effort, defaulting to "medium" (only consulted when
  enhanced_fit_mode=True)
- effort_timeout_s -> enhanced_timeout_s
- effort_metric -> enhanced_effort_metric

Activation moves from "effort is not None" back to a dedicated boolean
(enhanced_fit_mode, default False). enhanced_effort is a Literal
["medium", "high"] with a documented default of "medium" so users can flip
on enhanced fit mode without picking an effort level.

Wire format is unchanged: client.py still sends effort, effort_timeout_s
and effort_metric on FitRequest, so no server companion change is needed.
---
 src/tabpfn_client/client.py          |  42 +++++----
 src/tabpfn_client/estimator.py       | 127 ++++++++++++++++-----------
 tests/unit/test_tabpfn_classifier.py |   7 +-
 tests/unit/test_tabpfn_regressor.py  |   7 +-
 4 files changed, 110 insertions(+), 73 deletions(-)

diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py
index 6241baa..739b9ce 100644
--- a/src/tabpfn_client/client.py
+++ b/src/tabpfn_client/client.py
@@ -388,25 +388,29 @@ def fit(
                     raise
 
         tabpfn_systems = ["preprocessing", "text"]
-        effort = tabpfn_config.get("effort") if tabpfn_config else None
+        enhanced_fit_mode = (
+            bool(tabpfn_config.get("enhanced_fit_mode")) if tabpfn_config else False
+        )
         if tabpfn_config:
             if tabpfn_config.get("paper_version") is True:
                 tabpfn_systems = []
-            elif effort is not None:
+            elif enhanced_fit_mode:
                 # Enhanced mode runs on top of the base systems rather than
                 # replacing them — keep preprocessing + text alongside it.
                 tabpfn_systems = ["preprocessing", "text", "enhanced"]
 
-        # `effort`, `effort_timeout_s`, `effort_metric` are top-level
-        # FitRequest fields on the server (siblings to `tabpfn_systems`),
-        # not part of `tabpfn_config`. Lift them out before stripping the
-        # rest of the client-only keys.
-        effort_timeout_s = (
-            tabpfn_config.get("effort_timeout_s") if tabpfn_config else None
-        )
-        effort_metric = (
-            tabpfn_config.get("effort_metric") if tabpfn_config else None
-        )
+        # The client-side `enhanced_*` knobs are translated to the server's
+        # top-level FitRequest fields (`effort`, `effort_timeout_s`,
+        # `effort_metric`). They are only consulted when enhanced fit mode
+        # is enabled; otherwise we send None.
+        if enhanced_fit_mode and tabpfn_config:
+            effort = tabpfn_config.get("enhanced_effort", "medium")
+            effort_timeout_s = tabpfn_config.get("enhanced_timeout_s")
+            effort_metric = tabpfn_config.get("enhanced_effort_metric")
+        else:
+            effort = None
+            effort_timeout_s = None
+            effort_metric = None
 
         # Strip client-only keys that the server does not expect (mirrors
         # the predict path's filter below).
@@ -417,9 +421,10 @@ def fit(
                 if k
                 not in {
                     "paper_version",
-                    "effort",
-                    "effort_timeout_s",
-                    "effort_metric",
+                    "enhanced_fit_mode",
+                    "enhanced_effort",
+                    "enhanced_timeout_s",
+                    "enhanced_effort_metric",
                 }
             }
             if tabpfn_config is not None
@@ -582,9 +587,10 @@ def predict(
                 if k
                 not in {
                     "paper_version",
-                    "effort",
-                    "effort_timeout_s",
-                    "effort_metric",
+                    "enhanced_fit_mode",
+                    "enhanced_effort",
+                    "enhanced_timeout_s",
+                    "enhanced_effort_metric",
                 }
             }
 
diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py
index 03dae6e..9071bae 100644
--- a/src/tabpfn_client/estimator.py
+++ b/src/tabpfn_client/estimator.py
@@ -54,10 +54,10 @@
 # is kept as a backward-compatible alias.
 _AUTO_MODEL_PATH_ALIASES = frozenset({"auto", "default"})
 
-EFFORT_TIMEOUT_MAX_S = 40 * 60
+ENHANCED_TIMEOUT_MAX_S = 40 * 60
 
-EffortLevel = Literal["medium", "high"]
-_VALID_EFFORT_LEVELS = frozenset({"medium", "high"})
+EnhancedEffort = Literal["medium", "high"]
+_VALID_ENHANCED_EFFORT_LEVELS = frozenset({"medium", "high"})
 
 
 class TabPFNModelSelection:
@@ -183,9 +183,10 @@ def __init__(
         ] = 0,
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
-        effort: Optional[EffortLevel] = None,
-        effort_timeout_s: Optional[float] = None,
-        effort_metric: Optional[str] = None,
+        enhanced_fit_mode: bool = False,
+        enhanced_effort: EnhancedEffort = "medium",
+        enhanced_timeout_s: Optional[float] = None,
+        enhanced_effort_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -243,15 +244,19 @@ def __init__(
         paper_version: bool, default=False
             If True, will use the model described in the paper, instead of the newest
             version available on the API, which e.g handles text features better.
-        effort: {"medium", "high"} or None, default=None
-            Spends extra fit-time compute for higher precision. None
-            disables it.
-        effort_timeout_s: float or None, default=None
-            Budget for the fit, in seconds. Only consulted when `effort`
-            is set. Capped at 2400.
-        effort_metric: str or None, default=None
-            Optimization metric for the fit. Only consulted when `effort`
-            is set.
+        enhanced_fit_mode: bool, default=False
+            If True, spend extra fit-time compute for higher precision.
+            The remaining `enhanced_*` parameters are only consulted when
+            this is True.
+        enhanced_effort: {"medium", "high"}, default="medium"
+            Effort level for enhanced fit mode. Only consulted when
+            `enhanced_fit_mode=True`.
+        enhanced_timeout_s: float or None, default=None
+            Budget for the fit, in seconds. Only consulted when
+            `enhanced_fit_mode=True`. Capped at 2400.
+        enhanced_effort_metric: str or None, default=None
+            Optimization metric for the fit. Only consulted when
+            `enhanced_fit_mode=True`.
 
             Binary classification:
                 "accuracy", "balanced_accuracy", "mcc", "log_loss",
@@ -281,9 +286,10 @@ def __init__(
         self.random_state = random_state
         self.inference_config = inference_config
         self.paper_version = paper_version
-        self.effort = effort
-        self.effort_timeout_s = effort_timeout_s
-        self.effort_metric = effort_metric
+        self.enhanced_fit_mode = enhanced_fit_mode
+        self.enhanced_effort = enhanced_effort
+        self.enhanced_timeout_s = enhanced_timeout_s
+        self.enhanced_effort_metric = enhanced_effort_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -306,7 +312,12 @@ def fit(
 
         estimator_param = self._get_estimator_params_with_model_path("classification")
         validate_train_set(X, y)
-        validate_effort(self.effort, self.effort_timeout_s, self.effort_metric)
+        validate_enhanced_fit_mode(
+            self.enhanced_fit_mode,
+            self.enhanced_effort,
+            self.enhanced_timeout_s,
+            self.enhanced_effort_metric,
+        )
         X = _clean_text_features(X)
         self._validate_targets_and_classes(y)
 
@@ -479,9 +490,10 @@ def __init__(
         ] = 0,
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
-        effort: Optional[EffortLevel] = None,
-        effort_timeout_s: Optional[float] = None,
-        effort_metric: Optional[str] = None,
+        enhanced_fit_mode: bool = False,
+        enhanced_effort: EnhancedEffort = "medium",
+        enhanced_timeout_s: Optional[float] = None,
+        enhanced_effort_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -531,15 +543,19 @@ def __init__(
         paper_version: bool, default=False
             If True, will use the model described in the paper, instead of the newest
             version available on the API, which e.g handles text features better.
-        effort: {"medium", "high"} or None, default=None
-            Spends extra fit-time compute for higher precision. None
-            disables it.
-        effort_timeout_s: float or None, default=None
-            Budget for the fit, in seconds. Only consulted when `effort`
-            is set. Capped at 2400.
-        effort_metric: str or None, default=None
-            Optimization metric for the fit. Only consulted when `effort`
-            is set.
+        enhanced_fit_mode: bool, default=False
+            If True, spend extra fit-time compute for higher precision.
+            The remaining `enhanced_*` parameters are only consulted when
+            this is True.
+        enhanced_effort: {"medium", "high"}, default="medium"
+            Effort level for enhanced fit mode. Only consulted when
+            `enhanced_fit_mode=True`.
+        enhanced_timeout_s: float or None, default=None
+            Budget for the fit, in seconds. Only consulted when
+            `enhanced_fit_mode=True`. Capped at 2400.
+        enhanced_effort_metric: str or None, default=None
+            Optimization metric for the fit. Only consulted when
+            `enhanced_fit_mode=True`.
 
             Regression:
                 "r2", "mean_squared_error", "root_mean_squared_error",
@@ -564,9 +580,10 @@ def __init__(
         self.random_state = random_state
         self.inference_config = inference_config
         self.paper_version = paper_version
-        self.effort = effort
-        self.effort_timeout_s = effort_timeout_s
-        self.effort_metric = effort_metric
+        self.enhanced_fit_mode = enhanced_fit_mode
+        self.enhanced_effort = enhanced_effort
+        self.enhanced_timeout_s = enhanced_timeout_s
+        self.enhanced_effort_metric = enhanced_effort_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -589,7 +606,12 @@ def fit(
 
         estimator_param = self._get_estimator_params_with_model_path("regression")
         validate_train_set(X, y)
-        validate_effort(self.effort, self.effort_timeout_s, self.effort_metric)
+        validate_enhanced_fit_mode(
+            self.enhanced_fit_mode,
+            self.enhanced_effort,
+            self.enhanced_timeout_s,
+            self.enhanced_effort_metric,
+        )
         self._validate_targets(y)
         X = _clean_text_features(X)
 
@@ -734,26 +756,33 @@ def _validate_targets(self, y) -> np.ndarray:
             raise ValueError("Input y contains NaN.")
 
 
-def validate_effort(
-    effort: Optional[str],
-    effort_timeout_s: Optional[float],
-    effort_metric: Optional[str],
+def validate_enhanced_fit_mode(
+    enhanced_fit_mode: bool,
+    enhanced_effort: str,
+    enhanced_timeout_s: Optional[float],
+    enhanced_effort_metric: Optional[str],
 ) -> None:
-    if effort is not None and effort not in _VALID_EFFORT_LEVELS:
+    if enhanced_effort not in _VALID_ENHANCED_EFFORT_LEVELS:
         raise ValueError(
-            f"effort must be one of {sorted(_VALID_EFFORT_LEVELS)} or None, "
-            f"got {effort!r}."
+            f"enhanced_effort must be one of "
+            f"{sorted(_VALID_ENHANCED_EFFORT_LEVELS)}, got {enhanced_effort!r}."
         )
-    if effort is None and (effort_timeout_s is not None or effort_metric is not None):
+    if not enhanced_fit_mode and (
+        enhanced_timeout_s is not None or enhanced_effort_metric is not None
+    ):
         raise ValueError(
-            "effort_timeout_s and effort_metric are only consulted when "
-            "`effort` is set; pass effort='medium' or effort='high' to use them."
+            "enhanced_timeout_s and enhanced_effort_metric are only consulted "
+            "when `enhanced_fit_mode=True`; pass enhanced_fit_mode=True to use "
+            "them."
         )
-    if effort_timeout_s is not None and effort_timeout_s > EFFORT_TIMEOUT_MAX_S:
+    if (
+        enhanced_timeout_s is not None
+        and enhanced_timeout_s > ENHANCED_TIMEOUT_MAX_S
+    ):
         raise ValueError(
-            f"effort_timeout_s ({effort_timeout_s}) exceeds the "
-            f"maximum allowed of {EFFORT_TIMEOUT_MAX_S} seconds "
-            f"({EFFORT_TIMEOUT_MAX_S // 60} minutes)."
+            f"enhanced_timeout_s ({enhanced_timeout_s}) exceeds the "
+            f"maximum allowed of {ENHANCED_TIMEOUT_MAX_S} seconds "
+            f"({ENHANCED_TIMEOUT_MAX_S // 60} minutes)."
         )
 
 
diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py
index d2de6ef..ee1466c 100644
--- a/tests/unit/test_tabpfn_classifier.py
+++ b/tests/unit/test_tabpfn_classifier.py
@@ -475,9 +475,10 @@ def test_only_allowed_parameters_passed_to_config(self):
             "model_path",
             "balance_probabilities",
             "paper_version",
-            "effort",
-            "effort_timeout_s",
-            "effort_metric",
+            "enhanced_fit_mode",
+            "enhanced_effort",
+            "enhanced_timeout_s",
+            "enhanced_effort_metric",
         }
         OPTIONAL_PARAMS = {
             # These may be emitted by newer model versions, but are not required.
diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py
index ac97b72..b026747 100644
--- a/tests/unit/test_tabpfn_regressor.py
+++ b/tests/unit/test_tabpfn_regressor.py
@@ -469,9 +469,10 @@ def test_only_allowed_parameters_passed_to_config(self):
             "inference_config",
             "model_path",
             "paper_version",
-            "effort",
-            "effort_timeout_s",
-            "effort_metric",
+            "enhanced_fit_mode",
+            "enhanced_effort",
+            "enhanced_timeout_s",
+            "enhanced_effort_metric",
         }
         OPTIONAL_PARAMS = {
             "thinking",

From 43451a9003b99b73522bc764e1896a7fe75997b2 Mon Sep 17 00:00:00 2001
From: Georg Grab <georg@priorlabs.ai>
Date: Sat, 9 May 2026 21:33:59 +0200
Subject: [PATCH 3/6] rename enhanced_* knobs to thinking_*

---
 src/tabpfn_client/client.py          |  38 ++++----
 src/tabpfn_client/estimator.py       | 128 +++++++++++++--------------
 tests/unit/test_tabpfn_classifier.py |   8 +-
 tests/unit/test_tabpfn_regressor.py  |   8 +-
 4 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py
index 739b9ce..41dae22 100644
--- a/src/tabpfn_client/client.py
+++ b/src/tabpfn_client/client.py
@@ -388,25 +388,25 @@ def fit(
                     raise
 
         tabpfn_systems = ["preprocessing", "text"]
-        enhanced_fit_mode = (
-            bool(tabpfn_config.get("enhanced_fit_mode")) if tabpfn_config else False
+        thinking_mode = (
+            bool(tabpfn_config.get("thinking_mode")) if tabpfn_config else False
         )
         if tabpfn_config:
             if tabpfn_config.get("paper_version") is True:
                 tabpfn_systems = []
-            elif enhanced_fit_mode:
-                # Enhanced mode runs on top of the base systems rather than
+            elif thinking_mode:
+                # Thinking mode runs on top of the base systems rather than
                 # replacing them — keep preprocessing + text alongside it.
-                tabpfn_systems = ["preprocessing", "text", "enhanced"]
+                tabpfn_systems = ["preprocessing", "text", "thinking"]
 
-        # The client-side `enhanced_*` knobs are translated to the server's
+        # The client-side `thinking_*` knobs are translated to the server's
         # top-level FitRequest fields (`effort`, `effort_timeout_s`,
-        # `effort_metric`). They are only consulted when enhanced fit mode
+        # `effort_metric`). They are only consulted when thinking mode
         # is enabled; otherwise we send None.
-        if enhanced_fit_mode and tabpfn_config:
-            effort = tabpfn_config.get("enhanced_effort", "medium")
-            effort_timeout_s = tabpfn_config.get("enhanced_timeout_s")
-            effort_metric = tabpfn_config.get("enhanced_effort_metric")
+        if thinking_mode and tabpfn_config:
+            effort = tabpfn_config.get("thinking_effort", "medium")
+            effort_timeout_s = tabpfn_config.get("thinking_timeout_s")
+            effort_metric = tabpfn_config.get("thinking_effort_metric")
         else:
             effort = None
             effort_timeout_s = None
@@ -421,10 +421,10 @@ def fit(
                 if k
                 not in {
                     "paper_version",
-                    "enhanced_fit_mode",
-                    "enhanced_effort",
-                    "enhanced_timeout_s",
-                    "enhanced_effort_metric",
+                    "thinking_mode",
+                    "thinking_effort",
+                    "thinking_timeout_s",
+                    "thinking_effort_metric",
                 }
             }
             if tabpfn_config is not None
@@ -587,10 +587,10 @@ def predict(
                 if k
                 not in {
                     "paper_version",
-                    "enhanced_fit_mode",
-                    "enhanced_effort",
-                    "enhanced_timeout_s",
-                    "enhanced_effort_metric",
+                    "thinking_mode",
+                    "thinking_effort",
+                    "thinking_timeout_s",
+                    "thinking_effort_metric",
                 }
             }
 
diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py
index 9071bae..c7a216e 100644
--- a/src/tabpfn_client/estimator.py
+++ b/src/tabpfn_client/estimator.py
@@ -54,10 +54,10 @@
 # is kept as a backward-compatible alias.
 _AUTO_MODEL_PATH_ALIASES = frozenset({"auto", "default"})
 
-ENHANCED_TIMEOUT_MAX_S = 40 * 60
+THINKING_TIMEOUT_MAX_S = 40 * 60
 
-EnhancedEffort = Literal["medium", "high"]
-_VALID_ENHANCED_EFFORT_LEVELS = frozenset({"medium", "high"})
+ThinkingEffort = Literal["medium", "high"]
+_VALID_THINKING_EFFORT_LEVELS = frozenset({"medium", "high"})
 
 
 class TabPFNModelSelection:
@@ -183,10 +183,10 @@ def __init__(
         ] = 0,
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
-        enhanced_fit_mode: bool = False,
-        enhanced_effort: EnhancedEffort = "medium",
-        enhanced_timeout_s: Optional[float] = None,
-        enhanced_effort_metric: Optional[str] = None,
+        thinking_mode: bool = False,
+        thinking_effort: ThinkingEffort = "medium",
+        thinking_timeout_s: Optional[float] = None,
+        thinking_effort_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -244,19 +244,19 @@ def __init__(
         paper_version: bool, default=False
             If True, will use the model described in the paper, instead of the newest
             version available on the API, which e.g handles text features better.
-        enhanced_fit_mode: bool, default=False
+        thinking_mode: bool, default=False
             If True, spend extra fit-time compute for higher precision.
-            The remaining `enhanced_*` parameters are only consulted when
+            The remaining `thinking_*` parameters are only consulted when
             this is True.
-        enhanced_effort: {"medium", "high"}, default="medium"
-            Effort level for enhanced fit mode. Only consulted when
-            `enhanced_fit_mode=True`.
-        enhanced_timeout_s: float or None, default=None
+        thinking_effort: {"medium", "high"}, default="medium"
+            Effort level for thinking mode. Only consulted when
+            `thinking_mode=True`.
+        thinking_timeout_s: float or None, default=None
             Budget for the fit, in seconds. Only consulted when
-            `enhanced_fit_mode=True`. Capped at 2400.
-        enhanced_effort_metric: str or None, default=None
+            `thinking_mode=True`. Capped at 2400.
+        thinking_effort_metric: str or None, default=None
             Optimization metric for the fit. Only consulted when
-            `enhanced_fit_mode=True`.
+            `thinking_mode=True`.
 
             Binary classification:
                 "accuracy", "balanced_accuracy", "mcc", "log_loss",
@@ -286,10 +286,10 @@ def __init__(
         self.random_state = random_state
         self.inference_config = inference_config
         self.paper_version = paper_version
-        self.enhanced_fit_mode = enhanced_fit_mode
-        self.enhanced_effort = enhanced_effort
-        self.enhanced_timeout_s = enhanced_timeout_s
-        self.enhanced_effort_metric = enhanced_effort_metric
+        self.thinking_mode = thinking_mode
+        self.thinking_effort = thinking_effort
+        self.thinking_timeout_s = thinking_timeout_s
+        self.thinking_effort_metric = thinking_effort_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -312,11 +312,11 @@ def fit(
 
         estimator_param = self._get_estimator_params_with_model_path("classification")
         validate_train_set(X, y)
-        validate_enhanced_fit_mode(
-            self.enhanced_fit_mode,
-            self.enhanced_effort,
-            self.enhanced_timeout_s,
-            self.enhanced_effort_metric,
+        validate_thinking_mode(
+            self.thinking_mode,
+            self.thinking_effort,
+            self.thinking_timeout_s,
+            self.thinking_effort_metric,
         )
         X = _clean_text_features(X)
         self._validate_targets_and_classes(y)
@@ -490,10 +490,10 @@ def __init__(
         ] = 0,
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
-        enhanced_fit_mode: bool = False,
-        enhanced_effort: EnhancedEffort = "medium",
-        enhanced_timeout_s: Optional[float] = None,
-        enhanced_effort_metric: Optional[str] = None,
+        thinking_mode: bool = False,
+        thinking_effort: ThinkingEffort = "medium",
+        thinking_timeout_s: Optional[float] = None,
+        thinking_effort_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -543,19 +543,19 @@ def __init__(
         paper_version: bool, default=False
             If True, will use the model described in the paper, instead of the newest
             version available on the API, which e.g handles text features better.
-        enhanced_fit_mode: bool, default=False
+        thinking_mode: bool, default=False
             If True, spend extra fit-time compute for higher precision.
-            The remaining `enhanced_*` parameters are only consulted when
+            The remaining `thinking_*` parameters are only consulted when
             this is True.
-        enhanced_effort: {"medium", "high"}, default="medium"
-            Effort level for enhanced fit mode. Only consulted when
-            `enhanced_fit_mode=True`.
-        enhanced_timeout_s: float or None, default=None
+        thinking_effort: {"medium", "high"}, default="medium"
+            Effort level for thinking mode. Only consulted when
+            `thinking_mode=True`.
+        thinking_timeout_s: float or None, default=None
             Budget for the fit, in seconds. Only consulted when
-            `enhanced_fit_mode=True`. Capped at 2400.
-        enhanced_effort_metric: str or None, default=None
+            `thinking_mode=True`. Capped at 2400.
+        thinking_effort_metric: str or None, default=None
             Optimization metric for the fit. Only consulted when
-            `enhanced_fit_mode=True`.
+            `thinking_mode=True`.
 
             Regression:
                 "r2", "mean_squared_error", "root_mean_squared_error",
@@ -580,10 +580,10 @@ def __init__(
         self.random_state = random_state
         self.inference_config = inference_config
         self.paper_version = paper_version
-        self.enhanced_fit_mode = enhanced_fit_mode
-        self.enhanced_effort = enhanced_effort
-        self.enhanced_timeout_s = enhanced_timeout_s
-        self.enhanced_effort_metric = enhanced_effort_metric
+        self.thinking_mode = thinking_mode
+        self.thinking_effort = thinking_effort
+        self.thinking_timeout_s = thinking_timeout_s
+        self.thinking_effort_metric = thinking_effort_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -606,11 +606,11 @@ def fit(
 
         estimator_param = self._get_estimator_params_with_model_path("regression")
         validate_train_set(X, y)
-        validate_enhanced_fit_mode(
-            self.enhanced_fit_mode,
-            self.enhanced_effort,
-            self.enhanced_timeout_s,
-            self.enhanced_effort_metric,
+        validate_thinking_mode(
+            self.thinking_mode,
+            self.thinking_effort,
+            self.thinking_timeout_s,
+            self.thinking_effort_metric,
         )
         self._validate_targets(y)
         X = _clean_text_features(X)
@@ -756,33 +756,33 @@ def _validate_targets(self, y) -> np.ndarray:
             raise ValueError("Input y contains NaN.")
 
 
-def validate_enhanced_fit_mode(
-    enhanced_fit_mode: bool,
-    enhanced_effort: str,
-    enhanced_timeout_s: Optional[float],
-    enhanced_effort_metric: Optional[str],
+def validate_thinking_mode(
+    thinking_mode: bool,
+    thinking_effort: str,
+    thinking_timeout_s: Optional[float],
+    thinking_effort_metric: Optional[str],
 ) -> None:
-    if enhanced_effort not in _VALID_ENHANCED_EFFORT_LEVELS:
+    if thinking_effort not in _VALID_THINKING_EFFORT_LEVELS:
         raise ValueError(
-            f"enhanced_effort must be one of "
-            f"{sorted(_VALID_ENHANCED_EFFORT_LEVELS)}, got {enhanced_effort!r}."
+            f"thinking_effort must be one of "
+            f"{sorted(_VALID_THINKING_EFFORT_LEVELS)}, got {thinking_effort!r}."
         )
-    if not enhanced_fit_mode and (
-        enhanced_timeout_s is not None or enhanced_effort_metric is not None
+    if not thinking_mode and (
+        thinking_timeout_s is not None or thinking_effort_metric is not None
     ):
         raise ValueError(
-            "enhanced_timeout_s and enhanced_effort_metric are only consulted "
-            "when `enhanced_fit_mode=True`; pass enhanced_fit_mode=True to use "
+            "thinking_timeout_s and thinking_effort_metric are only consulted "
+            "when `thinking_mode=True`; pass thinking_mode=True to use "
             "them."
         )
     if (
-        enhanced_timeout_s is not None
-        and enhanced_timeout_s > ENHANCED_TIMEOUT_MAX_S
+        thinking_timeout_s is not None
+        and thinking_timeout_s > THINKING_TIMEOUT_MAX_S
     ):
         raise ValueError(
-            f"enhanced_timeout_s ({enhanced_timeout_s}) exceeds the "
-            f"maximum allowed of {ENHANCED_TIMEOUT_MAX_S} seconds "
-            f"({ENHANCED_TIMEOUT_MAX_S // 60} minutes)."
+            f"thinking_timeout_s ({thinking_timeout_s}) exceeds the "
+            f"maximum allowed of {THINKING_TIMEOUT_MAX_S} seconds "
+            f"({THINKING_TIMEOUT_MAX_S // 60} minutes)."
         )
 
 
diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py
index ee1466c..a884e86 100644
--- a/tests/unit/test_tabpfn_classifier.py
+++ b/tests/unit/test_tabpfn_classifier.py
@@ -475,10 +475,10 @@ def test_only_allowed_parameters_passed_to_config(self):
             "model_path",
             "balance_probabilities",
             "paper_version",
-            "enhanced_fit_mode",
-            "enhanced_effort",
-            "enhanced_timeout_s",
-            "enhanced_effort_metric",
+            "thinking_mode",
+            "thinking_effort",
+            "thinking_timeout_s",
+            "thinking_effort_metric",
         }
         OPTIONAL_PARAMS = {
             # These may be emitted by newer model versions, but are not required.
diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py
index b026747..4a7f13a 100644
--- a/tests/unit/test_tabpfn_regressor.py
+++ b/tests/unit/test_tabpfn_regressor.py
@@ -469,10 +469,10 @@ def test_only_allowed_parameters_passed_to_config(self):
             "inference_config",
             "model_path",
             "paper_version",
-            "enhanced_fit_mode",
-            "enhanced_effort",
-            "enhanced_timeout_s",
-            "enhanced_effort_metric",
+            "thinking_mode",
+            "thinking_effort",
+            "thinking_timeout_s",
+            "thinking_effort_metric",
         }
         OPTIONAL_PARAMS = {
             "thinking",

From ce8a75eb076e8cae4f65364fa098d19ad2c9b548 Mon Sep 17 00:00:00 2001
From: Georg Grab <georg@priorlabs.ai>
Date: Sun, 10 May 2026 10:58:07 +0200
Subject: [PATCH 4/6] feat: pass thinking_* through to server FitRequest 1:1

Server-side wire fields renamed effort/effort_timeout_s/effort_metric ->
thinking_effort/thinking_timeout_s/thinking_effort_metric. Drop the client-side
translation layer and forward the user-facing knobs directly.
---
 src/tabpfn_client/api_models.py | 12 ++++++------
 src/tabpfn_client/client.py     | 23 +++++++++++------------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/tabpfn_client/api_models.py b/src/tabpfn_client/api_models.py
index 27b9ef3..9a26fff 100644
--- a/src/tabpfn_client/api_models.py
+++ b/src/tabpfn_client/api_models.py
@@ -103,12 +103,12 @@ class FitRequest(BaseModel):
     # `tabpfn_systems` values on the server need this at fit time; the
     # server ignores it otherwise.
     tabpfn_config: TabPFNConfig = None
-    # User-facing effort level ("medium" or "high"). None disables it.
-    effort: Optional[str] = None
-    # Budget for the fit (seconds). Only consulted when `effort` is set.
-    effort_timeout_s: Optional[float] = None
-    # Optimization metric for the fit. Only consulted when `effort` is set.
-    effort_metric: Optional[str] = None
+    # User-facing thinking-effort level ("medium" or "high"). None disables it.
+    thinking_effort: Optional[str] = None
+    # Budget for the fit (seconds). Only consulted when `thinking_effort` is set.
+    thinking_timeout_s: Optional[float] = None
+    # Optimization metric for the fit. Only consulted when `thinking_effort` is set.
+    thinking_effort_metric: Optional[str] = None
 
 
 class FitResponse(BaseModel):
diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py
index 41dae22..a2e79bd 100644
--- a/src/tabpfn_client/client.py
+++ b/src/tabpfn_client/client.py
@@ -399,18 +399,17 @@ def fit(
                 # replacing them — keep preprocessing + text alongside it.
                 tabpfn_systems = ["preprocessing", "text", "thinking"]
 
-        # The client-side `thinking_*` knobs are translated to the server's
-        # top-level FitRequest fields (`effort`, `effort_timeout_s`,
-        # `effort_metric`). They are only consulted when thinking mode
+        # The client-side `thinking_*` knobs forward 1:1 to the server's
+        # top-level FitRequest fields. Only consulted when thinking_mode
         # is enabled; otherwise we send None.
         if thinking_mode and tabpfn_config:
-            effort = tabpfn_config.get("thinking_effort", "medium")
-            effort_timeout_s = tabpfn_config.get("thinking_timeout_s")
-            effort_metric = tabpfn_config.get("thinking_effort_metric")
+            thinking_effort = tabpfn_config.get("thinking_effort", "medium")
+            thinking_timeout_s = tabpfn_config.get("thinking_timeout_s")
+            thinking_effort_metric = tabpfn_config.get("thinking_effort_metric")
         else:
-            effort = None
-            effort_timeout_s = None
-            effort_metric = None
+            thinking_effort = None
+            thinking_timeout_s = None
+            thinking_effort_metric = None
 
         # Strip client-only keys that the server does not expect (mirrors
         # the predict path's filter below).
@@ -438,9 +437,9 @@ def fit(
                 tabpfn_systems=tabpfn_systems,
                 force_refit=force_refit or force_refit_enabled(),
                 tabpfn_config=server_tabpfn_config,
-                effort=effort,
-                effort_timeout_s=effort_timeout_s,
-                effort_metric=effort_metric,
+                thinking_effort=thinking_effort,
+                thinking_timeout_s=thinking_timeout_s,
+                thinking_effort_metric=thinking_effort_metric,
             ),
             timeout=client_options.timeout,
             headers=client_options.headers,

From bebe98d338b35be003aee0fdf97b79003475ab48 Mon Sep 17 00:00:00 2001
From: Georg Grab <georg@priorlabs.ai>
Date: Sun, 10 May 2026 11:02:38 +0200
Subject: [PATCH 5/6] feat: thinking_effort=... implies thinking is enabled

Drops the "medium" default on thinking_effort (now Optional, default None) so
the constructor can tell whether the user set it. Either signal turns thinking
on:
  - thinking_mode=True alone -> defaults thinking_effort to "medium"
  - thinking_effort="medium" or "high" alone -> implies thinking_mode=True

Updates validate_thinking_mode and the FitRequest builder in client.py to
honour the unified rule. Adds test_thinking_validation.py pinning the contract.
---
 src/tabpfn_client/client.py            | 21 +++++----
 src/tabpfn_client/estimator.py         | 63 +++++++++++++++-----------
 tests/unit/test_thinking_validation.py | 62 +++++++++++++++++++++++++
 3 files changed, 112 insertions(+), 34 deletions(-)
 create mode 100644 tests/unit/test_thinking_validation.py

diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py
index a2e79bd..2bd1a9b 100644
--- a/src/tabpfn_client/client.py
+++ b/src/tabpfn_client/client.py
@@ -388,22 +388,27 @@ def fit(
                     raise
 
         tabpfn_systems = ["preprocessing", "text"]
-        thinking_mode = (
-            bool(tabpfn_config.get("thinking_mode")) if tabpfn_config else False
+        # Thinking is enabled when either flag is set: explicit `thinking_mode=True`,
+        # or any non-None `thinking_effort`. Setting `thinking_effort` alone is
+        # enough — the server-side validator on FitRequest also normalises this,
+        # but doing it here means the request body itself is consistent.
+        thinking_enabled = bool(tabpfn_config) and (
+            bool(tabpfn_config.get("thinking_mode"))
+            or tabpfn_config.get("thinking_effort") is not None
         )
         if tabpfn_config:
             if tabpfn_config.get("paper_version") is True:
                 tabpfn_systems = []
-            elif thinking_mode:
-                # Thinking mode runs on top of the base systems rather than
+            elif thinking_enabled:
+                # Thinking runs on top of the base systems rather than
                 # replacing them — keep preprocessing + text alongside it.
                 tabpfn_systems = ["preprocessing", "text", "thinking"]
 
         # The client-side `thinking_*` knobs forward 1:1 to the server's
-        # top-level FitRequest fields. Only consulted when thinking_mode
-        # is enabled; otherwise we send None.
-        if thinking_mode and tabpfn_config:
-            thinking_effort = tabpfn_config.get("thinking_effort", "medium")
+        # top-level FitRequest fields. When the user enabled thinking via
+        # `thinking_mode=True` without picking a level, default to "medium".
+        if thinking_enabled and tabpfn_config:
+            thinking_effort = tabpfn_config.get("thinking_effort") or "medium"
             thinking_timeout_s = tabpfn_config.get("thinking_timeout_s")
             thinking_effort_metric = tabpfn_config.get("thinking_effort_metric")
         else:
diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py
index c7a216e..9eca2fa 100644
--- a/src/tabpfn_client/estimator.py
+++ b/src/tabpfn_client/estimator.py
@@ -184,7 +184,7 @@ def __init__(
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
         thinking_mode: bool = False,
-        thinking_effort: ThinkingEffort = "medium",
+        thinking_effort: Optional[ThinkingEffort] = None,
         thinking_timeout_s: Optional[float] = None,
         thinking_effort_metric: Optional[str] = None,
         force_refit: bool = False,
@@ -246,17 +246,19 @@ def __init__(
             version available on the API, which e.g handles text features better.
         thinking_mode: bool, default=False
             If True, spend extra fit-time compute for higher precision.
-            The remaining `thinking_*` parameters are only consulted when
-            this is True.
-        thinking_effort: {"medium", "high"}, default="medium"
-            Effort level for thinking mode. Only consulted when
-            `thinking_mode=True`.
+            Equivalent to passing `thinking_effort="medium"` — setting any
+            `thinking_effort` value also enables thinking, so this flag is
+            optional when you've set the level explicitly.
+        thinking_effort: {"medium", "high"} or None, default=None
+            Effort level for thinking mode. When set, thinking is enabled
+            (you don't also need `thinking_mode=True`). When None and
+            `thinking_mode=True`, defaults to "medium".
         thinking_timeout_s: float or None, default=None
-            Budget for the fit, in seconds. Only consulted when
-            `thinking_mode=True`. Capped at 2400.
+            Budget for the fit, in seconds. Only consulted when thinking is
+            enabled. Capped at 2400.
         thinking_effort_metric: str or None, default=None
-            Optimization metric for the fit. Only consulted when
-            `thinking_mode=True`.
+            Optimization metric for the fit. Only consulted when thinking
+            is enabled.
 
             Binary classification:
                 "accuracy", "balanced_accuracy", "mcc", "log_loss",
@@ -491,7 +493,7 @@ def __init__(
         inference_config: Optional[Dict] = None,
         paper_version: bool = False,
         thinking_mode: bool = False,
-        thinking_effort: ThinkingEffort = "medium",
+        thinking_effort: Optional[ThinkingEffort] = None,
         thinking_timeout_s: Optional[float] = None,
         thinking_effort_metric: Optional[str] = None,
         force_refit: bool = False,
@@ -545,17 +547,19 @@ def __init__(
             version available on the API, which e.g handles text features better.
         thinking_mode: bool, default=False
             If True, spend extra fit-time compute for higher precision.
-            The remaining `thinking_*` parameters are only consulted when
-            this is True.
-        thinking_effort: {"medium", "high"}, default="medium"
-            Effort level for thinking mode. Only consulted when
-            `thinking_mode=True`.
+            Equivalent to passing `thinking_effort="medium"` — setting any
+            `thinking_effort` value also enables thinking, so this flag is
+            optional when you've set the level explicitly.
+        thinking_effort: {"medium", "high"} or None, default=None
+            Effort level for thinking mode. When set, thinking is enabled
+            (you don't also need `thinking_mode=True`). When None and
+            `thinking_mode=True`, defaults to "medium".
         thinking_timeout_s: float or None, default=None
-            Budget for the fit, in seconds. Only consulted when
-            `thinking_mode=True`. Capped at 2400.
+            Budget for the fit, in seconds. Only consulted when thinking is
+            enabled. Capped at 2400.
         thinking_effort_metric: str or None, default=None
-            Optimization metric for the fit. Only consulted when
-            `thinking_mode=True`.
+            Optimization metric for the fit. Only consulted when thinking
+            is enabled.
 
             Regression:
                 "r2", "mean_squared_error", "root_mean_squared_error",
@@ -758,22 +762,29 @@ def _validate_targets(self, y) -> np.ndarray:
 
 def validate_thinking_mode(
     thinking_mode: bool,
-    thinking_effort: str,
+    thinking_effort: Optional[str],
     thinking_timeout_s: Optional[float],
     thinking_effort_metric: Optional[str],
 ) -> None:
-    if thinking_effort not in _VALID_THINKING_EFFORT_LEVELS:
+    if (
+        thinking_effort is not None
+        and thinking_effort not in _VALID_THINKING_EFFORT_LEVELS
+    ):
         raise ValueError(
             f"thinking_effort must be one of "
             f"{sorted(_VALID_THINKING_EFFORT_LEVELS)}, got {thinking_effort!r}."
         )
-    if not thinking_mode and (
+    # Setting `thinking_effort` is itself a way to enable thinking, so the
+    # effective state is "either flag set". Knobs that only make sense when
+    # thinking is on are rejected only when neither is set.
+    thinking_enabled = thinking_mode or thinking_effort is not None
+    if not thinking_enabled and (
         thinking_timeout_s is not None or thinking_effort_metric is not None
     ):
         raise ValueError(
-            "thinking_timeout_s and thinking_effort_metric are only consulted "
-            "when `thinking_mode=True`; pass thinking_mode=True to use "
-            "them."
+            "thinking_timeout_s and thinking_effort_metric are only "
+            "consulted when thinking is enabled; pass `thinking_mode=True` "
+            "or `thinking_effort=...` to use them."
         )
     if (
         thinking_timeout_s is not None
diff --git a/tests/unit/test_thinking_validation.py b/tests/unit/test_thinking_validation.py
new file mode 100644
index 0000000..3bfdf16
--- /dev/null
+++ b/tests/unit/test_thinking_validation.py
@@ -0,0 +1,62 @@
+"""Validator contract for the thinking_* knobs on TabPFNClassifier/Regressor.
+
+Pins the rule that thinking is enabled when either `thinking_mode=True` OR
+`thinking_effort` is set, so callers can pass either or both without surprise.
+"""
+
+import pytest
+
+from tabpfn_client.estimator import (
+    THINKING_TIMEOUT_MAX_S,
+    validate_thinking_mode,
+)
+
+
+def _v(**overrides):
+    args = dict(
+        thinking_mode=False,
+        thinking_effort=None,
+        thinking_timeout_s=None,
+        thinking_effort_metric=None,
+    )
+    args.update(overrides)
+    return validate_thinking_mode(**args)
+
+
+class TestThinkingValidator:
+    def test_neither_flag_is_off(self):
+        # No flags: thinking off, no errors.
+        _v()
+
+    def test_thinking_mode_alone_is_on(self):
+        # Just `thinking_mode=True` is enough; downstream defaults effort to "medium".
+        _v(thinking_mode=True)
+
+    def test_thinking_effort_alone_implies_on(self):
+        # The whole point of this contract: setting thinking_effort enables
+        # thinking even without thinking_mode=True.
+        _v(thinking_effort="medium")
+        _v(thinking_effort="high")
+
+    def test_extra_knobs_with_thinking_effort_set_are_allowed(self):
+        # If thinking is on (via either flag), the budget/metric knobs apply.
+        _v(thinking_effort="high", thinking_timeout_s=60.0, thinking_effort_metric="rmse")
+        _v(thinking_mode=True, thinking_timeout_s=60.0, thinking_effort_metric="rmse")
+
+    def test_extra_knobs_without_thinking_are_rejected(self):
+        # Knobs that only matter when thinking is on must error if neither flag is set.
+        with pytest.raises(ValueError, match="thinking is enabled"):
+            _v(thinking_timeout_s=60.0)
+        with pytest.raises(ValueError, match="thinking is enabled"):
+            _v(thinking_effort_metric="rmse")
+
+    def test_invalid_effort_level_rejected(self):
+        with pytest.raises(ValueError, match="thinking_effort must be one of"):
+            _v(thinking_effort="extreme")
+
+    def test_timeout_above_cap_rejected(self):
+        with pytest.raises(ValueError, match="exceeds the"):
+            _v(thinking_effort="high", thinking_timeout_s=THINKING_TIMEOUT_MAX_S + 1)
+
+    def test_timeout_at_cap_allowed(self):
+        _v(thinking_effort="high", thinking_timeout_s=THINKING_TIMEOUT_MAX_S)

From 10a09500cc580e3cf6140599a457c9a79f3448e2 Mon Sep 17 00:00:00 2001
From: Georg Grab <georg@priorlabs.ai>
Date: Sun, 10 May 2026 12:42:17 +0200
Subject: [PATCH 6/6] chore: brendan comments -- rename thinking_effort_metric
 to thinking_metric

---
 src/tabpfn_client/api_models.py        |  8 ++++----
 src/tabpfn_client/client.py            | 10 +++++-----
 src/tabpfn_client/estimator.py         | 22 +++++++++++-----------
 tests/unit/test_tabpfn_classifier.py   |  2 +-
 tests/unit/test_tabpfn_regressor.py    |  2 +-
 tests/unit/test_thinking_validation.py |  8 ++++----
 6 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/tabpfn_client/api_models.py b/src/tabpfn_client/api_models.py
index 9a26fff..cf1a5a8 100644
--- a/src/tabpfn_client/api_models.py
+++ b/src/tabpfn_client/api_models.py
@@ -1,5 +1,5 @@
 from uuid import UUID
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, Union
 from pydantic import BaseModel, Field
 
 # Classification output_type="preds" preserves the original label type, so
@@ -103,12 +103,12 @@ class FitRequest(BaseModel):
     # `tabpfn_systems` values on the server need this at fit time; the
     # server ignores it otherwise.
     tabpfn_config: TabPFNConfig = None
-    # User-facing thinking-effort level ("medium" or "high"). None disables it.
-    thinking_effort: Optional[str] = None
+    # User-facing thinking-effort level. None disables it.
+    thinking_effort: Optional[Literal["medium", "high"]] = None
     # Budget for the fit (seconds). Only consulted when `thinking_effort` is set.
     thinking_timeout_s: Optional[float] = None
     # Optimization metric for the fit. Only consulted when `thinking_effort` is set.
-    thinking_effort_metric: Optional[str] = None
+    thinking_metric: Optional[str] = None
 
 
 class FitResponse(BaseModel):
diff --git a/src/tabpfn_client/client.py b/src/tabpfn_client/client.py
index 2bd1a9b..8ab3a4c 100644
--- a/src/tabpfn_client/client.py
+++ b/src/tabpfn_client/client.py
@@ -410,11 +410,11 @@ def fit(
         if thinking_enabled and tabpfn_config:
             thinking_effort = tabpfn_config.get("thinking_effort") or "medium"
             thinking_timeout_s = tabpfn_config.get("thinking_timeout_s")
-            thinking_effort_metric = tabpfn_config.get("thinking_effort_metric")
+            thinking_metric = tabpfn_config.get("thinking_metric")
         else:
             thinking_effort = None
             thinking_timeout_s = None
-            thinking_effort_metric = None
+            thinking_metric = None
 
         # Strip client-only keys that the server does not expect (mirrors
         # the predict path's filter below).
@@ -428,7 +428,7 @@ def fit(
                     "thinking_mode",
                     "thinking_effort",
                     "thinking_timeout_s",
-                    "thinking_effort_metric",
+                    "thinking_metric",
                 }
             }
             if tabpfn_config is not None
@@ -444,7 +444,7 @@ def fit(
                 tabpfn_config=server_tabpfn_config,
                 thinking_effort=thinking_effort,
                 thinking_timeout_s=thinking_timeout_s,
-                thinking_effort_metric=thinking_effort_metric,
+                thinking_metric=thinking_metric,
             ),
             timeout=client_options.timeout,
             headers=client_options.headers,
@@ -594,7 +594,7 @@ def predict(
                     "thinking_mode",
                     "thinking_effort",
                     "thinking_timeout_s",
-                    "thinking_effort_metric",
+                    "thinking_metric",
                 }
             }
 
diff --git a/src/tabpfn_client/estimator.py b/src/tabpfn_client/estimator.py
index 9eca2fa..27e2015 100644
--- a/src/tabpfn_client/estimator.py
+++ b/src/tabpfn_client/estimator.py
@@ -186,7 +186,7 @@ def __init__(
         thinking_mode: bool = False,
         thinking_effort: Optional[ThinkingEffort] = None,
         thinking_timeout_s: Optional[float] = None,
-        thinking_effort_metric: Optional[str] = None,
+        thinking_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -256,7 +256,7 @@ def __init__(
         thinking_timeout_s: float or None, default=None
             Budget for the fit, in seconds. Only consulted when thinking is
             enabled. Capped at 2400.
-        thinking_effort_metric: str or None, default=None
+        thinking_metric: str or None, default=None
             Optimization metric for the fit. Only consulted when thinking
             is enabled.
 
@@ -291,7 +291,7 @@ def __init__(
         self.thinking_mode = thinking_mode
         self.thinking_effort = thinking_effort
         self.thinking_timeout_s = thinking_timeout_s
-        self.thinking_effort_metric = thinking_effort_metric
+        self.thinking_metric = thinking_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -318,7 +318,7 @@ def fit(
             self.thinking_mode,
             self.thinking_effort,
             self.thinking_timeout_s,
-            self.thinking_effort_metric,
+            self.thinking_metric,
         )
         X = _clean_text_features(X)
         self._validate_targets_and_classes(y)
@@ -495,7 +495,7 @@ def __init__(
         thinking_mode: bool = False,
         thinking_effort: Optional[ThinkingEffort] = None,
         thinking_timeout_s: Optional[float] = None,
-        thinking_effort_metric: Optional[str] = None,
+        thinking_metric: Optional[str] = None,
         force_refit: bool = False,
         client_options: ClientOptions | None = None,
     ):
@@ -557,7 +557,7 @@ def __init__(
         thinking_timeout_s: float or None, default=None
             Budget for the fit, in seconds. Only consulted when thinking is
             enabled. Capped at 2400.
-        thinking_effort_metric: str or None, default=None
+        thinking_metric: str or None, default=None
             Optimization metric for the fit. Only consulted when thinking
             is enabled.
 
@@ -587,7 +587,7 @@ def __init__(
         self.thinking_mode = thinking_mode
         self.thinking_effort = thinking_effort
         self.thinking_timeout_s = thinking_timeout_s
-        self.thinking_effort_metric = thinking_effort_metric
+        self.thinking_metric = thinking_metric
         self.force_refit = force_refit
         self.client_options = client_options or ClientOptions()
 
@@ -614,7 +614,7 @@ def fit(
             self.thinking_mode,
             self.thinking_effort,
             self.thinking_timeout_s,
-            self.thinking_effort_metric,
+            self.thinking_metric,
         )
         self._validate_targets(y)
         X = _clean_text_features(X)
@@ -764,7 +764,7 @@ def validate_thinking_mode(
     thinking_mode: bool,
     thinking_effort: Optional[str],
     thinking_timeout_s: Optional[float],
-    thinking_effort_metric: Optional[str],
+    thinking_metric: Optional[str],
 ) -> None:
     if (
         thinking_effort is not None
@@ -779,10 +779,10 @@ def validate_thinking_mode(
     # thinking is on are rejected only when neither is set.
     thinking_enabled = thinking_mode or thinking_effort is not None
     if not thinking_enabled and (
-        thinking_timeout_s is not None or thinking_effort_metric is not None
+        thinking_timeout_s is not None or thinking_metric is not None
     ):
         raise ValueError(
-            "thinking_timeout_s and thinking_effort_metric are only "
+            "thinking_timeout_s and thinking_metric are only "
             "consulted when thinking is enabled; pass `thinking_mode=True` "
             "or `thinking_effort=...` to use them."
         )
diff --git a/tests/unit/test_tabpfn_classifier.py b/tests/unit/test_tabpfn_classifier.py
index a884e86..fd9f361 100644
--- a/tests/unit/test_tabpfn_classifier.py
+++ b/tests/unit/test_tabpfn_classifier.py
@@ -478,7 +478,7 @@ def test_only_allowed_parameters_passed_to_config(self):
             "thinking_mode",
             "thinking_effort",
             "thinking_timeout_s",
-            "thinking_effort_metric",
+            "thinking_metric",
         }
         OPTIONAL_PARAMS = {
             # These may be emitted by newer model versions, but are not required.
diff --git a/tests/unit/test_tabpfn_regressor.py b/tests/unit/test_tabpfn_regressor.py
index 4a7f13a..da2bb0c 100644
--- a/tests/unit/test_tabpfn_regressor.py
+++ b/tests/unit/test_tabpfn_regressor.py
@@ -472,7 +472,7 @@ def test_only_allowed_parameters_passed_to_config(self):
             "thinking_mode",
             "thinking_effort",
             "thinking_timeout_s",
-            "thinking_effort_metric",
+            "thinking_metric",
         }
         OPTIONAL_PARAMS = {
             "thinking",
diff --git a/tests/unit/test_thinking_validation.py b/tests/unit/test_thinking_validation.py
index 3bfdf16..27e7604 100644
--- a/tests/unit/test_thinking_validation.py
+++ b/tests/unit/test_thinking_validation.py
@@ -17,7 +17,7 @@ def _v(**overrides):
         thinking_mode=False,
         thinking_effort=None,
         thinking_timeout_s=None,
-        thinking_effort_metric=None,
+        thinking_metric=None,
     )
     args.update(overrides)
     return validate_thinking_mode(**args)
@@ -40,15 +40,15 @@ def test_thinking_effort_alone_implies_on(self):
 
     def test_extra_knobs_with_thinking_effort_set_are_allowed(self):
         # If thinking is on (via either flag), the budget/metric knobs apply.
-        _v(thinking_effort="high", thinking_timeout_s=60.0, thinking_effort_metric="rmse")
-        _v(thinking_mode=True, thinking_timeout_s=60.0, thinking_effort_metric="rmse")
+        _v(thinking_effort="high", thinking_timeout_s=60.0, thinking_metric="rmse")
+        _v(thinking_mode=True, thinking_timeout_s=60.0, thinking_metric="rmse")
 
     def test_extra_knobs_without_thinking_are_rejected(self):
         # Knobs that only matter when thinking is on must error if neither flag is set.
         with pytest.raises(ValueError, match="thinking is enabled"):
             _v(thinking_timeout_s=60.0)
         with pytest.raises(ValueError, match="thinking is enabled"):
-            _v(thinking_effort_metric="rmse")
+            _v(thinking_metric="rmse")
 
     def test_invalid_effort_level_rejected(self):
         with pytest.raises(ValueError, match="thinking_effort must be one of"):