From 56d37131600268679a5d8fbc0749a0b595a71829 Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Wed, 6 May 2026 11:12:14 +0200 Subject: [PATCH 01/11] disable torch svd step --- src/tabpfn/preprocessing/pipeline_factory.py | 26 ++++++++++++-------- src/tabpfn/preprocessing/torch/factory.py | 7 ++---- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/tabpfn/preprocessing/pipeline_factory.py b/src/tabpfn/preprocessing/pipeline_factory.py index 70da147d4..465b0e77e 100644 --- a/src/tabpfn/preprocessing/pipeline_factory.py +++ b/src/tabpfn/preprocessing/pipeline_factory.py @@ -73,12 +73,24 @@ def create_preprocessing_pipeline( steps.append(RemoveConstantFeaturesStep()) + # Hardcode svd_on_gpu to False for now. + svd_on_gpu = False + has_svd = ( + not pconfig.differentiable + and pconfig.global_transformer_name is not None + and pconfig.global_transformer_name != "None" + ) + # Decide whether the quantile transform moves to GPU. # The reshape step still runs on CPU (handling subsampling, categorical # reclassification, append_to_original) but uses "none" (identity) as the # transform so the actual quantile work happens on GPU. - schedule_quantile_for_gpu = enable_gpu_preprocessing and is_gpu_quantile_eligible( - pconfig.name + # When SVD is configured but stays on CPU, quantile must also stay on CPU + # so that SVD sees quantile-transformed data (SVD runs after quantile). + schedule_quantile_for_gpu = ( + enable_gpu_preprocessing + and is_gpu_quantile_eligible(pconfig.name) + and not (has_svd and not svd_on_gpu) ) if pconfig.differentiable: @@ -104,8 +116,7 @@ def create_preprocessing_pipeline( ) ) - # SVD moves to GPU when enable_gpu_preprocessing is on. - if not enable_gpu_preprocessing: + if not svd_on_gpu: use_global_transformer = ( pconfig.global_transformer_name is not None and pconfig.global_transformer_name != "None" @@ -122,12 +133,7 @@ def create_preprocessing_pipeline( # GPU (those steps precede fingerprint and change the data the hash sees). # When neither is on GPU, fingerprint stays on CPU at its original position. fingerprint_on_gpu = enable_gpu_preprocessing and ( - schedule_quantile_for_gpu - or ( - not pconfig.differentiable - and pconfig.global_transformer_name is not None - and pconfig.global_transformer_name != "None" - ) + schedule_quantile_for_gpu or svd_on_gpu ) if config.add_fingerprint_feature and not fingerprint_on_gpu: steps.append(AddFingerprintFeaturesStep()) diff --git a/src/tabpfn/preprocessing/torch/factory.py b/src/tabpfn/preprocessing/torch/factory.py index 1c0f33512..97060332b 100644 --- a/src/tabpfn/preprocessing/torch/factory.py +++ b/src/tabpfn/preprocessing/torch/factory.py @@ -74,11 +74,8 @@ def create_gpu_preprocessing_pipeline( ) # SVD features - svd_on_gpu = ( - pconfig.global_transformer_name is not None - and pconfig.global_transformer_name != "None" - and not pconfig.differentiable - ) + # TODO: We disable SVD on GPU for now because we've seen memory issues. + svd_on_gpu = False if svd_on_gpu and pconfig.global_transformer_name is not None: steps.append( ( From 079e596ac60d77df83b8262a286338fc4e3dc40b Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Wed, 6 May 2026 14:35:33 +0200 Subject: [PATCH 02/11] make v3 the default --- src/tabpfn/model_loading.py | 23 ++++++++++++----------- src/tabpfn/settings.py | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/tabpfn/model_loading.py b/src/tabpfn/model_loading.py index aeda15e39..ab6d887db 100644 --- a/src/tabpfn/model_loading.py +++ b/src/tabpfn/model_loading.py @@ -169,22 +169,22 @@ def get_regressor_v2_6(cls) -> ModelSource: # noqa: D102 @classmethod def get_classifier_v3(cls) -> ModelSource: # noqa: D102 filenames = [ - "tabpfn-v3-classifier-v3_default.ckpt", + "tabpfn-v3-classifier-20260506.ckpt", ] return cls( repo_id="Prior-Labs/tabpfn_3", - default_filename="tabpfn-v3-classifier-v3_default.ckpt", + default_filename="tabpfn-v3-classifier-20260506.ckpt", filenames=filenames, ) @classmethod def get_regressor_v3(cls) -> ModelSource: # noqa: D102 filenames = [ - "tabpfn-v3-regressor-v3_default.ckpt", + "tabpfn-v3-regressor-20260506.ckpt", ] return cls( repo_id="Prior-Labs/tabpfn_3", - default_filename="tabpfn-v3-regressor-v3_default.ckpt", + default_filename="tabpfn-v3-regressor-20260506.ckpt", filenames=filenames, ) @@ -499,13 +499,14 @@ def _download_model( ModelVersion.V2_6: "tabpfn_2_6", ModelVersion.V3: "tabpfn_3", } - if version in _HF_REPOS: - try: - from tabpfn.browser_auth import ensure_license_accepted # noqa: PLC0415 - - ensure_license_accepted(hf_repo_id=_HF_REPOS[version]) - except Exception as e: # noqa: BLE001 - return [e] + # Skip license check for now until tabpfn_3 is public + # if version in _HF_REPOS: + # try: + # from tabpfn.browser_auth import ensure_license_accepted + + # ensure_license_accepted(hf_repo_id=_HF_REPOS[version]) + # except Exception as e: + # return [e] try: model_source = _get_model_source(version, ModelType(which)) diff --git a/src/tabpfn/settings.py b/src/tabpfn/settings.py index 0a07b3584..8b3be469f 100644 --- a/src/tabpfn/settings.py +++ b/src/tabpfn/settings.py @@ -33,7 +33,7 @@ class TabPFNSettings(BaseSettings): "If not set, uses platform-specific user cache directory.", ) model_version: ModelVersion = Field( - default=ModelVersion.V2_6, + default=ModelVersion.V3, description="The version of the TabPFN model to use by default.", ) From fa8c889b3361594165772bc155f2cf311e2408dd Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Wed, 6 May 2026 15:29:44 +0200 Subject: [PATCH 03/11] introduce embedding_dim API in Architecture --- src/tabpfn/architectures/base/config.py | 2 +- src/tabpfn/architectures/base/transformer.py | 16 ++++++++-------- src/tabpfn/architectures/interface.py | 8 ++++++++ src/tabpfn/architectures/tabpfn_v2_5.py | 2 +- src/tabpfn/architectures/tabpfn_v2_6.py | 2 +- src/tabpfn/architectures/tabpfn_v3.py | 8 +++++++- tests/test_inference.py | 4 ++-- tests/test_regressor_interface.py | 2 +- 8 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/tabpfn/architectures/base/config.py b/src/tabpfn/architectures/base/config.py index 86a8a3184..c16c75b2b 100644 --- a/src/tabpfn/architectures/base/config.py +++ b/src/tabpfn/architectures/base/config.py @@ -53,7 +53,7 @@ class ModelConfig(ArchitectureConfig): nan_handling_enabled: Literal[True] = True nan_handling_y_encoder: Literal[True] = True nhid_factor: int = 4 - """Hidden dimension in the MLP layers is ninp * nhid_factor.""" + """Hidden dimension in the MLP layers is emsize * nhid_factor.""" nlayers: int = 12 """Number of layers in the encoder, each consisting of a multi-head attention and an MLP layer.""" diff --git a/src/tabpfn/architectures/base/transformer.py b/src/tabpfn/architectures/base/transformer.py index 04caba84d..0d581616d 100644 --- a/src/tabpfn/architectures/base/transformer.py +++ b/src/tabpfn/architectures/base/transformer.py @@ -121,10 +121,10 @@ def __init__( # noqa: D417, PLR0913 Args: encoder: Pass a nn.Module that takes in a batch of sequences of inputs and - returns something of the shape (seq_len, batch_size, ninp) + returns something of the shape (seq_len, batch_size, emsize) y_encoder: A nn.Module that takes in a batch of sequences of outputs and - returns something of the shape (seq_len, batch_size, ninp) + returns something of the shape (seq_len, batch_size, emsize) activation: An activation function, "gelu" or "relu" min_num_layers_layer_dropout: If this is set, it enables to drop the last @@ -185,9 +185,9 @@ def __init__( # noqa: D417, PLR0913 self.encoder = encoder self.y_encoder = y_encoder - self.ninp = config.emsize + self.emsize = config.emsize self.nhid_factor = config.nhid_factor - nhid = self.ninp * self.nhid_factor + nhid = self.emsize * self.nhid_factor self.features_per_group = config.features_per_group self.cache_trainset_representation = cache_trainset_representation self.cached_embeddings: torch.Tensor | None = None @@ -238,7 +238,7 @@ def __init__( # noqa: D417, PLR0913 self.global_att_embeddings_for_compression = nn.Embedding( num_global_att_tokens_for_compression, - self.ninp, + self.emsize, ) self.encoder_compression_layer = LayerStack.of_repeated_layer( @@ -250,7 +250,7 @@ def __init__( # noqa: D417, PLR0913 self.decoder_dict = nn.ModuleDict( { "standard": nn.Sequential( - nn.Linear(self.ninp, nhid), + nn.Linear(self.emsize, nhid), nn.GELU(), nn.Linear(nhid, n_out), ) @@ -260,11 +260,11 @@ def __init__( # noqa: D417, PLR0913 self.feature_positional_embedding = config.feature_positional_embedding if self.feature_positional_embedding == "learned": self.feature_positional_embedding_embeddings = nn.Embedding( - 1_000, self.ninp + 1_000, self.emsize ) elif self.feature_positional_embedding == "subspace": self.feature_positional_embedding_embeddings = nn.Linear( - self.ninp // 4, self.ninp + self.emsize // 4, self.emsize ) self.dag_pos_enc_dim = config.dag_pos_enc_dim diff --git a/src/tabpfn/architectures/interface.py b/src/tabpfn/architectures/interface.py index 44c26528b..38bdc5c3c 100644 --- a/src/tabpfn/architectures/interface.py +++ b/src/tabpfn/architectures/interface.py @@ -247,3 +247,11 @@ def get_default_performance_options(self) -> PerformanceOptions: force_recompute_layer=False, use_chunkwise_inference=False, ) + + @property + def embedding_dim(self) -> int: + """Returns the dimension of the embeddings. + + This is the size of the embeddings that can be used for further processing. + """ + return self.emsize diff --git a/src/tabpfn/architectures/tabpfn_v2_5.py b/src/tabpfn/architectures/tabpfn_v2_5.py index 2cf94af73..707f90ff7 100644 --- a/src/tabpfn/architectures/tabpfn_v2_5.py +++ b/src/tabpfn/architectures/tabpfn_v2_5.py @@ -570,7 +570,7 @@ def __init__( self._do_encoder_nan_check = True # TODO(Phil): This is here to not fail the memory computation. We should make # this a proper API. - self.ninp = config.emsize + self.emsize = config.emsize def _get_feature_group_embedder(self, config: TabPFNV2p5Config) -> nn.Module: """Get the feature group embedder.""" diff --git a/src/tabpfn/architectures/tabpfn_v2_6.py b/src/tabpfn/architectures/tabpfn_v2_6.py index de0b90a02..342556c51 100644 --- a/src/tabpfn/architectures/tabpfn_v2_6.py +++ b/src/tabpfn/architectures/tabpfn_v2_6.py @@ -581,7 +581,7 @@ def __init__( self._do_encoder_nan_check = True # TODO(Phil): This is here to not fail the memory computation. We should make # this a proper API. - self.ninp = config.emsize + self.emsize = config.emsize def _get_feature_group_embedder(self, config: TabPFNV2p6Config) -> nn.Module: """Get the feature group embedder.""" diff --git a/src/tabpfn/architectures/tabpfn_v3.py b/src/tabpfn/architectures/tabpfn_v3.py index 66894adbb..d6af6d803 100644 --- a/src/tabpfn/architectures/tabpfn_v3.py +++ b/src/tabpfn/architectures/tabpfn_v3.py @@ -1640,10 +1640,16 @@ def __init__( ) self.standard_scaler = TorchStandardScaler() self._nan_safe_output = True - self.ninp = config.embed_dim + self.emsize = config.embed_dim + self.num_cls_tokens = config.feat_agg_num_cls_tokens self.inference_row_chunk_size = config.inference_row_chunk_size self.inference_col_chunk_size = config.inference_col_chunk_size + @property + @override + def embedding_dim(self) -> int: + return self.icl_emsize + @override def forward( self, diff --git a/tests/test_inference.py b/tests/test_inference.py index b273a47df..3b8f29c93 100644 --- a/tests/test_inference.py +++ b/tests/test_inference.py @@ -74,7 +74,7 @@ def forward( return x.sum(-2, keepdim=True).sum(-1, keepdim=True).reshape(-1, test_rows) @property - def ninp(self) -> int: + def emsize(self) -> int: return 2 @property @@ -116,7 +116,7 @@ def forward( return x.sum(-2, keepdim=True).sum(-1, keepdim=True).reshape(-1, test_rows) @property - def ninp(self) -> int: + def emsize(self) -> int: return 2 @property diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py index 7b9956802..c150c41bb 100644 --- a/tests/test_regressor_interface.py +++ b/tests/test_regressor_interface.py @@ -534,7 +534,7 @@ def test_get_embeddings( # Need to access the model through the executor model_instance = next(iter(model.executor_.model_caches[0]._models.values())) - hidden_size = model_instance.ninp + hidden_size = model_instance.embedding_dim assert isinstance(embeddings, np.ndarray) assert embeddings.shape[0] == n_estimators From 0e777fa0477173072add0c279310b034764f380f Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Wed, 6 May 2026 16:43:52 +0200 Subject: [PATCH 04/11] fix saving and loading of weights --- src/tabpfn/model_loading.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/tabpfn/model_loading.py b/src/tabpfn/model_loading.py index ab6d887db..e0db5668e 100644 --- a/src/tabpfn/model_loading.py +++ b/src/tabpfn/model_loading.py @@ -976,10 +976,10 @@ def load_model( ) if "test_targets_MB" in inspect.signature(model.forward).parameters: - # The model computes the loss internally. Support for this was only added after - # v2.5, so we can safely assume that the inference config is stored in the - # checkpoint. - model.load_state_dict(full_state) + # The model computes the loss internally. Strip criterion keys that + # save_tabpfn_model may have written so load_state_dict doesn't reject them. + model_state = {k: v for k, v in full_state.items() if "criterion." not in k} + model.load_state_dict(model_state) model.eval() inference_config = InferenceConfig( **_rename_old_inference_config_keys(checkpoint["inference_config"]) @@ -1225,9 +1225,14 @@ def load_fitted_tabpfn_model( def _resolve_architecture_name(config: ArchitectureConfig) -> str: """Resolve the architecture name from the config.""" - name = getattr(config, "name", "") - if "2.6" in name: + from tabpfn.architectures.tabpfn_v2_5 import TabPFNV2p5Config # noqa: PLC0415 + from tabpfn.architectures.tabpfn_v2_6 import TabPFNV2p6Config # noqa: PLC0415 + from tabpfn.architectures.tabpfn_v3 import TabPFNV3Config # noqa: PLC0415 + + if isinstance(config, TabPFNV3Config): + return "tabpfn_v3" + if isinstance(config, TabPFNV2p6Config): return "tabpfn_v2_6" - if "2.5" in name: + if isinstance(config, TabPFNV2p5Config): return "tabpfn_v2_5" return "base" From f9e6228b08cb25581bf8aa35da21c131169dd619 Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Wed, 6 May 2026 17:54:38 +0200 Subject: [PATCH 05/11] fix tests --- src/tabpfn/model_loading.py | 1 + tests/test_classifier_interface.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tabpfn/model_loading.py b/src/tabpfn/model_loading.py index e0db5668e..ba5d01769 100644 --- a/src/tabpfn/model_loading.py +++ b/src/tabpfn/model_loading.py @@ -1102,6 +1102,7 @@ def save_tabpfn_model( "state_dict": state_dict, "config": asdict(config), "architecture_name": architecture_name, + "inference_config": asdict(model.inference_config_), } if additional_fields is not None: diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py index c0d0060b8..f021166c1 100644 --- a/tests/test_classifier_interface.py +++ b/tests/test_classifier_interface.py @@ -800,7 +800,7 @@ def test_get_embeddings( assert isinstance(embeddings, np.ndarray) assert embeddings.shape[0] == n_estimators assert embeddings.shape[1] == X.shape[0] - assert embeddings.shape[2] == model.models_[0].input_size + assert embeddings.shape[2] == model.models_[0].embedding_dim def test_pandas_output_config(X_y: tuple[np.ndarray, np.ndarray]): From c8ef9287bdfc39caa6f9d04f27f917ada2696645 Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Thu, 7 May 2026 09:56:09 +0200 Subject: [PATCH 06/11] add math backend as fallback --- src/tabpfn/architectures/tabpfn_v3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tabpfn/architectures/tabpfn_v3.py b/src/tabpfn/architectures/tabpfn_v3.py index d6af6d803..5d1acec2e 100644 --- a/src/tabpfn/architectures/tabpfn_v3.py +++ b/src/tabpfn/architectures/tabpfn_v3.py @@ -260,8 +260,9 @@ def cache_size_mb(self) -> int: SDPBackend.FLASH_ATTENTION, SDPBackend.EFFICIENT_ATTENTION, SDPBackend.CUDNN_ATTENTION, + SDPBackend.MATH, # fallback for older GPUs or unsupported configurations ] -_SDPA_BACKENDS_CPU = [*_SDPA_BACKENDS, SDPBackend.MATH] +_SDPA_BACKENDS_CPU = [*_SDPA_BACKENDS] # --------------------------------------------------------------------------- # Utility helpers From 5d87ae238fc6b372c54e206b619d3cf6a1f0750c Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Thu, 7 May 2026 12:20:23 +0200 Subject: [PATCH 07/11] add V3 to integration tests --- tests/conftest.py | 26 ------------------- .../classifier_tiny_dataset_v3.json | 14 ++++++++++ .../regressor_tiny_dataset_v3.json | 5 ++++ tests/test_classifier_interface.py | 14 +++++----- tests/test_consistency.py | 14 ++++++++-- tests/test_estimators.py | 3 ++- tests/test_regressor_interface.py | 14 +++++----- 7 files changed, 49 insertions(+), 41 deletions(-) create mode 100644 tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_v3.json create mode 100644 tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_v3.json diff --git a/tests/conftest.py b/tests/conftest.py index f2ecee196..b4b5f98d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,8 +14,6 @@ import pytest import torch -from tabpfn.model_loading import ModelSource, get_cache_dir - def pytest_configure(config: pytest.Config) -> None: # noqa: ARG001 """Configure pytest with global settings.""" @@ -29,27 +27,3 @@ def set_global_seed() -> None: torch.manual_seed(seed) np.random.seed(seed) # noqa: NPY002 random.seed(seed) - - -def _is_v3_classifier_in_cache() -> bool: - cache_dir = get_cache_dir() - return (cache_dir / ModelSource.get_classifier_v3().default_filename).exists() - - -def _is_v3_regressor_in_cache() -> bool: - cache_dir = get_cache_dir() - return (cache_dir / ModelSource.get_regressor_v3().default_filename).exists() - - -@pytest.fixture -def skip_if_v3_classifier_unavailable() -> None: - """Skip the test when the V3 classifier model is not in the local cache.""" - if not _is_v3_classifier_in_cache(): - pytest.skip("V3 classifier model not in cache; skipping V3-specific test.") - - -@pytest.fixture -def skip_if_v3_regressor_unavailable() -> None: - """Skip the test when the V3 regressor model is not in the local cache.""" - if not _is_v3_regressor_in_cache(): - pytest.skip("V3 regressor model not in cache; skipping V3-specific test.") diff --git a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_v3.json b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_v3.json new file mode 100644 index 000000000..ea0801da0 --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_v3.json @@ -0,0 +1,14 @@ +[ + [ + 0.678566575050354, + 0.3214334547519684 + ], + [ + 0.46058961749076843, + 0.5394103527069092 + ], + [ + 0.6697312593460083, + 0.3302687108516693 + ] +] \ No newline at end of file diff --git a/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_v3.json b/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_v3.json new file mode 100644 index 000000000..4a8c68fbf --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_v3.json @@ -0,0 +1,5 @@ +[ + 5.081058502197266, + 4.23805046081543, + 4.6899261474609375 +] \ No newline at end of file diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py index 7e5ab86d8..c4f729e4c 100644 --- a/tests/test_classifier_interface.py +++ b/tests/test_classifier_interface.py @@ -34,7 +34,6 @@ from tabpfn.preprocessing import PreprocessorConfig from tabpfn.utils import infer_devices -from .conftest import _is_v3_classifier_in_cache from .utils import ( get_pytest_devices, is_cpu_float16_supported, @@ -62,7 +61,11 @@ def X_y() -> tuple[np.ndarray, np.ndarray]: ) -model_sources = [ModelSource.get_classifier_v2(), ModelSource.get_classifier_v2_5()] +model_sources = [ + ModelSource.get_classifier_v2(), + ModelSource.get_classifier_v2_5(), + ModelSource.get_classifier_v3(), +] fit_modes = ["low_memory", "fit_preprocessors"] @@ -523,8 +526,9 @@ def test_balance_probabilities_alters_proba_output() -> None: ) -# Only v2 and 2.5 support the KV cache at the moment. -@pytest.mark.parametrize("model_version", [ModelVersion.V2, ModelVersion.V2_5]) +@pytest.mark.parametrize( + "model_version", [ModelVersion.V2, ModelVersion.V2_5, ModelVersion.V3] +) # Disable MPS as it doesn't support float64. @pytest.mark.parametrize("device", [d for d in get_pytest_devices() if d != "mps"]) def test__fit_preprocessors_and_with_cache_produce_equal_results( @@ -562,8 +566,6 @@ def test__fit_preprocessors_and_with_cache_produce_equal_results( def test__fit_preprocessors_and_low_memory_produce_equal_results( X_y: tuple[np.ndarray, np.ndarray], model_version: ModelVersion, device: str ) -> None: - if model_version == ModelVersion.V3 and not _is_v3_classifier_in_cache(): - pytest.skip("V3 classifier model not in cache; skipping V3-specific test.") kwargs = { "version": model_version, "n_estimators": 2, diff --git a/tests/test_consistency.py b/tests/test_consistency.py index e00157750..3bbc79607 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -115,7 +115,12 @@ class _ConsistencyCase: **DEFAULT_CONFIG, ), ) - for version in [ModelVersion.V2, ModelVersion.V2_5, ModelVersion.V2_6] + for version in [ + ModelVersion.V2, + ModelVersion.V2_5, + ModelVersion.V2_6, + ModelVersion.V3, + ] }, **{ f"regressor_tiny_dataset_{version.value}": _ConsistencyCase( @@ -126,7 +131,12 @@ class _ConsistencyCase: **DEFAULT_CONFIG, ), ) - for version in [ModelVersion.V2, ModelVersion.V2_5, ModelVersion.V2_6] + for version in [ + ModelVersion.V2, + ModelVersion.V2_5, + ModelVersion.V2_6, + ModelVersion.V3, + ] }, "classifier_tiny_dataset_differentiable_input": _ConsistencyCase( data=lambda: _to_tensors(_get_tiny_classification_data()), diff --git a/tests/test_estimators.py b/tests/test_estimators.py index 3b9b7653a..4c6df0dfe 100644 --- a/tests/test_estimators.py +++ b/tests/test_estimators.py @@ -125,7 +125,8 @@ def test__to__after_fit__no_tensors_left_on_old_device( @pytest.mark.parametrize("estimator_class", [TabPFNRegressor, TabPFNClassifier]) @pytest.mark.parametrize("fit_mode", ["fit_preprocessors", "low_memory"]) @pytest.mark.parametrize( - "model_version", [ModelVersion.V2, ModelVersion.V2_5, ModelVersion.V2_6] + "model_version", + [ModelVersion.V2, ModelVersion.V2_5, ModelVersion.V2_6, ModelVersion.V3], ) def test__to__after_fit_and_predict__no_tensors_left_on_old_device( estimator_class: type[TabPFNClassifier] | type[TabPFNRegressor], diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py index 9878d078b..7a958af5e 100644 --- a/tests/test_regressor_interface.py +++ b/tests/test_regressor_interface.py @@ -27,7 +27,6 @@ from tabpfn.settings import settings from tabpfn.utils import infer_devices -from .conftest import _is_v3_regressor_in_cache from .utils import ( get_pytest_devices, is_cpu_float16_supported, @@ -38,7 +37,11 @@ devices = get_pytest_devices() -model_sources = [ModelSource.get_regressor_v2(), ModelSource.get_regressor_v2_5()] +model_sources = [ + ModelSource.get_regressor_v2(), + ModelSource.get_regressor_v2_5(), + ModelSource.get_regressor_v3(), +] fit_modes = ["low_memory", "fit_preprocessors"] @@ -277,8 +280,9 @@ def test__fit_predict__specify_inference_config__outputs_correct_shape( assert model.predict(X).shape == (X.shape[0],) -# Only v2 and 2.5 support the KV cache at the moment. -@pytest.mark.parametrize("model_version", [ModelVersion.V2, ModelVersion.V2_5]) +@pytest.mark.parametrize( + "model_version", [ModelVersion.V2, ModelVersion.V2_5, ModelVersion.V3] +) # Disable MPS as it doesn't support float64. @pytest.mark.parametrize("device", [d for d in get_pytest_devices() if d != "mps"]) def test__fit_preprocessors_and_with_cache_produce_equal_results( @@ -314,8 +318,6 @@ def test__fit_preprocessors_and_with_cache_produce_equal_results( def test__fit_preprocessors_and_low_memory_produce_equal_results( X_y: tuple[np.ndarray, np.ndarray], model_version: ModelVersion, device: str ) -> None: - if model_version == ModelVersion.V3 and not _is_v3_regressor_in_cache(): - pytest.skip("V3 regressor model not in cache; skipping V3-specific test.") kwargs = { "version": model_version, "n_estimators": 2, From e77a3912500e72cc10a96ff7974f75972f6b4408 Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Thu, 7 May 2026 13:56:13 +0200 Subject: [PATCH 08/11] add v3 consistency tests --- ...ifier_iris_dataset_several_devices_v3.json | 17 +++++++++ .../classifier_iris_dataset_v3.json | 17 +++++++++ .../classifier_tiny_dataset_3_estimators.json | 14 +++++++ ...assifier_tiny_dataset_3_estimators_v3.json | 14 +++++++ .../classifier_tiny_dataset_5_estimators.json | 14 ------- ..._tiny_dataset_differentiable_input_v3.json | 14 +++++++ ...essor_tiny_dataset_several_devices_v3.json | 5 +++ tests/test_consistency.py | 38 ++++++++++++++++++- 8 files changed, 117 insertions(+), 16 deletions(-) create mode 100644 tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices_v3.json create mode 100644 tests/reference_predictions/darwin_arm64/classifier_iris_dataset_v3.json create mode 100644 tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators.json create mode 100644 tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators_v3.json delete mode 100644 tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_5_estimators.json create mode 100644 tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input_v3.json create mode 100644 tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices_v3.json diff --git a/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices_v3.json b/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices_v3.json new file mode 100644 index 000000000..bdccf809b --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices_v3.json @@ -0,0 +1,17 @@ +[ + [ + 0.9999734163284302, + 1.3569095244747587e-05, + 1.2983076885575429e-05 + ], + [ + 0.00036331909359432757, + 0.9991976618766785, + 0.0004389923997223377 + ], + [ + 2.672282062121667e-05, + 0.0005141739966347814, + 0.9994590878486633 + ] +] \ No newline at end of file diff --git a/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_v3.json b/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_v3.json new file mode 100644 index 000000000..bdccf809b --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_v3.json @@ -0,0 +1,17 @@ +[ + [ + 0.9999734163284302, + 1.3569095244747587e-05, + 1.2983076885575429e-05 + ], + [ + 0.00036331909359432757, + 0.9991976618766785, + 0.0004389923997223377 + ], + [ + 2.672282062121667e-05, + 0.0005141739966347814, + 0.9994590878486633 + ] +] \ No newline at end of file diff --git a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators.json b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators.json new file mode 100644 index 000000000..0eecbfbe6 --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators.json @@ -0,0 +1,14 @@ +[ + [ + 0.745369017124176, + 0.2546309530735016 + ], + [ + 0.42508718371391296, + 0.5749127864837646 + ], + [ + 0.6312955617904663, + 0.3687044084072113 + ] +] \ No newline at end of file diff --git a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators_v3.json b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators_v3.json new file mode 100644 index 000000000..220fa9c68 --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators_v3.json @@ -0,0 +1,14 @@ +[ + [ + 0.6735507845878601, + 0.3264492154121399 + ], + [ + 0.44950684905052185, + 0.5504931807518005 + ], + [ + 0.6495934128761292, + 0.35040655732154846 + ] +] \ No newline at end of file diff --git a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_5_estimators.json b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_5_estimators.json deleted file mode 100644 index e6ac22e13..000000000 --- a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_5_estimators.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - [ - 0.6925381422042847, - 0.3074618875980377 - ], - [ - 0.4254949390888214, - 0.574505090713501 - ], - [ - 0.6501684188842773, - 0.34983158111572266 - ] -] \ No newline at end of file diff --git a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input_v3.json b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input_v3.json new file mode 100644 index 000000000..bd4ff24a3 --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input_v3.json @@ -0,0 +1,14 @@ +[ + [ + 0.7227111458778381, + 0.27728888392448425 + ], + [ + 0.47503453493118286, + 0.5249655246734619 + ], + [ + 0.4784924387931824, + 0.5215075612068176 + ] +] \ No newline at end of file diff --git a/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices_v3.json b/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices_v3.json new file mode 100644 index 000000000..4a8c68fbf --- /dev/null +++ b/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices_v3.json @@ -0,0 +1,5 @@ +[ + 5.081058502197266, + 4.23805046081543, + 4.6899261474609375 +] \ No newline at end of file diff --git a/tests/test_consistency.py b/tests/test_consistency.py index 3bbc79607..c34a651d6 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -144,12 +144,24 @@ class _ConsistencyCase: version=ModelVersion.V2_6, **DEFAULT_CONFIG, differentiable_input=True ), ), + "classifier_tiny_dataset_differentiable_input_v3": _ConsistencyCase( + data=lambda: _to_tensors(_get_tiny_classification_data()), + model=lambda: TabPFNClassifier.create_default_for_version( + version=ModelVersion.V3, **DEFAULT_CONFIG, differentiable_input=True + ), + ), "classifier_iris_dataset": _ConsistencyCase( data=_get_iris_multiclass_data, model=lambda: TabPFNClassifier.create_default_for_version( version=ModelVersion.V2_6, **DEFAULT_CONFIG ), ), + "classifier_iris_dataset_v3": _ConsistencyCase( + data=_get_iris_multiclass_data, + model=lambda: TabPFNClassifier.create_default_for_version( + version=ModelVersion.V3, **DEFAULT_CONFIG + ), + ), "regressor_tiny_dataset_several_devices": _ConsistencyCase( data=_get_tiny_regression_data, model=lambda: _add_extra_devices( @@ -158,6 +170,14 @@ class _ConsistencyCase: ) ), ), + "regressor_tiny_dataset_several_devices_v3": _ConsistencyCase( + data=_get_tiny_regression_data, + model=lambda: _add_extra_devices( + TabPFNRegressor.create_default_for_version( + version=ModelVersion.V3, **DEFAULT_CONFIG + ) + ), + ), "classifier_iris_dataset_several_devices": _ConsistencyCase( data=_get_iris_multiclass_data, model=lambda: _add_extra_devices( @@ -166,10 +186,24 @@ class _ConsistencyCase: ) ), ), - "classifier_tiny_dataset_5_estimators": _ConsistencyCase( + "classifier_iris_dataset_several_devices_v3": _ConsistencyCase( + data=_get_iris_multiclass_data, + model=lambda: _add_extra_devices( + TabPFNClassifier.create_default_for_version( + version=ModelVersion.V3, **DEFAULT_CONFIG + ) + ), + ), + "classifier_tiny_dataset_3_estimators": _ConsistencyCase( + data=_get_tiny_classification_data, + model=lambda: TabPFNClassifier.create_default_for_version( + version=ModelVersion.V2_6, **DEFAULT_CONFIG | {"n_estimators": 3} + ), + ), + "classifier_tiny_dataset_3_estimators_v3": _ConsistencyCase( data=_get_tiny_classification_data, model=lambda: TabPFNClassifier.create_default_for_version( - version=ModelVersion.V2_6, **DEFAULT_CONFIG | {"n_estimators": 5} + version=ModelVersion.V3, **DEFAULT_CONFIG | {"n_estimators": 3} ), ), } From fbd4999112e2b06dd17ef6570e3a7121393683fe Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Thu, 7 May 2026 14:18:07 +0200 Subject: [PATCH 09/11] add version tag to 'default' consistency reference files --- ... classifier_iris_dataset_several_devices_v2.6.json} | 0 ..._devices.json => classifier_iris_dataset_v2.6.json} | 0 ... => classifier_tiny_dataset_3_estimators_v2.6.json} | 0 ...sifier_tiny_dataset_differentiable_input_v2.6.json} | 0 ...> regressor_tiny_dataset_several_devices_v2.6.json} | 0 tests/test_consistency.py | 10 +++++----- 6 files changed, 5 insertions(+), 5 deletions(-) rename tests/reference_predictions/darwin_arm64/{classifier_iris_dataset.json => classifier_iris_dataset_several_devices_v2.6.json} (100%) rename tests/reference_predictions/darwin_arm64/{classifier_iris_dataset_several_devices.json => classifier_iris_dataset_v2.6.json} (100%) rename tests/reference_predictions/darwin_arm64/{classifier_tiny_dataset_3_estimators.json => classifier_tiny_dataset_3_estimators_v2.6.json} (100%) rename tests/reference_predictions/darwin_arm64/{classifier_tiny_dataset_differentiable_input.json => classifier_tiny_dataset_differentiable_input_v2.6.json} (100%) rename tests/reference_predictions/darwin_arm64/{regressor_tiny_dataset_several_devices.json => regressor_tiny_dataset_several_devices_v2.6.json} (100%) diff --git a/tests/reference_predictions/darwin_arm64/classifier_iris_dataset.json b/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices_v2.6.json similarity index 100% rename from tests/reference_predictions/darwin_arm64/classifier_iris_dataset.json rename to tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices_v2.6.json diff --git a/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices.json b/tests/reference_predictions/darwin_arm64/classifier_iris_dataset_v2.6.json similarity index 100% rename from tests/reference_predictions/darwin_arm64/classifier_iris_dataset_several_devices.json rename to tests/reference_predictions/darwin_arm64/classifier_iris_dataset_v2.6.json diff --git a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators.json b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators_v2.6.json similarity index 100% rename from tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators.json rename to tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_3_estimators_v2.6.json diff --git a/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input.json b/tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input_v2.6.json similarity index 100% rename from tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input.json rename to tests/reference_predictions/darwin_arm64/classifier_tiny_dataset_differentiable_input_v2.6.json diff --git a/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices.json b/tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices_v2.6.json similarity index 100% rename from tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices.json rename to tests/reference_predictions/darwin_arm64/regressor_tiny_dataset_several_devices_v2.6.json diff --git a/tests/test_consistency.py b/tests/test_consistency.py index c34a651d6..e18ea62ac 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -138,7 +138,7 @@ class _ConsistencyCase: ModelVersion.V3, ] }, - "classifier_tiny_dataset_differentiable_input": _ConsistencyCase( + "classifier_tiny_dataset_differentiable_input_v2.6": _ConsistencyCase( data=lambda: _to_tensors(_get_tiny_classification_data()), model=lambda: TabPFNClassifier.create_default_for_version( version=ModelVersion.V2_6, **DEFAULT_CONFIG, differentiable_input=True @@ -150,7 +150,7 @@ class _ConsistencyCase: version=ModelVersion.V3, **DEFAULT_CONFIG, differentiable_input=True ), ), - "classifier_iris_dataset": _ConsistencyCase( + "classifier_iris_dataset_v2.6": _ConsistencyCase( data=_get_iris_multiclass_data, model=lambda: TabPFNClassifier.create_default_for_version( version=ModelVersion.V2_6, **DEFAULT_CONFIG @@ -162,7 +162,7 @@ class _ConsistencyCase: version=ModelVersion.V3, **DEFAULT_CONFIG ), ), - "regressor_tiny_dataset_several_devices": _ConsistencyCase( + "regressor_tiny_dataset_several_devices_v2.6": _ConsistencyCase( data=_get_tiny_regression_data, model=lambda: _add_extra_devices( TabPFNRegressor.create_default_for_version( @@ -178,7 +178,7 @@ class _ConsistencyCase: ) ), ), - "classifier_iris_dataset_several_devices": _ConsistencyCase( + "classifier_iris_dataset_several_devices_v2.6": _ConsistencyCase( data=_get_iris_multiclass_data, model=lambda: _add_extra_devices( TabPFNClassifier.create_default_for_version( @@ -194,7 +194,7 @@ class _ConsistencyCase: ) ), ), - "classifier_tiny_dataset_3_estimators": _ConsistencyCase( + "classifier_tiny_dataset_3_estimators_v2.6": _ConsistencyCase( data=_get_tiny_classification_data, model=lambda: TabPFNClassifier.create_default_for_version( version=ModelVersion.V2_6, **DEFAULT_CONFIG | {"n_estimators": 3} From d43ca12ab5ee691771cf599081bf47775fdd4aa9 Mon Sep 17 00:00:00 2001 From: Benjamin Jaeger Date: Sun, 10 May 2026 10:46:14 +0000 Subject: [PATCH 10/11] fix tests --- tests/test_classifier_interface.py | 3 --- tests/test_regressor_interface.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py index f5554bfbf..93d293f7c 100644 --- a/tests/test_classifier_interface.py +++ b/tests/test_classifier_interface.py @@ -598,9 +598,6 @@ def test__fit_preprocessors_and_low_memory_produce_equal_results( def test__fit_and_predict__on_demo_dataset__accuracy_reasonable( model_version: ModelVersion, ) -> None: - if model_version == ModelVersion.V3 and not is_v3_classifier_in_cache(): - pytest.skip("V3 classifier model not in cache.") - X, y = sklearn.datasets.load_iris(return_X_y=True) model = TabPFNClassifier.create_default_for_version( version=model_version, random_state=0 diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py index 284571af9..1f6b912f2 100644 --- a/tests/test_regressor_interface.py +++ b/tests/test_regressor_interface.py @@ -347,9 +347,6 @@ def test__fit_preprocessors_and_low_memory_produce_equal_results( def test__fit_and_predict__on_demo_dataset__r2_reasonable( model_version: ModelVersion, ) -> None: - if model_version == ModelVersion.V3 and not is_v3_regressor_in_cache(): - pytest.skip("V3 regressor model not in cache.") - X, y = sklearn.datasets.make_friedman1(n_samples=200, noise=0.1, random_state=0) model = TabPFNRegressor.create_default_for_version( version=model_version, random_state=0 From 92a1fd93e4e68a442e3aeeac2973a5d0a0c36682 Mon Sep 17 00:00:00 2001 From: Brendan Roof Date: Sun, 10 May 2026 12:40:20 +0000 Subject: [PATCH 11/11] Use canonical v3 checkpoint filenames Switch ModelSource.get_{classifier,regressor}_v3() default_filename and filenames from the dated form (tabpfn-v3-{role}-20260506.ckpt) to the canonical wire-format name used everywhere else in the stack (tabpfn-v3-{role}-v3_default.ckpt). This is the form produced by tabpfn-client's `_model_name_to_path("v3_default")` and pinned by tabpfn-server's STARTUP_MODELS, so a single source of truth flows end-to-end with no rename layer in between. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/tabpfn/model_loading.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tabpfn/model_loading.py b/src/tabpfn/model_loading.py index 90d20f69b..4237b29f9 100644 --- a/src/tabpfn/model_loading.py +++ b/src/tabpfn/model_loading.py @@ -169,22 +169,22 @@ def get_regressor_v2_6(cls) -> ModelSource: # noqa: D102 @classmethod def get_classifier_v3(cls) -> ModelSource: # noqa: D102 filenames = [ - "tabpfn-v3-classifier-20260506.ckpt", + "tabpfn-v3-classifier-v3_default.ckpt", ] return cls( repo_id="Prior-Labs/tabpfn_3", - default_filename="tabpfn-v3-classifier-20260506.ckpt", + default_filename="tabpfn-v3-classifier-v3_default.ckpt", filenames=filenames, ) @classmethod def get_regressor_v3(cls) -> ModelSource: # noqa: D102 filenames = [ - "tabpfn-v3-regressor-20260506.ckpt", + "tabpfn-v3-regressor-v3_default.ckpt", ] return cls( repo_id="Prior-Labs/tabpfn_3", - default_filename="tabpfn-v3-regressor-20260506.ckpt", + default_filename="tabpfn-v3-regressor-v3_default.ckpt", filenames=filenames, )