From 8a5203accca1fde5b69c8b15ce7eb074d61e0919 Mon Sep 17 00:00:00 2001 From: Ysobel Date: Thu, 22 Jan 2026 13:27:52 +1100 Subject: [PATCH] model path, train or no train --- README.md | 23 +++++++++++++++++++++++ ecosci/models.py | 15 +++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f36d463..53ba3f4 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,9 @@ EcoNetToolkit lets you train a shallow neural network or classical models on you - [Inspecting Saved Models](#inspecting-saved-models) - [Config reference (YAML)](#config-reference-yaml) - [Simple example (single model, classification)](#simple-example-single-model-classification) + - [Multi-output (multi-target) prediction](#multi-output-multi-target-prediction) - [Available models and key parameters](#available-models-and-key-parameters) + - [Loading a model from a saved path](#loading-a-model-from-a-saved-path) - [Notes on metrics](#notes-on-metrics) - [Additional notes](#additional-notes) - [Hyperparameter Tuning](#hyperparameter-tuning) @@ -306,6 +308,27 @@ See `configs/penguins_multilabel.yaml` and `configs/possum_multilabel.yaml` for **Linear Regression** (regression only) - `fit_intercept`: Whether to calculate the intercept (default: `true`) +## Loading a model from a saved path + +You can load a previously trained model directly from a file by specifying `model_path` in the model's `params` section of your YAML config. This is useful for reusing or updating models without retraining. + +**Example:** + +```yaml +models: + - name: random_forest + params: + model_path: outputs/possum/random_forest/model_random_forest_seed42.joblib + no_train: true # If true, use the loaded model with no further training +``` + +- If `no_train: true`, EcoNetToolkit will use the loaded model for prediction only and will not retrain it with the current data. +- If `no_train` is omitted or set to `false`, the loaded model will be further trained (fit) on the current data, allowing you to continue training or fine-tune. + +If `model_path` is provided, EcoNetToolkit will load the model from disk using joblib and use it for predictions or further evaluation. All other parameters are ignored when loading from a path and `no_train: true`. + +--- + ### Notes on metrics **Classification:** diff --git a/ecosci/models.py b/ecosci/models.py index fcf7491..24a3c8d 100644 --- a/ecosci/models.py +++ b/ecosci/models.py @@ -20,8 +20,8 @@ class ModelZoo: @staticmethod def get_model( - name: str, - problem_type: str = "classification", + name: str, + problem_type: str = "classification", params: Dict[str, Any] = None, n_outputs: int = 1 ): @@ -39,6 +39,17 @@ def get_model( Number of output targets. If > 1, wraps the model in MultiOutput wrapper. """ params = params or {} + # If model_path is provided, load the model from disk + model_path = params.get("model_path") + no_train = params.get("no_train", False) + if model_path: + import joblib + model = joblib.load(model_path) + # If no_train is True, return the loaded model directly + if no_train: + return model + # Otherwise, return the loaded model for further training (fit will be called) + return model if name.lower() == "mlp": from sklearn.neural_network import MLPClassifier, MLPRegressor