Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ runs:

baselines:
- baseline:
baseline_id: COSMO-E
label: COSMO-E
root: /store_new/mch/msopr/ml/COSMO-E
steps: 0/120/6
Expand Down
2 changes: 0 additions & 2 deletions config/forecasters-co1e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ runs:
extra_requirements:
- git+https://github.com/ecmwf/anemoi-inference.git@0.6.3

baselines:
- baseline:
baseline_id: COSMO-1E
label: COSMO-1E
root: /store_new/mch/msopr/ml/COSMO-1E
steps: 0/33/6
Expand Down
2 changes: 0 additions & 2 deletions config/forecasters-co2-disentangled.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ runs:
- earthkit-data<0.19.0
- git+https://github.com/MeteoSwiss/anemoi-core.git@2a90165e3f25defc55fbeb77f7b4ebfef685820d#subdirectory=models

baselines:
- baseline:
baseline_id: COSMO-E
label: COSMO-E
root: /store_new/mch/msopr/ml/COSMO-E
steps: 0/120/6
Expand Down
2 changes: 0 additions & 2 deletions config/forecasters-co2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ runs:
extra_requirements:
- git+https://github.com/ecmwf/anemoi-inference.git@0.8.3

baselines:
- baseline:
baseline_id: COSMO-E
label: COSMO-E
root: /store_new/mch/msopr/ml/COSMO-E
steps: 0/120/6
Expand Down
1 change: 0 additions & 1 deletion config/forecasters-ich1-oper-fixed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ runs:

baselines:
- baseline:
baseline_id: ICON-CH1-EPS
label: ICON-CH1-EPS
root: /store_new/mch/msopr/ml/ICON-CH1-EPS
steps: 0/33/6
Expand Down
4 changes: 1 addition & 3 deletions config/forecasters-ich1-oper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@ runs:
- earthkit-data<0.19.0
- git+https://github.com/ecmwf/anemoi-inference.git@main

baselines:
- baseline:
baseline_id: ICON-CH1-EPS
label: ICON-CH1-ctrl
root: /scratch/mch/cmerker/ICON-CH1-EPS
steps: 0/33/6

- baseline:
baseline_id: ICON-CH2-EPS
label: ICON-CH2-ctrl
root: /scratch/mch/cmerker/ICON-CH2-EPS
steps: 0/120/6
Expand Down
8 changes: 3 additions & 5 deletions config/forecasters-ich1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,11 @@ runs:
# config: resources/inference/configs/sgm-forecaster-global-ich1.yaml
# disable_local_eccodes_definitions: true
# extra_requirements:
# - earthkit-utils<0.2.0
# - earthkit-data<0.19.0
# - git+https://github.com/ecmwf/anemoi-inference.git@main
# - earthkit-utils<0.2.0
# - earthkit-data<0.19.0
# - git+https://github.com/ecmwf/anemoi-inference.git@main

baselines:
- baseline:
baseline_id: ICON-CH2-EPS
label: ICON-CH2-EPS
root: /scratch/mch/cmerker/ICON-CH2-EPS
steps: 0/120/6
Expand Down
2 changes: 0 additions & 2 deletions config/interpolators-co2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ runs:
extra_requirements:
- git+https://github.com/ecmwf/anemoi-inference.git@0.8.3

baselines:
- baseline:
baseline_id: COSMO-E-1h
label: COSMO-E
root: /store_new/mch/msopr/ml/COSMO-E_hourly
steps: 0/120/1
Expand Down
16 changes: 8 additions & 8 deletions src/evalml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,10 @@ class InterpolatorConfig(RunConfig):
class BaselineConfig(BaseModel):
"""Configuration for a single baseline to include in the verification."""

baseline_id: str = Field(
...,
baseline_id: str | None = Field(
None,
min_length=1,
description="Identifier for the baseline, e.g. 'COSMO-E'.",
description="Deprecated compatibility field. Workflow baseline IDs are derived from the stem of `root`.",
)
label: str = Field(
...,
Expand All @@ -164,7 +164,7 @@ class BaselineConfig(BaseModel):
root: str = Field(
...,
min_length=1,
description="Root directory where the baseline data is stored.",
description="Root directory where the baseline data is stored. The workflow derives the baseline ID from the stem of this path.",
)
steps: str = Field(
...,
Expand Down Expand Up @@ -298,13 +298,13 @@ class ConfigModel(BaseModel):
description="Optional label for the experiment that will be used in the experiment directory name. Defaults to the config file name if not provided.",
)
dates: Dates | ExplicitDates
runs: List[ForecasterItem | InterpolatorItem] = Field(
runs: List[ForecasterItem | InterpolatorItem | BaselineItem] = Field(
...,
description="Dictionary of runs to execute, with run IDs as keys and configurations as values.",
description="List of experiment participants, including forecaster/interpolator ML runs and baselines.",
)
baselines: List[BaselineItem] = Field(
...,
description="Dictionary of baselines to include in the verification.",
default_factory=list,
description="Deprecated top-level baselines list. Prefer defining baseline entries directly in `runs`.",
)
truth: TruthConfig | None
stratification: Stratification
Expand Down
66 changes: 66 additions & 0 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import pytest

from evalml.config import ConfigModel
Expand Down Expand Up @@ -25,3 +27,67 @@ def test_example_interpolators_config(example_interpolators_config):
del example_interpolators_config["runs"]
with pytest.raises(ValueError, match="Field required"):
_ = ConfigModel.model_validate(example_interpolators_config)


def test_legacy_top_level_baselines_still_supported(example_forecasters_config):
"""Top-level `baselines` remains accepted for backward compatibility."""

cfg = {k: v for k, v in example_forecasters_config.items() if k != "runs"}
cfg["runs"] = [
run for run in example_forecasters_config["runs"] if "forecaster" in run
]
cfg["baselines"] = [
run for run in example_forecasters_config["runs"] if "baseline" in run
]

_ = ConfigModel.model_validate(cfg)


def test_workflow_parsing_excludes_baselines_from_run_configs(
example_forecasters_config,
):
"""Baseline entries in `runs` should not be treated as ML run configs."""

namespace = {
"Path": Path,
"config": example_forecasters_config,
}
common_rules = Path("workflow/rules/common.smk").read_text()

exec(common_rules, namespace)

run_configs = namespace["RUN_CONFIGS"]
baseline_configs = namespace["BASELINE_CONFIGS"]

assert all(
run_config["model_type"] != "baseline" for run_config in run_configs.values()
)
assert baseline_configs == {
"COSMO-E": {
"label": "COSMO-E",
"root": "/store_new/mch/msopr/ml/COSMO-E",
"steps": "0/120/6",
}
}


def test_workflow_derives_baseline_id_from_root_stem(example_interpolators_config):
"""Workflow baseline IDs should come from the baseline root path stem."""

namespace = {
"Path": Path,
"config": example_interpolators_config,
}
common_rules = Path("workflow/rules/common.smk").read_text()

exec(common_rules, namespace)

baseline_configs = namespace["BASELINE_CONFIGS"]

assert "COSMO-E_hourly" in baseline_configs
assert "COSMO-E-1h" not in baseline_configs
assert baseline_configs["COSMO-E_hourly"] == {
"label": "COSMO-E",
"root": "/store_new/mch/msopr/ml/COSMO-E_hourly",
"steps": "0/120/1",
}
17 changes: 15 additions & 2 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def collect_all_runs() -> dict:
runs: dict[str, dict] = {}
for run_entry in config["runs"]:
model_type = next(iter(run_entry))
if model_type == "baseline":
continue
run_config = run_entry[model_type]
runs |= register_run(model_type, run_config)
return runs
Expand All @@ -159,11 +161,22 @@ def collect_all_candidates():
def collect_all_baselines():
"""Collect all baselines defined in the configuration."""
baselines = {}
for baseline_entry in copy.deepcopy(config["baselines"]):

for run_entry in copy.deepcopy(config["runs"]):
if "baseline" not in run_entry:
continue
baseline_config = run_entry["baseline"]
baseline_id = Path(baseline_config["root"]).stem
baselines[baseline_id] = baseline_config

# Backward compatibility with legacy top-level `baselines` block.
for baseline_entry in copy.deepcopy(config.get("baselines", [])):
baseline_type = next(iter(baseline_entry))
baseline_config = baseline_entry[baseline_type]
baseline_id = baseline_config.pop("baseline_id")
baseline_id = Path(baseline_config["root"]).stem
baseline_config.pop("baseline_id", None)
baselines[baseline_id] = baseline_config

return baselines


Expand Down
27 changes: 18 additions & 9 deletions workflow/tools/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@
"description": "Configuration for a single baseline to include in the verification.",
"properties": {
"baseline_id": {
"description": "Identifier for the baseline, e.g. 'COSMO-E'.",
"minLength": 1,
"title": "Baseline Id",
"type": "string"
"anyOf": [
{
"minLength": 1,
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Deprecated compatibility field. Workflow baseline IDs are derived from the stem of `root`.",
"title": "Baseline Id"
},
"label": {
"description": "Label for the baseline that will be used in experiment results such as reports and figures.",
Expand All @@ -16,7 +24,7 @@
"type": "string"
},
"root": {
"description": "Root directory where the baseline data is stored.",
"description": "Root directory where the baseline data is stored. The workflow derives the baseline ID from the stem of this path.",
"minLength": 1,
"title": "Root",
"type": "string"
Expand All @@ -29,7 +37,6 @@
}
},
"required": [
"baseline_id",
"label",
"root",
"steps"
Expand Down Expand Up @@ -539,22 +546,25 @@
"title": "Dates"
},
"runs": {
"description": "Dictionary of runs to execute, with run IDs as keys and configurations as values.",
"description": "List of experiment participants, including forecaster/interpolator ML runs and baselines.",
"items": {
"anyOf": [
{
"$ref": "#/$defs/ForecasterItem"
},
{
"$ref": "#/$defs/InterpolatorItem"
},
{
"$ref": "#/$defs/BaselineItem"
}
]
},
"title": "Runs",
"type": "array"
},
"baselines": {
"description": "Dictionary of baselines to include in the verification.",
"description": "Deprecated top-level baselines list. Prefer defining baseline entries directly in `runs`.",
"items": {
"$ref": "#/$defs/BaselineItem"
},
Expand Down Expand Up @@ -585,7 +595,6 @@
"description",
"dates",
"runs",
"baselines",
"truth",
"stratification",
"locations",
Expand Down
Loading