From c1375ab0ed4e7c27d40daddde37bb09cde98c350 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Tue, 7 Oct 2025 14:01:42 +0200 Subject: [PATCH 01/34] Initial draft (pseudo code) --- workflow/rules/verif_obs.smk | 51 ++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 workflow/rules/verif_obs.smk diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk new file mode 100644 index 00000000..69e89c38 --- /dev/null +++ b/workflow/rules/verif_obs.smk @@ -0,0 +1,51 @@ +from pathlib import Path + +rule generate_mec_namelist: + input: + template="resources/mec/namelist.jinja2" + output: + namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist", + run: + """ + import jinja2 + + context = {"init_time": wildcards.init_time} + env = jinja2.Environment( + loader=jinja2.FileSystemLoader({Path(input.template).parent}) + ) + template = env.get_template(input.template) + namelist = template.render(**context) + + namelist_fn = Path(output.namelist) + with namelist_fn.open("w+") as f: + f.write(namelist) + """ + +rule run_mec: + input: + grib_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib", + ekf="path/to/ekf/file{init_time}", + namelist=generate_mec_namelist.output.namelist + output: + feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedbacks/verSYNOP.nc + # module: sarus? + resources: + cpus_per_task=1, + runtime="1h", + shell: + """ + # some code to prepare the data + # (or use a separate rule) + # sarus command from Mary + sarus pull ... + """ + +rule rename_feedback: + input: + feedback=run_mec.output.feedback + output: + feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedback + shell: + + +# rule ... \ No newline at end of file From 9f608f2f0dd65dade23606bee408e331e8d19757 Mon Sep 17 00:00:00 2001 From: Mary McGlohon Date: Thu, 13 Nov 2025 16:58:38 +0100 Subject: [PATCH 02/34] add namelist as resource --- resources/mec/namelist.jinja2 | 80 +++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 resources/mec/namelist.jinja2 diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2 new file mode 100644 index 00000000..a9f5c9dc --- /dev/null +++ b/resources/mec/namelist.jinja2 @@ -0,0 +1,80 @@ +!============================================================================== +! namelist for MEC +!============================================================================== + + !=================== + ! general parameters + !=================== + &run + method = 'GMESTAT' ! Model Equivalent Calculator + model = 'ML' ! forecast model. One of "COSMO" "ICON" "ML" + input = './input_mod' ! input data path + data = '/oprusers/osm/opr.emme/data/' ! data path for auxiliary data + obsinput = './input_obs' ! observation input data path + output = './output' ! output data to working directory + time_ana = 20201028000000 ! analysis date + read_fields = 'ps u t v q geof t2m td2m u_10m v_10m' + grib_edition = 2 + grib_library = 2 ! GRIB-API used: 1=GRIBEX 2=GRIB2-API + cosmo_refatm = 2 ! reference atmosphere to be used for COSMO:1or2 + fc_hours = 0 ! Default is 3h. Has to be set to 0 if one wants to verify +0h leadtime + nproc1 = 1 + nproc2 = 1 + / + + !=============================== + ! observation related parameters + !=============================== + &observations + !--------------------------------------------------- + ! read from CDFIN files (if not set use mon/cof/ekf) + !--------------------------------------------------- + read_cdfin = F ! (F): dont read COSMO CDFIN files get obs from ekf + vint_lin_t = T ! linear vertical interpolation for temperature + vint_lin_z = T ! linear vertical interpolation for geopotential + vint_lin_uv = T ! linear vertical interpolation for wind + ptop_lapse = 850. + pbot_lapse = 950. +! int_nn = T ! horizontal interpolation: nearest neighbor + / + + !==================== + ! Ensemble parameters + !==================== + &ENKF + k_enkf = 0 ! ensemble size (0 for det. run) + det_run = 1 ! set to 1 for deterministic run, 0 for ensemble + / + + !================================ + ! Verification related parameters + !================================ + &veri_obs + obstypes = "SYNOP" ! "SYNOP TEMP" + fc_times = 0000,1200,2400,3600,4800,6000,7200,8400,9600,10800,12000 ! forecast lead time at reference (hhmm) + prefix_in = 'ekf' + prefix_out = 'ver' + rm_old = 2 ! overwrite entries in verification file ? + fc_file = '_FCR_TIME_/lfffDDVVMMSS' ! template for forecast file name + !det_suffix = '.m000' ! for ensemble forecast must be set in order to differentiate between the input model files. + time_range = 1 + ekf_concat = F + !ekf_rm_ve = -2 -7 ! special (<0) member ids to remove + !eps_offset = 40 ! skip the first members up to this value to which is set. The default is 0 + !ekf_offset = 40 ! exclude members in ekf-file up to this value => try setting to 40? => no effect => ask Hendrik? + ref_runtype = 'any' ! accept any runtype for the reference state + / + + &report + time_b = -0029 ! (hhmm, inclusive) + time_e = 0030 ! (hhmm, exclusive) + / + + &cosmo_obs + lcd187 = .true. ! use ground based wind lidar obs + verification_start = -29 ! (min, inclusive) + verification_end = 30 ! (min, inclusive) + / + &synop_obs + version = 1 ! Harald: Für version >= 3 werden konsistent die DACE-internen Kontrollvariablen genutzt, mit ggf. Umrechnung tv,rh <-> t,td etc. DACE-Version mind. 2.22 . Random crashes (T=10000 Umrechnungen failen). Need of no undefs for version=3 + / \ No newline at end of file From e82bd9487e27c792a3c2e173b5f86ae26e3e047a Mon Sep 17 00:00:00 2001 From: Mary McGlohon Date: Thu, 13 Nov 2025 16:59:04 +0100 Subject: [PATCH 03/34] add verif_obs.smk to Snakefile --- workflow/Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/Snakefile b/workflow/Snakefile index 8b8d99ae..f806f44d 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -15,6 +15,7 @@ include: "rules/data.smk" include: "rules/inference.smk" include: "rules/verif.smk" include: "rules/report.smk" +include: "rules/verif_obs.smk" # optional messages, log and error handling From c3ab6516e7c425afe917f5c3d1bb778db83a6725 Mon Sep 17 00:00:00 2001 From: Mary McGlohon Date: Thu, 13 Nov 2025 18:32:32 +0100 Subject: [PATCH 04/34] Add rules for observation data and namelist generation (using fake data) --- workflow/Snakefile | 8 +++++ workflow/rules/verif_obs.smk | 58 ++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index f806f44d..de216ff2 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -85,3 +85,11 @@ rule verif_metrics_plot_all: rules.verif_metrics_plot.output, experiment=EXPERIMENT_HASH, ), + +rule verif_obs_all: + input: + expand( + rules.run_mec.output, + init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], + run_id=collect_all_runs(), + ) \ No newline at end of file diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk index 69e89c38..3f3ac961 100644 --- a/workflow/rules/verif_obs.smk +++ b/workflow/rules/verif_obs.smk @@ -1,51 +1,57 @@ from pathlib import Path +rule generate_observation_data: + input: + testcase_dir="/scratch/mch/mmcgloho/MEC/2020102800", + output: + input_obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"), + input_mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"), + parent=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"), + shell: + """ + cp -r {input.testcase_dir}/input_obs {output.parent}/ + cp -r {input.testcase_dir}/input_mod {output.parent}/ + ls {output.parent} + # TODO: Some data still seems to be missing. + """ + rule generate_mec_namelist: input: template="resources/mec/namelist.jinja2" output: + #namelist=OUT_ROOT / "data/runs/mec/namelist", + # TODO: get wildcards working. namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist", run: - """ import jinja2 - + # TODO: get wildcards working. context = {"init_time": wildcards.init_time} + template_path = Path(input.template) env = jinja2.Environment( - loader=jinja2.FileSystemLoader({Path(input.template).parent}) + loader=jinja2.FileSystemLoader({template_path.parent}) ) - template = env.get_template(input.template) + template = env.get_template(template_path.name) namelist = template.render(**context) - namelist_fn = Path(output.namelist) with namelist_fn.open("w+") as f: f.write(namelist) - """ rule run_mec: input: - grib_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib", - ekf="path/to/ekf/file{init_time}", - namelist=generate_mec_namelist.output.namelist + testcase_dir=directory(rules.generate_observation_data.output.parent), + namelist=rules.generate_mec_namelist.output.namelist output: - feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedbacks/verSYNOP.nc - # module: sarus? + OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/output/verSYNOP.nc" resources: cpus_per_task=1, runtime="1h", shell: + #TODO(mmcglohon): Replace podman with sarus if needed. """ - # some code to prepare the data - # (or use a separate rule) - # sarus command from Mary - sarus pull ... - """ - -rule rename_feedback: - input: - feedback=run_mec.output.feedback - output: - feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedback - shell: - - -# rule ... \ No newline at end of file + echo 'running mec on namelist:' + cat {input.namelist} + ls {input.testcase_dir} + podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main + srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={input.testcase_dir},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main + ls -l {output} + """ \ No newline at end of file From 7512d96e52756c4827c44e436943f9da15c1fd4f Mon Sep 17 00:00:00 2001 From: Mary McGlohon Date: Thu, 13 Nov 2025 18:43:14 +0100 Subject: [PATCH 05/34] add newline to namelist template --- resources/mec/namelist.jinja2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2 index a9f5c9dc..6fa71063 100644 --- a/resources/mec/namelist.jinja2 +++ b/resources/mec/namelist.jinja2 @@ -77,4 +77,5 @@ / &synop_obs version = 1 ! Harald: Für version >= 3 werden konsistent die DACE-internen Kontrollvariablen genutzt, mit ggf. Umrechnung tv,rh <-> t,td etc. DACE-Version mind. 2.22 . Random crashes (T=10000 Umrechnungen failen). Need of no undefs for version=3 - / \ No newline at end of file + / + \ No newline at end of file From 13301a507664a9f2d9807910c1f2605523d7076b Mon Sep 17 00:00:00 2001 From: Mary McGlohon Date: Thu, 13 Nov 2025 19:16:01 +0100 Subject: [PATCH 06/34] somewhat working version of run_mec (with fake data) --- workflow/Snakefile | 1 + workflow/rules/verif_obs.smk | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index de216ff2..2e27e44f 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -86,6 +86,7 @@ rule verif_metrics_plot_all: experiment=EXPERIMENT_HASH, ), +# To run: snakemake --cores 1 --configfile=config/recasters.yaml verif_obs_all rule verif_obs_all: input: expand( diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk index 3f3ac961..fffa29c8 100644 --- a/workflow/rules/verif_obs.smk +++ b/workflow/rules/verif_obs.smk @@ -51,7 +51,8 @@ rule run_mec: echo 'running mec on namelist:' cat {input.namelist} ls {input.testcase_dir} - podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main + # Note: pull command currently redundant; may not be the case with sarus. + #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={input.testcase_dir},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main ls -l {output} """ \ No newline at end of file From e722e5f3ac9023305bd64a4fbcba81d6b101fd0f Mon Sep 17 00:00:00 2001 From: Mary McGlohon Date: Mon, 24 Nov 2025 18:00:28 +0100 Subject: [PATCH 07/34] correct typo and add optional script for generating namelist, in case we want to factor it out of the rule --- workflow/Snakefile | 2 +- workflow/scripts/generate_mec_namelist.py | 27 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 workflow/scripts/generate_mec_namelist.py diff --git a/workflow/Snakefile b/workflow/Snakefile index 2e27e44f..cf595cb3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -86,7 +86,7 @@ rule verif_metrics_plot_all: experiment=EXPERIMENT_HASH, ), -# To run: snakemake --cores 1 --configfile=config/recasters.yaml verif_obs_all +# To run: snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all rule verif_obs_all: input: expand( diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py new file mode 100644 index 00000000..0e938805 --- /dev/null +++ b/workflow/scripts/generate_mec_namelist.py @@ -0,0 +1,27 @@ +import logging +import jinja2 +# snakemake object inherited by default, but this enables code completion. +from snakemake.script import snakemake +from pathlib import Path + +# Note: not currently in use; optional script in case we want to factor it out +# of the rules file +def main(args): + #TODO: get wildcards working + context = {} + #context = {"init_time": snakemake.wildcards.init_time} + template_path = Path(snakemake.input.template) + logging.info('writing namelist to {template_filename}') + env = jinja2.Environment( + loader=jinja2.FileSystemLoader({template_path.parent}) + ) + template = env.get_template(template_path.name) + namelist = template.render(**context) + namelist_fn = Path(snakemake.output['namelist']) + with namelist_fn.open("w+") as f: + f.write(namelist) + logging.info('finished writing namelist') + + +if __name__ == "__main__": + main() \ No newline at end of file From 3d9e3c10973b643ca3f7c6fc72404e3b379e9247 Mon Sep 17 00:00:00 2001 From: Francesco Zanetta <62377868+frazane@users.noreply.github.com> Date: Wed, 8 Oct 2025 14:01:59 +0200 Subject: [PATCH 08/34] fix: add localrule to inference_interpolator rule (#57) --- workflow/rules/inference.smk | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk index 38ebf285..91e75b73 100644 --- a/workflow/rules/inference.smk +++ b/workflow/rules/inference.smk @@ -192,6 +192,7 @@ def _get_forecaster_run_id(run_id): rule inference_interpolator: """Run the interpolator for a specific run ID.""" + localrule: True input: pyproject=rules.create_inference_pyproject.output.pyproject, image=rules.make_squashfs_image.output.image, From 918913f9188288b9e01b8c2c7ca9c25b0259455b Mon Sep 17 00:00:00 2001 From: omiralles Date: Wed, 8 Oct 2025 16:40:39 +0200 Subject: [PATCH 09/34] Fix for interpolator rule --- workflow/rules/inference.smk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk index 91e75b73..f83f0fd5 100644 --- a/workflow/rules/inference.smk +++ b/workflow/rules/inference.smk @@ -143,7 +143,7 @@ rule inference_forecaster: slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"), cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24), mem_mb_per_cpu=lambda wc: get_resource(wc, "mem_mb_per_cpu", 8000), - runtime=lambda wc: get_resource(wc, "runtime", "20m"), + runtime=lambda wc: get_resource(wc, "runtime", "40m"), gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}", ntasks=lambda wc: get_resource(wc, "tasks", 1), slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment", @@ -222,13 +222,14 @@ rule inference_interpolator: if RUN_CONFIGS[wc.run_id].get("forecaster") is None else _get_forecaster_run_id(wc.run_id) ), + image_path=lambda wc, input: f"{Path(input.image).resolve()}", log: OUT_ROOT / "logs/inference_interpolator/{run_id}-{init_time}.log", resources: slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"), cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24), mem_mb_per_cpu=lambda wc: get_resource(wc, "mem_mb_per_cpu", 8000), - runtime=lambda wc: get_resource(wc, "runtime", "20m"), + runtime=lambda wc: get_resource(wc, "runtime", "40m"), gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}", ntasks=lambda wc: get_resource(wc, "tasks", 1), slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment", From 179eb4da3a3eb6ef72ef492cb43048208b67da74 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Tue, 14 Oct 2025 09:09:40 +0200 Subject: [PATCH 10/34] Consolidate multi packages into unique src/ dir (#58) --- pyproject.toml | 6 ++++++ .../src/verification.py => src/verification/__init__.py | 0 workflow/rules/verif.smk | 3 ++- workflow/scripts/src/__init__.py | 0 workflow/scripts/verif_baseline.py | 3 ++- workflow/scripts/verif_from_grib.py | 2 +- 6 files changed, 11 insertions(+), 3 deletions(-) rename workflow/scripts/src/verification.py => src/verification/__init__.py (100%) delete mode 100644 workflow/scripts/src/__init__.py diff --git a/pyproject.toml b/pyproject.toml index 4c60c9c5..043c1718 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,3 +45,9 @@ dev = [ markers = [ "longtest: mark tests that take a long time to run, e.g. integration tests", ] + +[tool.hatch.build.targets.wheel] +packages = [ + "src/evalml", + "src/verification" +] \ No newline at end of file diff --git a/workflow/scripts/src/verification.py b/src/verification/__init__.py similarity index 100% rename from workflow/scripts/src/verification.py rename to src/verification/__init__.py diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk index 677c8d7e..d1032cb2 100644 --- a/workflow/rules/verif.smk +++ b/workflow/rules/verif.smk @@ -13,7 +13,7 @@ include: "common.smk" rule verif_metrics_baseline: input: script="workflow/scripts/verif_baseline.py", - module="workflow/scripts/src/verification.py", + module="src/verification/__init__.py", baseline_zarr=lambda wc: expand( "{root}/FCST{year}.zarr", root=BASELINE_CONFIGS[wc.baseline_id].get("root"), @@ -55,6 +55,7 @@ def _get_no_none(dict, key, replacement): rule verif_metrics: input: script="workflow/scripts/verif_from_grib.py", + module="src/verification/__init__.py", inference_okfile=_inference_routing_fn, grib_output=rules.inference_routing.output[0], analysis_zarr=config["analysis"].get("analysis_zarr"), diff --git a/workflow/scripts/src/__init__.py b/workflow/scripts/src/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py index 88120c57..3f034cc9 100644 --- a/workflow/scripts/verif_baseline.py +++ b/workflow/scripts/verif_baseline.py @@ -12,7 +12,8 @@ import numpy as np # noqa: E402 import xarray as xr # noqa: E402 -from src.verification import verify # noqa: E402 + +from verification import verify # noqa: E402 LOG = logging.getLogger(__name__) logging.basicConfig( diff --git a/workflow/scripts/verif_from_grib.py b/workflow/scripts/verif_from_grib.py index da7c5ece..61d62a2a 100644 --- a/workflow/scripts/verif_from_grib.py +++ b/workflow/scripts/verif_from_grib.py @@ -13,7 +13,7 @@ import numpy as np # noqa: E402 import xarray as xr # noqa: E402 -from src.verification import verify # noqa: E402 +from verification import verify # noqa: E402 LOG = logging.getLogger(__name__) logging.basicConfig( From e791a3058fb792288510afcbf3b80233ff6e4da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oph=C3=A9lia=20Miralles?= Date: Wed, 15 Oct 2025 14:56:39 +0200 Subject: [PATCH 11/34] Update configs (#63) --- resources/inference/configs/forecaster.yaml | 1 - resources/inference/configs/interpolator.yaml | 2 +- .../configs/interpolator_from_test_data.yaml | 4 ---- ...interpolator_from_test_data_stretched.yaml | 24 +++++++++++++++++++ .../configs/interpolator_stretched.yaml | 2 +- 5 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 resources/inference/configs/interpolator_from_test_data_stretched.yaml diff --git a/resources/inference/configs/forecaster.yaml b/resources/inference/configs/forecaster.yaml index dac3e496..4f558a3e 100644 --- a/resources/inference/configs/forecaster.yaml +++ b/resources/inference/configs/forecaster.yaml @@ -4,7 +4,6 @@ input: allow_nans: true - output: tee: outputs: diff --git a/resources/inference/configs/interpolator.yaml b/resources/inference/configs/interpolator.yaml index 8cbb98fe..41253c0c 100644 --- a/resources/inference/configs/interpolator.yaml +++ b/resources/inference/configs/interpolator.yaml @@ -56,7 +56,7 @@ output: templates: samples: _resources/templates_index_cosmo.yaml -forcings: +constant_forcings: test: use_original_paths: true diff --git a/resources/inference/configs/interpolator_from_test_data.yaml b/resources/inference/configs/interpolator_from_test_data.yaml index aaa938fa..07aea411 100644 --- a/resources/inference/configs/interpolator_from_test_data.yaml +++ b/resources/inference/configs/interpolator_from_test_data.yaml @@ -17,10 +17,6 @@ output: templates: samples: _resources/templates_index_cosmo.yaml -forcings: - test: - use_original_paths: true - verbosity: 1 allow_nans: true output_frequency: "1h" diff --git a/resources/inference/configs/interpolator_from_test_data_stretched.yaml b/resources/inference/configs/interpolator_from_test_data_stretched.yaml new file mode 100644 index 00000000..19cd733e --- /dev/null +++ b/resources/inference/configs/interpolator_from_test_data_stretched.yaml @@ -0,0 +1,24 @@ +runner: time_interpolator + +input: + test: + use_original_paths: true + +output: + tee: + outputs: + - extract_lam: + output: + assign_mask: + mask: "source0/trimedge_mask" + output: + grib: + path: grib/{dateTime}_{step:03}.grib + encoding: + typeOfGeneratingProcess: 2 + templates: + samples: _resources/templates_index_cosmo.yaml + +verbosity: 1 +allow_nans: true +output_frequency: "1h" diff --git a/resources/inference/configs/interpolator_stretched.yaml b/resources/inference/configs/interpolator_stretched.yaml index 0010ffe8..300d6c65 100644 --- a/resources/inference/configs/interpolator_stretched.yaml +++ b/resources/inference/configs/interpolator_stretched.yaml @@ -79,7 +79,7 @@ input: - - shortName: TOT_PREC - tp -forcings: +constant_forcings: test: use_original_paths: true From d1977121fabfd9b3d45854a47c91b8c7eedb0a92 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Mon, 20 Oct 2025 11:57:11 +0200 Subject: [PATCH 12/34] Adopt 'steps' instead of 'lead_time' (#62) --- README.md | 4 +-- config/forecasters-co1e.yaml | 3 +-- config/forecasters.yaml | 4 +-- config/interpolators.yaml | 4 +-- src/evalml/config.py | 38 ++++++++++++++++++++++------ workflow/rules/inference.smk | 11 +++++++-- workflow/rules/verif.smk | 2 +- workflow/tools/config.schema.json | 41 ++++++++----------------------- 8 files changed, 58 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 2050b312..64fe254f 100644 --- a/README.md +++ b/README.md @@ -33,15 +33,15 @@ dates: end: 2020-01-10T00:00 frequency: 54h -lead_time: 120h - runs: - forecaster: mlflow_id: 2f962c89ff644ca7940072fa9cd088ec label: Stage D - N320 global grid with CERRA finetuning + steps: 0/126/6 - forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 label: M-1 forecaster + steps: 0/126/6 baselines: diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml index 490196cb..6171d62a 100644 --- a/config/forecasters-co1e.yaml +++ b/config/forecasters-co1e.yaml @@ -8,13 +8,12 @@ dates: end: 2020-01-10T00:00 frequency: 54h -lead_time: 120h - runs: - forecaster: mlflow_id: 2174c939c8844555a52843b71219d425 label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11 config: resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml + steps: 0/126/6 inference_resources: gpu: 4 tasks: 4 diff --git a/config/forecasters.yaml b/config/forecasters.yaml index 17d98729..413a734f 100644 --- a/config/forecasters.yaml +++ b/config/forecasters.yaml @@ -8,15 +8,15 @@ dates: # end: 2020-03-30T00:00 frequency: 36h -lead_time: 120h - runs: - forecaster: mlflow_id: 2f962c89ff644ca7940072fa9cd088ec label: Stage D - N320 global grid with CERRA finetuning + steps: 0/126/6 - forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 label: M-1 forecaster + steps: 0/126/6 baselines: - baseline: diff --git a/config/interpolators.yaml b/config/interpolators.yaml index 893b579c..c8e796e4 100644 --- a/config/interpolators.yaml +++ b/config/interpolators.yaml @@ -7,8 +7,6 @@ dates: end: 2020-01-10T00:00 frequency: 54h -lead_time: 120h - runs: - interpolator: mlflow_id: 9c18b90074214d769b8b383722fc5a06 @@ -27,6 +25,7 @@ runs: forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 config: resources/inference/configs/forecaster_with_global.yaml + steps: 0/126/6 extra_dependencies: - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files - torch-geometric==2.6.1 @@ -39,6 +38,7 @@ runs: forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 config: resources/inference/configs/forecaster_with_global.yaml + steps: 0/126/6 extra_dependencies: - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files - torch-geometric==2.6.1 diff --git a/src/evalml/config.py b/src/evalml/config.py index c6bcf1e7..c4a97ce2 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Dict, List, Any -from pydantic import BaseModel, Field, RootModel, HttpUrl +from pydantic import BaseModel, Field, RootModel, HttpUrl, field_validator PROJECT_ROOT = Path(__file__).parents[2] @@ -70,9 +70,13 @@ class RunConfig(BaseModel): None, description="The label for the run that will be used in experiment results such as reports and figures.", ) - steps: str | None = Field( - None, - description="Forecast steps to be used from interpolator, e.g. '0/126/6'.", + steps: str = Field( + ..., + description=( + "Forecast lead times in hours, formatted as 'start/end/step'. " + "The range is half-open [start, end), meaning it includes the start " + "but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours." + ), ) extra_dependencies: List[str] = Field( default_factory=list, @@ -86,6 +90,29 @@ class RunConfig(BaseModel): config: Dict[str, Any] | str + @field_validator("steps") + def validate_steps(cls, v: str) -> str: + if "/" not in v: + raise ValueError( + f"Steps must follow the format 'start/stop/step', got '{v}'" + ) + parts = v.split("/") + if len(parts) != 3: + raise ValueError("Steps must be formatted as 'start/end/step'.") + try: + start, end, step = map(int, parts) + except ValueError: + raise ValueError("Start, end, and interval must be integers.") + if start >= end: + raise ValueError(f"Start ({start}) must be less than end ({end}).") + if step <= 0: + raise ValueError(f"Interval ({step}) must be a positive integer.") + if (end - start) % step != 0: + raise ValueError( + f"The step ({step}) must evenly divide the range ({end - start})." + ) + return v + class ForecasterConfig(RunConfig): """Single training run stored in MLflow.""" @@ -240,9 +267,6 @@ class ConfigModel(BaseModel): description="Description of the experiment, e.g. 'Hindcast of the 2023 season.'", ) dates: Dates | ExplicitDates - lead_time: str = Field( - ..., description="Forecast length, e.g. '120h'", pattern=r"^\d+[hmd]$" - ) runs: List[ForecasterItem | InterpolatorItem] = Field( ..., description="Dictionary of runs to execute, with run IDs as keys and configurations as values.", diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk index f83f0fd5..ea136b97 100644 --- a/workflow/rules/inference.smk +++ b/workflow/rules/inference.smk @@ -118,6 +118,13 @@ def get_resource(wc, field: str, default): return getattr(rc["inference_resources"], field) or default +def get_leadtime(wc): + """Get the lead time from the run config.""" + start, end, step = RUN_CONFIGS[wc.run_id]["steps"].split("/") + end = int(end) - int(step) # make inclusive + return f"{end}h" + + rule inference_forecaster: localrule: True input: @@ -130,7 +137,7 @@ rule inference_forecaster: checkpoints_path=parse_input( input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path" ), - lead_time=config["lead_time"], + lead_time=lambda wc: get_leadtime(wc), output_root=(OUT_ROOT / "data").resolve(), resources_root=Path("resources/inference").resolve(), reftime_to_iso=lambda wc: datetime.strptime( @@ -211,7 +218,7 @@ rule inference_interpolator: checkpoints_path=parse_input( input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path" ), - lead_time=config["lead_time"], + lead_time=lambda wc: get_leadtime(wc), output_root=(OUT_ROOT / "data").resolve(), resources_root=Path("resources/inference").resolve(), reftime_to_iso=lambda wc: datetime.strptime( diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk index d1032cb2..10acbe13 100644 --- a/workflow/rules/verif.smk +++ b/workflow/rules/verif.smk @@ -66,7 +66,7 @@ rule verif_metrics: # TODO: implement logic to use experiment name instead of run_id as wildcard params: fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"), - fcst_steps=lambda wc: _get_no_none(RUN_CONFIGS[wc.run_id], "steps", "0/126/6"), + fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"], analysis_label=config["analysis"].get("label"), log: OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log", diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 5b3216ce..4bc53068 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -167,17 +167,9 @@ "title": "Label" }, "steps": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Forecast steps to be used from interpolator, e.g. '0/126/6'.", - "title": "Steps" + "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.", + "title": "Steps", + "type": "string" }, "extra_dependencies": { "description": "List of extra dependencies to install for this model. These will be added to the pyproject.toml file in the run directory.", @@ -214,7 +206,8 @@ } }, "required": [ - "mlflow_id" + "mlflow_id", + "steps" ], "title": "ForecasterConfig", "type": "object" @@ -355,17 +348,9 @@ "title": "Label" }, "steps": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Forecast steps to be used from interpolator, e.g. '0/126/6'.", - "title": "Steps" + "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.", + "title": "Steps", + "type": "string" }, "extra_dependencies": { "description": "List of extra dependencies to install for this model. These will be added to the pyproject.toml file in the run directory.", @@ -414,7 +399,8 @@ } }, "required": [ - "mlflow_id" + "mlflow_id", + "steps" ], "title": "InterpolatorConfig", "type": "object" @@ -509,12 +495,6 @@ ], "title": "Dates" }, - "lead_time": { - "description": "Forecast length, e.g. '120h'", - "pattern": "^\\d+[hmd]$", - "title": "Lead Time", - "type": "string" - }, "runs": { "description": "Dictionary of runs to execute, with run IDs as keys and configurations as values.", "items": { @@ -551,7 +531,6 @@ "required": [ "description", "dates", - "lead_time", "runs", "baselines", "analysis", From 956898754d8b959d4a90be0441eccfbd34330b2a Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Mon, 20 Oct 2025 12:14:00 +0200 Subject: [PATCH 13/34] Update example config for experiment with interpolators (#70) --- config/interpolators.yaml | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/config/interpolators.yaml b/config/interpolators.yaml index c8e796e4..e04fc39c 100644 --- a/config/interpolators.yaml +++ b/config/interpolators.yaml @@ -10,29 +10,17 @@ dates: runs: - interpolator: mlflow_id: 9c18b90074214d769b8b383722fc5a06 - label: LAM Interpolator (COSMO-E analysis) + label: M-2 interpolator (KENDA) steps: 0/121/1 config: resources/inference/configs/interpolator_from_test_data.yaml forecaster: null extra_dependencies: - - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files + - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6 - torch-geometric==2.6.1 - anemoi-graphs==0.5.2 - interpolator: - mlflow_id: 9c18b90074214d769b8b383722fc5a06 - label: LAM Interpolator (M-1 forecaster) - steps: 0/121/1 - forecaster: - mlflow_id: d0846032fc7248a58b089cbe8fa4c511 - config: resources/inference/configs/forecaster_with_global.yaml - steps: 0/126/6 - extra_dependencies: - - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files - - torch-geometric==2.6.1 - - anemoi-graphs==0.5.2 - - interpolator: - mlflow_id: 07c3d9698db14d859b78bb712a65bbbf - label: SGM Interpolator (M-1 forecaster) + mlflow_id: 8d1e0410ca7d4f74b368b3079878259a + label: M-2 interpolator (M-1 forecaster) steps: 0/121/1 config: resources/inference/configs/interpolator_stretched.yaml forecaster: @@ -40,9 +28,14 @@ runs: config: resources/inference/configs/forecaster_with_global.yaml steps: 0/126/6 extra_dependencies: - - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files + - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6 - torch-geometric==2.6.1 - anemoi-graphs==0.5.2 + - forecaster: + mlflow_id: d0846032fc7248a58b089cbe8fa4c511 + label: M-1 forecaster + config: resources/inference/configs/forecaster_with_global.yaml + steps: 0/126/6 baselines: - baseline: From 128eb91636631103e41dccd68f555e6bc30ab543 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Mon, 20 Oct 2025 14:27:18 +0200 Subject: [PATCH 14/34] Distinguish between primary runs ('candidates') and secondary runs (#64) * Distinguish between primary runs ('candidates') and secondary runs * Docstrings --- workflow/Snakefile | 6 +++--- workflow/rules/common.smk | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index cf595cb3..b65cd638 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -55,7 +55,7 @@ rule sandbox_all: input: expand( rules.create_inference_sandbox.output.sandbox, - run_id=collect_all_runs(), + run_id=collect_all_candidates(), ), @@ -65,7 +65,7 @@ rule run_inference_all: expand( OUT_ROOT / "data/runs/{run_id}/{init_time}/raw", init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], - run_id=collect_all_runs(), + run_id=collect_all_candidates(), ), @@ -74,7 +74,7 @@ rule verif_metrics_all: expand( rules.verif_metrics.output, init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], - run_id=collect_all_runs(), + run_id=collect_all_candidates(), ), diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index cea00b23..98283be0 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -66,12 +66,13 @@ REFTIMES = _reftimes() def collect_all_runs(): - """Collect all runs defined in the configuration.""" + """Collect all runs defined in the configuration, including secondary runs.""" runs = {} for run_entry in copy.deepcopy(config["runs"]): model_type = next(iter(run_entry)) run_config = run_entry[model_type] run_config["model_type"] = model_type + run_config["is_candidate"] = True run_id = run_config["mlflow_id"][0:9] if model_type == "interpolator": @@ -82,6 +83,7 @@ def collect_all_runs(): # Ensure a proper 'forecaster' entry exists with model_type fore_cfg = copy.deepcopy(run_config["forecaster"]) fore_cfg["model_type"] = "forecaster" + fore_cfg["is_candidate"] = False # exclude from outputs runs[tail_id] = fore_cfg run_id = f"{run_id}-{tail_id}" @@ -90,6 +92,16 @@ def collect_all_runs(): return runs +def collect_all_candidates(): + """Collect participating runs ('candidates') only.""" + runs = collect_all_runs() + candidates = {} + for run_id, run_config in runs.items(): + if run_config.get("is_candidate", False): + candidates[run_id] = run_config + return candidates + + def collect_all_baselines(): """Collect all baselines defined in the configuration.""" baselines = {} @@ -106,7 +118,8 @@ def collect_experiment_participants(): for base in BASELINE_CONFIGS.keys(): participants[base] = OUT_ROOT / f"data/baselines/{base}/verif_aggregated.nc" for exp in RUN_CONFIGS.keys(): - participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc" + if RUN_CONFIGS[exp].get("is_candidate", False): + participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc" return participants From 6315afc05cd6d1b6c100adfbcba7359bbc65aa73 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Tue, 21 Oct 2025 11:44:31 +0200 Subject: [PATCH 15/34] Adopt forecast intervals including the end point (#71) * Adopt forecast intervals including the end point * Fix parsing * Experiments work * Update config/forecasters.yaml * Align init times to availabiliy of COE * run pre-commit * Change README to COSMO-E availability --------- Co-authored-by: Jonas Bhend Co-authored-by: Jonas Bhend --- README.md | 8 +++---- config/forecasters-co1e.yaml | 4 ++-- config/forecasters.yaml | 15 ++++-------- config/interpolators.yaml | 20 ++++++++-------- pyproject.toml | 2 +- src/evalml/config.py | 18 +++++++------- workflow/rules/data.smk | 8 +++---- workflow/rules/inference.smk | 1 - workflow/rules/verif.smk | 6 ++--- workflow/scripts/extract_baseline.py | 34 ++++++++++++-------------- workflow/scripts/verif_baseline.py | 32 +++++++++++-------------- workflow/scripts/verif_from_grib.py | 36 +++++++++++++--------------- workflow/tools/config.schema.json | 4 ++-- 13 files changed, 85 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index 64fe254f..f2fdf5b4 100644 --- a/README.md +++ b/README.md @@ -31,17 +31,17 @@ description: | dates: start: 2020-01-01T12:00 end: 2020-01-10T00:00 - frequency: 54h + frequency: 60h runs: - forecaster: mlflow_id: 2f962c89ff644ca7940072fa9cd088ec label: Stage D - N320 global grid with CERRA finetuning - steps: 0/126/6 + steps: 0/120/6 - forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 label: M-1 forecaster - steps: 0/126/6 + steps: 0/120/6 baselines: @@ -49,7 +49,7 @@ baselines: baseline_id: COSMO-E label: COSMO-E root: /store_new/mch/msopr/ml/COSMO-E - steps: 0/126/6 + steps: 0/120/6 analysis: label: COSMO KENDA diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml index 6171d62a..c72ae546 100644 --- a/config/forecasters-co1e.yaml +++ b/config/forecasters-co1e.yaml @@ -13,7 +13,7 @@ runs: mlflow_id: 2174c939c8844555a52843b71219d425 label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11 config: resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml - steps: 0/126/6 + steps: 0/120/6 inference_resources: gpu: 4 tasks: 4 @@ -23,7 +23,7 @@ baselines: baseline_id: COSMO-1E label: COSMO-1E root: /scratch/mch/bhendj/COSMO-1E - steps: 0/126/6 + steps: 0/33/6 analysis: label: COSMO KENDA diff --git a/config/forecasters.yaml b/config/forecasters.yaml index 413a734f..1dbdad13 100644 --- a/config/forecasters.yaml +++ b/config/forecasters.yaml @@ -1,29 +1,24 @@ # yaml-language-server: $schema=../workflow/tools/config.schema.json description: | - This is an experiment to do blabla. + Evaluate skill of COSMO-E emulator (M-1 forecaster). dates: start: 2020-01-01T12:00 end: 2020-01-10T00:00 - # end: 2020-03-30T00:00 - frequency: 36h + frequency: 60h runs: - - forecaster: - mlflow_id: 2f962c89ff644ca7940072fa9cd088ec - label: Stage D - N320 global grid with CERRA finetuning - steps: 0/126/6 - forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 label: M-1 forecaster - steps: 0/126/6 + steps: 0/120/6 baselines: - baseline: baseline_id: COSMO-E label: COSMO-E root: /store_new/mch/msopr/ml/COSMO-E - steps: 0/126/6 + steps: 0/120/6 analysis: label: COSMO KENDA @@ -38,7 +33,7 @@ locations: profile: executor: slurm global_resources: - gpus: 15 + gpus: 16 default_resources: slurm_partition: "postproc" cpus_per_task: 1 diff --git a/config/interpolators.yaml b/config/interpolators.yaml index e04fc39c..662f0679 100644 --- a/config/interpolators.yaml +++ b/config/interpolators.yaml @@ -1,18 +1,18 @@ # yaml-language-server: $schema=../workflow/tools/config.schema.json description: | - Stretched interpolator vs LAM interpolator. + Evaluate skill of SGM interpolator (M-2 interpolator). dates: start: 2020-01-01T12:00 end: 2020-01-10T00:00 - frequency: 54h + frequency: 60h runs: - interpolator: - mlflow_id: 9c18b90074214d769b8b383722fc5a06 + mlflow_id: 8d1e0410ca7d4f74b368b3079878259a label: M-2 interpolator (KENDA) - steps: 0/121/1 - config: resources/inference/configs/interpolator_from_test_data.yaml + steps: 0/120/1 + config: resources/inference/configs/interpolator_from_test_data_stretched.yaml forecaster: null extra_dependencies: - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6 @@ -21,12 +21,12 @@ runs: - interpolator: mlflow_id: 8d1e0410ca7d4f74b368b3079878259a label: M-2 interpolator (M-1 forecaster) - steps: 0/121/1 + steps: 0/120/1 config: resources/inference/configs/interpolator_stretched.yaml forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 config: resources/inference/configs/forecaster_with_global.yaml - steps: 0/126/6 + steps: 0/120/6 extra_dependencies: - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6 - torch-geometric==2.6.1 @@ -35,14 +35,14 @@ runs: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 label: M-1 forecaster config: resources/inference/configs/forecaster_with_global.yaml - steps: 0/126/6 + steps: 0/120/6 baselines: - baseline: baseline_id: COSMO-E-1h label: COSMO-E root: /scratch/mch/bhendj/COSMO-E - steps: 0/121/1 + steps: 0/120/1 analysis: label: COSMO KENDA @@ -58,7 +58,7 @@ locations: profile: executor: slurm global_resources: - gpus: 15 + gpus: 16 default_resources: slurm_partition: "postproc" cpus_per_task: 1 diff --git a/pyproject.toml b/pyproject.toml index 043c1718..bfcb9ee9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,4 +50,4 @@ markers = [ packages = [ "src/evalml", "src/verification" -] \ No newline at end of file +] diff --git a/src/evalml/config.py b/src/evalml/config.py index c4a97ce2..ec3852b4 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -74,8 +74,10 @@ class RunConfig(BaseModel): ..., description=( "Forecast lead times in hours, formatted as 'start/end/step'. " - "The range is half-open [start, end), meaning it includes the start " - "but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours." + "The range includes the start lead time and continues with the given step " + "until reaching or exceeding the end lead time. " + "Example: '0/120/6' for lead times every 6 hours up to 120 h, " + "or '0/33/6' up to 30 h." ), ) extra_dependencies: List[str] = Field( @@ -102,15 +104,13 @@ def validate_steps(cls, v: str) -> str: try: start, end, step = map(int, parts) except ValueError: - raise ValueError("Start, end, and interval must be integers.") - if start >= end: - raise ValueError(f"Start ({start}) must be less than end ({end}).") - if step <= 0: - raise ValueError(f"Interval ({step}) must be a positive integer.") - if (end - start) % step != 0: + raise ValueError("Start, end, and step must be integers.") + if start > end: raise ValueError( - f"The step ({step}) must evenly divide the range ({end - start})." + f"Start ({start}) must be less than or equal to end ({end})." ) + if step <= 0: + raise ValueError(f"Step ({step}) must be a positive integer.") return v diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk index 7aa0bb7c..bef818f1 100644 --- a/workflow/rules/data.smk +++ b/workflow/rules/data.smk @@ -18,7 +18,7 @@ if "extract_cosmoe" in config.get("include-optional-rules", []): runtime="24h", params: year_postfix=lambda wc: f"FCST{wc.year}", - lead_time="0/126/6", + steps="0/120/6", log: OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log", shell: @@ -26,7 +26,7 @@ if "extract_cosmoe" in config.get("include-optional-rules", []): python workflow/scripts/extract_baseline_fct.py \ --archive_dir {input.archive}/{params.year_postfix} \ --output_store {output.fcts} \ - --lead_time {params.lead_time} \ + --steps {params.steps} \ > {log} 2>&1 """ @@ -45,7 +45,7 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []): runtime="24h", params: year_postfix=lambda wc: f"FCST{wc.year}", - lead_time="0/34/1", + steps="0/33/1", log: OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log", shell: @@ -53,6 +53,6 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []): python workflow/scripts/extract_baseline_fct.py \ --archive_dir {input.archive}/{params.year_postfix} \ --output_store {output.fcts} \ - --lead_time {params.lead_time} \ + --steps {params.steps} \ > {log} 2>&1 """ diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk index ea136b97..8ec1d98a 100644 --- a/workflow/rules/inference.smk +++ b/workflow/rules/inference.smk @@ -121,7 +121,6 @@ def get_resource(wc, field: str, default): def get_leadtime(wc): """Get the lead time from the run config.""" start, end, step = RUN_CONFIGS[wc.run_id]["steps"].split("/") - end = int(end) - int(step) # make inclusive return f"{end}h" diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk index 10acbe13..bef45226 100644 --- a/workflow/rules/verif.smk +++ b/workflow/rules/verif.smk @@ -22,7 +22,7 @@ rule verif_metrics_baseline: analysis_zarr=config["analysis"].get("analysis_zarr"), params: baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"), - baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("steps"), + baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id]["steps"], analysis_label=config["analysis"].get("label"), output: OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc", @@ -38,7 +38,7 @@ rule verif_metrics_baseline: --analysis_zarr {input.analysis_zarr} \ --baseline_zarr {input.baseline_zarr} \ --reftime {wildcards.init_time} \ - --lead_time "{params.baseline_steps}" \ + --steps "{params.baseline_steps}" \ --baseline_label "{params.baseline_label}" \ --analysis_label "{params.analysis_label}" \ --output {output} > {log} 2>&1 @@ -79,7 +79,7 @@ rule verif_metrics: uv run {input.script} \ --grib_output_dir {input.grib_output} \ --analysis_zarr {input.analysis_zarr} \ - --lead_time "{params.fcst_steps}" \ + --steps "{params.fcst_steps}" \ --fcst_label "{params.fcst_label}" \ --analysis_label "{params.analysis_label}" \ --output {output} > {log} 2>&1 diff --git a/workflow/scripts/extract_baseline.py b/workflow/scripts/extract_baseline.py index 570151bb..f450b82b 100644 --- a/workflow/scripts/extract_baseline.py +++ b/workflow/scripts/extract_baseline.py @@ -42,7 +42,7 @@ def check_reftime_consistency(tarfiles: list[Path], delta_h: int = 12): def extract( - tar: Path, lead_time: list[int], run_id: str, params: list[str] + tar: Path, lead_times: list[int], run_id: str, params: list[str] ) -> xr.Dataset: LOG.info(f"Extracting fields from {tar}.") reftime = reftime_from_tarfile(tar) @@ -54,7 +54,7 @@ def extract( raise ValueError("Currently only COSMO-E and COSMO-1E are supported.") tar_archive = tarfile.open(tar, mode="r:*") out = ekd.SimpleFieldList() - for lt in lead_time: + for lt in lead_times: filename = f"{tar.stem}/grib/{gribname}{lt:03}_{run_id}" LOG.info(f"Extracting {filename}.") stream = tar_archive.extractfile(filename) @@ -79,23 +79,19 @@ def extract( class ScriptConfig(Namespace): archive_dir: Path output_store: Path - lead_time: int + steps: list[int] run_id: str params: list[str] -def _parse_lead_time(lead_time: str) -> int: - # check that lead_time is in the format "start/stop/step" - if "/" not in lead_time: - raise ValueError( - f"Expected lead_time in format 'start/stop/step', got '{lead_time}'" - ) - if len(lead_time.split("/")) != 3: - raise ValueError( - f"Expected lead_time in format 'start/stop/step', got '{lead_time}'" - ) - - return list(range(*map(int, lead_time.split("/")))) +def _parse_steps(steps: str) -> int: + # check that steps is in the format "start/stop/step" + if "/" not in steps: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + if len(steps.split("/")) != 3: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + start, end, step = map(int, steps.split("/")) + return list(range(start, end + 1, step)) def main(cfg: ScriptConfig): @@ -135,7 +131,7 @@ def main(cfg: ScriptConfig): for i in indices: file = tarfiles[i] - ds = extract(file, cfg.lead_time, cfg.run_id, cfg.params) + ds = extract(file, cfg.steps, cfg.run_id, cfg.params) LOG.info(f"Extracted: {ds}") @@ -167,7 +163,7 @@ def main(cfg: ScriptConfig): help="Path to the output zarr store.", ) - parser.add_argument("--lead_time", type=_parse_lead_time, default="0/126/6") + parser.add_argument("--steps", type=_parse_steps, default="0/120/6") parser.add_argument("--run_id", type=str, default="000") @@ -193,10 +189,10 @@ def main(cfg: ScriptConfig): python workflow/scripts/extract_baseline_fct.py \ --archive_dir /archive/mch/msopr/osm/COSMO-E/FCST20 \ --output_store /store_new/mch/msopr/ml/COSMO-E/FCST20.zarr \ - --lead_time 0/126/6 + --steps 0/120/6 python workflow/scripts/extract_baseline_fct.py \ --archive_dir /archive/mch/s83/osm/from_GPFS/COSMO-1E/FCST20 \ --output_store /store_new/mch/msopr/ml/COSMO-1E/FCST20.zarr \ - --lead_time 0/34/1 + --steps 0/33/1 """ diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py index 3f034cc9..f052dbab 100644 --- a/workflow/scripts/verif_baseline.py +++ b/workflow/scripts/verif_baseline.py @@ -92,18 +92,14 @@ def load_analysis_data_from_zarr( return ds -def _parse_lead_time(lead_time: str) -> int: - # check that lead_time is in the format "start/stop/step" - if "/" not in lead_time: - raise ValueError( - f"Expected lead_time in format 'start/stop/step', got '{lead_time}'" - ) - if len(lead_time.split("/")) != 3: - raise ValueError( - f"Expected lead_time in format 'start/stop/step', got '{lead_time}'" - ) - - return list(range(*map(int, lead_time.split("/")))) +def _parse_steps(steps: str) -> int: + # check that steps is in the format "start/stop/step" + if "/" not in steps: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + if len(steps.split("/")) != 3: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + start, end, step = map(int, steps.split("/")) + return list(range(start, end + 1, step)) class ScriptConfig(Namespace): @@ -114,7 +110,7 @@ class ScriptConfig(Namespace): baseline_zarr: Path = None reftime: datetime = None params: list[str] = ["T_2M", "TD_2M", "U_10M", "V_10M"] - lead_time: list[int] = _parse_lead_time("0/126/6") + steps: list[int] = _parse_steps("0/120/6") def program_summary_log(args): @@ -158,7 +154,7 @@ def main(args: ScriptConfig): ) baseline = baseline[args.params].sel( ref_time=args.reftime, - lead_time=np.array(args.lead_time, dtype="timedelta64[h]"), + lead_time=np.array(args.steps, dtype="timedelta64[h]"), method="nearest", ) baseline = baseline.assign_coords(time=baseline.ref_time + baseline.lead_time) @@ -226,10 +222,10 @@ def main(args: ScriptConfig): default=["T_2M", "TD_2M", "U_10M", "V_10M", "PS", "PMSL", "TOT_PREC"], ) parser.add_argument( - "--lead_time", - type=_parse_lead_time, - default="0/126/6", - help="Lead time in the format 'start/stop/step' (default: 0/126/6).", + "--steps", + type=_parse_steps, + default="0/120/6", + help="Forecast steps in the format 'start/stop/step' (default: 0/120/6).", ) parser.add_argument( "--baseline_label", diff --git a/workflow/scripts/verif_from_grib.py b/workflow/scripts/verif_from_grib.py index 61d62a2a..5ed09be7 100644 --- a/workflow/scripts/verif_from_grib.py +++ b/workflow/scripts/verif_from_grib.py @@ -112,12 +112,12 @@ def load_analysis_data_from_zarr( def load_fct_data_from_grib( - grib_output_dir: Path, params: list[str], step: list[int] + grib_output_dir: Path, params: list[str], steps: list[int] ) -> xr.Dataset: """Load forecast data from GRIB files for a specific valid time.""" files = sorted(grib_output_dir.glob("20*.grib")) fds = data_source.FileDataSource(datafiles=files) - ds = grib_decoder.load(fds, {"param": params, "step": step}) + ds = grib_decoder.load(fds, {"param": params, "step": steps}) for var, da in ds.items(): if "z" in da.dims and da.sizes["z"] == 1: ds[var] = da.squeeze("z", drop=True) @@ -143,18 +143,14 @@ def load_fct_data_from_grib( return ds -def _parse_lead_time(lead_time: str) -> int: - # check that lead_time is in the format "start/stop/step" - if "/" not in lead_time: - raise ValueError( - f"Expected lead_time in format 'start/stop/step', got '{lead_time}'" - ) - if len(lead_time.split("/")) != 3: - raise ValueError( - f"Expected lead_time in format 'start/stop/step', got '{lead_time}'" - ) - - return list(range(*map(int, lead_time.split("/")))) +def _parse_steps(steps: str) -> int: + # check that steps is in the format "start/stop/step" + if "/" not in steps: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + if len(steps.split("/")) != 3: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + start, end, step = map(int, steps.split("/")) + return list(range(start, end + 1, step)) class ScriptConfig(Namespace): @@ -164,7 +160,7 @@ class ScriptConfig(Namespace): analysis_zarr: Path = None forecast_zarr: Path = None params: list[str] - lead_time: list[int] = _parse_lead_time("0/126/6") + steps: list[int] = _parse_steps("0/120/6") def program_summary_log(args): @@ -189,7 +185,7 @@ def main(args: ScriptConfig): # get forecast data start = datetime.now() fct = load_fct_data_from_grib( - grib_output_dir=args.grib_output_dir, params=args.params, step=args.lead_time + grib_output_dir=args.grib_output_dir, params=args.params, steps=args.steps ) LOG.info( "Loaded forecast data from GRIB files in %.2f seconds: \n%s", @@ -254,10 +250,10 @@ def main(args: ScriptConfig): help="Comma-separated list of parameters to verify.", ) parser.add_argument( - "--lead_time", - type=_parse_lead_time, - default="0/126/6", - help="Lead time in the format 'start/stop/step'.", + "--steps", + type=_parse_steps, + default="0/120/6", + help="Forecast steps in the format 'start/stop/step'.", ) parser.add_argument( "--fcst_label", diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 4bc53068..94024013 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -167,7 +167,7 @@ "title": "Label" }, "steps": { - "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.", + "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range includes the start lead time and continues with the given step until reaching or exceeding the end lead time. Example: '0/120/6' for lead times every 6 hours up to 120 h, or '0/33/6' up to 30 h.", "title": "Steps", "type": "string" }, @@ -348,7 +348,7 @@ "title": "Label" }, "steps": { - "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.", + "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range includes the start lead time and continues with the given step until reaching or exceeding the end lead time. Example: '0/120/6' for lead times every 6 hours up to 120 h, or '0/33/6' up to 30 h.", "title": "Steps", "type": "string" }, From e028f59c657cf7bb68ccefa661ccf40f259210f0 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Tue, 21 Oct 2025 18:52:59 +0200 Subject: [PATCH 16/34] Mrb 550 inconcsistent forecast initializations in evalml (#72) --- config/forecasters-co1e.yaml | 4 ++-- workflow/scripts/verif_baseline.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml index c72ae546..84943b76 100644 --- a/config/forecasters-co1e.yaml +++ b/config/forecasters-co1e.yaml @@ -4,8 +4,8 @@ description: | (KENDA-1) at 1km resolution. dates: - start: 2020-01-01T12:00 - end: 2020-01-10T00:00 + start: 2020-08-01T12:00 + end: 2020-08-10T00:00 frequency: 54h runs: diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py index f052dbab..b8f0fac5 100644 --- a/workflow/scripts/verif_baseline.py +++ b/workflow/scripts/verif_baseline.py @@ -155,7 +155,6 @@ def main(args: ScriptConfig): baseline = baseline[args.params].sel( ref_time=args.reftime, lead_time=np.array(args.steps, dtype="timedelta64[h]"), - method="nearest", ) baseline = baseline.assign_coords(time=baseline.ref_time + baseline.lead_time) LOG.info( From 54067779a2a78f5e934b4efeaa1264904252e729 Mon Sep 17 00:00:00 2001 From: Francesco Zanetta <62377868+frazane@users.noreply.github.com> Date: Wed, 22 Oct 2025 11:32:51 +0200 Subject: [PATCH 17/34] Update vega-lite spec (#69) --- resources/report/dashboard/script.js | 122 ++++++++++++++++----------- 1 file changed, 74 insertions(+), 48 deletions(-) diff --git a/resources/report/dashboard/script.js b/resources/report/dashboard/script.js index 54b7933d..b422a5f0 100644 --- a/resources/report/dashboard/script.js +++ b/resources/report/dashboard/script.js @@ -1,11 +1,11 @@ // Tab switching document.querySelectorAll(".tab-link").forEach(button => { - button.addEventListener("click", () => { - document.querySelectorAll(".tab-link").forEach(btn => btn.classList.remove("active")); - document.querySelectorAll(".tab-content").forEach(tab => tab.classList.remove("active")); - button.classList.add("active"); - document.getElementById(button.dataset.tab).classList.add("active"); - }); + button.addEventListener("click", () => { + document.querySelectorAll(".tab-link").forEach(btn => btn.classList.remove("active")); + document.querySelectorAll(".tab-content").forEach(tab => tab.classList.remove("active")); + button.classList.add("active"); + document.getElementById(button.dataset.tab).classList.add("active"); + }); }); @@ -13,29 +13,29 @@ document.querySelectorAll(".tab-link").forEach(button => { const choicesInstances = {}; choicesInstances["source-select"] = new Choices("#source-select", { - searchEnabled: false, - removeItemButton: true, - shouldSort: false, - itemSelectText: "", - placeholder: false + searchEnabled: false, + removeItemButton: true, + shouldSort: false, + itemSelectText: "", + placeholder: false }); document.getElementById("source-select").addEventListener("change", updateChart); choicesInstances["metric-select"] = new Choices("#metric-select", { - searchEnabled: false, - removeItemButton: true, - shouldSort: false, - itemSelectText: "", - placeholder: false + searchEnabled: false, + removeItemButton: true, + shouldSort: false, + itemSelectText: "", + placeholder: false }); document.getElementById("metric-select").addEventListener("change", updateChart); choicesInstances["param-select"] = new Choices("#param-select", { - searchEnabled: false, - removeItemButton: true, - shouldSort: false, - itemSelectText: "", - placeholder: false + searchEnabled: false, + removeItemButton: true, + shouldSort: false, + itemSelectText: "", + placeholder: false }); document.getElementById("param-select").addEventListener("change", updateChart); @@ -44,25 +44,51 @@ data = JSON.parse(document.getElementById("verif-data").textContent) // Define base spec var spec = { - "data": { - "values": data - }, + "data": { "values": data }, + "params": [ + { + "name": "xZoom", + "select": { + "type": "interval", + "encodings": ["x"], + "zoom": "wheel![!event.shiftKey]" + }, + "bind": "scales" + } + ], "facet": { "column": { "field": "param" }, "row": { "field": "metric" } }, "spec": { - "mark": { "type": "line" }, + "mark": "line", "width": 300, "height": 200, "encoding": { - "x": { "field": "lead_time", "type": "ordinal" }, - "y": { "field": "value", "type": "quantitative" , "scale": { "zero": false }}, - "color": { "field": "source", "legend": { "orient": "top", "labelLimit": 1000, "symbolSize": 1000 } } - } + "x": { + "field": "lead_time", + "type": "quantitative", + "axis": { "labels": true, "ticks": true }, + }, + "y": { + "field": "value", + "type": "quantitative", + "scale": { "zero": false } + }, + "color": { + "field": "source", + "legend": { "orient": "top", "labelLimit": 1000, "symbolSize": 1000 } + } + }, + "transform": [ + { + "filter": { "param": "xZoom" } + } + ] }, "resolve": { "scale": { + "x": "shared", "y": "independent" } } @@ -72,32 +98,32 @@ var spec = { // Define functions function getSelectedValues(id) { - return choicesInstances[id].getValue(true) + return choicesInstances[id].getValue(true) } function updateChart() { - const selectedSources = getSelectedValues("source-select"); - const selectedparams = getSelectedValues("param-select"); - const selectedMetrics = getSelectedValues("metric-select"); + const selectedSources = getSelectedValues("source-select"); + const selectedparams = getSelectedValues("param-select"); + const selectedMetrics = getSelectedValues("metric-select"); - const newSpec = JSON.parse(JSON.stringify(spec)); - const filters = []; + const newSpec = JSON.parse(JSON.stringify(spec)); + const filters = []; - if (selectedSources.length > 0) { - filters.push({ field: "source", oneOf: selectedSources }); - } - if (selectedparams.length > 0) { - filters.push({ field: "param", oneOf: selectedparams }); - } - if (selectedMetrics.length > 0) { - filters.push({ field: "metric", oneOf: selectedMetrics }); - } + if (selectedSources.length > 0) { + filters.push({ field: "source", oneOf: selectedSources }); + } + if (selectedparams.length > 0) { + filters.push({ field: "param", oneOf: selectedparams }); + } + if (selectedMetrics.length > 0) { + filters.push({ field: "metric", oneOf: selectedMetrics }); + } - if (filters.length > 0) { - newSpec.transform = [{ filter: { and: filters } }]; - } + if (filters.length > 0) { + newSpec.transform = [{ filter: { and: filters } }]; + } - vegaEmbed('#vis', newSpec, { actions: false }); + vegaEmbed('#vis', newSpec, { actions: false }); } // Initial chart From 8d014907349b12b3621f71ab8797aae64223f553 Mon Sep 17 00:00:00 2001 From: Francesco Zanetta <62377868+frazane@users.noreply.github.com> Date: Wed, 22 Oct 2025 12:04:39 +0200 Subject: [PATCH 18/34] Decouple inference preparation and execution (#68) * draft changes * rename workspace resources dir * working for config/forecasters.yaml * improve logging * works for interpolators.yaml * re-add get_leadtime function * refactor run directives into script --- config/interpolators.yaml | 4 +- resources/inference/configs/forecaster.yaml | 2 +- .../configs/forecaster_no_trimedge.yaml | 2 +- .../forecaster_no_trimedge_fromtraining.yaml | 2 +- .../configs/forecaster_with_global.yaml | 6 +- resources/inference/configs/interpolator.yaml | 12 +- .../configs/interpolator_from_test_data.yaml | 3 +- ...interpolator_from_test_data_stretched.yaml | 2 +- .../configs/interpolator_stretched.yaml | 6 +- .../templates/templates_index_cosmo.yaml | 16 +- .../templates/templates_index_ifs.yaml | 4 +- src/evalml/helpers.py | 39 ++++ workflow/envs/anemoi_inference.toml | 2 +- workflow/rules/common.smk | 7 +- workflow/rules/inference.smk | 156 +++++++--------- workflow/rules/verif.smk | 9 +- workflow/scripts/inference_prepare.py | 173 ++++++++++++++++++ 17 files changed, 322 insertions(+), 123 deletions(-) create mode 100644 src/evalml/helpers.py create mode 100644 workflow/scripts/inference_prepare.py diff --git a/config/interpolators.yaml b/config/interpolators.yaml index 662f0679..0f5a042c 100644 --- a/config/interpolators.yaml +++ b/config/interpolators.yaml @@ -15,7 +15,7 @@ runs: config: resources/inference/configs/interpolator_from_test_data_stretched.yaml forecaster: null extra_dependencies: - - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6 + - git+https://github.com/ecmwf/anemoi-inference@fix/cutout-preprocessors - torch-geometric==2.6.1 - anemoi-graphs==0.5.2 - interpolator: @@ -28,7 +28,7 @@ runs: config: resources/inference/configs/forecaster_with_global.yaml steps: 0/120/6 extra_dependencies: - - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6 + - git+https://github.com/ecmwf/anemoi-inference@fix/cutout-preprocessors - torch-geometric==2.6.1 - anemoi-graphs==0.5.2 - forecaster: diff --git a/resources/inference/configs/forecaster.yaml b/resources/inference/configs/forecaster.yaml index 4f558a3e..8b318c83 100644 --- a/resources/inference/configs/forecaster.yaml +++ b/resources/inference/configs/forecaster.yaml @@ -17,7 +17,7 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml - printer write_initial_state: true diff --git a/resources/inference/configs/forecaster_no_trimedge.yaml b/resources/inference/configs/forecaster_no_trimedge.yaml index 2e3417dc..306c62f6 100644 --- a/resources/inference/configs/forecaster_no_trimedge.yaml +++ b/resources/inference/configs/forecaster_no_trimedge.yaml @@ -15,7 +15,7 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml - printer write_initial_state: true diff --git a/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml b/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml index 11188b9e..b5097f5b 100644 --- a/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml +++ b/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml @@ -15,7 +15,7 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml - printer write_initial_state: true diff --git a/resources/inference/configs/forecaster_with_global.yaml b/resources/inference/configs/forecaster_with_global.yaml index ae00c3a7..890d3e29 100644 --- a/resources/inference/configs/forecaster_with_global.yaml +++ b/resources/inference/configs/forecaster_with_global.yaml @@ -17,18 +17,18 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml - grib: path: grib/ifs-{dateTime}_{step:03}.grib encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_ifs.yaml + samples: resources/templates_index_ifs.yaml post_processors: - extract_slice: [189699, -1] - assign_mask: "global/cutout_mask" -forcings: +constant_forcings: test: use_original_paths: true diff --git a/resources/inference/configs/interpolator.yaml b/resources/inference/configs/interpolator.yaml index 41253c0c..765b093d 100644 --- a/resources/inference/configs/interpolator.yaml +++ b/resources/inference/configs/interpolator.yaml @@ -10,7 +10,7 @@ post_processors: input: grib: - path: forecaster_grib/20*.grib # TODO: remove dirty fix to only use local files + path: forecaster/20*.grib # TODO: remove dirty fix to only use local files namer: rules: - - shortName: SKT @@ -54,12 +54,20 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml constant_forcings: test: use_original_paths: true +dynamic_forcings: + test: + use_original_paths: true + +patch_metadata: + dataset: + constant_fields: [z, lsm] + verbosity: 1 allow_nans: true output_frequency: "1h" diff --git a/resources/inference/configs/interpolator_from_test_data.yaml b/resources/inference/configs/interpolator_from_test_data.yaml index 07aea411..2fdb6cda 100644 --- a/resources/inference/configs/interpolator_from_test_data.yaml +++ b/resources/inference/configs/interpolator_from_test_data.yaml @@ -1,5 +1,4 @@ runner: time_interpolator -include_forcings: true input: test: @@ -15,7 +14,7 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml verbosity: 1 allow_nans: true diff --git a/resources/inference/configs/interpolator_from_test_data_stretched.yaml b/resources/inference/configs/interpolator_from_test_data_stretched.yaml index 19cd733e..21674891 100644 --- a/resources/inference/configs/interpolator_from_test_data_stretched.yaml +++ b/resources/inference/configs/interpolator_from_test_data_stretched.yaml @@ -17,7 +17,7 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml verbosity: 1 allow_nans: true diff --git a/resources/inference/configs/interpolator_stretched.yaml b/resources/inference/configs/interpolator_stretched.yaml index 300d6c65..2928e76e 100644 --- a/resources/inference/configs/interpolator_stretched.yaml +++ b/resources/inference/configs/interpolator_stretched.yaml @@ -4,9 +4,9 @@ input: cutout: lam_0: grib: - path: forecaster_grib/20* pre_processors: - extract_mask: "source0/trimedge_mask" + path: forecaster/20* namer: rules: - - shortName: T @@ -43,7 +43,7 @@ input: - tp global: grib: - path: forecaster_grib/ifs* + path: forecaster/ifs* namer: rules: - - shortName: T @@ -100,7 +100,7 @@ output: encoding: typeOfGeneratingProcess: 2 templates: - samples: _resources/templates_index_cosmo.yaml + samples: resources/templates_index_cosmo.yaml verbosity: 1 allow_nans: true diff --git a/resources/inference/templates/templates_index_cosmo.yaml b/resources/inference/templates/templates_index_cosmo.yaml index 8f150041..632164ab 100644 --- a/resources/inference/templates/templates_index_cosmo.yaml +++ b/resources/inference/templates/templates_index_cosmo.yaml @@ -1,26 +1,26 @@ # COSMO-2 templates - - {grid: 0.02, levtype: pl} - - _resources/co2-typeOfLevel=isobaricInhPa.grib + - resources/co2-typeOfLevel=isobaricInhPa.grib - - {grid: 0.02, levtype: sfc, param: [T_2M, TD_2M, U_10M, V_10M]} - - _resources/co2-typeOfLevel=heightAboveGround.grib + - resources/co2-typeOfLevel=heightAboveGround.grib - - {grid: 0.02, levtype: sfc, param: [FR_LAND, TOC_PREC, PMSL, PS, FIS, T_G]} - - _resources/co2-typeOfLevel=surface.grib + - resources/co2-typeOfLevel=surface.grib - - {grid: 0.02, levtype: sfc, param: [TOT_PREC]} - - _resources/co2-shortName=TOT_PREC.grib + - resources/co2-shortName=TOT_PREC.grib # COSMO-1E templates - - {grid: 0.01, levtype: pl} - - _resources/co1e-typeOfLevel=isobaricInhPa.grib + - resources/co1e-typeOfLevel=isobaricInhPa.grib - - {grid: 0.01, levtype: sfc, param: [T_2M, TD_2M, U_10M, V_10M]} - - _resources/co1e-typeOfLevel=heightAboveGround.grib + - resources/co1e-typeOfLevel=heightAboveGround.grib - - {grid: 0.01, levtype: sfc, param: [FR_LAND, TOC_PREC, PMSL, PS, FIS, T_G]} - - _resources/co1e-typeOfLevel=surface.grib + - resources/co1e-typeOfLevel=surface.grib - - {grid: 0.01, levtype: sfc, param: [TOT_PREC]} - - _resources/co1e-shortName=TOT_PREC.grib + - resources/co1e-shortName=TOT_PREC.grib diff --git a/resources/inference/templates/templates_index_ifs.yaml b/resources/inference/templates/templates_index_ifs.yaml index a399ed92..c0700cfc 100644 --- a/resources/inference/templates/templates_index_ifs.yaml +++ b/resources/inference/templates/templates_index_ifs.yaml @@ -1,5 +1,5 @@ - - {levtype: pl} - - _resources/ifs-levtype=pl.grib + - resources/ifs-levtype=pl.grib - - {levtype: sfc} - - _resources/ifs-levtype=sfc.grib + - resources/ifs-levtype=sfc.grib diff --git a/src/evalml/helpers.py b/src/evalml/helpers.py new file mode 100644 index 00000000..bb3e03c1 --- /dev/null +++ b/src/evalml/helpers.py @@ -0,0 +1,39 @@ +import logging + + +def setup_logger(logger_name, log_file, level=logging.INFO): + """ + Setup a logger with the specified name and log file path. + + Can be used to set up loggers from python scripts `run` directives + used in the Snakemake workflow. + + Parameters + ---------- + logger_name : str + The name of the logger. + log_file : str + The file path where the log messages will be written. + level : int, optional + The logging level (default is logging.INFO). + + Returns + ------- + logging.Logger + Configured logger instance. + """ + logger = logging.getLogger(logger_name) + logger.setLevel(level) + + if not logger.handlers: + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(level) + + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + file_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + + return logger diff --git a/workflow/envs/anemoi_inference.toml b/workflow/envs/anemoi_inference.toml index 22618d63..982a673d 100644 --- a/workflow/envs/anemoi_inference.toml +++ b/workflow/envs/anemoi_inference.toml @@ -8,7 +8,7 @@ dependencies = [ "torchaudio", "anemoi-datasets>=0.5.23,<0.7.0", "anemoi-graphs>=0.5.0,<0.7.0", - "anemoi-inference>=0.7.0,<0.8.0", + "anemoi-inference>=0.8.0,<0.9.0", "anemoi-models>=0.4.20,<0.6.0", "anemoi-training>=0.3.3,<0.5.0", "anemoi-transform>=0.1.10,<0.3.0", diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 98283be0..e9314cb6 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -1,3 +1,4 @@ +import logging import copy from datetime import datetime, timedelta import yaml @@ -128,9 +129,11 @@ def _inference_routing_fn(wc): run_config = RUN_CONFIGS[wc.run_id] if run_config["model_type"] == "forecaster": - input_path = f"logs/inference_forecaster/{wc.run_id}-{wc.init_time}.ok" + input_path = f"logs/prepare_inference_forecaster/{wc.run_id}-{wc.init_time}.ok" elif run_config["model_type"] == "interpolator": - input_path = f"logs/inference_interpolator/{wc.run_id}-{wc.init_time}.ok" + input_path = ( + f"logs/prepare_inference_interpolator/{wc.run_id}-{wc.init_time}.ok" + ) else: raise ValueError(f"Unsupported model type: {run_config['model_type']}") diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk index 8ec1d98a..b490dc2e 100644 --- a/workflow/rules/inference.smk +++ b/workflow/rules/inference.smk @@ -8,6 +8,13 @@ from datetime import datetime rule create_inference_pyproject: + """ + Generate a pyproject.toml that contains the information needed + to set up a virtual environment for inference of a specific checkpoint. + The list of dependencies is taken from the checkpoint's MLFlow run metadata, + and additional dependencies can be specified under a run entry in the main + config file. + """ input: toml="workflow/envs/anemoi_inference.toml", output: @@ -25,6 +32,11 @@ rule create_inference_pyproject: rule create_inference_venv: + """ + Create a virtual environment for inference, using the pyproject.toml created above. + The virtual environment is managed with uv. The created virtual environment is relocatable, + so it can be squashed later. Pre-compilation to bytecode is done to speed up imports. + """ input: pyproject=rules.create_inference_pyproject.output.pyproject, output: @@ -56,11 +68,12 @@ rule create_inference_venv: """ -# optionally, precompile to bytecode to reduce the import times -# find {output.venv} -exec stat --format='%i' {} + | sort -u | wc -l # optionally, how many files did I create? - - rule make_squashfs_image: + """ + Create a squashfs image for the inference virtual environment of + a specific checkpoint. Find more about this at + https://docs.cscs.ch/guides/storage/#python-virtual-environments-with-uenv. + """ input: venv=rules.create_inference_venv.output.venv, output: @@ -76,7 +89,11 @@ rule make_squashfs_image: rule create_inference_sandbox: - """Generate a zipped directory that can be used as a sandbox for running inference jobs. + """ + Create a zipped directory that, when extracted, can be used as a sandbox + for running inference jobs for a specific checkpoint. Its main purpose is + to serve as a development environment for anemoi-inference and to facilitate + sharing with external collaborators. TO use this sandbox, unzip it to a target directory. @@ -124,14 +141,18 @@ def get_leadtime(wc): return f"{end}h" -rule inference_forecaster: +rule prepare_inference_forecaster: localrule: True input: pyproject=rules.create_inference_pyproject.output.pyproject, - image=rules.make_squashfs_image.output.image, config=lambda wc: Path(RUN_CONFIGS[wc.run_id]["config"]).resolve(), output: - okfile=touch(OUT_ROOT / "logs/inference_forecaster/{run_id}-{init_time}.ok"), + config=Path(OUT_ROOT / "data/runs/{run_id}/{init_time}/config.yaml"), + resources=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/resources"), + grib_out_dir=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/grib"), + okfile=touch( + OUT_ROOT / "logs/prepare_inference_forecaster/{run_id}-{init_time}.ok" + ), params: checkpoints_path=parse_input( input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path" @@ -142,53 +163,10 @@ rule inference_forecaster: reftime_to_iso=lambda wc: datetime.strptime( wc.init_time, "%Y%m%d%H%M" ).strftime("%Y-%m-%dT%H:%M"), - image_path=lambda wc, input: f"{Path(input.image).resolve()}", log: - OUT_ROOT / "logs/inference_forecaster/{run_id}-{init_time}.log", - resources: - slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"), - cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24), - mem_mb_per_cpu=lambda wc: get_resource(wc, "mem_mb_per_cpu", 8000), - runtime=lambda wc: get_resource(wc, "runtime", "40m"), - gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}", - ntasks=lambda wc: get_resource(wc, "tasks", 1), - slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment", - gpus=lambda wc: get_resource(wc, "gpu", 1), - shell: - r""" - ( - set -euo pipefail - squashfs-mount {params.image_path}:/user-environment -- bash -c ' - export TZ=UTC - source /user-environment/bin/activate - export ECCODES_DEFINITION_PATH=/user-environment/share/eccodes-cosmo-resources/definitions - - # prepare the working directory - WORKDIR={params.output_root}/runs/{wildcards.run_id}/{wildcards.init_time} - mkdir -p $WORKDIR && cd $WORKDIR && mkdir -p grib raw _resources - cp {input.config} config.yaml && cp -r {params.resources_root}/templates/* _resources/ - CMD_ARGS=( - date={params.reftime_to_iso} - checkpoint={params.checkpoints_path}/inference-last.ckpt - lead_time={params.lead_time} - ) - - # is GPU > 1, add runner=parallel to CMD_ARGS - if [ {resources.gpus} -gt 1 ]; then - CMD_ARGS+=(runner=parallel) - fi - - srun \ - --partition={resources.slurm_partition} \ - --cpus-per-task={resources.cpus_per_task} \ - --mem-per-cpu={resources.mem_mb_per_cpu} \ - --time={resources.runtime} \ - --gres={resources.gres} \ - --ntasks={resources.ntasks} \ - anemoi-inference run config.yaml "${{CMD_ARGS[@]}}" - ' - ) > {log} 2>&1 - """ + OUT_ROOT / "logs/prepare_inference_forecaster/{run_id}-{init_time}.log", + script: + "../scripts/inference_prepare.py" def _get_forecaster_run_id(run_id): @@ -196,23 +174,28 @@ def _get_forecaster_run_id(run_id): return RUN_CONFIGS[run_id]["forecaster"]["mlflow_id"][0:9] -rule inference_interpolator: +rule prepare_inference_interpolator: """Run the interpolator for a specific run ID.""" localrule: True input: pyproject=rules.create_inference_pyproject.output.pyproject, - image=rules.make_squashfs_image.output.image, config=lambda wc: Path(RUN_CONFIGS[wc.run_id]["config"]).resolve(), forecasts=lambda wc: ( [ OUT_ROOT - / f"logs/inference_forecaster/{_get_forecaster_run_id(wc.run_id)}-{wc.init_time}.ok" + / f"logs/execute_inference/{_get_forecaster_run_id(wc.run_id)}-{wc.init_time}.ok" ] if RUN_CONFIGS[wc.run_id].get("forecaster") is not None else [] ), output: - okfile=touch(OUT_ROOT / "logs/inference_interpolator/{run_id}-{init_time}.ok"), + config=Path(OUT_ROOT / "data/runs/{run_id}/{init_time}/config.yaml"), + resources=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/resources"), + grib_out_dir=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/grib"), + forecaster=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/forecaster"), + okfile=touch( + OUT_ROOT / "logs/prepare_inference_interpolator/{run_id}-{init_time}.ok" + ), params: checkpoints_path=parse_input( input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path" @@ -228,9 +211,26 @@ rule inference_interpolator: if RUN_CONFIGS[wc.run_id].get("forecaster") is None else _get_forecaster_run_id(wc.run_id) ), - image_path=lambda wc, input: f"{Path(input.image).resolve()}", log: - OUT_ROOT / "logs/inference_interpolator/{run_id}-{init_time}.log", + OUT_ROOT / "logs/prepare_inference_interpolator/{run_id}-{init_time}.log", + script: + "../scripts/inference_prepare.py" + + +rule execute_inference: + localrule: True + input: + okfile=_inference_routing_fn, + image=rules.make_squashfs_image.output.image, + output: + okfile=touch(OUT_ROOT / "logs/execute_inference/{run_id}-{init_time}.ok"), + log: + OUT_ROOT / "logs/execute_inference/{run_id}-{init_time}.log", + params: + image_path=lambda wc, input: f"{Path(input.image).resolve()}", + workdir=lambda wc: ( + OUT_ROOT / f"data/runs/{wc.run_id}/{wc.init_time}" + ).resolve(), resources: slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"), cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24), @@ -238,35 +238,19 @@ rule inference_interpolator: runtime=lambda wc: get_resource(wc, "runtime", "40m"), gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}", ntasks=lambda wc: get_resource(wc, "tasks", 1), - slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment", gpus=lambda wc: get_resource(wc, "gpu", 1), shell: - r""" + """ ( set -euo pipefail + + cd {params.workdir} + squashfs-mount {params.image_path}:/user-environment -- bash -c ' - export TZ=UTC source /user-environment/bin/activate export ECCODES_DEFINITION_PATH=/user-environment/share/eccodes-cosmo-resources/definitions - # prepare the working directory - WORKDIR={params.output_root}/runs/{wildcards.run_id}/{wildcards.init_time} - mkdir -p $WORKDIR && cd $WORKDIR && mkdir -p grib raw _resources - cp {input.config} config.yaml && cp -r {params.resources_root}/templates/* _resources/ - - # if forecaster_run_id is not "null", link the forecaster grib directory; else, run from files. - if [ "{params.forecaster_run_id}" != "null" ]; then - FORECASTER_WORKDIR={params.output_root}/runs/{params.forecaster_run_id}/{wildcards.init_time} - ln -fns $FORECASTER_WORKDIR/grib forecaster_grib - else - echo "Forecaster configuration is null; proceeding with file-based inputs." - fi - - CMD_ARGS=( - date={params.reftime_to_iso} - checkpoint={params.checkpoints_path}/inference-last.ckpt - lead_time={params.lead_time} - ) + CMD_ARGS=() # is GPU > 1, add runner=parallel to CMD_ARGS if [ {resources.gpus} -gt 1 ]; then @@ -274,6 +258,7 @@ rule inference_interpolator: fi srun \ + --unbuffered \ --partition={resources.slurm_partition} \ --cpus-per-task={resources.cpus_per_task} \ --mem-per-cpu={resources.mem_mb_per_cpu} \ @@ -284,12 +269,3 @@ rule inference_interpolator: ' ) > {log} 2>&1 """ - - -rule inference_routing: - localrule: True - input: - _inference_routing_fn, - output: - directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/grib"), - directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/raw"), diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk index bef45226..6c732db8 100644 --- a/workflow/rules/verif.smk +++ b/workflow/rules/verif.smk @@ -55,9 +55,7 @@ def _get_no_none(dict, key, replacement): rule verif_metrics: input: script="workflow/scripts/verif_from_grib.py", - module="src/verification/__init__.py", - inference_okfile=_inference_routing_fn, - grib_output=rules.inference_routing.output[0], + inference_okfile=rules.execute_inference.output.okfile, analysis_zarr=config["analysis"].get("analysis_zarr"), output: OUT_ROOT / "data/runs/{run_id}/{init_time}/verif.nc", @@ -68,6 +66,9 @@ rule verif_metrics: fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"), fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"], analysis_label=config["analysis"].get("label"), + grib_out_dir=lambda wc: ( + Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib" + ).resolve(), log: OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log", resources: @@ -77,7 +78,7 @@ rule verif_metrics: shell: """ uv run {input.script} \ - --grib_output_dir {input.grib_output} \ + --grib_output_dir {params.grib_out_dir} \ --analysis_zarr {input.analysis_zarr} \ --steps "{params.fcst_steps}" \ --fcst_label "{params.fcst_label}" \ diff --git a/workflow/scripts/inference_prepare.py b/workflow/scripts/inference_prepare.py new file mode 100644 index 00000000..e3178774 --- /dev/null +++ b/workflow/scripts/inference_prepare.py @@ -0,0 +1,173 @@ +"""Script to prepare configuration and working directory for inference runs.""" + +import logging +import yaml +import shutil +from pathlib import Path + +from evalml.helpers import setup_logger + + +def prepare_config(default_config_path: str, output_config_path: str, params: dict): + """Prepare the configuration file for the inference run. + + Overrides default configuration parameters with those provided in params + and writes the updated configuration to output_config_path. + + Parameters + ---------- + default_config_path : str + Path to the default configuration file. + output_config_path : str + Path where the updated configuration file will be written. + params : dict + Dictionary of parameters to override in the default configuration. + """ + + with open(default_config_path, "r") as f: + config = yaml.safe_load(f) + + config = _override_recursive(config, params) + + with open(output_config_path, "w") as f: + yaml.safe_dump(config, f, sort_keys=False) + + +def prepare_workdir(workdir: Path, resources_root: Path): + """Prepare the working directory for the inference run. + + Creates necessary subdirectories and copies resource files. + + Parameters + ---------- + workdir : Path + Path to the working directory. + resources_root : Path + Path to the root directory containing resource files. + """ + workdir.mkdir(parents=True, exist_ok=True) + (workdir / "grib").mkdir(parents=True, exist_ok=True) + shutil.copytree(resources_root / "templates", workdir / "resources") + + +def prepare_interpolator(smk): + """Prepare the interpolator for the inference run. + + Required steps: + - prepare working directory + - prepare forecaster directory + - prepare config + """ + LOG = _setup_logger(smk) + + # prepare working directory + workdir = _get_workdir(smk) + prepare_workdir(workdir, smk.params.resources_root) + LOG.info("Prepared working directory at %s", workdir) + res_list = "\n".join([str(fn) for fn in Path(workdir / "resources").rglob("*")]) + LOG.info("Resources: \n%s", res_list) + + # prepare forecaster directory + fct_run_id = smk.params.forecaster_run_id + if fct_run_id != "null": + fct_workdir = ( + smk.params.output_root / "runs" / fct_run_id / smk.wildcards.init_time + ) + (workdir / "forecaster").symlink_to(fct_workdir / "grib") + LOG.info( + "Created symlink to forecaster grib directory at %s", workdir / "forecaster" + ) + else: + (workdir / "forecaster").mkdir(parents=True, exist_ok=True) + (workdir / "forecaster/.dataset").touch() + LOG.info( + "No forecaster run ID provided; using dataset placeholder at %s", + workdir / "forecaster/.dataset", + ) + + # prepare config + overrides = _overrides_from_params(smk) + prepare_config(smk.input.config, smk.output.config, overrides) + LOG.info("Wrote config file at %s", smk.output.config) + with open(smk.output.config, "r") as f: + config_content = f.read() + LOG.info("Config: \n%s", config_content) + + LOG.info("Interpolator preparation complete.") + + +def prepare_forecaster(smk): + """Prepare the forecaster for the inference run. + + Required steps: + - prepare working directory + - prepare config + """ + LOG = _setup_logger(smk) + + workdir = _get_workdir(smk) + prepare_workdir(workdir, smk.params.resources_root) + LOG.info("Prepared working directory at %s", workdir) + res_list = "\n".join([str(fn) for fn in Path(workdir / "resources").rglob("*")]) + LOG.info("Resources: \n%s", res_list) + + overrides = _overrides_from_params(smk) + prepare_config(smk.input.config, smk.output.config, overrides) + LOG.info("Wrote config file at %s", smk.output.config) + with open(smk.output.config, "r") as f: + config_content = f.read() + LOG.info("Config: \n%s", config_content) + + LOG.info("Forecaster preparation complete.") + + +# TODO: just pass a dictionary of config overrides to the rule's params +def _overrides_from_params(smk) -> dict: + return { + "checkpoint": f"{smk.params.checkpoints_path}/inference-last.ckpt", + "date": smk.params.reftime_to_iso, + "lead_time": smk.params.lead_time, + } + + +def _get_workdir(smk) -> Path: + run_id = smk.wildcards.run_id + init_time = smk.wildcards.init_time + return smk.params.output_root / "runs" / run_id / init_time + + +def _setup_logger(smk) -> logging.Logger: + run_id = smk.wildcards.run_id + init_time = smk.wildcards.init_time + logger_name = f"{smk.rule}_{run_id}_{init_time}" + LOG = setup_logger(logger_name, log_file=smk.log[0]) + return LOG + + +def _override_recursive(original: dict, updates: dict) -> dict: + """Recursively override values in the original dictionary with those from the updates dictionary.""" + for key, value in updates.items(): + if ( + isinstance(value, dict) + and key in original + and isinstance(original[key], dict) + ): + original[key] = _override_recursive(original[key], value) + else: + original[key] = value + return original + + +def main(smk): + """Main function to run the Snakemake workflow.""" + if smk.rule == "prepare_inference_forecaster": + prepare_forecaster(smk) + elif smk.rule == "prepare_inference_interpolator": + prepare_interpolator(smk) + else: + raise ValueError(f"Unknown rule: {smk.rule}") + + +if __name__ == "__main__": + snakemake = snakemake # type: ignore # noqa: F821 + raise SystemExit(main(snakemake)) From b7b131195b2273a1e7426920934a8772c7c64c49 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Wed, 29 Oct 2025 09:21:32 +0100 Subject: [PATCH 19/34] Scores by Region (#75) * add region averages * add regions to config * Add regions to verification module, scripts, and rules * add stratification to forecaster config and fix typo * fix dict indexing * fix append error * read lon/lat from obs dataset * Add inner verification domain * Add missing dependency * add plots by region * Add regions to dashboard * Fix dashboard * Add region name and initializations to plot title (and remove header div) * Add support for multiple regions * Fix legend --- config/forecasters-co1e.yaml | 10 + config/forecasters.yaml | 10 + config/interpolators.yaml | 10 + pyproject.toml | 3 + resources/report/dashboard/script.js | 94 ++++++---- .../report/dashboard/template.html.jinja2 | 17 +- src/evalml/config.py | 14 ++ src/verification/__init__.py | 134 ++++++++++++-- uv.lock | 174 +++++++++++++++++- workflow/rules/common.smk | 11 ++ workflow/rules/report.smk | 5 +- workflow/rules/verif.smk | 5 + .../scripts/report_experiment_dashboard.py | 2 + workflow/scripts/verif_baseline.py | 9 +- workflow/scripts/verif_from_grib.py | 7 +- workflow/scripts/verif_plot_metrics.py | 10 +- workflow/tools/config.schema.json | 28 +++ 17 files changed, 480 insertions(+), 63 deletions(-) diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml index 84943b76..d0685047 100644 --- a/config/forecasters-co1e.yaml +++ b/config/forecasters-co1e.yaml @@ -29,6 +29,16 @@ analysis: label: COSMO KENDA analysis_zarr: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co1e-an-archive-0p01-2019-2024-1h-v1-pl13.zarr +stratification: + regions: + - jura + - mittelland + - voralpen + - alpennordhang + - innerealpentaeler + - alpensuedseite + root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 + locations: output_root: output/ mlflow_uri: diff --git a/config/forecasters.yaml b/config/forecasters.yaml index 1dbdad13..c5445dc5 100644 --- a/config/forecasters.yaml +++ b/config/forecasters.yaml @@ -24,6 +24,16 @@ analysis: label: COSMO KENDA analysis_zarr: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-6h-v3-pl13.zarr +stratification: + regions: + - jura + - mittelland + - voralpen + - alpennordhang + - innerealpentaeler + - alpensuedseite + root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 + locations: output_root: output/ mlflow_uri: diff --git a/config/interpolators.yaml b/config/interpolators.yaml index 0f5a042c..dd235440 100644 --- a/config/interpolators.yaml +++ b/config/interpolators.yaml @@ -48,6 +48,16 @@ analysis: label: COSMO KENDA analysis_zarr: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-1h-v3-pl13.zarr +stratification: + regions: + - jura + - mittelland + - voralpen + - alpennordhang + - innerealpentaeler + - alpensuedseite + root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 + locations: output_root: output/ mlflow_uri: diff --git a/pyproject.toml b/pyproject.toml index bfcb9ee9..b294e140 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,9 @@ dependencies = [ "pydantic>=2.11.7", "toml>=0.10.2", "netcdf4>=1.7.2", + "shapely>=2.1.2", + "cartopy>=0.25.0", + "pyproj>=3.7.2", ] [project.optional-dependencies] diff --git a/resources/report/dashboard/script.js b/resources/report/dashboard/script.js index b422a5f0..43740200 100644 --- a/resources/report/dashboard/script.js +++ b/resources/report/dashboard/script.js @@ -12,6 +12,16 @@ document.querySelectorAll(".tab-link").forEach(button => { // Initialize selection widgets const choicesInstances = {}; +choicesInstances["region-select"] = new Choices("#region-select", { + searchEnabled: false, + removeItemButton: true, + shouldSort: false, + itemSelectText: "", + placeholder: false +}); +document.getElementById("region-select").addEventListener("change", updateChart); + + choicesInstances["source-select"] = new Choices("#source-select", { searchEnabled: false, removeItemButton: true, @@ -41,57 +51,74 @@ document.getElementById("param-select").addEventListener("change", updateChart); // Get the data (embedded in the HTML) data = JSON.parse(document.getElementById("verif-data").textContent) +header = document.getElementById("header-text").textContent.trim() // Define base spec var spec = { "data": { "values": data }, - "params": [ - { - "name": "xZoom", - "select": { - "type": "interval", - "encodings": ["x"], - "zoom": "wheel![!event.shiftKey]" - }, - "bind": "scales" - } - ], + "config": { + "scale": { "continuousPadding": 1 } + }, "facet": { - "column": { "field": "param" }, - "row": { "field": "metric" } + "row": { "field": "metric", "type": "nominal", "title": null }, + "column": { "field": "param", "type": "nominal" , "title": null }, + }, + "resolve": { + "scale": { + "x": "shared", + "y": "independent" + }, }, "spec": { - "mark": "line", + "params": [ + { + "name": "xZoom", + "select": { + "type": "interval", + "encodings": ["x"], + "zoom": "wheel![!event.shiftKey]" + }, + "bind": "scales" + } + ], + "mark": {"type": "line", "point": { "size": 50 } }, "width": 300, "height": 200, "encoding": { "x": { "field": "lead_time", - "type": "quantitative", - "axis": { "labels": true, "ticks": true }, + "type": "quantitative" }, "y": { "field": "value", - "type": "quantitative", - "scale": { "zero": false } + "type": "quantitative" , + "scale": { "zero": false } }, "color": { "field": "source", - "legend": { "orient": "top", "labelLimit": 1000, "symbolSize": 1000 } - } + "type": "nominal", + "legend": { "orient": "top", "title": "Data Source", "offset": 0, "padding": 10 } + }, + "shape": { + "field": "region", + "type": "nominal", + "legend": { "orient": "top", "title": "Region", "offset": 0, "padding": 10 } + }, + "strokeDash": { + "field": "region", + "type": "nominal", + "legend": null + }, + "tooltip": [ + { "field": "region", "type": "nominal", "title": "Region" }, + { "field": "source", "type": "nominal", "title": "Source" }, + { "field": "param", "type": "nominal", "title": "Parameter" }, + { "field": "metric", "type": "nominal", "title": "Metric" }, + { "field": "lead_time", "type": "quantitative", "title": "Lead Time (h)" }, + { "field": "value", "type": "quantitative", "title": "Value" } + ] }, - "transform": [ - { - "filter": { "param": "xZoom" } - } - ] }, - "resolve": { - "scale": { - "x": "shared", - "y": "independent" - } - } }; @@ -102,6 +129,7 @@ function getSelectedValues(id) { } function updateChart() { + const selectedRegions = getSelectedValues("region-select"); const selectedSources = getSelectedValues("source-select"); const selectedparams = getSelectedValues("param-select"); const selectedMetrics = getSelectedValues("metric-select"); @@ -109,6 +137,10 @@ function updateChart() { const newSpec = JSON.parse(JSON.stringify(spec)); const filters = []; + newSpec.title = "Verification using " + header; + if (selectedRegions.length > 0) { + filters.push({ field: "region", oneOf: selectedRegions }); + } if (selectedSources.length > 0) { filters.push({ field: "source", oneOf: selectedSources }); } diff --git a/resources/report/dashboard/template.html.jinja2 b/resources/report/dashboard/template.html.jinja2 index 3fe6adbe..36f9fecf 100644 --- a/resources/report/dashboard/template.html.jinja2 +++ b/resources/report/dashboard/template.html.jinja2 @@ -101,12 +101,15 @@
-
-

- {{header_text}} -

-
+
+ + +