From c1375ab0ed4e7c27d40daddde37bb09cde98c350 Mon Sep 17 00:00:00 2001
From: Daniele Nerini <daniele.nerini@gmail.com>
Date: Tue, 7 Oct 2025 14:01:42 +0200
Subject: [PATCH 01/34] Initial draft (pseudo code)

---
 workflow/rules/verif_obs.smk | 51 ++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 workflow/rules/verif_obs.smk

diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
new file mode 100644
index 00000000..69e89c38
--- /dev/null
+++ b/workflow/rules/verif_obs.smk
@@ -0,0 +1,51 @@
+from pathlib import Path
+
+rule generate_mec_namelist:
+    input:
+        template="resources/mec/namelist.jinja2"
+    output:
+        namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
+    run:
+        """
+        import jinja2
+
+        context = {"init_time": wildcards.init_time}
+        env = jinja2.Environment(
+            loader=jinja2.FileSystemLoader({Path(input.template).parent})
+        )
+        template = env.get_template(input.template)
+        namelist = template.render(**context)
+
+        namelist_fn = Path(output.namelist)
+        with namelist_fn.open("w+") as f:
+            f.write(namelist)
+        """
+
+rule run_mec:
+    input:
+        grib_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
+        ekf="path/to/ekf/file{init_time}",
+        namelist=generate_mec_namelist.output.namelist
+    output:
+        feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedbacks/verSYNOP.nc
+    # module: sarus?
+    resources:
+        cpus_per_task=1,
+        runtime="1h",
+    shell:
+        """
+        # some code to prepare the data
+        # (or use a separate rule)
+        # sarus command from Mary
+        sarus pull ...
+        """
+
+rule rename_feedback:
+    input:
+        feedback=run_mec.output.feedback
+    output:
+        feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedback
+    shell:
+
+
+# rule ...
\ No newline at end of file

From 9f608f2f0dd65dade23606bee408e331e8d19757 Mon Sep 17 00:00:00 2001
From: Mary McGlohon <Mary.McGlohon@meteoswiss.ch>
Date: Thu, 13 Nov 2025 16:58:38 +0100
Subject: [PATCH 02/34] add namelist as resource

---
 resources/mec/namelist.jinja2 | 80 +++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 resources/mec/namelist.jinja2

diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2
new file mode 100644
index 00000000..a9f5c9dc
--- /dev/null
+++ b/resources/mec/namelist.jinja2
@@ -0,0 +1,80 @@
+!==============================================================================
+! namelist for MEC
+!==============================================================================
+
+ !===================
+ ! general parameters
+ !===================
+ &run
+   method         = 'GMESTAT'                     ! Model Equivalent Calculator
+   model          = 'ML'                          ! forecast model. One of "COSMO" "ICON" "ML"
+   input          = './input_mod'                 ! input data path
+   data           = '/oprusers/osm/opr.emme/data/'      ! data path for auxiliary data
+   obsinput       = './input_obs'                 ! observation input data path
+   output         = './output'                    ! output data to working directory
+   time_ana       = 20201028000000                  ! analysis date
+   read_fields    = 'ps u t v q geof t2m td2m u_10m v_10m'
+   grib_edition   = 2
+   grib_library   = 2                             ! GRIB-API used: 1=GRIBEX  2=GRIB2-API
+   cosmo_refatm   = 2                             ! reference atmosphere to be used for COSMO:1or2
+   fc_hours       = 0                                             ! Default is 3h. Has to be set to 0 if one wants to verify +0h leadtime
+   nproc1         = 1
+   nproc2         = 1
+ /
+
+ !===============================
+ ! observation related parameters
+ !===============================
+  &observations
+   !---------------------------------------------------
+   ! read from CDFIN files (if not set use mon/cof/ekf)  
+   !---------------------------------------------------
+   read_cdfin   = F      ! (F): dont read COSMO CDFIN files get obs from ekf
+   vint_lin_t   = T      ! linear vertical interpolation for temperature
+   vint_lin_z   = T      ! linear vertical interpolation for geopotential
+   vint_lin_uv  = T      ! linear vertical interpolation for wind
+   ptop_lapse   = 850.
+   pbot_lapse   = 950.
+!  int_nn       = T      ! horizontal interpolation: nearest neighbor 
+ /
+
+ !====================
+ ! Ensemble parameters
+ !====================
+ &ENKF
+  k_enkf        = 0       ! ensemble size (0 for det. run)
+  det_run       = 1       ! set to 1 for deterministic run, 0 for ensemble
+ /
+
+ !================================
+ ! Verification related parameters
+ !================================
+ &veri_obs
+  obstypes      = "SYNOP"      ! "SYNOP TEMP"
+  fc_times      = 0000,1200,2400,3600,4800,6000,7200,8400,9600,10800,12000  ! forecast lead time at reference (hhmm)
+  prefix_in     = 'ekf'                                                                                                                                                                                           
+  prefix_out    = 'ver'               
+  rm_old        = 2                          ! overwrite entries in verification file ?
+  fc_file       = '_FCR_TIME_/lfffDDVVMMSS'  ! template for forecast file name
+  !det_suffix    = '.m000'   ! for ensemble forecast must be set in order to differentiate between the input model files.
+  time_range    = 1                                                                                                                                                                                               
+  ekf_concat    = F                                                                                                                                                                                               
+  !ekf_rm_ve     =  -2 -7   ! special (<0) member ids to remove 
+  !eps_offset    = 40   ! skip the first members up to this value to which is set. The default is 0
+  !ekf_offset    = 40   ! exclude members in ekf-file up to this value => try setting to 40? => no effect => ask Hendrik?
+  ref_runtype   = 'any'  ! accept any runtype for the reference state
+ /
+
+ &report
+  time_b =   -0029                ! (hhmm, inclusive)
+  time_e =    0030                ! (hhmm, exclusive)
+ /
+
+ &cosmo_obs
+   lcd187             = .true.    ! use ground based wind lidar obs
+   verification_start = -29       ! (min, inclusive)
+   verification_end   =  30       ! (min, inclusive)
+ /
+ &synop_obs
+   version = 1   ! Harald: Für version >= 3 werden konsistent die DACE-internen Kontrollvariablen genutzt, mit ggf. Umrechnung tv,rh <-> t,td etc. DACE-Version mind. 2.22 . Random crashes (T=10000 Umrechnungen failen). Need of no undefs for version=3 
+ /
\ No newline at end of file

From e82bd9487e27c792a3c2e173b5f86ae26e3e047a Mon Sep 17 00:00:00 2001
From: Mary McGlohon <Mary.McGlohon@meteoswiss.ch>
Date: Thu, 13 Nov 2025 16:59:04 +0100
Subject: [PATCH 03/34] add verif_obs.smk to Snakefile

---
 workflow/Snakefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 8b8d99ae..f806f44d 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -15,6 +15,7 @@ include: "rules/data.smk"
 include: "rules/inference.smk"
 include: "rules/verif.smk"
 include: "rules/report.smk"
+include: "rules/verif_obs.smk"
 
 
 # optional messages, log and error handling

From c3ab6516e7c425afe917f5c3d1bb778db83a6725 Mon Sep 17 00:00:00 2001
From: Mary McGlohon <Mary.McGlohon@meteoswiss.ch>
Date: Thu, 13 Nov 2025 18:32:32 +0100
Subject: [PATCH 04/34] Add rules for observation data and namelist generation
 (using fake data)

---
 workflow/Snakefile           |  8 +++++
 workflow/rules/verif_obs.smk | 58 ++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index f806f44d..de216ff2 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -85,3 +85,11 @@ rule verif_metrics_plot_all:
             rules.verif_metrics_plot.output,
             experiment=EXPERIMENT_HASH,
         ),
+
+rule verif_obs_all:
+    input:
+        expand(
+            rules.run_mec.output,
+            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
+            run_id=collect_all_runs(),
+        )
\ No newline at end of file
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index 69e89c38..3f3ac961 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -1,51 +1,57 @@
 from pathlib import Path
 
+rule generate_observation_data:
+    input:
+        testcase_dir="/scratch/mch/mmcgloho/MEC/2020102800",
+    output:
+        input_obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
+        input_mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
+        parent=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
+    shell:
+        """
+        cp -r {input.testcase_dir}/input_obs {output.parent}/
+        cp -r {input.testcase_dir}/input_mod {output.parent}/
+        ls {output.parent}
+        # TODO: Some data still seems to be missing.
+        """
+
 rule generate_mec_namelist:
     input:
         template="resources/mec/namelist.jinja2"
     output:
+        #namelist=OUT_ROOT / "data/runs/mec/namelist",
+        # TODO: get wildcards working.
         namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
     run:
-        """
         import jinja2
-
+        # TODO: get wildcards working.
         context = {"init_time": wildcards.init_time}
+        template_path = Path(input.template)
         env = jinja2.Environment(
-            loader=jinja2.FileSystemLoader({Path(input.template).parent})
+            loader=jinja2.FileSystemLoader({template_path.parent})
         )
-        template = env.get_template(input.template)
+        template = env.get_template(template_path.name)
         namelist = template.render(**context)
-
         namelist_fn = Path(output.namelist)
         with namelist_fn.open("w+") as f:
             f.write(namelist)
-        """
 
 rule run_mec:
     input:
-        grib_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
-        ekf="path/to/ekf/file{init_time}",
-        namelist=generate_mec_namelist.output.namelist
+        testcase_dir=directory(rules.generate_observation_data.output.parent),
+        namelist=rules.generate_mec_namelist.output.namelist
     output:
-        feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedbacks/verSYNOP.nc
-    # module: sarus?
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/output/verSYNOP.nc"
     resources:
         cpus_per_task=1,
         runtime="1h",
     shell:
+        #TODO(mmcglohon): Replace podman with sarus if needed.
         """
-        # some code to prepare the data
-        # (or use a separate rule)
-        # sarus command from Mary
-        sarus pull ...
-        """
-
-rule rename_feedback:
-    input:
-        feedback=run_mec.output.feedback
-    output:
-        feedback=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/feedback
-    shell:
-
-
-# rule ...
\ No newline at end of file
+        echo 'running mec on namelist:'
+        cat {input.namelist}
+        ls {input.testcase_dir}
+        podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={input.testcase_dir},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        ls -l {output}
+        """
\ No newline at end of file

From 7512d96e52756c4827c44e436943f9da15c1fd4f Mon Sep 17 00:00:00 2001
From: Mary McGlohon <Mary.McGlohon@meteoswiss.ch>
Date: Thu, 13 Nov 2025 18:43:14 +0100
Subject: [PATCH 05/34] add newline to namelist template

---
 resources/mec/namelist.jinja2 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2
index a9f5c9dc..6fa71063 100644
--- a/resources/mec/namelist.jinja2
+++ b/resources/mec/namelist.jinja2
@@ -77,4 +77,5 @@
  /
  &synop_obs
    version = 1   ! Harald: Für version >= 3 werden konsistent die DACE-internen Kontrollvariablen genutzt, mit ggf. Umrechnung tv,rh <-> t,td etc. DACE-Version mind. 2.22 . Random crashes (T=10000 Umrechnungen failen). Need of no undefs for version=3 
- /
\ No newline at end of file
+ /
+ 
\ No newline at end of file

From 13301a507664a9f2d9807910c1f2605523d7076b Mon Sep 17 00:00:00 2001
From: Mary McGlohon <Mary.McGlohon@meteoswiss.ch>
Date: Thu, 13 Nov 2025 19:16:01 +0100
Subject: [PATCH 06/34] somewhat working version of run_mec (with fake data)

---
 workflow/Snakefile           | 1 +
 workflow/rules/verif_obs.smk | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index de216ff2..2e27e44f 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -86,6 +86,7 @@ rule verif_metrics_plot_all:
             experiment=EXPERIMENT_HASH,
         ),
 
+# To run:  snakemake --cores 1 --configfile=config/recasters.yaml verif_obs_all
 rule verif_obs_all:
     input:
         expand(
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index 3f3ac961..fffa29c8 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -51,7 +51,8 @@ rule run_mec:
         echo 'running mec on namelist:'
         cat {input.namelist}
         ls {input.testcase_dir}
-        podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        # Note: pull command currently redundant; may not be the case with sarus.
+        #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
         srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={input.testcase_dir},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
         ls -l {output}
         """
\ No newline at end of file

From e722e5f3ac9023305bd64a4fbcba81d6b101fd0f Mon Sep 17 00:00:00 2001
From: Mary McGlohon <Mary.McGlohon@meteoswiss.ch>
Date: Mon, 24 Nov 2025 18:00:28 +0100
Subject: [PATCH 07/34] correct typo and add optional script for generating
 namelist, in case we want to factor it out of the rule

---
 workflow/Snakefile                        |  2 +-
 workflow/scripts/generate_mec_namelist.py | 27 +++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 workflow/scripts/generate_mec_namelist.py

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 2e27e44f..cf595cb3 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -86,7 +86,7 @@ rule verif_metrics_plot_all:
             experiment=EXPERIMENT_HASH,
         ),
 
-# To run:  snakemake --cores 1 --configfile=config/recasters.yaml verif_obs_all
+# To run:  snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
 rule verif_obs_all:
     input:
         expand(
diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py
new file mode 100644
index 00000000..0e938805
--- /dev/null
+++ b/workflow/scripts/generate_mec_namelist.py
@@ -0,0 +1,27 @@
+import logging
+import jinja2
+# snakemake object inherited by default, but this enables code completion.
+from snakemake.script import snakemake
+from pathlib import Path
+
+# Note: not currently in use; optional script in case we want to factor it out
+# of the rules file
+def main(args):
+	#TODO: get wildcards working
+	context = {}
+	#context = {"init_time": snakemake.wildcards.init_time}
+	template_path = Path(snakemake.input.template)
+	logging.info('writing namelist to {template_filename}')
+	env = jinja2.Environment(
+		loader=jinja2.FileSystemLoader({template_path.parent})
+	)
+	template = env.get_template(template_path.name)
+	namelist = template.render(**context)
+	namelist_fn = Path(snakemake.output['namelist'])
+	with namelist_fn.open("w+") as f:
+		f.write(namelist)
+	logging.info('finished writing namelist')
+
+
+if __name__ == "__main__":
+	main()
\ No newline at end of file

From 3d9e3c10973b643ca3f7c6fc72404e3b379e9247 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <62377868+frazane@users.noreply.github.com>
Date: Wed, 8 Oct 2025 14:01:59 +0200
Subject: [PATCH 08/34] fix: add localrule to inference_interpolator rule (#57)

---
 workflow/rules/inference.smk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
index 38ebf285..91e75b73 100644
--- a/workflow/rules/inference.smk
+++ b/workflow/rules/inference.smk
@@ -192,6 +192,7 @@ def _get_forecaster_run_id(run_id):
 
 rule inference_interpolator:
     """Run the interpolator for a specific run ID."""
+    localrule: True
     input:
         pyproject=rules.create_inference_pyproject.output.pyproject,
         image=rules.make_squashfs_image.output.image,

From 918913f9188288b9e01b8c2c7ca9c25b0259455b Mon Sep 17 00:00:00 2001
From: omiralles <ophelia.miralles@meteoswiss.ch>
Date: Wed, 8 Oct 2025 16:40:39 +0200
Subject: [PATCH 09/34] Fix for interpolator rule

---
 workflow/rules/inference.smk | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
index 91e75b73..f83f0fd5 100644
--- a/workflow/rules/inference.smk
+++ b/workflow/rules/inference.smk
@@ -143,7 +143,7 @@ rule inference_forecaster:
         slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"),
         cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24),
         mem_mb_per_cpu=lambda wc: get_resource(wc, "mem_mb_per_cpu", 8000),
-        runtime=lambda wc: get_resource(wc, "runtime", "20m"),
+        runtime=lambda wc: get_resource(wc, "runtime", "40m"),
         gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}",
         ntasks=lambda wc: get_resource(wc, "tasks", 1),
         slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment",
@@ -222,13 +222,14 @@ rule inference_interpolator:
             if RUN_CONFIGS[wc.run_id].get("forecaster") is None
             else _get_forecaster_run_id(wc.run_id)
         ),
+        image_path=lambda wc, input: f"{Path(input.image).resolve()}",
     log:
         OUT_ROOT / "logs/inference_interpolator/{run_id}-{init_time}.log",
     resources:
         slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"),
         cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24),
         mem_mb_per_cpu=lambda wc: get_resource(wc, "mem_mb_per_cpu", 8000),
-        runtime=lambda wc: get_resource(wc, "runtime", "20m"),
+        runtime=lambda wc: get_resource(wc, "runtime", "40m"),
         gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}",
         ntasks=lambda wc: get_resource(wc, "tasks", 1),
         slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment",

From 179eb4da3a3eb6ef72ef492cb43048208b67da74 Mon Sep 17 00:00:00 2001
From: Daniele Nerini <daniele.nerini@meteoswiss.ch>
Date: Tue, 14 Oct 2025 09:09:40 +0200
Subject: [PATCH 10/34] Consolidate multi packages into unique src/ dir (#58)

---
 pyproject.toml                                              | 6 ++++++
 .../src/verification.py => src/verification/__init__.py     | 0
 workflow/rules/verif.smk                                    | 3 ++-
 workflow/scripts/src/__init__.py                            | 0
 workflow/scripts/verif_baseline.py                          | 3 ++-
 workflow/scripts/verif_from_grib.py                         | 2 +-
 6 files changed, 11 insertions(+), 3 deletions(-)
 rename workflow/scripts/src/verification.py => src/verification/__init__.py (100%)
 delete mode 100644 workflow/scripts/src/__init__.py

diff --git a/pyproject.toml b/pyproject.toml
index 4c60c9c5..043c1718 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,3 +45,9 @@ dev = [
 markers = [
     "longtest: mark tests that take a long time to run, e.g. integration tests",
 ]
+
+[tool.hatch.build.targets.wheel]
+packages = [
+  "src/evalml",
+  "src/verification"
+]
\ No newline at end of file
diff --git a/workflow/scripts/src/verification.py b/src/verification/__init__.py
similarity index 100%
rename from workflow/scripts/src/verification.py
rename to src/verification/__init__.py
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index 677c8d7e..d1032cb2 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -13,7 +13,7 @@ include: "common.smk"
 rule verif_metrics_baseline:
     input:
         script="workflow/scripts/verif_baseline.py",
-        module="workflow/scripts/src/verification.py",
+        module="src/verification/__init__.py",
         baseline_zarr=lambda wc: expand(
             "{root}/FCST{year}.zarr",
             root=BASELINE_CONFIGS[wc.baseline_id].get("root"),
@@ -55,6 +55,7 @@ def _get_no_none(dict, key, replacement):
 rule verif_metrics:
     input:
         script="workflow/scripts/verif_from_grib.py",
+        module="src/verification/__init__.py",
         inference_okfile=_inference_routing_fn,
         grib_output=rules.inference_routing.output[0],
         analysis_zarr=config["analysis"].get("analysis_zarr"),
diff --git a/workflow/scripts/src/__init__.py b/workflow/scripts/src/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py
index 88120c57..3f034cc9 100644
--- a/workflow/scripts/verif_baseline.py
+++ b/workflow/scripts/verif_baseline.py
@@ -12,7 +12,8 @@
 
 import numpy as np  # noqa: E402
 import xarray as xr  # noqa: E402
-from src.verification import verify  # noqa: E402
+
+from verification import verify  # noqa: E402
 
 LOG = logging.getLogger(__name__)
 logging.basicConfig(
diff --git a/workflow/scripts/verif_from_grib.py b/workflow/scripts/verif_from_grib.py
index da7c5ece..61d62a2a 100644
--- a/workflow/scripts/verif_from_grib.py
+++ b/workflow/scripts/verif_from_grib.py
@@ -13,7 +13,7 @@
 import numpy as np  # noqa: E402
 import xarray as xr  # noqa: E402
 
-from src.verification import verify  # noqa: E402
+from verification import verify  # noqa: E402
 
 LOG = logging.getLogger(__name__)
 logging.basicConfig(

From e791a3058fb792288510afcbf3b80233ff6e4da7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Oph=C3=A9lia=20Miralles?= <opheliamiralles@gmail.com>
Date: Wed, 15 Oct 2025 14:56:39 +0200
Subject: [PATCH 11/34] Update configs (#63)

---
 resources/inference/configs/forecaster.yaml   |  1 -
 resources/inference/configs/interpolator.yaml |  2 +-
 .../configs/interpolator_from_test_data.yaml  |  4 ----
 ...interpolator_from_test_data_stretched.yaml | 24 +++++++++++++++++++
 .../configs/interpolator_stretched.yaml       |  2 +-
 5 files changed, 26 insertions(+), 7 deletions(-)
 create mode 100644 resources/inference/configs/interpolator_from_test_data_stretched.yaml

diff --git a/resources/inference/configs/forecaster.yaml b/resources/inference/configs/forecaster.yaml
index dac3e496..4f558a3e 100644
--- a/resources/inference/configs/forecaster.yaml
+++ b/resources/inference/configs/forecaster.yaml
@@ -4,7 +4,6 @@ input:
 
 allow_nans: true
 
-
 output:
   tee:
     outputs:
diff --git a/resources/inference/configs/interpolator.yaml b/resources/inference/configs/interpolator.yaml
index 8cbb98fe..41253c0c 100644
--- a/resources/inference/configs/interpolator.yaml
+++ b/resources/inference/configs/interpolator.yaml
@@ -56,7 +56,7 @@ output:
     templates:
       samples: _resources/templates_index_cosmo.yaml
 
-forcings:
+constant_forcings:
   test:
     use_original_paths: true
 
diff --git a/resources/inference/configs/interpolator_from_test_data.yaml b/resources/inference/configs/interpolator_from_test_data.yaml
index aaa938fa..07aea411 100644
--- a/resources/inference/configs/interpolator_from_test_data.yaml
+++ b/resources/inference/configs/interpolator_from_test_data.yaml
@@ -17,10 +17,6 @@ output:
     templates:
       samples: _resources/templates_index_cosmo.yaml
 
-forcings:
-  test:
-    use_original_paths: true
-
 verbosity: 1
 allow_nans: true
 output_frequency: "1h"
diff --git a/resources/inference/configs/interpolator_from_test_data_stretched.yaml b/resources/inference/configs/interpolator_from_test_data_stretched.yaml
new file mode 100644
index 00000000..19cd733e
--- /dev/null
+++ b/resources/inference/configs/interpolator_from_test_data_stretched.yaml
@@ -0,0 +1,24 @@
+runner: time_interpolator
+
+input:
+  test:
+    use_original_paths: true
+
+output:
+  tee:
+    outputs:
+      - extract_lam:
+          output:
+            assign_mask:
+              mask: "source0/trimedge_mask"
+              output:
+                grib:
+                  path: grib/{dateTime}_{step:03}.grib
+                  encoding:
+                    typeOfGeneratingProcess: 2
+                  templates:
+                    samples: _resources/templates_index_cosmo.yaml
+
+verbosity: 1
+allow_nans: true
+output_frequency: "1h"
diff --git a/resources/inference/configs/interpolator_stretched.yaml b/resources/inference/configs/interpolator_stretched.yaml
index 0010ffe8..300d6c65 100644
--- a/resources/inference/configs/interpolator_stretched.yaml
+++ b/resources/inference/configs/interpolator_stretched.yaml
@@ -79,7 +79,7 @@ input:
             - - shortName: TOT_PREC
               - tp
 
-forcings:
+constant_forcings:
   test:
     use_original_paths: true
 

From d1977121fabfd9b3d45854a47c91b8c7eedb0a92 Mon Sep 17 00:00:00 2001
From: Daniele Nerini <daniele.nerini@meteoswiss.ch>
Date: Mon, 20 Oct 2025 11:57:11 +0200
Subject: [PATCH 12/34] Adopt 'steps' instead of 'lead_time' (#62)

---
 README.md                         |  4 +--
 config/forecasters-co1e.yaml      |  3 +--
 config/forecasters.yaml           |  4 +--
 config/interpolators.yaml         |  4 +--
 src/evalml/config.py              | 38 ++++++++++++++++++++++------
 workflow/rules/inference.smk      | 11 +++++++--
 workflow/rules/verif.smk          |  2 +-
 workflow/tools/config.schema.json | 41 ++++++++-----------------------
 8 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/README.md b/README.md
index 2050b312..64fe254f 100644
--- a/README.md
+++ b/README.md
@@ -33,15 +33,15 @@ dates:
   end: 2020-01-10T00:00
   frequency: 54h
 
-lead_time: 120h
-
 runs:
   - forecaster:
       mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
       label: Stage D - N320 global grid with CERRA finetuning
+      steps: 0/126/6
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
+      steps: 0/126/6
 
 
 baselines:
diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
index 490196cb..6171d62a 100644
--- a/config/forecasters-co1e.yaml
+++ b/config/forecasters-co1e.yaml
@@ -8,13 +8,12 @@ dates:
   end: 2020-01-10T00:00
   frequency: 54h
 
-lead_time: 120h
-
 runs:
   - forecaster:
       mlflow_id: 2174c939c8844555a52843b71219d425
       label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11
       config: resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
+      steps: 0/126/6
       inference_resources:
         gpu: 4
         tasks: 4
diff --git a/config/forecasters.yaml b/config/forecasters.yaml
index 17d98729..413a734f 100644
--- a/config/forecasters.yaml
+++ b/config/forecasters.yaml
@@ -8,15 +8,15 @@ dates:
   # end: 2020-03-30T00:00
   frequency: 36h
 
-lead_time: 120h
-
 runs:
   - forecaster:
       mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
       label: Stage D - N320 global grid with CERRA finetuning
+      steps: 0/126/6
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
+      steps: 0/126/6
 
 baselines:
   - baseline:
diff --git a/config/interpolators.yaml b/config/interpolators.yaml
index 893b579c..c8e796e4 100644
--- a/config/interpolators.yaml
+++ b/config/interpolators.yaml
@@ -7,8 +7,6 @@ dates:
   end: 2020-01-10T00:00
   frequency: 54h
 
-lead_time: 120h
-
 runs:
   - interpolator:
       mlflow_id: 9c18b90074214d769b8b383722fc5a06
@@ -27,6 +25,7 @@ runs:
       forecaster:
         mlflow_id: d0846032fc7248a58b089cbe8fa4c511
         config: resources/inference/configs/forecaster_with_global.yaml
+        steps: 0/126/6
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files
         - torch-geometric==2.6.1
@@ -39,6 +38,7 @@ runs:
       forecaster:
         mlflow_id: d0846032fc7248a58b089cbe8fa4c511
         config: resources/inference/configs/forecaster_with_global.yaml
+        steps: 0/126/6
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files
         - torch-geometric==2.6.1
diff --git a/src/evalml/config.py b/src/evalml/config.py
index c6bcf1e7..c4a97ce2 100644
--- a/src/evalml/config.py
+++ b/src/evalml/config.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 from typing import Dict, List, Any
 
-from pydantic import BaseModel, Field, RootModel, HttpUrl
+from pydantic import BaseModel, Field, RootModel, HttpUrl, field_validator
 
 PROJECT_ROOT = Path(__file__).parents[2]
 
@@ -70,9 +70,13 @@ class RunConfig(BaseModel):
         None,
         description="The label for the run that will be used in experiment results such as reports and figures.",
     )
-    steps: str | None = Field(
-        None,
-        description="Forecast steps to be used from interpolator, e.g. '0/126/6'.",
+    steps: str = Field(
+        ...,
+        description=(
+            "Forecast lead times in hours, formatted as 'start/end/step'. "
+            "The range is half-open [start, end), meaning it includes the start  "
+            "but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours."
+        ),
     )
     extra_dependencies: List[str] = Field(
         default_factory=list,
@@ -86,6 +90,29 @@ class RunConfig(BaseModel):
 
     config: Dict[str, Any] | str
 
+    @field_validator("steps")
+    def validate_steps(cls, v: str) -> str:
+        if "/" not in v:
+            raise ValueError(
+                f"Steps must follow the format 'start/stop/step', got '{v}'"
+            )
+        parts = v.split("/")
+        if len(parts) != 3:
+            raise ValueError("Steps must be formatted as 'start/end/step'.")
+        try:
+            start, end, step = map(int, parts)
+        except ValueError:
+            raise ValueError("Start, end, and interval must be integers.")
+        if start >= end:
+            raise ValueError(f"Start ({start}) must be less than end ({end}).")
+        if step <= 0:
+            raise ValueError(f"Interval ({step}) must be a positive integer.")
+        if (end - start) % step != 0:
+            raise ValueError(
+                f"The step ({step}) must evenly divide the range ({end - start})."
+            )
+        return v
+
 
 class ForecasterConfig(RunConfig):
     """Single training run stored in MLflow."""
@@ -240,9 +267,6 @@ class ConfigModel(BaseModel):
         description="Description of the experiment, e.g. 'Hindcast of the 2023 season.'",
     )
     dates: Dates | ExplicitDates
-    lead_time: str = Field(
-        ..., description="Forecast length, e.g. '120h'", pattern=r"^\d+[hmd]$"
-    )
     runs: List[ForecasterItem | InterpolatorItem] = Field(
         ...,
         description="Dictionary of runs to execute, with run IDs as keys and configurations as values.",
diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
index f83f0fd5..ea136b97 100644
--- a/workflow/rules/inference.smk
+++ b/workflow/rules/inference.smk
@@ -118,6 +118,13 @@ def get_resource(wc, field: str, default):
         return getattr(rc["inference_resources"], field) or default
 
 
+def get_leadtime(wc):
+    """Get the lead time from the run config."""
+    start, end, step = RUN_CONFIGS[wc.run_id]["steps"].split("/")
+    end = int(end) - int(step)  # make inclusive
+    return f"{end}h"
+
+
 rule inference_forecaster:
     localrule: True
     input:
@@ -130,7 +137,7 @@ rule inference_forecaster:
         checkpoints_path=parse_input(
             input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path"
         ),
-        lead_time=config["lead_time"],
+        lead_time=lambda wc: get_leadtime(wc),
         output_root=(OUT_ROOT / "data").resolve(),
         resources_root=Path("resources/inference").resolve(),
         reftime_to_iso=lambda wc: datetime.strptime(
@@ -211,7 +218,7 @@ rule inference_interpolator:
         checkpoints_path=parse_input(
             input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path"
         ),
-        lead_time=config["lead_time"],
+        lead_time=lambda wc: get_leadtime(wc),
         output_root=(OUT_ROOT / "data").resolve(),
         resources_root=Path("resources/inference").resolve(),
         reftime_to_iso=lambda wc: datetime.strptime(
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index d1032cb2..10acbe13 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -66,7 +66,7 @@ rule verif_metrics:
     # TODO: implement logic to use experiment name instead of run_id as wildcard
     params:
         fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"),
-        fcst_steps=lambda wc: _get_no_none(RUN_CONFIGS[wc.run_id], "steps", "0/126/6"),
+        fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
         analysis_label=config["analysis"].get("label"),
     log:
         OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log",
diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json
index 5b3216ce..4bc53068 100644
--- a/workflow/tools/config.schema.json
+++ b/workflow/tools/config.schema.json
@@ -167,17 +167,9 @@
           "title": "Label"
         },
         "steps": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Forecast steps to be used from interpolator, e.g. '0/126/6'.",
-          "title": "Steps"
+          "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start  but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.",
+          "title": "Steps",
+          "type": "string"
         },
         "extra_dependencies": {
           "description": "List of extra dependencies to install for this model. These will be added to the pyproject.toml file in the run directory.",
@@ -214,7 +206,8 @@
         }
       },
       "required": [
-        "mlflow_id"
+        "mlflow_id",
+        "steps"
       ],
       "title": "ForecasterConfig",
       "type": "object"
@@ -355,17 +348,9 @@
           "title": "Label"
         },
         "steps": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Forecast steps to be used from interpolator, e.g. '0/126/6'.",
-          "title": "Steps"
+          "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start  but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.",
+          "title": "Steps",
+          "type": "string"
         },
         "extra_dependencies": {
           "description": "List of extra dependencies to install for this model. These will be added to the pyproject.toml file in the run directory.",
@@ -414,7 +399,8 @@
         }
       },
       "required": [
-        "mlflow_id"
+        "mlflow_id",
+        "steps"
       ],
       "title": "InterpolatorConfig",
       "type": "object"
@@ -509,12 +495,6 @@
       ],
       "title": "Dates"
     },
-    "lead_time": {
-      "description": "Forecast length, e.g. '120h'",
-      "pattern": "^\\d+[hmd]$",
-      "title": "Lead Time",
-      "type": "string"
-    },
     "runs": {
       "description": "Dictionary of runs to execute, with run IDs as keys and configurations as values.",
       "items": {
@@ -551,7 +531,6 @@
   "required": [
     "description",
     "dates",
-    "lead_time",
     "runs",
     "baselines",
     "analysis",

From 956898754d8b959d4a90be0441eccfbd34330b2a Mon Sep 17 00:00:00 2001
From: Daniele Nerini <daniele.nerini@meteoswiss.ch>
Date: Mon, 20 Oct 2025 12:14:00 +0200
Subject: [PATCH 13/34] Update example config for experiment with interpolators
 (#70)

---
 config/interpolators.yaml | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/config/interpolators.yaml b/config/interpolators.yaml
index c8e796e4..e04fc39c 100644
--- a/config/interpolators.yaml
+++ b/config/interpolators.yaml
@@ -10,29 +10,17 @@ dates:
 runs:
   - interpolator:
       mlflow_id: 9c18b90074214d769b8b383722fc5a06
-      label: LAM Interpolator (COSMO-E analysis)
+      label: M-2 interpolator (KENDA)
       steps: 0/121/1
       config: resources/inference/configs/interpolator_from_test_data.yaml
       forecaster: null
       extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files
+        - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
         - torch-geometric==2.6.1
         - anemoi-graphs==0.5.2
   - interpolator:
-      mlflow_id: 9c18b90074214d769b8b383722fc5a06
-      label: LAM Interpolator (M-1 forecaster)
-      steps: 0/121/1
-      forecaster:
-        mlflow_id: d0846032fc7248a58b089cbe8fa4c511
-        config: resources/inference/configs/forecaster_with_global.yaml
-        steps: 0/126/6
-      extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files
-        - torch-geometric==2.6.1
-        - anemoi-graphs==0.5.2
-  - interpolator:
-      mlflow_id: 07c3d9698db14d859b78bb712a65bbbf
-      label: SGM Interpolator (M-1 forecaster)
+      mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
+      label: M-2 interpolator (M-1 forecaster)
       steps: 0/121/1
       config: resources/inference/configs/interpolator_stretched.yaml
       forecaster:
@@ -40,9 +28,14 @@ runs:
         config: resources/inference/configs/forecaster_with_global.yaml
         steps: 0/126/6
       extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@fix/interp_files
+        - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
         - torch-geometric==2.6.1
         - anemoi-graphs==0.5.2
+  - forecaster:
+      mlflow_id: d0846032fc7248a58b089cbe8fa4c511
+      label: M-1 forecaster
+      config: resources/inference/configs/forecaster_with_global.yaml
+      steps: 0/126/6
 
 baselines:
   - baseline:

From 128eb91636631103e41dccd68f555e6bc30ab543 Mon Sep 17 00:00:00 2001
From: Daniele Nerini <daniele.nerini@meteoswiss.ch>
Date: Mon, 20 Oct 2025 14:27:18 +0200
Subject: [PATCH 14/34] Distinguish between primary runs ('candidates') and
 secondary runs (#64)

* Distinguish between primary runs ('candidates') and secondary runs

* Docstrings
---
 workflow/Snakefile        |  6 +++---
 workflow/rules/common.smk | 17 +++++++++++++++--
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index cf595cb3..b65cd638 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -55,7 +55,7 @@ rule sandbox_all:
     input:
         expand(
             rules.create_inference_sandbox.output.sandbox,
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
@@ -65,7 +65,7 @@ rule run_inference_all:
         expand(
             OUT_ROOT / "data/runs/{run_id}/{init_time}/raw",
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
@@ -74,7 +74,7 @@ rule verif_metrics_all:
         expand(
             rules.verif_metrics.output,
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index cea00b23..98283be0 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -66,12 +66,13 @@ REFTIMES = _reftimes()
 
 
 def collect_all_runs():
-    """Collect all runs defined in the configuration."""
+    """Collect all runs defined in the configuration, including secondary runs."""
     runs = {}
     for run_entry in copy.deepcopy(config["runs"]):
         model_type = next(iter(run_entry))
         run_config = run_entry[model_type]
         run_config["model_type"] = model_type
+        run_config["is_candidate"] = True
         run_id = run_config["mlflow_id"][0:9]
 
         if model_type == "interpolator":
@@ -82,6 +83,7 @@ def collect_all_runs():
                 # Ensure a proper 'forecaster' entry exists with model_type
                 fore_cfg = copy.deepcopy(run_config["forecaster"])
                 fore_cfg["model_type"] = "forecaster"
+                fore_cfg["is_candidate"] = False  # exclude from outputs
                 runs[tail_id] = fore_cfg
             run_id = f"{run_id}-{tail_id}"
 
@@ -90,6 +92,16 @@ def collect_all_runs():
     return runs
 
 
+def collect_all_candidates():
+    """Collect participating runs ('candidates') only."""
+    runs = collect_all_runs()
+    candidates = {}
+    for run_id, run_config in runs.items():
+        if run_config.get("is_candidate", False):
+            candidates[run_id] = run_config
+    return candidates
+
+
 def collect_all_baselines():
     """Collect all baselines defined in the configuration."""
     baselines = {}
@@ -106,7 +118,8 @@ def collect_experiment_participants():
     for base in BASELINE_CONFIGS.keys():
         participants[base] = OUT_ROOT / f"data/baselines/{base}/verif_aggregated.nc"
     for exp in RUN_CONFIGS.keys():
-        participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc"
+        if RUN_CONFIGS[exp].get("is_candidate", False):
+            participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc"
     return participants
 
 

From 6315afc05cd6d1b6c100adfbcba7359bbc65aa73 Mon Sep 17 00:00:00 2001
From: Daniele Nerini <daniele.nerini@meteoswiss.ch>
Date: Tue, 21 Oct 2025 11:44:31 +0200
Subject: [PATCH 15/34] Adopt forecast intervals including the end point (#71)

* Adopt forecast intervals including the end point

* Fix parsing

* Experiments work

* Update config/forecasters.yaml

* Align init times to availabiliy of COE

* run pre-commit

* Change README to COSMO-E availability

---------

Co-authored-by: Jonas Bhend <jonasbhend@users.noreply.github.com>
Co-authored-by: Jonas Bhend <jonas.bhend@meteoswiss.ch>
---
 README.md                            |  8 +++----
 config/forecasters-co1e.yaml         |  4 ++--
 config/forecasters.yaml              | 15 ++++--------
 config/interpolators.yaml            | 20 ++++++++--------
 pyproject.toml                       |  2 +-
 src/evalml/config.py                 | 18 +++++++-------
 workflow/rules/data.smk              |  8 +++----
 workflow/rules/inference.smk         |  1 -
 workflow/rules/verif.smk             |  6 ++---
 workflow/scripts/extract_baseline.py | 34 ++++++++++++--------------
 workflow/scripts/verif_baseline.py   | 32 +++++++++++--------------
 workflow/scripts/verif_from_grib.py  | 36 +++++++++++++---------------
 workflow/tools/config.schema.json    |  4 ++--
 13 files changed, 85 insertions(+), 103 deletions(-)

diff --git a/README.md b/README.md
index 64fe254f..f2fdf5b4 100644
--- a/README.md
+++ b/README.md
@@ -31,17 +31,17 @@ description: |
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  frequency: 54h
+  frequency: 60h
 
 runs:
   - forecaster:
       mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
       label: Stage D - N320 global grid with CERRA finetuning
-      steps: 0/126/6
+      steps: 0/120/6
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
-      steps: 0/126/6
+      steps: 0/120/6
 
 
 baselines:
@@ -49,7 +49,7 @@ baselines:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/126/6
+      steps: 0/120/6
 
 analysis:
   label: COSMO KENDA
diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
index 6171d62a..c72ae546 100644
--- a/config/forecasters-co1e.yaml
+++ b/config/forecasters-co1e.yaml
@@ -13,7 +13,7 @@ runs:
       mlflow_id: 2174c939c8844555a52843b71219d425
       label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11
       config: resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
-      steps: 0/126/6
+      steps: 0/120/6
       inference_resources:
         gpu: 4
         tasks: 4
@@ -23,7 +23,7 @@ baselines:
       baseline_id: COSMO-1E
       label: COSMO-1E
       root: /scratch/mch/bhendj/COSMO-1E
-      steps: 0/126/6
+      steps: 0/33/6
 
 analysis:
   label: COSMO KENDA
diff --git a/config/forecasters.yaml b/config/forecasters.yaml
index 413a734f..1dbdad13 100644
--- a/config/forecasters.yaml
+++ b/config/forecasters.yaml
@@ -1,29 +1,24 @@
 # yaml-language-server: $schema=../workflow/tools/config.schema.json
 description: |
-  This is an experiment to do blabla.
+  Evaluate skill of COSMO-E emulator (M-1 forecaster).
 
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  # end: 2020-03-30T00:00
-  frequency: 36h
+  frequency: 60h
 
 runs:
-  - forecaster:
-      mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
-      label: Stage D - N320 global grid with CERRA finetuning
-      steps: 0/126/6
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
-      steps: 0/126/6
+      steps: 0/120/6
 
 baselines:
   - baseline:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/126/6
+      steps: 0/120/6
 
 analysis:
   label: COSMO KENDA
@@ -38,7 +33,7 @@ locations:
 profile:
   executor: slurm
   global_resources:
-    gpus: 15
+    gpus: 16
   default_resources:
     slurm_partition: "postproc"
     cpus_per_task: 1
diff --git a/config/interpolators.yaml b/config/interpolators.yaml
index e04fc39c..662f0679 100644
--- a/config/interpolators.yaml
+++ b/config/interpolators.yaml
@@ -1,18 +1,18 @@
 # yaml-language-server: $schema=../workflow/tools/config.schema.json
 description: |
-  Stretched interpolator vs LAM interpolator.
+  Evaluate skill of SGM interpolator (M-2 interpolator).
 
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  frequency: 54h
+  frequency: 60h
 
 runs:
   - interpolator:
-      mlflow_id: 9c18b90074214d769b8b383722fc5a06
+      mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
       label: M-2 interpolator (KENDA)
-      steps: 0/121/1
-      config: resources/inference/configs/interpolator_from_test_data.yaml
+      steps: 0/120/1
+      config: resources/inference/configs/interpolator_from_test_data_stretched.yaml
       forecaster: null
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
@@ -21,12 +21,12 @@ runs:
   - interpolator:
       mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
       label: M-2 interpolator (M-1 forecaster)
-      steps: 0/121/1
+      steps: 0/120/1
       config: resources/inference/configs/interpolator_stretched.yaml
       forecaster:
         mlflow_id: d0846032fc7248a58b089cbe8fa4c511
         config: resources/inference/configs/forecaster_with_global.yaml
-        steps: 0/126/6
+        steps: 0/120/6
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
         - torch-geometric==2.6.1
@@ -35,14 +35,14 @@ runs:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
       config: resources/inference/configs/forecaster_with_global.yaml
-      steps: 0/126/6
+      steps: 0/120/6
 
 baselines:
   - baseline:
       baseline_id: COSMO-E-1h
       label: COSMO-E
       root: /scratch/mch/bhendj/COSMO-E
-      steps: 0/121/1
+      steps: 0/120/1
 
 analysis:
   label: COSMO KENDA
@@ -58,7 +58,7 @@ locations:
 profile:
   executor: slurm
   global_resources:
-    gpus: 15
+    gpus: 16
   default_resources:
     slurm_partition: "postproc"
     cpus_per_task: 1
diff --git a/pyproject.toml b/pyproject.toml
index 043c1718..bfcb9ee9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,4 +50,4 @@ markers = [
 packages = [
   "src/evalml",
   "src/verification"
-]
\ No newline at end of file
+]
diff --git a/src/evalml/config.py b/src/evalml/config.py
index c4a97ce2..ec3852b4 100644
--- a/src/evalml/config.py
+++ b/src/evalml/config.py
@@ -74,8 +74,10 @@ class RunConfig(BaseModel):
         ...,
         description=(
             "Forecast lead times in hours, formatted as 'start/end/step'. "
-            "The range is half-open [start, end), meaning it includes the start  "
-            "but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours."
+            "The range includes the start lead time and continues with the given step "
+            "until reaching or exceeding the end lead time. "
+            "Example: '0/120/6' for lead times every 6 hours up to 120 h, "
+            "or '0/33/6' up to 30 h."
         ),
     )
     extra_dependencies: List[str] = Field(
@@ -102,15 +104,13 @@ def validate_steps(cls, v: str) -> str:
         try:
             start, end, step = map(int, parts)
         except ValueError:
-            raise ValueError("Start, end, and interval must be integers.")
-        if start >= end:
-            raise ValueError(f"Start ({start}) must be less than end ({end}).")
-        if step <= 0:
-            raise ValueError(f"Interval ({step}) must be a positive integer.")
-        if (end - start) % step != 0:
+            raise ValueError("Start, end, and step must be integers.")
+        if start > end:
             raise ValueError(
-                f"The step ({step}) must evenly divide the range ({end - start})."
+                f"Start ({start}) must be less than or equal to end ({end})."
             )
+        if step <= 0:
+            raise ValueError(f"Step ({step}) must be a positive integer.")
         return v
 
 
diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk
index 7aa0bb7c..bef818f1 100644
--- a/workflow/rules/data.smk
+++ b/workflow/rules/data.smk
@@ -18,7 +18,7 @@ if "extract_cosmoe" in config.get("include-optional-rules", []):
             runtime="24h",
         params:
             year_postfix=lambda wc: f"FCST{wc.year}",
-            lead_time="0/126/6",
+            steps="0/120/6",
         log:
             OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log",
         shell:
@@ -26,7 +26,7 @@ if "extract_cosmoe" in config.get("include-optional-rules", []):
             python workflow/scripts/extract_baseline_fct.py \
                 --archive_dir {input.archive}/{params.year_postfix} \
                 --output_store {output.fcts} \
-                --lead_time {params.lead_time} \
+                --steps {params.steps} \
                     > {log} 2>&1
             """
 
@@ -45,7 +45,7 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []):
             runtime="24h",
         params:
             year_postfix=lambda wc: f"FCST{wc.year}",
-            lead_time="0/34/1",
+            steps="0/33/1",
         log:
             OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log",
         shell:
@@ -53,6 +53,6 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []):
             python workflow/scripts/extract_baseline_fct.py \
                 --archive_dir {input.archive}/{params.year_postfix} \
                 --output_store {output.fcts} \
-                --lead_time {params.lead_time} \
+                --steps {params.steps} \
                     > {log} 2>&1
             """
diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
index ea136b97..8ec1d98a 100644
--- a/workflow/rules/inference.smk
+++ b/workflow/rules/inference.smk
@@ -121,7 +121,6 @@ def get_resource(wc, field: str, default):
 def get_leadtime(wc):
     """Get the lead time from the run config."""
     start, end, step = RUN_CONFIGS[wc.run_id]["steps"].split("/")
-    end = int(end) - int(step)  # make inclusive
     return f"{end}h"
 
 
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index 10acbe13..bef45226 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -22,7 +22,7 @@ rule verif_metrics_baseline:
         analysis_zarr=config["analysis"].get("analysis_zarr"),
     params:
         baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"),
-        baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("steps"),
+        baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id]["steps"],
         analysis_label=config["analysis"].get("label"),
     output:
         OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc",
@@ -38,7 +38,7 @@ rule verif_metrics_baseline:
             --analysis_zarr {input.analysis_zarr} \
             --baseline_zarr {input.baseline_zarr} \
             --reftime {wildcards.init_time} \
-            --lead_time "{params.baseline_steps}" \
+            --steps "{params.baseline_steps}" \
             --baseline_label "{params.baseline_label}" \
             --analysis_label "{params.analysis_label}" \
             --output {output} > {log} 2>&1
@@ -79,7 +79,7 @@ rule verif_metrics:
         uv run {input.script} \
             --grib_output_dir {input.grib_output} \
             --analysis_zarr {input.analysis_zarr} \
-            --lead_time "{params.fcst_steps}" \
+            --steps "{params.fcst_steps}" \
             --fcst_label "{params.fcst_label}" \
             --analysis_label "{params.analysis_label}" \
             --output {output} > {log} 2>&1
diff --git a/workflow/scripts/extract_baseline.py b/workflow/scripts/extract_baseline.py
index 570151bb..f450b82b 100644
--- a/workflow/scripts/extract_baseline.py
+++ b/workflow/scripts/extract_baseline.py
@@ -42,7 +42,7 @@ def check_reftime_consistency(tarfiles: list[Path], delta_h: int = 12):
 
 
 def extract(
-    tar: Path, lead_time: list[int], run_id: str, params: list[str]
+    tar: Path, lead_times: list[int], run_id: str, params: list[str]
 ) -> xr.Dataset:
     LOG.info(f"Extracting fields from {tar}.")
     reftime = reftime_from_tarfile(tar)
@@ -54,7 +54,7 @@ def extract(
         raise ValueError("Currently only COSMO-E and COSMO-1E are supported.")
     tar_archive = tarfile.open(tar, mode="r:*")
     out = ekd.SimpleFieldList()
-    for lt in lead_time:
+    for lt in lead_times:
         filename = f"{tar.stem}/grib/{gribname}{lt:03}_{run_id}"
         LOG.info(f"Extracting {filename}.")
         stream = tar_archive.extractfile(filename)
@@ -79,23 +79,19 @@ def extract(
 class ScriptConfig(Namespace):
     archive_dir: Path
     output_store: Path
-    lead_time: int
+    steps: list[int]
     run_id: str
     params: list[str]
 
 
-def _parse_lead_time(lead_time: str) -> int:
-    # check that lead_time is in the format "start/stop/step"
-    if "/" not in lead_time:
-        raise ValueError(
-            f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
-        )
-    if len(lead_time.split("/")) != 3:
-        raise ValueError(
-            f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
-        )
-
-    return list(range(*map(int, lead_time.split("/"))))
+def _parse_steps(steps: str) -> int:
+    # check that steps is in the format "start/stop/step"
+    if "/" not in steps:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    if len(steps.split("/")) != 3:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    start, end, step = map(int, steps.split("/"))
+    return list(range(start, end + 1, step))
 
 
 def main(cfg: ScriptConfig):
@@ -135,7 +131,7 @@ def main(cfg: ScriptConfig):
 
     for i in indices:
         file = tarfiles[i]
-        ds = extract(file, cfg.lead_time, cfg.run_id, cfg.params)
+        ds = extract(file, cfg.steps, cfg.run_id, cfg.params)
 
         LOG.info(f"Extracted: {ds}")
 
@@ -167,7 +163,7 @@ def main(cfg: ScriptConfig):
         help="Path to the output zarr store.",
     )
 
-    parser.add_argument("--lead_time", type=_parse_lead_time, default="0/126/6")
+    parser.add_argument("--steps", type=_parse_steps, default="0/120/6")
 
     parser.add_argument("--run_id", type=str, default="000")
 
@@ -193,10 +189,10 @@ def main(cfg: ScriptConfig):
 python workflow/scripts/extract_baseline_fct.py \
     --archive_dir /archive/mch/msopr/osm/COSMO-E/FCST20 \
     --output_store /store_new/mch/msopr/ml/COSMO-E/FCST20.zarr \
-    --lead_time 0/126/6
+    --steps 0/120/6
 
 python workflow/scripts/extract_baseline_fct.py \
     --archive_dir /archive/mch/s83/osm/from_GPFS/COSMO-1E/FCST20 \
     --output_store /store_new/mch/msopr/ml/COSMO-1E/FCST20.zarr \
-    --lead_time 0/34/1
+    --steps 0/33/1
 """
diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py
index 3f034cc9..f052dbab 100644
--- a/workflow/scripts/verif_baseline.py
+++ b/workflow/scripts/verif_baseline.py
@@ -92,18 +92,14 @@ def load_analysis_data_from_zarr(
     return ds
 
 
-def _parse_lead_time(lead_time: str) -> int:
-    # check that lead_time is in the format "start/stop/step"
-    if "/" not in lead_time:
-        raise ValueError(
-            f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
-        )
-    if len(lead_time.split("/")) != 3:
-        raise ValueError(
-            f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
-        )
-
-    return list(range(*map(int, lead_time.split("/"))))
+def _parse_steps(steps: str) -> int:
+    # check that steps is in the format "start/stop/step"
+    if "/" not in steps:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    if len(steps.split("/")) != 3:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    start, end, step = map(int, steps.split("/"))
+    return list(range(start, end + 1, step))
 
 
 class ScriptConfig(Namespace):
@@ -114,7 +110,7 @@ class ScriptConfig(Namespace):
     baseline_zarr: Path = None
     reftime: datetime = None
     params: list[str] = ["T_2M", "TD_2M", "U_10M", "V_10M"]
-    lead_time: list[int] = _parse_lead_time("0/126/6")
+    steps: list[int] = _parse_steps("0/120/6")
 
 
 def program_summary_log(args):
@@ -158,7 +154,7 @@ def main(args: ScriptConfig):
         )
     baseline = baseline[args.params].sel(
         ref_time=args.reftime,
-        lead_time=np.array(args.lead_time, dtype="timedelta64[h]"),
+        lead_time=np.array(args.steps, dtype="timedelta64[h]"),
         method="nearest",
     )
     baseline = baseline.assign_coords(time=baseline.ref_time + baseline.lead_time)
@@ -226,10 +222,10 @@ def main(args: ScriptConfig):
         default=["T_2M", "TD_2M", "U_10M", "V_10M", "PS", "PMSL", "TOT_PREC"],
     )
     parser.add_argument(
-        "--lead_time",
-        type=_parse_lead_time,
-        default="0/126/6",
-        help="Lead time in the format 'start/stop/step' (default: 0/126/6).",
+        "--steps",
+        type=_parse_steps,
+        default="0/120/6",
+        help="Forecast steps in the format 'start/stop/step' (default: 0/120/6).",
     )
     parser.add_argument(
         "--baseline_label",
diff --git a/workflow/scripts/verif_from_grib.py b/workflow/scripts/verif_from_grib.py
index 61d62a2a..5ed09be7 100644
--- a/workflow/scripts/verif_from_grib.py
+++ b/workflow/scripts/verif_from_grib.py
@@ -112,12 +112,12 @@ def load_analysis_data_from_zarr(
 
 
 def load_fct_data_from_grib(
-    grib_output_dir: Path, params: list[str], step: list[int]
+    grib_output_dir: Path, params: list[str], steps: list[int]
 ) -> xr.Dataset:
     """Load forecast data from GRIB files for a specific valid time."""
     files = sorted(grib_output_dir.glob("20*.grib"))
     fds = data_source.FileDataSource(datafiles=files)
-    ds = grib_decoder.load(fds, {"param": params, "step": step})
+    ds = grib_decoder.load(fds, {"param": params, "step": steps})
     for var, da in ds.items():
         if "z" in da.dims and da.sizes["z"] == 1:
             ds[var] = da.squeeze("z", drop=True)
@@ -143,18 +143,14 @@ def load_fct_data_from_grib(
     return ds
 
 
-def _parse_lead_time(lead_time: str) -> int:
-    # check that lead_time is in the format "start/stop/step"
-    if "/" not in lead_time:
-        raise ValueError(
-            f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
-        )
-    if len(lead_time.split("/")) != 3:
-        raise ValueError(
-            f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
-        )
-
-    return list(range(*map(int, lead_time.split("/"))))
+def _parse_steps(steps: str) -> int:
+    # check that steps is in the format "start/stop/step"
+    if "/" not in steps:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    if len(steps.split("/")) != 3:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    start, end, step = map(int, steps.split("/"))
+    return list(range(start, end + 1, step))
 
 
 class ScriptConfig(Namespace):
@@ -164,7 +160,7 @@ class ScriptConfig(Namespace):
     analysis_zarr: Path = None
     forecast_zarr: Path = None
     params: list[str]
-    lead_time: list[int] = _parse_lead_time("0/126/6")
+    steps: list[int] = _parse_steps("0/120/6")
 
 
 def program_summary_log(args):
@@ -189,7 +185,7 @@ def main(args: ScriptConfig):
     # get forecast data
     start = datetime.now()
     fct = load_fct_data_from_grib(
-        grib_output_dir=args.grib_output_dir, params=args.params, step=args.lead_time
+        grib_output_dir=args.grib_output_dir, params=args.params, steps=args.steps
     )
     LOG.info(
         "Loaded forecast data from GRIB files in %.2f seconds: \n%s",
@@ -254,10 +250,10 @@ def main(args: ScriptConfig):
         help="Comma-separated list of parameters to verify.",
     )
     parser.add_argument(
-        "--lead_time",
-        type=_parse_lead_time,
-        default="0/126/6",
-        help="Lead time in the format 'start/stop/step'.",
+        "--steps",
+        type=_parse_steps,
+        default="0/120/6",
+        help="Forecast steps in the format 'start/stop/step'.",
     )
     parser.add_argument(
         "--fcst_label",
diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json
index 4bc53068..94024013 100644
--- a/workflow/tools/config.schema.json
+++ b/workflow/tools/config.schema.json
@@ -167,7 +167,7 @@
           "title": "Label"
         },
         "steps": {
-          "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start  but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.",
+          "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range includes the start lead time and continues with the given step until reaching or exceeding the end lead time. Example: '0/120/6' for lead times every 6 hours up to 120 h, or '0/33/6' up to 30 h.",
           "title": "Steps",
           "type": "string"
         },
@@ -348,7 +348,7 @@
           "title": "Label"
         },
         "steps": {
-          "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range is half-open [start, end), meaning it includes the start  but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours.",
+          "description": "Forecast lead times in hours, formatted as 'start/end/step'. The range includes the start lead time and continues with the given step until reaching or exceeding the end lead time. Example: '0/120/6' for lead times every 6 hours up to 120 h, or '0/33/6' up to 30 h.",
           "title": "Steps",
           "type": "string"
         },

From e028f59c657cf7bb68ccefa661ccf40f259210f0 Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonasbhend@users.noreply.github.com>
Date: Tue, 21 Oct 2025 18:52:59 +0200
Subject: [PATCH 16/34] Mrb 550 inconcsistent forecast initializations in
 evalml (#72)

---
 config/forecasters-co1e.yaml       | 4 ++--
 workflow/scripts/verif_baseline.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
index c72ae546..84943b76 100644
--- a/config/forecasters-co1e.yaml
+++ b/config/forecasters-co1e.yaml
@@ -4,8 +4,8 @@ description: |
   (KENDA-1) at 1km resolution.
 
 dates:
-  start: 2020-01-01T12:00
-  end: 2020-01-10T00:00
+  start: 2020-08-01T12:00
+  end: 2020-08-10T00:00
   frequency: 54h
 
 runs:
diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py
index f052dbab..b8f0fac5 100644
--- a/workflow/scripts/verif_baseline.py
+++ b/workflow/scripts/verif_baseline.py
@@ -155,7 +155,6 @@ def main(args: ScriptConfig):
     baseline = baseline[args.params].sel(
         ref_time=args.reftime,
         lead_time=np.array(args.steps, dtype="timedelta64[h]"),
-        method="nearest",
     )
     baseline = baseline.assign_coords(time=baseline.ref_time + baseline.lead_time)
     LOG.info(

From 54067779a2a78f5e934b4efeaa1264904252e729 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <62377868+frazane@users.noreply.github.com>
Date: Wed, 22 Oct 2025 11:32:51 +0200
Subject: [PATCH 17/34] Update vega-lite spec (#69)

---
 resources/report/dashboard/script.js | 122 ++++++++++++++++-----------
 1 file changed, 74 insertions(+), 48 deletions(-)

diff --git a/resources/report/dashboard/script.js b/resources/report/dashboard/script.js
index 54b7933d..b422a5f0 100644
--- a/resources/report/dashboard/script.js
+++ b/resources/report/dashboard/script.js
@@ -1,11 +1,11 @@
 // Tab switching
 document.querySelectorAll(".tab-link").forEach(button => {
-    button.addEventListener("click", () => {
-        document.querySelectorAll(".tab-link").forEach(btn => btn.classList.remove("active"));
-        document.querySelectorAll(".tab-content").forEach(tab => tab.classList.remove("active"));
-        button.classList.add("active");
-        document.getElementById(button.dataset.tab).classList.add("active");
-    });
+  button.addEventListener("click", () => {
+    document.querySelectorAll(".tab-link").forEach(btn => btn.classList.remove("active"));
+    document.querySelectorAll(".tab-content").forEach(tab => tab.classList.remove("active"));
+    button.classList.add("active");
+    document.getElementById(button.dataset.tab).classList.add("active");
+  });
 });
 
 
@@ -13,29 +13,29 @@ document.querySelectorAll(".tab-link").forEach(button => {
 const choicesInstances = {};
 
 choicesInstances["source-select"] = new Choices("#source-select", {
-    searchEnabled: false,
-    removeItemButton: true,
-    shouldSort: false,
-    itemSelectText: "",
-    placeholder: false
+  searchEnabled: false,
+  removeItemButton: true,
+  shouldSort: false,
+  itemSelectText: "",
+  placeholder: false
 });
 document.getElementById("source-select").addEventListener("change", updateChart);
 
 choicesInstances["metric-select"] = new Choices("#metric-select", {
-    searchEnabled: false,
-    removeItemButton: true,
-    shouldSort: false,
-    itemSelectText: "",
-    placeholder: false
+  searchEnabled: false,
+  removeItemButton: true,
+  shouldSort: false,
+  itemSelectText: "",
+  placeholder: false
 });
 document.getElementById("metric-select").addEventListener("change", updateChart);
 
 choicesInstances["param-select"] = new Choices("#param-select", {
-    searchEnabled: false,
-    removeItemButton: true,
-    shouldSort: false,
-    itemSelectText: "",
-    placeholder: false
+  searchEnabled: false,
+  removeItemButton: true,
+  shouldSort: false,
+  itemSelectText: "",
+  placeholder: false
 });
 document.getElementById("param-select").addEventListener("change", updateChart);
 
@@ -44,25 +44,51 @@ data = JSON.parse(document.getElementById("verif-data").textContent)
 
 // Define base spec
 var spec = {
-  "data": {
-    "values": data
-  },
+  "data": { "values": data },
+  "params": [
+    {
+      "name": "xZoom",
+      "select": {
+        "type": "interval",
+        "encodings": ["x"],
+        "zoom": "wheel![!event.shiftKey]"
+      },
+      "bind": "scales"
+    }
+  ],
   "facet": {
     "column": { "field": "param" },
     "row": { "field": "metric" }
   },
   "spec": {
-    "mark": { "type": "line" },
+    "mark": "line",
     "width": 300,
     "height": 200,
     "encoding": {
-      "x": { "field": "lead_time", "type": "ordinal" },
-      "y": { "field": "value", "type": "quantitative" , "scale": { "zero": false }},
-      "color": { "field": "source", "legend": { "orient": "top", "labelLimit": 1000, "symbolSize": 1000 } }
-    }
+      "x": {
+        "field": "lead_time",
+        "type": "quantitative",
+        "axis": { "labels": true, "ticks": true },
+      },
+      "y": {
+        "field": "value",
+        "type": "quantitative",
+        "scale": { "zero": false }
+      },
+      "color": {
+        "field": "source",
+        "legend": { "orient": "top", "labelLimit": 1000, "symbolSize": 1000 }
+      }
+    },
+    "transform": [
+      {
+        "filter": { "param": "xZoom" }
+      }
+    ]
   },
   "resolve": {
     "scale": {
+      "x": "shared",
       "y": "independent"
     }
   }
@@ -72,32 +98,32 @@ var spec = {
 // Define functions
 
 function getSelectedValues(id) {
-    return choicesInstances[id].getValue(true)
+  return choicesInstances[id].getValue(true)
 }
 
 function updateChart() {
-    const selectedSources = getSelectedValues("source-select");
-    const selectedparams = getSelectedValues("param-select");
-    const selectedMetrics = getSelectedValues("metric-select");
+  const selectedSources = getSelectedValues("source-select");
+  const selectedparams = getSelectedValues("param-select");
+  const selectedMetrics = getSelectedValues("metric-select");
 
-    const newSpec = JSON.parse(JSON.stringify(spec));
-    const filters = [];
+  const newSpec = JSON.parse(JSON.stringify(spec));
+  const filters = [];
 
-    if (selectedSources.length > 0) {
-        filters.push({ field: "source", oneOf: selectedSources });
-    }
-    if (selectedparams.length > 0) {
-        filters.push({ field: "param", oneOf: selectedparams });
-    }
-    if (selectedMetrics.length > 0) {
-        filters.push({ field: "metric", oneOf: selectedMetrics });
-    }
+  if (selectedSources.length > 0) {
+    filters.push({ field: "source", oneOf: selectedSources });
+  }
+  if (selectedparams.length > 0) {
+    filters.push({ field: "param", oneOf: selectedparams });
+  }
+  if (selectedMetrics.length > 0) {
+    filters.push({ field: "metric", oneOf: selectedMetrics });
+  }
 
-    if (filters.length > 0) {
-        newSpec.transform = [{ filter: { and: filters } }];
-    }
+  if (filters.length > 0) {
+    newSpec.transform = [{ filter: { and: filters } }];
+  }
 
-    vegaEmbed('#vis', newSpec, { actions: false });
+  vegaEmbed('#vis', newSpec, { actions: false });
 }
 
 // Initial chart

From 8d014907349b12b3621f71ab8797aae64223f553 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <62377868+frazane@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:04:39 +0200
Subject: [PATCH 18/34] Decouple inference preparation and execution (#68)

* draft changes

* rename workspace resources dir

* working for config/forecasters.yaml

* improve logging

* works for interpolators.yaml

* re-add get_leadtime function

* refactor run directives into script
---
 config/interpolators.yaml                     |   4 +-
 resources/inference/configs/forecaster.yaml   |   2 +-
 .../configs/forecaster_no_trimedge.yaml       |   2 +-
 .../forecaster_no_trimedge_fromtraining.yaml  |   2 +-
 .../configs/forecaster_with_global.yaml       |   6 +-
 resources/inference/configs/interpolator.yaml |  12 +-
 .../configs/interpolator_from_test_data.yaml  |   3 +-
 ...interpolator_from_test_data_stretched.yaml |   2 +-
 .../configs/interpolator_stretched.yaml       |   6 +-
 .../templates/templates_index_cosmo.yaml      |  16 +-
 .../templates/templates_index_ifs.yaml        |   4 +-
 src/evalml/helpers.py                         |  39 ++++
 workflow/envs/anemoi_inference.toml           |   2 +-
 workflow/rules/common.smk                     |   7 +-
 workflow/rules/inference.smk                  | 156 +++++++---------
 workflow/rules/verif.smk                      |   9 +-
 workflow/scripts/inference_prepare.py         | 173 ++++++++++++++++++
 17 files changed, 322 insertions(+), 123 deletions(-)
 create mode 100644 src/evalml/helpers.py
 create mode 100644 workflow/scripts/inference_prepare.py

diff --git a/config/interpolators.yaml b/config/interpolators.yaml
index 662f0679..0f5a042c 100644
--- a/config/interpolators.yaml
+++ b/config/interpolators.yaml
@@ -15,7 +15,7 @@ runs:
       config: resources/inference/configs/interpolator_from_test_data_stretched.yaml
       forecaster: null
       extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
+        - git+https://github.com/ecmwf/anemoi-inference@fix/cutout-preprocessors
         - torch-geometric==2.6.1
         - anemoi-graphs==0.5.2
   - interpolator:
@@ -28,7 +28,7 @@ runs:
         config: resources/inference/configs/forecaster_with_global.yaml
         steps: 0/120/6
       extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
+        - git+https://github.com/ecmwf/anemoi-inference@fix/cutout-preprocessors
         - torch-geometric==2.6.1
         - anemoi-graphs==0.5.2
   - forecaster:
diff --git a/resources/inference/configs/forecaster.yaml b/resources/inference/configs/forecaster.yaml
index 4f558a3e..8b318c83 100644
--- a/resources/inference/configs/forecaster.yaml
+++ b/resources/inference/configs/forecaster.yaml
@@ -17,7 +17,7 @@ output:
                 encoding:
                   typeOfGeneratingProcess: 2
                 templates:
-                  samples: _resources/templates_index_cosmo.yaml
+                  samples: resources/templates_index_cosmo.yaml
     - printer
 
 write_initial_state: true
diff --git a/resources/inference/configs/forecaster_no_trimedge.yaml b/resources/inference/configs/forecaster_no_trimedge.yaml
index 2e3417dc..306c62f6 100644
--- a/resources/inference/configs/forecaster_no_trimedge.yaml
+++ b/resources/inference/configs/forecaster_no_trimedge.yaml
@@ -15,7 +15,7 @@ output:
             encoding:
               typeOfGeneratingProcess: 2
             templates:
-              samples: _resources/templates_index_cosmo.yaml
+              samples: resources/templates_index_cosmo.yaml
     - printer
 
 write_initial_state: true
diff --git a/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml b/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
index 11188b9e..b5097f5b 100644
--- a/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
+++ b/resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
@@ -15,7 +15,7 @@ output:
             encoding:
               typeOfGeneratingProcess: 2
             templates:
-              samples: _resources/templates_index_cosmo.yaml
+              samples: resources/templates_index_cosmo.yaml
     - printer
 
 write_initial_state: true
diff --git a/resources/inference/configs/forecaster_with_global.yaml b/resources/inference/configs/forecaster_with_global.yaml
index ae00c3a7..890d3e29 100644
--- a/resources/inference/configs/forecaster_with_global.yaml
+++ b/resources/inference/configs/forecaster_with_global.yaml
@@ -17,18 +17,18 @@ output:
                 encoding:
                   typeOfGeneratingProcess: 2
                 templates:
-                   samples: _resources/templates_index_cosmo.yaml
+                   samples: resources/templates_index_cosmo.yaml
     - grib:
         path: grib/ifs-{dateTime}_{step:03}.grib
         encoding:
           typeOfGeneratingProcess: 2
         templates:
-          samples: _resources/templates_index_ifs.yaml
+          samples: resources/templates_index_ifs.yaml
         post_processors:
           - extract_slice: [189699, -1]
           - assign_mask: "global/cutout_mask"
 
-forcings:
+constant_forcings:
   test:
     use_original_paths: true
 
diff --git a/resources/inference/configs/interpolator.yaml b/resources/inference/configs/interpolator.yaml
index 41253c0c..765b093d 100644
--- a/resources/inference/configs/interpolator.yaml
+++ b/resources/inference/configs/interpolator.yaml
@@ -10,7 +10,7 @@ post_processors:
 
 input:
   grib:
-    path: forecaster_grib/20*.grib # TODO: remove dirty fix to only use local files
+    path: forecaster/20*.grib # TODO: remove dirty fix to only use local files
     namer:
       rules:
         - - shortName: SKT
@@ -54,12 +54,20 @@ output:
     encoding:
       typeOfGeneratingProcess: 2
     templates:
-      samples: _resources/templates_index_cosmo.yaml
+      samples: resources/templates_index_cosmo.yaml
 
 constant_forcings:
   test:
     use_original_paths: true
 
+dynamic_forcings:
+  test:
+    use_original_paths: true
+
+patch_metadata:
+  dataset:
+    constant_fields: [z, lsm]
+
 verbosity: 1
 allow_nans: true
 output_frequency: "1h"
diff --git a/resources/inference/configs/interpolator_from_test_data.yaml b/resources/inference/configs/interpolator_from_test_data.yaml
index 07aea411..2fdb6cda 100644
--- a/resources/inference/configs/interpolator_from_test_data.yaml
+++ b/resources/inference/configs/interpolator_from_test_data.yaml
@@ -1,5 +1,4 @@
 runner: time_interpolator
-include_forcings: true
 
 input:
   test:
@@ -15,7 +14,7 @@ output:
     encoding:
       typeOfGeneratingProcess: 2
     templates:
-      samples: _resources/templates_index_cosmo.yaml
+      samples: resources/templates_index_cosmo.yaml
 
 verbosity: 1
 allow_nans: true
diff --git a/resources/inference/configs/interpolator_from_test_data_stretched.yaml b/resources/inference/configs/interpolator_from_test_data_stretched.yaml
index 19cd733e..21674891 100644
--- a/resources/inference/configs/interpolator_from_test_data_stretched.yaml
+++ b/resources/inference/configs/interpolator_from_test_data_stretched.yaml
@@ -17,7 +17,7 @@ output:
                   encoding:
                     typeOfGeneratingProcess: 2
                   templates:
-                    samples: _resources/templates_index_cosmo.yaml
+                    samples: resources/templates_index_cosmo.yaml
 
 verbosity: 1
 allow_nans: true
diff --git a/resources/inference/configs/interpolator_stretched.yaml b/resources/inference/configs/interpolator_stretched.yaml
index 300d6c65..2928e76e 100644
--- a/resources/inference/configs/interpolator_stretched.yaml
+++ b/resources/inference/configs/interpolator_stretched.yaml
@@ -4,9 +4,9 @@ input:
   cutout:
     lam_0:
       grib:
-        path: forecaster_grib/20*
         pre_processors:
           - extract_mask: "source0/trimedge_mask"
+        path: forecaster/20*
         namer:
           rules:
             - - shortName: T
@@ -43,7 +43,7 @@ input:
               - tp
     global:
       grib:
-        path: forecaster_grib/ifs*
+        path: forecaster/ifs*
         namer:
           rules:
             - - shortName: T
@@ -100,7 +100,7 @@ output:
                   encoding:
                     typeOfGeneratingProcess: 2
                   templates:
-                    samples: _resources/templates_index_cosmo.yaml
+                    samples: resources/templates_index_cosmo.yaml
 
 verbosity: 1
 allow_nans: true
diff --git a/resources/inference/templates/templates_index_cosmo.yaml b/resources/inference/templates/templates_index_cosmo.yaml
index 8f150041..632164ab 100644
--- a/resources/inference/templates/templates_index_cosmo.yaml
+++ b/resources/inference/templates/templates_index_cosmo.yaml
@@ -1,26 +1,26 @@
 
 # COSMO-2 templates
 - - {grid: 0.02, levtype: pl}
-  - _resources/co2-typeOfLevel=isobaricInhPa.grib
+  - resources/co2-typeOfLevel=isobaricInhPa.grib
 
 - - {grid: 0.02, levtype: sfc, param: [T_2M, TD_2M, U_10M, V_10M]}
-  - _resources/co2-typeOfLevel=heightAboveGround.grib
+  - resources/co2-typeOfLevel=heightAboveGround.grib
 
 - - {grid: 0.02, levtype: sfc, param: [FR_LAND, TOC_PREC, PMSL, PS, FIS, T_G]}
-  - _resources/co2-typeOfLevel=surface.grib
+  - resources/co2-typeOfLevel=surface.grib
 
 - - {grid: 0.02, levtype: sfc, param: [TOT_PREC]}
-  - _resources/co2-shortName=TOT_PREC.grib
+  - resources/co2-shortName=TOT_PREC.grib
 
 # COSMO-1E templates
 - - {grid: 0.01, levtype: pl}
-  - _resources/co1e-typeOfLevel=isobaricInhPa.grib
+  - resources/co1e-typeOfLevel=isobaricInhPa.grib
 
 - - {grid: 0.01, levtype: sfc, param: [T_2M, TD_2M, U_10M, V_10M]}
-  - _resources/co1e-typeOfLevel=heightAboveGround.grib
+  - resources/co1e-typeOfLevel=heightAboveGround.grib
 
 - - {grid: 0.01, levtype: sfc, param: [FR_LAND, TOC_PREC, PMSL, PS, FIS, T_G]}
-  - _resources/co1e-typeOfLevel=surface.grib
+  - resources/co1e-typeOfLevel=surface.grib
 
 - - {grid: 0.01, levtype: sfc, param: [TOT_PREC]}
-  - _resources/co1e-shortName=TOT_PREC.grib
+  - resources/co1e-shortName=TOT_PREC.grib
diff --git a/resources/inference/templates/templates_index_ifs.yaml b/resources/inference/templates/templates_index_ifs.yaml
index a399ed92..c0700cfc 100644
--- a/resources/inference/templates/templates_index_ifs.yaml
+++ b/resources/inference/templates/templates_index_ifs.yaml
@@ -1,5 +1,5 @@
 - - {levtype: pl}
-  - _resources/ifs-levtype=pl.grib
+  - resources/ifs-levtype=pl.grib
 
 - - {levtype: sfc}
-  - _resources/ifs-levtype=sfc.grib
+  - resources/ifs-levtype=sfc.grib
diff --git a/src/evalml/helpers.py b/src/evalml/helpers.py
new file mode 100644
index 00000000..bb3e03c1
--- /dev/null
+++ b/src/evalml/helpers.py
@@ -0,0 +1,39 @@
+import logging
+
+
+def setup_logger(logger_name, log_file, level=logging.INFO):
+    """
+    Setup a logger with the specified name and log file path.
+
+    Can be used to set up loggers from python scripts `run` directives
+    used in the Snakemake workflow.
+
+    Parameters
+    ----------
+    logger_name : str
+        The name of the logger.
+    log_file : str
+        The file path where the log messages will be written.
+    level : int, optional
+        The logging level (default is logging.INFO).
+
+    Returns
+    -------
+    logging.Logger
+        Configured logger instance.
+    """
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(level)
+
+    if not logger.handlers:
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setLevel(level)
+
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        file_handler.setFormatter(formatter)
+
+        logger.addHandler(file_handler)
+
+    return logger
diff --git a/workflow/envs/anemoi_inference.toml b/workflow/envs/anemoi_inference.toml
index 22618d63..982a673d 100644
--- a/workflow/envs/anemoi_inference.toml
+++ b/workflow/envs/anemoi_inference.toml
@@ -8,7 +8,7 @@ dependencies = [
   "torchaudio",
   "anemoi-datasets>=0.5.23,<0.7.0",
   "anemoi-graphs>=0.5.0,<0.7.0",
-  "anemoi-inference>=0.7.0,<0.8.0",
+  "anemoi-inference>=0.8.0,<0.9.0",
   "anemoi-models>=0.4.20,<0.6.0",
   "anemoi-training>=0.3.3,<0.5.0",
   "anemoi-transform>=0.1.10,<0.3.0",
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 98283be0..e9314cb6 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -1,3 +1,4 @@
+import logging
 import copy
 from datetime import datetime, timedelta
 import yaml
@@ -128,9 +129,11 @@ def _inference_routing_fn(wc):
     run_config = RUN_CONFIGS[wc.run_id]
 
     if run_config["model_type"] == "forecaster":
-        input_path = f"logs/inference_forecaster/{wc.run_id}-{wc.init_time}.ok"
+        input_path = f"logs/prepare_inference_forecaster/{wc.run_id}-{wc.init_time}.ok"
     elif run_config["model_type"] == "interpolator":
-        input_path = f"logs/inference_interpolator/{wc.run_id}-{wc.init_time}.ok"
+        input_path = (
+            f"logs/prepare_inference_interpolator/{wc.run_id}-{wc.init_time}.ok"
+        )
     else:
         raise ValueError(f"Unsupported model type: {run_config['model_type']}")
 
diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
index 8ec1d98a..b490dc2e 100644
--- a/workflow/rules/inference.smk
+++ b/workflow/rules/inference.smk
@@ -8,6 +8,13 @@ from datetime import datetime
 
 
 rule create_inference_pyproject:
+    """
+    Generate a pyproject.toml that contains the information needed
+    to set up a virtual environment for inference of a specific checkpoint.
+    The list of dependencies is taken from the checkpoint's MLFlow run metadata,
+    and additional dependencies can be specified under a run entry in the main
+    config file.
+    """
     input:
         toml="workflow/envs/anemoi_inference.toml",
     output:
@@ -25,6 +32,11 @@ rule create_inference_pyproject:
 
 
 rule create_inference_venv:
+    """
+    Create a virtual environment for inference, using the pyproject.toml created above.
+    The virtual environment is managed with uv. The created virtual environment is relocatable,
+    so it can be squashed later. Pre-compilation to bytecode is done to speed up imports.
+    """
     input:
         pyproject=rules.create_inference_pyproject.output.pyproject,
     output:
@@ -56,11 +68,12 @@ rule create_inference_venv:
         """
 
 
-# optionally, precompile to bytecode to reduce the import times
-# find {output.venv} -exec stat --format='%i' {} + | sort -u | wc -l  # optionally, how many files did I create?
-
-
 rule make_squashfs_image:
+    """
+    Create a squashfs image for the inference virtual environment of
+    a specific checkpoint. Find more about this at
+    https://docs.cscs.ch/guides/storage/#python-virtual-environments-with-uenv.
+    """
     input:
         venv=rules.create_inference_venv.output.venv,
     output:
@@ -76,7 +89,11 @@ rule make_squashfs_image:
 
 
 rule create_inference_sandbox:
-    """Generate a zipped directory that can be used as a sandbox for running inference jobs.
+    """
+    Create a zipped directory that, when extracted, can be used as a sandbox
+    for running inference jobs for a specific checkpoint. Its main purpose is
+    to serve as a development environment for anemoi-inference and to facilitate
+    sharing with external collaborators.
 
     TO use this sandbox, unzip it to a target directory.
 
@@ -124,14 +141,18 @@ def get_leadtime(wc):
     return f"{end}h"
 
 
-rule inference_forecaster:
+rule prepare_inference_forecaster:
     localrule: True
     input:
         pyproject=rules.create_inference_pyproject.output.pyproject,
-        image=rules.make_squashfs_image.output.image,
         config=lambda wc: Path(RUN_CONFIGS[wc.run_id]["config"]).resolve(),
     output:
-        okfile=touch(OUT_ROOT / "logs/inference_forecaster/{run_id}-{init_time}.ok"),
+        config=Path(OUT_ROOT / "data/runs/{run_id}/{init_time}/config.yaml"),
+        resources=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/resources"),
+        grib_out_dir=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/grib"),
+        okfile=touch(
+            OUT_ROOT / "logs/prepare_inference_forecaster/{run_id}-{init_time}.ok"
+        ),
     params:
         checkpoints_path=parse_input(
             input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path"
@@ -142,53 +163,10 @@ rule inference_forecaster:
         reftime_to_iso=lambda wc: datetime.strptime(
             wc.init_time, "%Y%m%d%H%M"
         ).strftime("%Y-%m-%dT%H:%M"),
-        image_path=lambda wc, input: f"{Path(input.image).resolve()}",
     log:
-        OUT_ROOT / "logs/inference_forecaster/{run_id}-{init_time}.log",
-    resources:
-        slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"),
-        cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24),
-        mem_mb_per_cpu=lambda wc: get_resource(wc, "mem_mb_per_cpu", 8000),
-        runtime=lambda wc: get_resource(wc, "runtime", "40m"),
-        gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}",
-        ntasks=lambda wc: get_resource(wc, "tasks", 1),
-        slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment",
-        gpus=lambda wc: get_resource(wc, "gpu", 1),
-    shell:
-        r"""
-        (
-        set -euo pipefail
-        squashfs-mount {params.image_path}:/user-environment -- bash -c '
-        export TZ=UTC
-        source /user-environment/bin/activate
-        export ECCODES_DEFINITION_PATH=/user-environment/share/eccodes-cosmo-resources/definitions
-
-        # prepare the working directory
-        WORKDIR={params.output_root}/runs/{wildcards.run_id}/{wildcards.init_time}
-        mkdir -p $WORKDIR && cd $WORKDIR && mkdir -p grib raw _resources
-        cp {input.config} config.yaml && cp -r {params.resources_root}/templates/* _resources/
-        CMD_ARGS=(
-            date={params.reftime_to_iso}
-            checkpoint={params.checkpoints_path}/inference-last.ckpt
-            lead_time={params.lead_time}
-        )
-
-        # is GPU > 1, add runner=parallel to CMD_ARGS
-        if [ {resources.gpus} -gt 1 ]; then
-            CMD_ARGS+=(runner=parallel)
-        fi
-
-        srun \
-            --partition={resources.slurm_partition} \
-            --cpus-per-task={resources.cpus_per_task} \
-            --mem-per-cpu={resources.mem_mb_per_cpu} \
-            --time={resources.runtime} \
-            --gres={resources.gres} \
-            --ntasks={resources.ntasks} \
-            anemoi-inference run config.yaml "${{CMD_ARGS[@]}}"
-        '
-        ) > {log} 2>&1
-        """
+        OUT_ROOT / "logs/prepare_inference_forecaster/{run_id}-{init_time}.log",
+    script:
+        "../scripts/inference_prepare.py"
 
 
 def _get_forecaster_run_id(run_id):
@@ -196,23 +174,28 @@ def _get_forecaster_run_id(run_id):
     return RUN_CONFIGS[run_id]["forecaster"]["mlflow_id"][0:9]
 
 
-rule inference_interpolator:
+rule prepare_inference_interpolator:
     """Run the interpolator for a specific run ID."""
     localrule: True
     input:
         pyproject=rules.create_inference_pyproject.output.pyproject,
-        image=rules.make_squashfs_image.output.image,
         config=lambda wc: Path(RUN_CONFIGS[wc.run_id]["config"]).resolve(),
         forecasts=lambda wc: (
             [
                 OUT_ROOT
-                / f"logs/inference_forecaster/{_get_forecaster_run_id(wc.run_id)}-{wc.init_time}.ok"
+                / f"logs/execute_inference/{_get_forecaster_run_id(wc.run_id)}-{wc.init_time}.ok"
             ]
             if RUN_CONFIGS[wc.run_id].get("forecaster") is not None
             else []
         ),
     output:
-        okfile=touch(OUT_ROOT / "logs/inference_interpolator/{run_id}-{init_time}.ok"),
+        config=Path(OUT_ROOT / "data/runs/{run_id}/{init_time}/config.yaml"),
+        resources=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/resources"),
+        grib_out_dir=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/grib"),
+        forecaster=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/forecaster"),
+        okfile=touch(
+            OUT_ROOT / "logs/prepare_inference_interpolator/{run_id}-{init_time}.ok"
+        ),
     params:
         checkpoints_path=parse_input(
             input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path"
@@ -228,9 +211,26 @@ rule inference_interpolator:
             if RUN_CONFIGS[wc.run_id].get("forecaster") is None
             else _get_forecaster_run_id(wc.run_id)
         ),
-        image_path=lambda wc, input: f"{Path(input.image).resolve()}",
     log:
-        OUT_ROOT / "logs/inference_interpolator/{run_id}-{init_time}.log",
+        OUT_ROOT / "logs/prepare_inference_interpolator/{run_id}-{init_time}.log",
+    script:
+        "../scripts/inference_prepare.py"
+
+
+rule execute_inference:
+    localrule: True
+    input:
+        okfile=_inference_routing_fn,
+        image=rules.make_squashfs_image.output.image,
+    output:
+        okfile=touch(OUT_ROOT / "logs/execute_inference/{run_id}-{init_time}.ok"),
+    log:
+        OUT_ROOT / "logs/execute_inference/{run_id}-{init_time}.log",
+    params:
+        image_path=lambda wc, input: f"{Path(input.image).resolve()}",
+        workdir=lambda wc: (
+            OUT_ROOT / f"data/runs/{wc.run_id}/{wc.init_time}"
+        ).resolve(),
     resources:
         slurm_partition=lambda wc: get_resource(wc, "slurm_partition", "short-shared"),
         cpus_per_task=lambda wc: get_resource(wc, "cpus_per_task", 24),
@@ -238,35 +238,19 @@ rule inference_interpolator:
         runtime=lambda wc: get_resource(wc, "runtime", "40m"),
         gres=lambda wc: f"gpu:{get_resource(wc, 'gpu',1)}",
         ntasks=lambda wc: get_resource(wc, "tasks", 1),
-        slurm_extra=lambda wc, input: f"--uenv={Path(input.image).resolve()}:/user-environment",
         gpus=lambda wc: get_resource(wc, "gpu", 1),
     shell:
-        r"""
+        """
         (
         set -euo pipefail
+
+        cd {params.workdir}
+
         squashfs-mount {params.image_path}:/user-environment -- bash -c '
-        export TZ=UTC
         source /user-environment/bin/activate
         export ECCODES_DEFINITION_PATH=/user-environment/share/eccodes-cosmo-resources/definitions
 
-        # prepare the working directory
-        WORKDIR={params.output_root}/runs/{wildcards.run_id}/{wildcards.init_time}
-        mkdir -p $WORKDIR && cd $WORKDIR && mkdir -p grib raw _resources
-        cp {input.config} config.yaml && cp -r {params.resources_root}/templates/* _resources/
-
-        # if forecaster_run_id is not "null", link the forecaster grib directory; else, run from files.
-        if [ "{params.forecaster_run_id}" != "null" ]; then
-            FORECASTER_WORKDIR={params.output_root}/runs/{params.forecaster_run_id}/{wildcards.init_time}
-            ln -fns $FORECASTER_WORKDIR/grib forecaster_grib
-        else
-            echo "Forecaster configuration is null; proceeding with file-based inputs."
-        fi
-
-        CMD_ARGS=(
-            date={params.reftime_to_iso}
-            checkpoint={params.checkpoints_path}/inference-last.ckpt
-            lead_time={params.lead_time}
-        )
+        CMD_ARGS=()
 
         # is GPU > 1, add runner=parallel to CMD_ARGS
         if [ {resources.gpus} -gt 1 ]; then
@@ -274,6 +258,7 @@ rule inference_interpolator:
         fi
 
         srun \
+            --unbuffered \
             --partition={resources.slurm_partition} \
             --cpus-per-task={resources.cpus_per_task} \
             --mem-per-cpu={resources.mem_mb_per_cpu} \
@@ -284,12 +269,3 @@ rule inference_interpolator:
         '
         ) > {log} 2>&1
         """
-
-
-rule inference_routing:
-    localrule: True
-    input:
-        _inference_routing_fn,
-    output:
-        directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/grib"),
-        directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/raw"),
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index bef45226..6c732db8 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -55,9 +55,7 @@ def _get_no_none(dict, key, replacement):
 rule verif_metrics:
     input:
         script="workflow/scripts/verif_from_grib.py",
-        module="src/verification/__init__.py",
-        inference_okfile=_inference_routing_fn,
-        grib_output=rules.inference_routing.output[0],
+        inference_okfile=rules.execute_inference.output.okfile,
         analysis_zarr=config["analysis"].get("analysis_zarr"),
     output:
         OUT_ROOT / "data/runs/{run_id}/{init_time}/verif.nc",
@@ -68,6 +66,9 @@ rule verif_metrics:
         fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"),
         fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
         analysis_label=config["analysis"].get("label"),
+        grib_out_dir=lambda wc: (
+            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib"
+        ).resolve(),
     log:
         OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log",
     resources:
@@ -77,7 +78,7 @@ rule verif_metrics:
     shell:
         """
         uv run {input.script} \
-            --grib_output_dir {input.grib_output} \
+            --grib_output_dir {params.grib_out_dir} \
             --analysis_zarr {input.analysis_zarr} \
             --steps "{params.fcst_steps}" \
             --fcst_label "{params.fcst_label}" \
diff --git a/workflow/scripts/inference_prepare.py b/workflow/scripts/inference_prepare.py
new file mode 100644
index 00000000..e3178774
--- /dev/null
+++ b/workflow/scripts/inference_prepare.py
@@ -0,0 +1,173 @@
+"""Script to prepare configuration and working directory for inference runs."""
+
+import logging
+import yaml
+import shutil
+from pathlib import Path
+
+from evalml.helpers import setup_logger
+
+
+def prepare_config(default_config_path: str, output_config_path: str, params: dict):
+    """Prepare the configuration file for the inference run.
+
+    Overrides default configuration parameters with those provided in params
+    and writes the updated configuration to output_config_path.
+
+    Parameters
+    ----------
+    default_config_path : str
+        Path to the default configuration file.
+    output_config_path : str
+        Path where the updated configuration file will be written.
+    params : dict
+        Dictionary of parameters to override in the default configuration.
+    """
+
+    with open(default_config_path, "r") as f:
+        config = yaml.safe_load(f)
+
+    config = _override_recursive(config, params)
+
+    with open(output_config_path, "w") as f:
+        yaml.safe_dump(config, f, sort_keys=False)
+
+
+def prepare_workdir(workdir: Path, resources_root: Path):
+    """Prepare the working directory for the inference run.
+
+    Creates necessary subdirectories and copies resource files.
+
+    Parameters
+    ----------
+    workdir : Path
+        Path to the working directory.
+    resources_root : Path
+        Path to the root directory containing resource files.
+    """
+    workdir.mkdir(parents=True, exist_ok=True)
+    (workdir / "grib").mkdir(parents=True, exist_ok=True)
+    shutil.copytree(resources_root / "templates", workdir / "resources")
+
+
+def prepare_interpolator(smk):
+    """Prepare the interpolator for the inference run.
+
+    Required steps:
+    - prepare working directory
+    - prepare forecaster directory
+    - prepare config
+    """
+    LOG = _setup_logger(smk)
+
+    # prepare working directory
+    workdir = _get_workdir(smk)
+    prepare_workdir(workdir, smk.params.resources_root)
+    LOG.info("Prepared working directory at %s", workdir)
+    res_list = "\n".join([str(fn) for fn in Path(workdir / "resources").rglob("*")])
+    LOG.info("Resources: \n%s", res_list)
+
+    # prepare forecaster directory
+    fct_run_id = smk.params.forecaster_run_id
+    if fct_run_id != "null":
+        fct_workdir = (
+            smk.params.output_root / "runs" / fct_run_id / smk.wildcards.init_time
+        )
+        (workdir / "forecaster").symlink_to(fct_workdir / "grib")
+        LOG.info(
+            "Created symlink to forecaster grib directory at %s", workdir / "forecaster"
+        )
+    else:
+        (workdir / "forecaster").mkdir(parents=True, exist_ok=True)
+        (workdir / "forecaster/.dataset").touch()
+        LOG.info(
+            "No forecaster run ID provided; using dataset placeholder at %s",
+            workdir / "forecaster/.dataset",
+        )
+
+    # prepare config
+    overrides = _overrides_from_params(smk)
+    prepare_config(smk.input.config, smk.output.config, overrides)
+    LOG.info("Wrote config file at %s", smk.output.config)
+    with open(smk.output.config, "r") as f:
+        config_content = f.read()
+    LOG.info("Config: \n%s", config_content)
+
+    LOG.info("Interpolator preparation complete.")
+
+
+def prepare_forecaster(smk):
+    """Prepare the forecaster for the inference run.
+
+    Required steps:
+    - prepare working directory
+    - prepare config
+    """
+    LOG = _setup_logger(smk)
+
+    workdir = _get_workdir(smk)
+    prepare_workdir(workdir, smk.params.resources_root)
+    LOG.info("Prepared working directory at %s", workdir)
+    res_list = "\n".join([str(fn) for fn in Path(workdir / "resources").rglob("*")])
+    LOG.info("Resources: \n%s", res_list)
+
+    overrides = _overrides_from_params(smk)
+    prepare_config(smk.input.config, smk.output.config, overrides)
+    LOG.info("Wrote config file at %s", smk.output.config)
+    with open(smk.output.config, "r") as f:
+        config_content = f.read()
+    LOG.info("Config: \n%s", config_content)
+
+    LOG.info("Forecaster preparation complete.")
+
+
+# TODO: just pass a dictionary of config overrides to the rule's params
+def _overrides_from_params(smk) -> dict:
+    return {
+        "checkpoint": f"{smk.params.checkpoints_path}/inference-last.ckpt",
+        "date": smk.params.reftime_to_iso,
+        "lead_time": smk.params.lead_time,
+    }
+
+
+def _get_workdir(smk) -> Path:
+    run_id = smk.wildcards.run_id
+    init_time = smk.wildcards.init_time
+    return smk.params.output_root / "runs" / run_id / init_time
+
+
+def _setup_logger(smk) -> logging.Logger:
+    run_id = smk.wildcards.run_id
+    init_time = smk.wildcards.init_time
+    logger_name = f"{smk.rule}_{run_id}_{init_time}"
+    LOG = setup_logger(logger_name, log_file=smk.log[0])
+    return LOG
+
+
+def _override_recursive(original: dict, updates: dict) -> dict:
+    """Recursively override values in the original dictionary with those from the updates dictionary."""
+    for key, value in updates.items():
+        if (
+            isinstance(value, dict)
+            and key in original
+            and isinstance(original[key], dict)
+        ):
+            original[key] = _override_recursive(original[key], value)
+        else:
+            original[key] = value
+    return original
+
+
+def main(smk):
+    """Main function to run the Snakemake workflow."""
+    if smk.rule == "prepare_inference_forecaster":
+        prepare_forecaster(smk)
+    elif smk.rule == "prepare_inference_interpolator":
+        prepare_interpolator(smk)
+    else:
+        raise ValueError(f"Unknown rule: {smk.rule}")
+
+
+if __name__ == "__main__":
+    snakemake = snakemake  # type: ignore # noqa: F821
+    raise SystemExit(main(snakemake))

From b7b131195b2273a1e7426920934a8772c7c64c49 Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonasbhend@users.noreply.github.com>
Date: Wed, 29 Oct 2025 09:21:32 +0100
Subject: [PATCH 19/34] Scores by Region (#75)

* add region averages

* add regions to config

* Add regions to verification module, scripts, and rules

* add stratification to forecaster config and fix typo

* fix dict indexing

* fix append error

* read lon/lat from obs dataset

* Add inner verification domain

* Add missing dependency

* add plots by region

* Add regions to dashboard

* Fix dashboard

* Add region name and initializations to plot title (and remove header div)

* Add support for multiple regions

* Fix legend
---
 config/forecasters-co1e.yaml                  |  10 +
 config/forecasters.yaml                       |  10 +
 config/interpolators.yaml                     |  10 +
 pyproject.toml                                |   3 +
 resources/report/dashboard/script.js          |  94 ++++++----
 .../report/dashboard/template.html.jinja2     |  17 +-
 src/evalml/config.py                          |  14 ++
 src/verification/__init__.py                  | 134 ++++++++++++--
 uv.lock                                       | 174 +++++++++++++++++-
 workflow/rules/common.smk                     |  11 ++
 workflow/rules/report.smk                     |   5 +-
 workflow/rules/verif.smk                      |   5 +
 .../scripts/report_experiment_dashboard.py    |   2 +
 workflow/scripts/verif_baseline.py            |   9 +-
 workflow/scripts/verif_from_grib.py           |   7 +-
 workflow/scripts/verif_plot_metrics.py        |  10 +-
 workflow/tools/config.schema.json             |  28 +++
 17 files changed, 480 insertions(+), 63 deletions(-)

diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
index 84943b76..d0685047 100644
--- a/config/forecasters-co1e.yaml
+++ b/config/forecasters-co1e.yaml
@@ -29,6 +29,16 @@ analysis:
   label: COSMO KENDA
   analysis_zarr: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co1e-an-archive-0p01-2019-2024-1h-v1-pl13.zarr
 
+stratification:
+  regions:
+    - jura
+    - mittelland
+    - voralpen
+    - alpennordhang
+    - innerealpentaeler
+    - alpensuedseite
+  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
+
 locations:
   output_root: output/
   mlflow_uri:
diff --git a/config/forecasters.yaml b/config/forecasters.yaml
index 1dbdad13..c5445dc5 100644
--- a/config/forecasters.yaml
+++ b/config/forecasters.yaml
@@ -24,6 +24,16 @@ analysis:
   label: COSMO KENDA
   analysis_zarr: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-6h-v3-pl13.zarr
 
+stratification:
+  regions:
+    - jura
+    - mittelland
+    - voralpen
+    - alpennordhang
+    - innerealpentaeler
+    - alpensuedseite
+  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
+
 locations:
   output_root: output/
   mlflow_uri:
diff --git a/config/interpolators.yaml b/config/interpolators.yaml
index 0f5a042c..dd235440 100644
--- a/config/interpolators.yaml
+++ b/config/interpolators.yaml
@@ -48,6 +48,16 @@ analysis:
   label: COSMO KENDA
   analysis_zarr: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-1h-v3-pl13.zarr
 
+stratification:
+  regions:
+    - jura
+    - mittelland
+    - voralpen
+    - alpennordhang
+    - innerealpentaeler
+    - alpensuedseite
+  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
+
 locations:
   output_root: output/
   mlflow_uri:
diff --git a/pyproject.toml b/pyproject.toml
index bfcb9ee9..b294e140 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,9 @@ dependencies = [
     "pydantic>=2.11.7",
     "toml>=0.10.2",
     "netcdf4>=1.7.2",
+    "shapely>=2.1.2",
+    "cartopy>=0.25.0",
+    "pyproj>=3.7.2",
 ]
 
 [project.optional-dependencies]
diff --git a/resources/report/dashboard/script.js b/resources/report/dashboard/script.js
index b422a5f0..43740200 100644
--- a/resources/report/dashboard/script.js
+++ b/resources/report/dashboard/script.js
@@ -12,6 +12,16 @@ document.querySelectorAll(".tab-link").forEach(button => {
 // Initialize selection widgets
 const choicesInstances = {};
 
+choicesInstances["region-select"] = new Choices("#region-select", {
+  searchEnabled: false,
+  removeItemButton: true,
+  shouldSort: false,
+  itemSelectText: "",
+  placeholder: false
+});
+document.getElementById("region-select").addEventListener("change", updateChart);
+
+
 choicesInstances["source-select"] = new Choices("#source-select", {
   searchEnabled: false,
   removeItemButton: true,
@@ -41,57 +51,74 @@ document.getElementById("param-select").addEventListener("change", updateChart);
 
 // Get the data (embedded in the HTML)
 data = JSON.parse(document.getElementById("verif-data").textContent)
+header = document.getElementById("header-text").textContent.trim()
 
 // Define base spec
 var spec = {
   "data": { "values": data },
-  "params": [
-    {
-      "name": "xZoom",
-      "select": {
-        "type": "interval",
-        "encodings": ["x"],
-        "zoom": "wheel![!event.shiftKey]"
-      },
-      "bind": "scales"
-    }
-  ],
+  "config": {
+    "scale": { "continuousPadding": 1 }
+  },
   "facet": {
-    "column": { "field": "param" },
-    "row": { "field": "metric" }
+    "row": { "field": "metric", "type": "nominal", "title": null },
+    "column": { "field": "param", "type": "nominal" , "title": null },
+  },
+  "resolve": {
+    "scale": {
+      "x": "shared",
+      "y": "independent"
+    },
   },
   "spec": {
-    "mark": "line",
+    "params": [
+      {
+        "name": "xZoom",
+        "select": {
+          "type": "interval",
+          "encodings": ["x"],
+          "zoom": "wheel![!event.shiftKey]"
+        },
+        "bind": "scales"
+      }
+    ],
+    "mark": {"type": "line", "point": { "size": 50 } },
     "width": 300,
     "height": 200,
     "encoding": {
       "x": {
         "field": "lead_time",
-        "type": "quantitative",
-        "axis": { "labels": true, "ticks": true },
+        "type": "quantitative"
       },
       "y": {
         "field": "value",
-        "type": "quantitative",
-        "scale": { "zero": false }
+        "type": "quantitative" ,
+          "scale": { "zero": false }
       },
       "color": {
         "field": "source",
-        "legend": { "orient": "top", "labelLimit": 1000, "symbolSize": 1000 }
-      }
+        "type": "nominal",
+        "legend": { "orient": "top", "title": "Data Source", "offset": 0, "padding": 10 }
+      },
+      "shape": {
+        "field": "region",
+        "type": "nominal",
+        "legend": { "orient": "top", "title": "Region", "offset": 0, "padding": 10 }
+      },
+      "strokeDash": {
+        "field": "region",
+        "type": "nominal",
+        "legend": null
+      },
+      "tooltip": [
+        { "field": "region", "type": "nominal", "title": "Region" },
+        { "field": "source", "type": "nominal", "title": "Source" },
+        { "field": "param", "type": "nominal", "title": "Parameter" },
+        { "field": "metric", "type": "nominal", "title": "Metric" },
+        { "field": "lead_time", "type": "quantitative", "title": "Lead Time (h)" },
+        { "field": "value", "type": "quantitative", "title": "Value" }
+      ]
     },
-    "transform": [
-      {
-        "filter": { "param": "xZoom" }
-      }
-    ]
   },
-  "resolve": {
-    "scale": {
-      "x": "shared",
-      "y": "independent"
-    }
-  }
 };
 
 
@@ -102,6 +129,7 @@ function getSelectedValues(id) {
 }
 
 function updateChart() {
+  const selectedRegions = getSelectedValues("region-select");
   const selectedSources = getSelectedValues("source-select");
   const selectedparams = getSelectedValues("param-select");
   const selectedMetrics = getSelectedValues("metric-select");
@@ -109,6 +137,10 @@ function updateChart() {
   const newSpec = JSON.parse(JSON.stringify(spec));
   const filters = [];
 
+  newSpec.title = "Verification using " + header;
+  if (selectedRegions.length > 0) {
+    filters.push({ field: "region", oneOf: selectedRegions });
+  }
   if (selectedSources.length > 0) {
     filters.push({ field: "source", oneOf: selectedSources });
   }
diff --git a/resources/report/dashboard/template.html.jinja2 b/resources/report/dashboard/template.html.jinja2
index 3fe6adbe..36f9fecf 100644
--- a/resources/report/dashboard/template.html.jinja2
+++ b/resources/report/dashboard/template.html.jinja2
@@ -101,12 +101,15 @@
 
     <!-- First tab -->
     <div id="tab1" class="tab-content active">
-        <div class="header">
-            <p>
-            {{header_text}}
-            </p>
-        </div>
         <div class="controls">
+            <div class="control-group">
+                <label>Region</label>
+                <select id="region-select" multiple>
+                    {% for region in regions %}
+                    <option value="{{region}}"{% if region == 'all' %} selected{% endif %}>{{region}}</option>
+                    {% endfor %}
+                </select>
+            </div>
             <div class="control-group">
                 <label>Source(s)</label>
                 <select id="source-select" multiple>
@@ -144,6 +147,10 @@
         {{ verif_data | safe | indent(8, false)}}
     </script>
 
+    <script id="header-text" type="text">
+        {{ header_text }}
+    </script>
+
     <script>
         {{ js_src | indent(8, true) }}
     </script>
diff --git a/src/evalml/config.py b/src/evalml/config.py
index ec3852b4..50733d0c 100644
--- a/src/evalml/config.py
+++ b/src/evalml/config.py
@@ -205,6 +205,19 @@ class Locations(BaseModel):
     )
 
 
+class Stratification(BaseModel):
+    """Stratification settings for the analysis."""
+
+    regions: List[str] = Field(
+        ...,
+        description="List of region names for stratification.",
+    )
+    root: str = Field(
+        ...,
+        description="Root directory where the region shapefiles are stored.",
+    )
+
+
 class DefaultResources(BaseModel):
     """Default resource settings for job execution."""
 
@@ -276,6 +289,7 @@ class ConfigModel(BaseModel):
         description="Dictionary of baselines to include in the verification.",
     )
     analysis: AnalysisConfig
+    stratification: Stratification
     locations: Locations
     profile: Profile
 
diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index 7ab5802d..6273f511 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -1,11 +1,76 @@
 import logging
 import time
 
+from pathlib import Path
+
+import cartopy.crs as ccrs
+from cartopy.io.shapereader import Reader
+
+import numpy as np
+from shapely import contains_xy
+from shapely.ops import transform
+import pyproj
 import xarray as xr
 
+import abc
+from shapely.geometry import Polygon
+
 LOG = logging.getLogger(__name__)
 
 
+class AggregationMasks(abc.ABC):
+    @abc.abstractmethod
+    def get_masks(self, *args, **kwargs) -> xr.DataArray:
+        pass
+
+
+class SpatialAggregationMasks(AggregationMasks):
+    @abc.abstractmethod
+    def get_masks(self, lat: xr.DataArray, lon: xr.DataArray) -> xr.DataArray:
+        pass
+
+
+class ShapefileSpatialAggregationMasks(SpatialAggregationMasks):
+    regions: dict[str, list[Polygon]]
+
+    def __init__(
+        self, shp: str | list[str], src_crs=ccrs.epsg(2056), dst_crs=ccrs.PlateCarree()
+    ):
+        proj = pyproj.Transformer.from_crs(
+            src_crs.proj4_init, dst_crs.proj4_init, always_xy=True
+        ).transform
+
+        regions = {}
+        # add inner region for ML evaluation
+        regions["all"] = [
+            Polygon(list(zip([1.5, 16, 16, 1.5, 1.5], [43, 43, 49.5, 49.5, 43])))
+        ]
+        shp = [shp] if isinstance(shp, str) else shp
+        for shapefile in shp:
+            region_name = Path(shapefile).stem
+            reader = Reader(shapefile)
+            regions[region_name] = [
+                transform(proj, record.geometry) for record in reader.records()
+            ]
+        self.regions = regions
+
+    def get_masks(self, lat: xr.DataArray, lon: xr.DataArray) -> xr.DataArray:
+        masks = []
+        for region_name, polygons in self.regions.items():
+            mask = self._mask_from_polygons(polygons, lat, lon)
+            masks.append(mask.assign_coords(region=region_name))
+        return xr.concat(masks, dim="region")
+
+    @staticmethod
+    def _mask_from_polygons(
+        polygons: list[Polygon], lat: xr.DataArray, lon: xr.DataArray
+    ) -> xr.DataArray:
+        mask = np.zeros(lon.shape, dtype=bool)
+        for poly in polygons:
+            mask |= contains_xy(poly, lon.values, lat.values)
+        return xr.DataArray(mask, coords=lon.coords, dims=lon.dims)
+
+
 def _compute_scores(
     fcst: xr.DataArray,
     obs: xr.DataArray,
@@ -64,8 +129,22 @@ def _merge_metrics(ds: xr.Dataset) -> xr.Dataset:
     return out
 
 
+def _compute_masks(ds: xr.Dataset) -> xr.Dataset:
+    # extract first data_var from ds and only retain x and y dimensions
+    darr = ds[list(ds.data_vars)[0]].isel(
+        **{dim: 0 for dim in ds[list(ds.data_vars)[0]].dims if dim not in ["x", "y"]}
+    )
+    # compile list of masks to use with data arrays in ds
+    mask = xr.ones_like(darr, dtype=bool).expand_dims(region=["all"])
+    return mask
+
+
 def verify(
-    fcst: xr.Dataset, obs: xr.Dataset, fcst_label: str, obs_label: str
+    fcst: xr.Dataset,
+    obs: xr.Dataset,
+    fcst_label: str,
+    obs_label: str,
+    regions: list[str] | None = None,
 ) -> xr.Dataset:
     """
     Compare two xarray Datasets (fcst and obs) and return pandas DataFrame with
@@ -77,29 +156,50 @@ def verify(
     # chunk the data to avoid memory issues
     # compute the metrics in parallel
     # return the results as a xarray Dataset
+    fcst_aligned, obs_aligned = xr.align(fcst, obs, join="inner", copy=False)
+    region_polygons = ShapefileSpatialAggregationMasks(shp=regions)
+    masks = region_polygons.get_masks(
+        lon=obs_aligned["longitude"], lat=obs_aligned["latitude"]
+    )
+
     scores = []
     statistics = []
-    for param in fcst.data_vars:
-        if param not in obs.data_vars:
+    for param in fcst_aligned.data_vars:
+        if param not in obs_aligned.data_vars:
             LOG.warning("Parameter %s not in obs, skipping", param)
             continue
-        LOG.info("Verifying parameter %s", param)
-        fcst_param, obs_param = xr.align(fcst[param], obs[param], join="inner")
+        score = []
+        fcst_statistics = []
+        obs_statistics = []
+        for region in masks.region.values:
+            LOG.info("Verifying parameter %s for region %s", param, region)
+            fcst_param = fcst_aligned[param].where(masks.sel(region=region))
+            obs_param = obs_aligned[param].where(masks.sel(region=region))
 
-        # scores vs time (reduce spatially)
-        score = _compute_scores(
-            fcst_param, obs_param, prefix=param + ".", source=fcst_label
-        )
-        scores.append(score)
+            # scores vs time (reduce spatially)
+            score.append(
+                _compute_scores(
+                    fcst_param, obs_param, prefix=param + ".", source=fcst_label
+                ).expand_dims(region=[region])
+            )
 
-        # statistics vs time (reduce spatially)
-        fcst_statistics = _compute_statistics(
-            fcst_param, prefix=param + ".", source=fcst_label
-        )
-        obs_statistics = _compute_statistics(
-            obs_param, prefix=param + ".", source=obs_label
-        )
+            # statistics vs time (reduce spatially)
+            fcst_statistics.append(
+                _compute_statistics(
+                    fcst_param, prefix=param + ".", source=fcst_label
+                ).expand_dims(region=[region])
+            )
+            obs_statistics.append(
+                _compute_statistics(
+                    obs_param, prefix=param + ".", source=obs_label
+                ).expand_dims(region=[region])
+            )
+
+        score = xr.concat(score, dim="region")
+        fcst_statistics = xr.concat(fcst_statistics, dim="region")
+        obs_statistics = xr.concat(obs_statistics, dim="region")
         statistics.append(xr.concat([fcst_statistics, obs_statistics], dim="source"))
+        scores.append(score)
 
     scores = _merge_metrics(scores)
     statistics = _merge_metrics(statistics)
diff --git a/uv.lock b/uv.lock
index d67e6053..303ca4f0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.11"
 resolution-markers = [
     "python_full_version >= '3.12'",
@@ -211,6 +211,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" },
 ]
 
+[[package]]
+name = "cartopy"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "matplotlib" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pyproj" },
+    { name = "pyshp" },
+    { name = "shapely" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3c/3f/ec3dee34237b696a486d566a6d3ae6550ae821836e0412bafdcbbec2cfd2/cartopy-0.25.0.tar.gz", hash = "sha256:55f1a390e5f3f075b221c7d91fb10258ad978db786c7930eba06eb45d28753fe", size = 10767728, upload-time = "2025-08-01T12:44:16.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0d/e1/6a52ee21424da0ed30860f4e94d1657ade8d4436f0718485badf0e63011e/cartopy-0.25.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e41d52160548a7ab7774423911db3bfb5a8bc0929580958b1945d3a004da872", size = 11006320, upload-time = "2025-08-01T12:43:48.13Z" },
+    { url = "https://files.pythonhosted.org/packages/68/06/38bcfeab9822acffc86474659d33c4dc3c5dec4e61e9927fb8cc8617f651/cartopy-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:432e2a2688fc58af43b9b6bf1d343bb08e2d6ef298efa91e55445f1d308b5ef3", size = 10995635, upload-time = "2025-08-01T12:43:50.855Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/b6/f39407d27d641a949496a52ab00220fe0635758e3cb7afb4b7328abe17e7/cartopy-0.25.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:999e44021db07dcf895b115934fb0816aef39985fbaca6ded61d2536355531de", size = 11808214, upload-time = "2025-08-01T12:43:53.218Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/c0/b33ac1f586608e80a5e10f3924e16c117da333fcb5e5240839e6681ac3d5/cartopy-0.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:4139e5ca9faaa037e0576cdcf625b9461a0b404d60e9d20ea24c4d8dbe6f689d", size = 10983301, upload-time = "2025-08-01T12:43:55.427Z" },
+    { url = "https://files.pythonhosted.org/packages/63/35/b19901cbe7f1b118dccbb9e655cda7d01a31ee1ecd67e5d2d8afe119f6d3/cartopy-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:060a7b835c0c4222c1067b6ffb2f9c18458abaa35b6624573a3aa37ecf55f4bf", size = 11006900, upload-time = "2025-08-01T12:43:57.708Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/4f/09e824f86be09152ec0f1fa1fe69affbd34eac7a13b545e2e08b9b6bc8ff/cartopy-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57717cb603aecff03ecfee1bc153bb4022c054fcd51a4214a1bb53e5a6f74465", size = 10994813, upload-time = "2025-08-01T12:44:00.069Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/30/7465b650110514fc5c9c3b59935264c35ab56f876322de34efa55367ee4e/cartopy-0.25.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:53c256351433155ef51dde976557212f4e230b8cca4e5d0d9b9a2737ad92959d", size = 11799069, upload-time = "2025-08-01T12:44:02.287Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/52/3a57ecb4598c33ee06b512d3686e46b3983e65abd6ec94c5262d01930ed9/cartopy-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:efedb82f38409b72becdfee02231126952816d33a68b1c584bd2136713036bfb", size = 10983127, upload-time = "2025-08-01T12:44:04.441Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/b9/0773ff8f1c755b8a362029e6910db87064d27ca021b060c48ce511ec98b7/cartopy-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a6fcd2df8039293096f957fc9c76e969b1a9715d12ab8cee1a6bdae0c6773b8b", size = 11007728, upload-time = "2025-08-01T12:44:06.64Z" },
+    { url = "https://files.pythonhosted.org/packages/34/a6/75738630b7f64bca7afc6bc5de08ddf0c61f13563f2a1abf642373d1095e/cartopy-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e4def451617e6957169447fe6ecdad0f63ef2d2007e7d451dd7b9656ada57382", size = 10996613, upload-time = "2025-08-01T12:44:08.822Z" },
+    { url = "https://files.pythonhosted.org/packages/19/0d/669d4bbeb36b87ba504409d85c68ec297e6f434ea6525424f8aa5f14abac/cartopy-0.25.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c388824cb13e4fa9c2901dc4fbb2dbe9547acd2f4a6a3440983d4e6c6973ae3", size = 11829044, upload-time = "2025-08-01T12:44:11.402Z" },
+    { url = "https://files.pythonhosted.org/packages/01/ff/b46e2120abd99b2ff3d376dc91ed58ae8f0a052d57c242c9b140497573dd/cartopy-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:60bad14c072d16e3c96967638cd66eb5a62cf24bc70087bcbfc6b30a3872ed26", size = 10987060, upload-time = "2025-08-01T12:44:14.222Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.8.3"
@@ -926,11 +954,14 @@ version = "0.1.0b1"
 source = { editable = "." }
 dependencies = [
     { name = "anemoi-datasets" },
+    { name = "cartopy" },
     { name = "click" },
     { name = "meteodata-lab" },
     { name = "mlflow" },
     { name = "netcdf4" },
     { name = "pydantic" },
+    { name = "pyproj" },
+    { name = "shapely" },
     { name = "snakemake" },
     { name = "snakemake-executor-plugin-slurm" },
     { name = "toml" },
@@ -953,6 +984,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "anemoi-datasets", specifier = ">=0.5.25" },
+    { name = "cartopy", specifier = ">=0.25.0" },
     { name = "click" },
     { name = "fastparquet", marker = "extra == 'kerchunk'" },
     { name = "kerchunk", marker = "extra == 'kerchunk'" },
@@ -960,6 +992,8 @@ requires-dist = [
     { name = "mlflow", specifier = ">=3.1.1" },
     { name = "netcdf4", specifier = ">=1.7.2" },
     { name = "pydantic", specifier = ">=2.11.7" },
+    { name = "pyproj", specifier = ">=3.7.2" },
+    { name = "shapely", specifier = ">=2.1.2" },
     { name = "snakemake", specifier = "<9.10" },
     { name = "snakemake-executor-plugin-slurm" },
     { name = "toml", specifier = ">=0.10.2" },
@@ -1915,6 +1949,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/71/96/d5d8859a6dac29f8ebc815ff8e75770bd513db9f08d7a711e21ae562a948/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30d20e56b9ba2c48884eb89c91b63e6c0612b4927881707e34402719153ef17f", size = 9378149, upload-time = "2024-10-22T19:01:04.924Z" },
     { url = "https://files.pythonhosted.org/packages/d1/80/b9c19f1bb4ac6c5fa6f94a4f278bc68a778473d1814a86a375d7cffa193a/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d6bfd38ba0bde04d56f06c1554714a2ea9dab75811c89450dc3ec57a9d36b80", size = 9254471, upload-time = "2024-10-22T19:01:07.041Z" },
     { url = "https://files.pythonhosted.org/packages/66/b5/e04550fd53de57001dbd5a87242da7ff784c80790adc48897977b6ccf891/netCDF4-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:5c5fbee6134ee1246c397e1508e5297d825aa19221fdf3fa8dc9727ad824d7a5", size = 6990521, upload-time = "2024-10-23T15:02:27.549Z" },
+    { url = "https://files.pythonhosted.org/packages/84/0a/182bb4fe5639699ba39d558b553b8e6f04fbfea6cf78404c0f21ef149bf7/netcdf4-1.7.2-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:7e81c3c47f2772eab0b93fba8bb05b17b58dce17720e1bed25e9d76551deecd0", size = 2751391, upload-time = "2025-10-13T18:32:22.749Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/1f/54ac27c791360f7452ca27ed1cb2917946bbe1ea4337c590a5abcef6332d/netcdf4-1.7.2-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:cb2791dba37fc98fd1ac4e236c97822909f54efbcdf7f1415c9777810e0a28f4", size = 2387513, upload-time = "2025-10-13T18:32:27.499Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/5e/9bf3008a9e45c08f4c9fedce4d6f722ef5d970f56a9c5eb375a200dd2b66/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf11480f6b8a5b246818ffff6b4d90481e51f8b9555b41af0c372eb0aaf8b65f", size = 9621674, upload-time = "2025-10-13T18:32:29.193Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/75/46871e85f2bbfb1efe229623d25d7c9daa17e2e968d5235572b2c8bb53e8/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ccc05328a8ff31921b539821791aeb20b054879f3fdf6d1d505bf6422824fec", size = 9453759, upload-time = "2025-10-13T18:32:31.136Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/10/c52f12297965938d9b9be666ea1f9d8340c2aea31d6909d90aa650847248/netcdf4-1.7.2-cp311-abi3-win_amd64.whl", hash = "sha256:999bfc4acebf400ed724d5e7329e2e768accc7ee1fa1d82d505da782f730301b", size = 7148514, upload-time = "2025-10-13T18:32:33.121Z" },
 ]
 
 [[package]]
@@ -2566,6 +2605,71 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" },
 ]
 
+[[package]]
+name = "pyproj"
+version = "3.7.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/04/90/67bd7260b4ea9b8b20b4f58afef6c223ecb3abf368eb4ec5bc2cdef81b49/pyproj-3.7.2.tar.gz", hash = "sha256:39a0cf1ecc7e282d1d30f36594ebd55c9fae1fda8a2622cee5d100430628f88c", size = 226279, upload-time = "2025-08-14T12:05:42.18Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a6/bd/f205552cd1713b08f93b09e39a3ec99edef0b3ebbbca67b486fdf1abe2de/pyproj-3.7.2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:2514d61f24c4e0bb9913e2c51487ecdaeca5f8748d8313c933693416ca41d4d5", size = 6227022, upload-time = "2025-08-14T12:03:51.474Z" },
+    { url = "https://files.pythonhosted.org/packages/75/4c/9a937e659b8b418ab573c6d340d27e68716928953273e0837e7922fcac34/pyproj-3.7.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:8693ca3892d82e70de077701ee76dd13d7bca4ae1c9d1e739d72004df015923a", size = 4625810, upload-time = "2025-08-14T12:03:53.808Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/7d/a9f41e814dc4d1dc54e95b2ccaf0b3ebe3eb18b1740df05fe334724c3d89/pyproj-3.7.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5e26484d80fea56273ed1555abaea161e9661d81a6c07815d54b8e883d4ceb25", size = 9638694, upload-time = "2025-08-14T12:03:55.669Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/ab/9bdb4a6216b712a1f9aab1c0fcbee5d3726f34a366f29c3e8c08a78d6b70/pyproj-3.7.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:281cb92847814e8018010c48b4069ff858a30236638631c1a91dd7bfa68f8a8a", size = 9493977, upload-time = "2025-08-14T12:03:57.937Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/db/2db75b1b6190f1137b1c4e8ef6a22e1c338e46320f6329bfac819143e063/pyproj-3.7.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9c8577f0b7bb09118ec2e57e3babdc977127dd66326d6c5d755c76b063e6d9dc", size = 10841151, upload-time = "2025-08-14T12:04:00.271Z" },
+    { url = "https://files.pythonhosted.org/packages/89/f7/989643394ba23a286e9b7b3f09981496172f9e0d4512457ffea7dc47ffc7/pyproj-3.7.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a23f59904fac3a5e7364b3aa44d288234af267ca041adb2c2b14a903cd5d3ac5", size = 10751585, upload-time = "2025-08-14T12:04:02.228Z" },
+    { url = "https://files.pythonhosted.org/packages/53/6d/ad928fe975a6c14a093c92e6a319ca18f479f3336bb353a740bdba335681/pyproj-3.7.2-cp311-cp311-win32.whl", hash = "sha256:f2af4ed34b2cf3e031a2d85b067a3ecbd38df073c567e04b52fa7a0202afde8a", size = 5908533, upload-time = "2025-08-14T12:04:04.821Z" },
+    { url = "https://files.pythonhosted.org/packages/79/e0/b95584605cec9ed50b7ebaf7975d1c4ddeec5a86b7a20554ed8b60042bd7/pyproj-3.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:0b7cb633565129677b2a183c4d807c727d1c736fcb0568a12299383056e67433", size = 6320742, upload-time = "2025-08-14T12:04:06.357Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/4d/536e8f93bca808175c2d0a5ac9fdf69b960d8ab6b14f25030dccb07464d7/pyproj-3.7.2-cp311-cp311-win_arm64.whl", hash = "sha256:38b08d85e3a38e455625b80e9eb9f78027c8e2649a21dec4df1f9c3525460c71", size = 6245772, upload-time = "2025-08-14T12:04:08.365Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/ab/9893ea9fb066be70ed9074ae543914a618c131ed8dff2da1e08b3a4df4db/pyproj-3.7.2-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:0a9bb26a6356fb5b033433a6d1b4542158fb71e3c51de49b4c318a1dff3aeaab", size = 6219832, upload-time = "2025-08-14T12:04:10.264Z" },
+    { url = "https://files.pythonhosted.org/packages/53/78/4c64199146eed7184eb0e85bedec60a4aa8853b6ffe1ab1f3a8b962e70a0/pyproj-3.7.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:567caa03021178861fad27fabde87500ec6d2ee173dd32f3e2d9871e40eebd68", size = 4620650, upload-time = "2025-08-14T12:04:11.978Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/ac/14a78d17943898a93ef4f8c6a9d4169911c994e3161e54a7cedeba9d8dde/pyproj-3.7.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c203101d1dc3c038a56cff0447acc515dd29d6e14811406ac539c21eed422b2a", size = 9667087, upload-time = "2025-08-14T12:04:13.964Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/be/212882c450bba74fc8d7d35cbd57e4af84792f0a56194819d98106b075af/pyproj-3.7.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:1edc34266c0c23ced85f95a1ee8b47c9035eae6aca5b6b340327250e8e281630", size = 9552797, upload-time = "2025-08-14T12:04:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/c0/c0f25c87b5d2a8686341c53c1792a222a480d6c9caf60311fec12c99ec26/pyproj-3.7.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aa9f26c21bc0e2dc3d224cb1eb4020cf23e76af179a7c66fea49b828611e4260", size = 10837036, upload-time = "2025-08-14T12:04:18.733Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/37/5cbd6772addde2090c91113332623a86e8c7d583eccb2ad02ea634c4a89f/pyproj-3.7.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f9428b318530625cb389b9ddc9c51251e172808a4af79b82809376daaeabe5e9", size = 10775952, upload-time = "2025-08-14T12:04:20.709Z" },
+    { url = "https://files.pythonhosted.org/packages/69/a1/dc250e3cf83eb4b3b9a2cf86fdb5e25288bd40037ae449695550f9e96b2f/pyproj-3.7.2-cp312-cp312-win32.whl", hash = "sha256:b3d99ed57d319da042f175f4554fc7038aa4bcecc4ac89e217e350346b742c9d", size = 5898872, upload-time = "2025-08-14T12:04:22.485Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/a6/6fe724b72b70f2b00152d77282e14964d60ab092ec225e67c196c9b463e5/pyproj-3.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:11614a054cd86a2ed968a657d00987a86eeb91fdcbd9ad3310478685dc14a128", size = 6312176, upload-time = "2025-08-14T12:04:24.736Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/68/915cc32c02a91e76d02c8f55d5a138d6ef9e47a0d96d259df98f4842e558/pyproj-3.7.2-cp312-cp312-win_arm64.whl", hash = "sha256:509a146d1398bafe4f53273398c3bb0b4732535065fa995270e52a9d3676bca3", size = 6233452, upload-time = "2025-08-14T12:04:27.287Z" },
+    { url = "https://files.pythonhosted.org/packages/be/14/faf1b90d267cea68d7e70662e7f88cefdb1bc890bd596c74b959e0517a72/pyproj-3.7.2-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:19466e529b1b15eeefdf8ff26b06fa745856c044f2f77bf0edbae94078c1dfa1", size = 6214580, upload-time = "2025-08-14T12:04:28.804Z" },
+    { url = "https://files.pythonhosted.org/packages/35/48/da9a45b184d375f62667f62eba0ca68569b0bd980a0bb7ffcc1d50440520/pyproj-3.7.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:c79b9b84c4a626c5dc324c0d666be0bfcebd99f7538d66e8898c2444221b3da7", size = 4615388, upload-time = "2025-08-14T12:04:30.553Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/e7/d2b459a4a64bca328b712c1b544e109df88e5c800f7c143cfbc404d39bfb/pyproj-3.7.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ceecf374cacca317bc09e165db38ac548ee3cad07c3609442bd70311c59c21aa", size = 9628455, upload-time = "2025-08-14T12:04:32.435Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/85/c2b1706e51942de19076eff082f8495e57d5151364e78b5bef4af4a1d94a/pyproj-3.7.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5141a538ffdbe4bfd157421828bb2e07123a90a7a2d6f30fa1462abcfb5ce681", size = 9514269, upload-time = "2025-08-14T12:04:34.599Z" },
+    { url = "https://files.pythonhosted.org/packages/34/38/07a9b89ae7467872f9a476883a5bad9e4f4d1219d31060f0f2b282276cbe/pyproj-3.7.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f000841e98ea99acbb7b8ca168d67773b0191de95187228a16110245c5d954d5", size = 10808437, upload-time = "2025-08-14T12:04:36.485Z" },
+    { url = "https://files.pythonhosted.org/packages/12/56/fda1daeabbd39dec5b07f67233d09f31facb762587b498e6fc4572be9837/pyproj-3.7.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8115faf2597f281a42ab608ceac346b4eb1383d3b45ab474fd37341c4bf82a67", size = 10745540, upload-time = "2025-08-14T12:04:38.568Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/90/c793182cbba65a39a11db2ac6b479fe76c59e6509ae75e5744c344a0da9d/pyproj-3.7.2-cp313-cp313-win32.whl", hash = "sha256:f18c0579dd6be00b970cb1a6719197fceecc407515bab37da0066f0184aafdf3", size = 5896506, upload-time = "2025-08-14T12:04:41.059Z" },
+    { url = "https://files.pythonhosted.org/packages/be/0f/747974129cf0d800906f81cd25efd098c96509026e454d4b66868779ab04/pyproj-3.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:bb41c29d5f60854b1075853fe80c58950b398d4ebb404eb532536ac8d2834ed7", size = 6310195, upload-time = "2025-08-14T12:04:42.974Z" },
+    { url = "https://files.pythonhosted.org/packages/82/64/fc7598a53172c4931ec6edf5228280663063150625d3f6423b4c20f9daff/pyproj-3.7.2-cp313-cp313-win_arm64.whl", hash = "sha256:2b617d573be4118c11cd96b8891a0b7f65778fa7733ed8ecdb297a447d439100", size = 6230748, upload-time = "2025-08-14T12:04:44.491Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/f0/611dd5cddb0d277f94b7af12981f56e1441bf8d22695065d4f0df5218498/pyproj-3.7.2-cp313-cp313t-macosx_13_0_x86_64.whl", hash = "sha256:d27b48f0e81beeaa2b4d60c516c3a1cfbb0c7ff6ef71256d8e9c07792f735279", size = 6241729, upload-time = "2025-08-14T12:04:46.274Z" },
+    { url = "https://files.pythonhosted.org/packages/15/93/40bd4a6c523ff9965e480870611aed7eda5aa2c6128c6537345a2b77b542/pyproj-3.7.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:55a3610d75023c7b1c6e583e48ef8f62918e85a2ae81300569d9f104d6684bb6", size = 4652497, upload-time = "2025-08-14T12:04:48.203Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/ae/7150ead53c117880b35e0d37960d3138fe640a235feb9605cb9386f50bb0/pyproj-3.7.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:8d7349182fa622696787cc9e195508d2a41a64765da9b8a6bee846702b9e6220", size = 9942610, upload-time = "2025-08-14T12:04:49.652Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/17/7a4a7eafecf2b46ab64e5c08176c20ceb5844b503eaa551bf12ccac77322/pyproj-3.7.2-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:d230b186eb876ed4f29a7c5ee310144c3a0e44e89e55f65fb3607e13f6db337c", size = 9692390, upload-time = "2025-08-14T12:04:51.731Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/55/ae18f040f6410f0ea547a21ada7ef3e26e6c82befa125b303b02759c0e9d/pyproj-3.7.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:237499c7862c578d0369e2b8ac56eec550e391a025ff70e2af8417139dabb41c", size = 11047596, upload-time = "2025-08-14T12:04:53.748Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/2e/d3fff4d2909473f26ae799f9dda04caa322c417a51ff3b25763f7d03b233/pyproj-3.7.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8c225f5978abd506fd9a78eaaf794435e823c9156091cabaab5374efb29d7f69", size = 10896975, upload-time = "2025-08-14T12:04:55.875Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/bc/8fc7d3963d87057b7b51ebe68c1e7c51c23129eee5072ba6b86558544a46/pyproj-3.7.2-cp313-cp313t-win32.whl", hash = "sha256:2da731876d27639ff9d2d81c151f6ab90a1546455fabd93368e753047be344a2", size = 5953057, upload-time = "2025-08-14T12:04:58.466Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/27/ea9809966cc47d2d51e6d5ae631ea895f7c7c7b9b3c29718f900a8f7d197/pyproj-3.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:f54d91ae18dd23b6c0ab48126d446820e725419da10617d86a1b69ada6d881d3", size = 6375414, upload-time = "2025-08-14T12:04:59.861Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/f8/1ef0129fba9a555c658e22af68989f35e7ba7b9136f25758809efec0cd6e/pyproj-3.7.2-cp313-cp313t-win_arm64.whl", hash = "sha256:fc52ba896cfc3214dc9f9ca3c0677a623e8fdd096b257c14a31e719d21ff3fdd", size = 6262501, upload-time = "2025-08-14T12:05:01.39Z" },
+    { url = "https://files.pythonhosted.org/packages/42/17/c2b050d3f5b71b6edd0d96ae16c990fdc42a5f1366464a5c2772146de33a/pyproj-3.7.2-cp314-cp314-macosx_13_0_x86_64.whl", hash = "sha256:2aaa328605ace41db050d06bac1adc11f01b71fe95c18661497763116c3a0f02", size = 6214541, upload-time = "2025-08-14T12:05:03.166Z" },
+    { url = "https://files.pythonhosted.org/packages/03/68/68ada9c8aea96ded09a66cfd9bf87aa6db8c2edebe93f5bf9b66b0143fbc/pyproj-3.7.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:35dccbce8201313c596a970fde90e33605248b66272595c061b511c8100ccc08", size = 4617456, upload-time = "2025-08-14T12:05:04.563Z" },
+    { url = "https://files.pythonhosted.org/packages/81/e4/4c50ceca7d0e937977866b02cb64e6ccf4df979a5871e521f9e255df6073/pyproj-3.7.2-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:25b0b7cb0042444c29a164b993c45c1b8013d6c48baa61dc1160d834a277e83b", size = 9615590, upload-time = "2025-08-14T12:05:06.094Z" },
+    { url = "https://files.pythonhosted.org/packages/05/1e/ada6fb15a1d75b5bd9b554355a69a798c55a7dcc93b8d41596265c1772e3/pyproj-3.7.2-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:85def3a6388e9ba51f964619aa002a9d2098e77c6454ff47773bb68871024281", size = 9474960, upload-time = "2025-08-14T12:05:07.973Z" },
+    { url = "https://files.pythonhosted.org/packages/51/07/9d48ad0a8db36e16f842f2c8a694c1d9d7dcf9137264846bef77585a71f3/pyproj-3.7.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b1bccefec3875ab81eabf49059e2b2ea77362c178b66fd3528c3e4df242f1516", size = 10799478, upload-time = "2025-08-14T12:05:14.102Z" },
+    { url = "https://files.pythonhosted.org/packages/85/cf/2f812b529079f72f51ff2d6456b7fef06c01735e5cfd62d54ffb2b548028/pyproj-3.7.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d5371ca114d6990b675247355a801925814eca53e6c4b2f1b5c0a956336ee36e", size = 10710030, upload-time = "2025-08-14T12:05:16.317Z" },
+    { url = "https://files.pythonhosted.org/packages/99/9b/4626a19e1f03eba4c0e77b91a6cf0f73aa9cb5d51a22ee385c22812bcc2c/pyproj-3.7.2-cp314-cp314-win32.whl", hash = "sha256:77f066626030f41be543274f5ac79f2a511fe89860ecd0914f22131b40a0ec25", size = 5991181, upload-time = "2025-08-14T12:05:19.492Z" },
+    { url = "https://files.pythonhosted.org/packages/04/b2/5a6610554306a83a563080c2cf2c57565563eadd280e15388efa00fb5b33/pyproj-3.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:5a964da1696b8522806f4276ab04ccfff8f9eb95133a92a25900697609d40112", size = 6434721, upload-time = "2025-08-14T12:05:21.022Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/ce/6c910ea2e1c74ef673c5d48c482564b8a7824a44c4e35cca2e765b68cfcc/pyproj-3.7.2-cp314-cp314-win_arm64.whl", hash = "sha256:e258ab4dbd3cf627809067c0ba8f9884ea76c8e5999d039fb37a1619c6c3e1f6", size = 6363821, upload-time = "2025-08-14T12:05:22.627Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/e4/5532f6f7491812ba782a2177fe9de73fd8e2912b59f46a1d056b84b9b8f2/pyproj-3.7.2-cp314-cp314t-macosx_13_0_x86_64.whl", hash = "sha256:bbbac2f930c6d266f70ec75df35ef851d96fdb3701c674f42fd23a9314573b37", size = 6241773, upload-time = "2025-08-14T12:05:24.577Z" },
+    { url = "https://files.pythonhosted.org/packages/20/1f/0938c3f2bbbef1789132d1726d9b0e662f10cfc22522743937f421ad664e/pyproj-3.7.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:b7544e0a3d6339dc9151e9c8f3ea62a936ab7cc446a806ec448bbe86aebb979b", size = 4652537, upload-time = "2025-08-14T12:05:26.391Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a8/488b1ed47d25972f33874f91f09ca8f2227902f05f63a2b80dc73e7b1c97/pyproj-3.7.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:f7f5133dca4c703e8acadf6f30bc567d39a42c6af321e7f81975c2518f3ed357", size = 9940864, upload-time = "2025-08-14T12:05:27.985Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/cc/7f4c895d0cb98e47b6a85a6d79eaca03eb266129eed2f845125c09cf31ff/pyproj-3.7.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:5aff3343038d7426aa5076f07feb88065f50e0502d1b0d7c22ddfdd2c75a3f81", size = 9688868, upload-time = "2025-08-14T12:05:30.425Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/b7/c7e306b8bb0f071d9825b753ee4920f066c40fbfcce9372c4f3cfb2fc4ed/pyproj-3.7.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b0552178c61f2ac1c820d087e8ba6e62b29442debddbb09d51c4bf8acc84d888", size = 11045910, upload-time = "2025-08-14T12:05:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/42/fb/538a4d2df695980e2dde5c04d965fbdd1fe8c20a3194dc4aaa3952a4d1be/pyproj-3.7.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:47d87db2d2c436c5fd0409b34d70bb6cdb875cca2ebe7a9d1c442367b0ab8d59", size = 10895724, upload-time = "2025-08-14T12:05:35.465Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/8b/a3f0618b03957de9db5489a04558a8826f43906628bb0b766033aa3b5548/pyproj-3.7.2-cp314-cp314t-win32.whl", hash = "sha256:c9b6f1d8ad3e80a0ee0903a778b6ece7dca1d1d40f6d114ae01bc8ddbad971aa", size = 6056848, upload-time = "2025-08-14T12:05:37.553Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/56/413240dd5149dd3291eda55aa55a659da4431244a2fd1319d0ae89407cfb/pyproj-3.7.2-cp314-cp314t-win_amd64.whl", hash = "sha256:1914e29e27933ba6f9822663ee0600f169014a2859f851c054c88cf5ea8a333c", size = 6517676, upload-time = "2025-08-14T12:05:39.126Z" },
+    { url = "https://files.pythonhosted.org/packages/15/73/a7141a1a0559bf1a7aa42a11c879ceb19f02f5c6c371c6d57fd86cefd4d1/pyproj-3.7.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d9d25bae416a24397e0d85739f84d323b55f6511e45a522dd7d7eae70d10c7e4", size = 6391844, upload-time = "2025-08-14T12:05:40.745Z" },
+]
+
 [[package]]
 name = "pyreadline3"
 version = "3.5.4"
@@ -2575,6 +2679,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" },
 ]
 
+[[package]]
+name = "pyshp"
+version = "3.0.2.post1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/fb/07f057ff01229c575831766b44bd249aefa086146cf5bce52e172d77cf4e/pyshp-3.0.2.post1.tar.gz", hash = "sha256:18e34a66759b6d34a6f535978c76dad518200f23a727d9e22af8e8535c0245b9", size = 2192180, upload-time = "2025-10-10T16:04:58.529Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/92/a8ad817864a560b96ac1c817f9c56bb7eacc1a7d522e2d39afe9e9c77d7b/pyshp-3.0.2.post1-py3-none-any.whl", hash = "sha256:b0aec66bc55f7cd3a846f6b02c5a9eec1fc1d2cff16ccfcf6493a6773c7eb602", size = 58298, upload-time = "2025-10-10T16:04:57.151Z" },
+]
+
 [[package]]
 name = "pytest"
 version = "8.4.1"
@@ -2966,6 +3079,65 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
 ]
 
+[[package]]
+name = "shapely"
+version = "2.1.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/8d/1ff672dea9ec6a7b5d422eb6d095ed886e2e523733329f75fdcb14ee1149/shapely-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91121757b0a36c9aac3427a651a7e6567110a4a67c97edf04f8d55d4765f6618", size = 1820038, upload-time = "2025-09-24T13:50:15.628Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/ce/28fab8c772ce5db23a0d86bf0adaee0c4c79d5ad1db766055fa3dab442e2/shapely-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:16a9c722ba774cf50b5d4541242b4cce05aafd44a015290c82ba8a16931ff63d", size = 1626039, upload-time = "2025-09-24T13:50:16.881Z" },
+    { url = "https://files.pythonhosted.org/packages/70/8b/868b7e3f4982f5006e9395c1e12343c66a8155c0374fdc07c0e6a1ab547d/shapely-2.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cc4f7397459b12c0b196c9efe1f9d7e92463cbba142632b4cc6d8bbbbd3e2b09", size = 3001519, upload-time = "2025-09-24T13:50:18.606Z" },
+    { url = "https://files.pythonhosted.org/packages/13/02/58b0b8d9c17c93ab6340edd8b7308c0c5a5b81f94ce65705819b7416dba5/shapely-2.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:136ab87b17e733e22f0961504d05e77e7be8c9b5a8184f685b4a91a84efe3c26", size = 3110842, upload-time = "2025-09-24T13:50:21.77Z" },
+    { url = "https://files.pythonhosted.org/packages/af/61/8e389c97994d5f331dcffb25e2fa761aeedfb52b3ad9bcdd7b8671f4810a/shapely-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:16c5d0fc45d3aa0a69074979f4f1928ca2734fb2e0dde8af9611e134e46774e7", size = 4021316, upload-time = "2025-09-24T13:50:23.626Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/d4/9b2a9fe6039f9e42ccf2cb3e84f219fd8364b0c3b8e7bbc857b5fbe9c14c/shapely-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ddc759f72b5b2b0f54a7e7cde44acef680a55019eb52ac63a7af2cf17cb9cd2", size = 4178586, upload-time = "2025-09-24T13:50:25.443Z" },
+    { url = "https://files.pythonhosted.org/packages/16/f6/9840f6963ed4decf76b08fd6d7fed14f8779fb7a62cb45c5617fa8ac6eab/shapely-2.1.2-cp311-cp311-win32.whl", hash = "sha256:2fa78b49485391224755a856ed3b3bd91c8455f6121fee0db0e71cefb07d0ef6", size = 1543961, upload-time = "2025-09-24T13:50:26.968Z" },
+    { url = "https://files.pythonhosted.org/packages/38/1e/3f8ea46353c2a33c1669eb7327f9665103aa3a8dfe7f2e4ef714c210b2c2/shapely-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:c64d5c97b2f47e3cd9b712eaced3b061f2b71234b3fc263e0fcf7d889c6559dc", size = 1722856, upload-time = "2025-09-24T13:50:28.497Z" },
+    { url = "https://files.pythonhosted.org/packages/24/c0/f3b6453cf2dfa99adc0ba6675f9aaff9e526d2224cbd7ff9c1a879238693/shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94", size = 1833550, upload-time = "2025-09-24T13:50:30.019Z" },
+    { url = "https://files.pythonhosted.org/packages/86/07/59dee0bc4b913b7ab59ab1086225baca5b8f19865e6101db9ebb7243e132/shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359", size = 1643556, upload-time = "2025-09-24T13:50:32.291Z" },
+    { url = "https://files.pythonhosted.org/packages/26/29/a5397e75b435b9895cd53e165083faed5d12fd9626eadec15a83a2411f0f/shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3", size = 2988308, upload-time = "2025-09-24T13:50:33.862Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/37/e781683abac55dde9771e086b790e554811a71ed0b2b8a1e789b7430dd44/shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b", size = 3099844, upload-time = "2025-09-24T13:50:35.459Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/f3/9876b64d4a5a321b9dc482c92bb6f061f2fa42131cba643c699f39317cb9/shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc", size = 3988842, upload-time = "2025-09-24T13:50:37.478Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a0/704c7292f7014c7e74ec84eddb7b109e1fbae74a16deae9c1504b1d15565/shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d", size = 4152714, upload-time = "2025-09-24T13:50:39.9Z" },
+    { url = "https://files.pythonhosted.org/packages/53/46/319c9dc788884ad0785242543cdffac0e6530e4d0deb6c4862bc4143dcf3/shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454", size = 1542745, upload-time = "2025-09-24T13:50:41.414Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/90/98ef257c23c46425dc4d1d31005ad7c8d649fe423a38b917db02c30f1f5a/shapely-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b510dda1a3672d6879beb319bc7c5fd302c6c354584690973c838f46ec3e0fa8", size = 1832644, upload-time = "2025-09-24T13:50:44.886Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/ab/0bee5a830d209adcd3a01f2d4b70e587cdd9fd7380d5198c064091005af8/shapely-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8cff473e81017594d20ec55d86b54bc635544897e13a7cfc12e36909c5309a2a", size = 1642887, upload-time = "2025-09-24T13:50:46.735Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/5e/7d7f54ba960c13302584c73704d8c4d15404a51024631adb60b126a4ae88/shapely-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe7b77dc63d707c09726b7908f575fc04ff1d1ad0f3fb92aec212396bc6cfe5e", size = 2970931, upload-time = "2025-09-24T13:50:48.374Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/a2/83fc37e2a58090e3d2ff79175a95493c664bcd0b653dd75cb9134645a4e5/shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ed1a5bbfb386ee8332713bf7508bc24e32d24b74fc9a7b9f8529a55db9f4ee6", size = 3082855, upload-time = "2025-09-24T13:50:50.037Z" },
+    { url = "https://files.pythonhosted.org/packages/44/2b/578faf235a5b09f16b5f02833c53822294d7f21b242f8e2d0cf03fb64321/shapely-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a84e0582858d841d54355246ddfcbd1fce3179f185da7470f41ce39d001ee1af", size = 3979960, upload-time = "2025-09-24T13:50:51.74Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/04/167f096386120f692cc4ca02f75a17b961858997a95e67a3cb6a7bbd6b53/shapely-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc3487447a43d42adcdf52d7ac73804f2312cbfa5d433a7d2c506dcab0033dfd", size = 4142851, upload-time = "2025-09-24T13:50:53.49Z" },
+    { url = "https://files.pythonhosted.org/packages/48/74/fb402c5a6235d1c65a97348b48cdedb75fb19eca2b1d66d04969fc1c6091/shapely-2.1.2-cp313-cp313-win32.whl", hash = "sha256:9c3a3c648aedc9f99c09263b39f2d8252f199cb3ac154fadc173283d7d111350", size = 1541890, upload-time = "2025-09-24T13:50:55.337Z" },
+    { url = "https://files.pythonhosted.org/packages/41/47/3647fe7ad990af60ad98b889657a976042c9988c2807cf322a9d6685f462/shapely-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:ca2591bff6645c216695bdf1614fca9c82ea1144d4a7591a466fef64f28f0715", size = 1722151, upload-time = "2025-09-24T13:50:57.153Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/49/63953754faa51ffe7d8189bfbe9ca34def29f8c0e34c67cbe2a2795f269d/shapely-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2d93d23bdd2ed9dc157b46bc2f19b7da143ca8714464249bef6771c679d5ff40", size = 1834130, upload-time = "2025-09-24T13:50:58.49Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ee/dce001c1984052970ff60eb4727164892fb2d08052c575042a47f5a9e88f/shapely-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01d0d304b25634d60bd7cf291828119ab55a3bab87dc4af1e44b07fb225f188b", size = 1642802, upload-time = "2025-09-24T13:50:59.871Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e7/fc4e9a19929522877fa602f705706b96e78376afb7fad09cad5b9af1553c/shapely-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8d8382dd120d64b03698b7298b89611a6ea6f55ada9d39942838b79c9bc89801", size = 3018460, upload-time = "2025-09-24T13:51:02.08Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/18/7519a25db21847b525696883ddc8e6a0ecaa36159ea88e0fef11466384d0/shapely-2.1.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19efa3611eef966e776183e338b2d7ea43569ae99ab34f8d17c2c054d3205cc0", size = 3095223, upload-time = "2025-09-24T13:51:04.472Z" },
+    { url = "https://files.pythonhosted.org/packages/48/de/b59a620b1f3a129c3fecc2737104a0a7e04e79335bd3b0a1f1609744cf17/shapely-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:346ec0c1a0fcd32f57f00e4134d1200e14bf3f5ae12af87ba83ca275c502498c", size = 4030760, upload-time = "2025-09-24T13:51:06.455Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b3/c6655ee7232b417562bae192ae0d3ceaadb1cc0ffc2088a2ddf415456cc2/shapely-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6305993a35989391bd3476ee538a5c9a845861462327efe00dd11a5c8c709a99", size = 4170078, upload-time = "2025-09-24T13:51:08.584Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/8e/605c76808d73503c9333af8f6cbe7e1354d2d238bda5f88eea36bfe0f42a/shapely-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:c8876673449f3401f278c86eb33224c5764582f72b653a415d0e6672fde887bf", size = 1559178, upload-time = "2025-09-24T13:51:10.73Z" },
+    { url = "https://files.pythonhosted.org/packages/36/f7/d317eb232352a1f1444d11002d477e54514a4a6045536d49d0c59783c0da/shapely-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:4a44bc62a10d84c11a7a3d7c1c4fe857f7477c3506e24c9062da0db0ae0c449c", size = 1739756, upload-time = "2025-09-24T13:51:12.105Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/c4/3ce4c2d9b6aabd27d26ec988f08cb877ba9e6e96086eff81bfea93e688c7/shapely-2.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9a522f460d28e2bf4e12396240a5fc1518788b2fcd73535166d748399ef0c223", size = 1831290, upload-time = "2025-09-24T13:51:13.56Z" },
+    { url = "https://files.pythonhosted.org/packages/17/b9/f6ab8918fc15429f79cb04afa9f9913546212d7fb5e5196132a2af46676b/shapely-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ff629e00818033b8d71139565527ced7d776c269a49bd78c9df84e8f852190c", size = 1641463, upload-time = "2025-09-24T13:51:14.972Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/57/91d59ae525ca641e7ac5551c04c9503aee6f29b92b392f31790fcb1a4358/shapely-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f67b34271dedc3c653eba4e3d7111aa421d5be9b4c4c7d38d30907f796cb30df", size = 2970145, upload-time = "2025-09-24T13:51:16.961Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cb/4948be52ee1da6927831ab59e10d4c29baa2a714f599f1f0d1bc747f5777/shapely-2.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21952dc00df38a2c28375659b07a3979d22641aeb104751e769c3ee825aadecf", size = 3073806, upload-time = "2025-09-24T13:51:18.712Z" },
+    { url = "https://files.pythonhosted.org/packages/03/83/f768a54af775eb41ef2e7bec8a0a0dbe7d2431c3e78c0a8bdba7ab17e446/shapely-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1f2f33f486777456586948e333a56ae21f35ae273be99255a191f5c1fa302eb4", size = 3980803, upload-time = "2025-09-24T13:51:20.37Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/cb/559c7c195807c91c79d38a1f6901384a2878a76fbdf3f1048893a9b7534d/shapely-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cf831a13e0d5a7eb519e96f58ec26e049b1fad411fc6fc23b162a7ce04d9cffc", size = 4133301, upload-time = "2025-09-24T13:51:21.887Z" },
+    { url = "https://files.pythonhosted.org/packages/80/cd/60d5ae203241c53ef3abd2ef27c6800e21afd6c94e39db5315ea0cbafb4a/shapely-2.1.2-cp314-cp314-win32.whl", hash = "sha256:61edcd8d0d17dd99075d320a1dd39c0cb9616f7572f10ef91b4b5b00c4aeb566", size = 1583247, upload-time = "2025-09-24T13:51:23.401Z" },
+    { url = "https://files.pythonhosted.org/packages/74/d4/135684f342e909330e50d31d441ace06bf83c7dc0777e11043f99167b123/shapely-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:a444e7afccdb0999e203b976adb37ea633725333e5b119ad40b1ca291ecf311c", size = 1773019, upload-time = "2025-09-24T13:51:24.873Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/05/a44f3f9f695fa3ada22786dc9da33c933da1cbc4bfe876fe3a100bafe263/shapely-2.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5ebe3f84c6112ad3d4632b1fd2290665aa75d4cef5f6c5d77c4c95b324527c6a", size = 1834137, upload-time = "2025-09-24T13:51:26.665Z" },
+    { url = "https://files.pythonhosted.org/packages/52/7e/4d57db45bf314573427b0a70dfca15d912d108e6023f623947fa69f39b72/shapely-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5860eb9f00a1d49ebb14e881f5caf6c2cf472c7fd38bd7f253bbd34f934eb076", size = 1642884, upload-time = "2025-09-24T13:51:28.029Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/27/4e29c0a55d6d14ad7422bf86995d7ff3f54af0eba59617eb95caf84b9680/shapely-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b705c99c76695702656327b819c9660768ec33f5ce01fa32b2af62b56ba400a1", size = 3018320, upload-time = "2025-09-24T13:51:29.903Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/bb/992e6a3c463f4d29d4cd6ab8963b75b1b1040199edbd72beada4af46bde5/shapely-2.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a1fd0ea855b2cf7c9cddaf25543e914dd75af9de08785f20ca3085f2c9ca60b0", size = 3094931, upload-time = "2025-09-24T13:51:32.699Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/16/82e65e21070e473f0ed6451224ed9fa0be85033d17e0c6e7213a12f59d12/shapely-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:df90e2db118c3671a0754f38e36802db75fe0920d211a27481daf50a711fdf26", size = 4030406, upload-time = "2025-09-24T13:51:34.189Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/75/c24ed871c576d7e2b64b04b1fe3d075157f6eb54e59670d3f5ffb36e25c7/shapely-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:361b6d45030b4ac64ddd0a26046906c8202eb60d0f9f53085f5179f1d23021a0", size = 4169511, upload-time = "2025-09-24T13:51:36.297Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/f7/b3d1d6d18ebf55236eec1c681ce5e665742aab3c0b7b232720a7d43df7b6/shapely-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:b54df60f1fbdecc8ebc2c5b11870461a6417b3d617f555e5033f1505d36e5735", size = 1602607, upload-time = "2025-09-24T13:51:37.757Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/f6/f09272a71976dfc138129b8faf435d064a811ae2f708cb147dccdf7aacdb/shapely-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0036ac886e0923417932c2e6369b6c52e38e0ff5d9120b90eef5cd9a5fc5cae9", size = 1796682, upload-time = "2025-09-24T13:51:39.233Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index e9314cb6..d52ba0b5 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -140,6 +140,17 @@ def _inference_routing_fn(wc):
     return OUT_ROOT / input_path
 
 
+def _regions():
+    cfg = config["stratification"]
+    regions = [f"{cfg['root']}/{region}.shp" for region in cfg["regions"]]
+    # convert list of strings in regions to comma-separated string
+    regions_txt = ",".join(regions)
+    return regions_txt
+
+
+REGION_TXT = _regions()
+
+
 RUN_CONFIGS = collect_all_runs()
 BASELINE_CONFIGS = collect_all_baselines()
 EXPERIMENT_PARTICIPANTS = collect_experiment_participants()
diff --git a/workflow/rules/report.smk b/workflow/rules/report.smk
index 2ab6ce09..bd6ad4bb 100644
--- a/workflow/rules/report.smk
+++ b/workflow/rules/report.smk
@@ -21,13 +21,12 @@ rule report_experiment_dashboard:
         ),
     params:
         sources=",".join(list(EXPERIMENT_PARTICIPANTS.keys())),
-        header_text="Initializations from "
+        header_text="initializations from "
         + config.get("dates").get("start")
         + " to "
         + config.get("dates").get("end")
         + " by "
-        + config.get("dates").get("frequency")
-        + " have been used.",
+        + config.get("dates").get("frequency"),
     log:
         OUT_ROOT / "logs/report_experiment_dashboard/{experiment}.log",
     shell:
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index 6c732db8..dd69a966 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -24,6 +24,7 @@ rule verif_metrics_baseline:
         baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"),
         baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id]["steps"],
         analysis_label=config["analysis"].get("label"),
+        regions=REGION_TXT,
     output:
         OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc",
     log:
@@ -41,6 +42,7 @@ rule verif_metrics_baseline:
             --steps "{params.baseline_steps}" \
             --baseline_label "{params.baseline_label}" \
             --analysis_label "{params.analysis_label}" \
+            --regions "{params.regions}" \
             --output {output} > {log} 2>&1
         """
 
@@ -55,6 +57,7 @@ def _get_no_none(dict, key, replacement):
 rule verif_metrics:
     input:
         script="workflow/scripts/verif_from_grib.py",
+        module="src/verification/__init__.py",
         inference_okfile=rules.execute_inference.output.okfile,
         analysis_zarr=config["analysis"].get("analysis_zarr"),
     output:
@@ -66,6 +69,7 @@ rule verif_metrics:
         fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"),
         fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
         analysis_label=config["analysis"].get("label"),
+        regions=REGION_TXT,
         grib_out_dir=lambda wc: (
             Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib"
         ).resolve(),
@@ -83,6 +87,7 @@ rule verif_metrics:
             --steps "{params.fcst_steps}" \
             --fcst_label "{params.fcst_label}" \
             --analysis_label "{params.analysis_label}" \
+            --regions "{params.regions}" \
             --output {output} > {log} 2>&1
         """
 
diff --git a/workflow/scripts/report_experiment_dashboard.py b/workflow/scripts/report_experiment_dashboard.py
index cc965c70..c66625b1 100644
--- a/workflow/scripts/report_experiment_dashboard.py
+++ b/workflow/scripts/report_experiment_dashboard.py
@@ -53,6 +53,7 @@ def main(args):
     sources = df["source"].unique()
     params = df["param"].unique()
     metrics = df["metric"].unique()
+    regions = df["region"].unique()
 
     # get json string to embed in the HTML
     df_json = df.to_json(orient="records", lines=False)
@@ -76,6 +77,7 @@ def main(args):
         sources=sources,
         params=params,
         metrics=metrics,
+        regions=regions,
         header_text=args.header_text,
     )
     LOG.info("Size of generated HTML: %d bytes", len(html.encode("utf-8")))
diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py
index b8f0fac5..4af95ef6 100644
--- a/workflow/scripts/verif_baseline.py
+++ b/workflow/scripts/verif_baseline.py
@@ -185,7 +185,9 @@ def main(args: ScriptConfig):
 
     # compute metrics and statistics
 
-    results = verify(baseline, analysis, args.baseline_label, args.analysis_label)
+    results = verify(
+        baseline, analysis, args.baseline_label, args.analysis_label, args.regions
+    )
 
     # save results to NetCDF
     args.output.parent.mkdir(parents=True, exist_ok=True)
@@ -238,6 +240,11 @@ def main(args: ScriptConfig):
         default="COSMO KENDA",
         help="Label for the analysis data (default: COSMO KENDA).",
     )
+    parser.add_argument(
+        "--regions",
+        type=lambda x: x.split(","),
+        help="Comma-separated list of shapefile paths defining regions for stratification.",
+    )
     parser.add_argument(
         "--output",
         type=Path,
diff --git a/workflow/scripts/verif_from_grib.py b/workflow/scripts/verif_from_grib.py
index 5ed09be7..85c80a7b 100644
--- a/workflow/scripts/verif_from_grib.py
+++ b/workflow/scripts/verif_from_grib.py
@@ -213,7 +213,7 @@ def main(args: ScriptConfig):
     )
 
     # compute metrics and statistics
-    results = verify(fct, analysis, args.fcst_label, args.analysis_label)
+    results = verify(fct, analysis, args.fcst_label, args.analysis_label, args.regions)
 
     # # save results to CSV
     args.output.parent.mkdir(parents=True, exist_ok=True)
@@ -266,6 +266,11 @@ def main(args: ScriptConfig):
         help="Label for the analysis data (default: COSMO KENDA).",
         default="COSMO KENDA",
     )
+    parser.add_argument(
+        "--regions",
+        type=lambda x: x.split(","),
+        help="Comma-separated list of shapefile paths defining regions for stratification.",
+    )
     parser.add_argument(
         "--output",
         type=Path,
diff --git a/workflow/scripts/verif_plot_metrics.py b/workflow/scripts/verif_plot_metrics.py
index 9daebf32..a5a50bba 100644
--- a/workflow/scripts/verif_plot_metrics.py
+++ b/workflow/scripts/verif_plot_metrics.py
@@ -95,20 +95,22 @@ def main(args: Namespace) -> None:
     metrics = all_df["metric"].unique()
     params = all_df["param"].unique()
     seasons = all_df["season"].unique() if args.stratify else ["all"]
+    regions = all_df["region"].unique() if args.stratify else ["all"]
     init_hours = (
         all_df["init_hour"].unique() if args.stratify else [-999]
     )  # numeric code to indicate all init hours
 
-    for metric, param, season, init_hour in itertools.product(
-        metrics, params, seasons, init_hours
+    for region, metric, param, season, init_hour in itertools.product(
+        regions, metrics, params, seasons, init_hours
     ):
         LOG.info(
-            f"Processing metric: {metric}, param: {param}, season: {season}, init_hour: {init_hour}"
+            f"Processing region: {region}, metric: {metric}, param: {param}, season: {season}, init_hour: {init_hour}"
         )
 
         def _subset_df(df):
             return subset_df(
                 df,
+                region=region,
                 metric=metric,
                 param=param,
                 season=season,
@@ -120,7 +122,7 @@ def _subset_df(df):
         # breakpoint()
         fig, ax = plt.subplots(figsize=(10, 6))
 
-        title = f"{metric} - {param}"
+        title = f"{metric} - {param} - {region}"
         title += f"- {season} - {init_hour}" if args.stratify else ""
         for source, df in sub_df.groupby("source"):
             df.plot(
diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json
index 94024013..941234b5 100644
--- a/workflow/tools/config.schema.json
+++ b/workflow/tools/config.schema.json
@@ -474,6 +474,30 @@
       ],
       "title": "Profile",
       "type": "object"
+    },
+    "Stratification": {
+      "description": "Stratification settings for the analysis.",
+      "properties": {
+        "regions": {
+          "description": "List of region names for stratification.",
+          "items": {
+            "type": "string"
+          },
+          "title": "Regions",
+          "type": "array"
+        },
+        "root": {
+          "description": "Root directory where the region shapefiles are stored.",
+          "title": "Root",
+          "type": "string"
+        }
+      },
+      "required": [
+        "regions",
+        "root"
+      ],
+      "title": "Stratification",
+      "type": "object"
     }
   },
   "additionalProperties": false,
@@ -521,6 +545,9 @@
     "analysis": {
       "$ref": "#/$defs/AnalysisConfig"
     },
+    "stratification": {
+      "$ref": "#/$defs/Stratification"
+    },
     "locations": {
       "$ref": "#/$defs/Locations"
     },
@@ -534,6 +561,7 @@
     "runs",
     "baselines",
     "analysis",
+    "stratification",
     "locations",
     "profile"
   ],

From 04c4cf17dcf92bf39e0348e431b0ebf307a9924c Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln002.cscs.ch>
Date: Thu, 11 Dec 2025 09:16:11 +0100
Subject: [PATCH 20/34] input data and namelist for MEC

---
 resources/mec/namelist.jinja2 | 16 +++-----
 workflow/rules/verif_obs.smk  | 77 +++++++++++++++++++++++------------
 2 files changed, 58 insertions(+), 35 deletions(-)

diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2
index 6fa71063..9fd930d9 100644
--- a/resources/mec/namelist.jinja2
+++ b/resources/mec/namelist.jinja2
@@ -1,5 +1,5 @@
 !==============================================================================
-! namelist for MEC
+! namelist template for MEC
 !==============================================================================
 
  !===================
@@ -12,12 +12,12 @@
    data           = '/oprusers/osm/opr.emme/data/'      ! data path for auxiliary data
    obsinput       = './input_obs'                 ! observation input data path
    output         = './output'                    ! output data to working directory
-   time_ana       = 20201028000000                  ! analysis date
+   time_ana       = {{ init_time }}00             ! analysis date YYYYMMDDHHMMSS
    read_fields    = 'ps u t v q geof t2m td2m u_10m v_10m'
    grib_edition   = 2
    grib_library   = 2                             ! GRIB-API used: 1=GRIBEX  2=GRIB2-API
    cosmo_refatm   = 2                             ! reference atmosphere to be used for COSMO:1or2
-   fc_hours       = 0                                             ! Default is 3h. Has to be set to 0 if one wants to verify +0h leadtime
+   fc_hours       = 0                             ! Default is 3h. Has to be set to 0 if one wants to verify +0h leadtime
    nproc1         = 1
    nproc2         = 1
  /
@@ -51,17 +51,13 @@
  !================================
  &veri_obs
   obstypes      = "SYNOP"      ! "SYNOP TEMP"
-  fc_times      = 0000,1200,2400,3600,4800,6000,7200,8400,9600,10800,12000  ! forecast lead time at reference (hhmm)
+  fc_times      = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,...  
   prefix_in     = 'ekf'                                                                                                                                                                                           
   prefix_out    = 'ver'               
   rm_old        = 2                          ! overwrite entries in verification file ?
-  fc_file       = '_FCR_TIME_/lfffDDVVMMSS'  ! template for forecast file name
-  !det_suffix    = '.m000'   ! for ensemble forecast must be set in order to differentiate between the input model files.
+  fc_file       = '_FCR_TIME_/lfffDDVVMMSS'  ! template for forecast file name TODO: adapt to new structure!
   time_range    = 1                                                                                                                                                                                               
   ekf_concat    = F                                                                                                                                                                                               
-  !ekf_rm_ve     =  -2 -7   ! special (<0) member ids to remove 
-  !eps_offset    = 40   ! skip the first members up to this value to which is set. The default is 0
-  !ekf_offset    = 40   ! exclude members in ekf-file up to this value => try setting to 40? => no effect => ask Hendrik?
   ref_runtype   = 'any'  ! accept any runtype for the reference state
  /
 
@@ -76,6 +72,6 @@
    verification_end   =  30       ! (min, inclusive)
  /
  &synop_obs
-   version = 1   ! Harald: Für version >= 3 werden konsistent die DACE-internen Kontrollvariablen genutzt, mit ggf. Umrechnung tv,rh <-> t,td etc. DACE-Version mind. 2.22 . Random crashes (T=10000 Umrechnungen failen). Need of no undefs for version=3 
+   version = 1  
  /
  
\ No newline at end of file
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index fffa29c8..3b36660c 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -1,58 +1,85 @@
 from pathlib import Path
 
-rule generate_observation_data:
+rule collect_mec_input:
     input:
-        testcase_dir="/scratch/mch/mmcgloho/MEC/2020102800",
+        inference_dir=rules.prepare_inference_forecaster.output.grib_out_dir
     output:
-        input_obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
-        input_mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
-        parent=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
+        obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
+        mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
     shell:
         """
-        cp -r {input.testcase_dir}/input_obs {output.parent}/
-        cp -r {input.testcase_dir}/input_mod {output.parent}/
-        ls {output.parent}
-        # TODO: Some data still seems to be missing.
+        # create the input_obs and input_mod dirs
+        mkdir -p {output.obs} {output.mod}
+
+        # extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths
+        init="{wildcards.init_time}"
+        ym="${{init:0:6}}"
+
+        # collect obs and mod files
+        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}
+        cat {input.inference_dir}/*.grib > {output.mod}/fc_${{init}}
+        ls -l {output.mod}  {output.obs}
         """
 
 rule generate_mec_namelist:
     input:
         template="resources/mec/namelist.jinja2"
     output:
-        #namelist=OUT_ROOT / "data/runs/mec/namelist",
-        # TODO: get wildcards working.
         namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
     run:
         import jinja2
-        # TODO: get wildcards working.
-        context = {"init_time": wildcards.init_time}
+        import re
+
+        # Construct the leadtimes list for MEC namelist from config steps
+        steps_str = None
+        cfg_runs = config.get("runs", []) if config else []
+        first = cfg_runs[0] if cfg_runs else {}
+        forecaster = first.get("forecaster") if isinstance(first, dict) else None
+        steps_str = forecaster.get("steps") if isinstance(forecaster, dict) else None
+
+        # Parse steps: start/stop/step (hours). Example: "0/120/6"
+        m = re.match(r"^\s*(\d+)\s*/\s*(\d+)\s*/\s*(\d+)\s*$", str(steps_str))
+        if not m:
+            raise ValueError(f"Invalid steps format: {steps_str}. Expected 'start/stop/step' in hours")
+        start_h, stop_h, step_h = map(int, m.groups())
+
+        # Include stop_h (inclusive). Produce strings like 0000,0600,1200,...,12000
+        lead_hours = range(start_h, stop_h + 1, step_h)
+        leadtimes = ",".join(f"{h:02d}00" for h in lead_hours)
+
+        # Render template with init_time and computed leadtimes
+        context = {"init_time": wildcards.init_time, "leadtimes": leadtimes}
         template_path = Path(input.template)
-        env = jinja2.Environment(
-            loader=jinja2.FileSystemLoader({template_path.parent})
-        )
+        env = jinja2.Environment(loader=jinja2.FileSystemLoader(str(template_path.parent)))
         template = env.get_template(template_path.name)
         namelist = template.render(**context)
-        namelist_fn = Path(output.namelist)
-        with namelist_fn.open("w+") as f:
+        print(f"MEC namelist created: \n{namelist}")
+        
+        out_path = Path(str(output.namelist))
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        with out_path.open("w", encoding="utf-8") as f:
             f.write(namelist)
 
 rule run_mec:
     input:
-        testcase_dir=directory(rules.generate_observation_data.output.parent),
-        namelist=rules.generate_mec_namelist.output.namelist
+        namelist=rules.generate_mec_namelist.output.namelist,
+        run_dir=directory(rules.collect_mec_input.output.mod),
+        mod_dir=directory(rules.collect_mec_input.output.mod),
+
     output:
-        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/output/verSYNOP.nc"
+        folder_to_delete=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/folder_to_delete")
     resources:
         cpus_per_task=1,
         runtime="1h",
     shell:
         #TODO(mmcglohon): Replace podman with sarus if needed.
         """
-        echo 'running mec on namelist:'
+        echo 'would run mec on namelist:'
         cat {input.namelist}
-        ls {input.testcase_dir}
+        ls -l {input.run_dir}
+        mkdir -p {output.folder_to_delete}
         # Note: pull command currently redundant; may not be the case with sarus.
         #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={input.testcase_dir},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        ls -l {output}
+        #srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        #ls -l {{output}}
         """
\ No newline at end of file

From 23c9599500d373a1e91733fcfa2d025687192063 Mon Sep 17 00:00:00 2001
From: Daniele Nerini <daniele.nerini@meteoswiss.ch>
Date: Thu, 11 Dec 2025 15:04:00 +0100
Subject: [PATCH 21/34] Cleanup

---
 ...interpolator_from_test_data_stretched.yaml | 24 -------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 resources/inference/configs/interpolator_from_test_data_stretched.yaml

diff --git a/resources/inference/configs/interpolator_from_test_data_stretched.yaml b/resources/inference/configs/interpolator_from_test_data_stretched.yaml
deleted file mode 100644
index 21674891..00000000
--- a/resources/inference/configs/interpolator_from_test_data_stretched.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-runner: time_interpolator
-
-input:
-  test:
-    use_original_paths: true
-
-output:
-  tee:
-    outputs:
-      - extract_lam:
-          output:
-            assign_mask:
-              mask: "source0/trimedge_mask"
-              output:
-                grib:
-                  path: grib/{dateTime}_{step:03}.grib
-                  encoding:
-                    typeOfGeneratingProcess: 2
-                  templates:
-                    samples: resources/templates_index_cosmo.yaml
-
-verbosity: 1
-allow_nans: true
-output_frequency: "1h"

From 804455adc57de52b75bcb07bdf1ffc59b6e96b51 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln002.cscs.ch>
Date: Thu, 11 Dec 2025 16:31:16 +0100
Subject: [PATCH 22/34] Refactor MEC namelist generation

---
 workflow/Snakefile                        |   6 +-
 workflow/rules/verif_obs.smk              | 156 ++++++++++------------
 workflow/scripts/generate_mec_namelist.py |  81 ++++++++---
 3 files changed, 138 insertions(+), 105 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 9d027bb5..59419d02 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -17,7 +17,7 @@ include: "rules/verif.smk"
 include: "rules/report.smk"
 include: "rules/plot.smk"
 include: "rules/summary.smk"
-
+include: "rules/verif_obs.smk"
 
 # optional messages, log and error handling
 # -----------------------------------------------------
@@ -103,10 +103,12 @@ rule verif_metrics_plot_all:
         ),
 
 # To run:  snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
+# clean: rm -rf ./output/data/runs/d0846032f/202001040000/mec ./output/data/runs/d0846032f/202001040000/folder_to_delete
+#            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], 
 rule verif_obs_all:
     input:
         expand(
             rules.run_mec.output,
-            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
+            init_time=202001040000,
             run_id=collect_all_runs(),
         )
\ No newline at end of file
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index 3b36660c..d14b30c9 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -1,85 +1,71 @@
-from pathlib import Path
-
-rule collect_mec_input:
-    input:
-        inference_dir=rules.prepare_inference_forecaster.output.grib_out_dir
-    output:
-        obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
-        mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
-    shell:
-        """
-        # create the input_obs and input_mod dirs
-        mkdir -p {output.obs} {output.mod}
-
-        # extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths
-        init="{wildcards.init_time}"
-        ym="${{init:0:6}}"
-
-        # collect obs and mod files
-        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}
-        cat {input.inference_dir}/*.grib > {output.mod}/fc_${{init}}
-        ls -l {output.mod}  {output.obs}
-        """
-
-rule generate_mec_namelist:
-    input:
-        template="resources/mec/namelist.jinja2"
-    output:
-        namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
-    run:
-        import jinja2
-        import re
-
-        # Construct the leadtimes list for MEC namelist from config steps
-        steps_str = None
-        cfg_runs = config.get("runs", []) if config else []
-        first = cfg_runs[0] if cfg_runs else {}
-        forecaster = first.get("forecaster") if isinstance(first, dict) else None
-        steps_str = forecaster.get("steps") if isinstance(forecaster, dict) else None
-
-        # Parse steps: start/stop/step (hours). Example: "0/120/6"
-        m = re.match(r"^\s*(\d+)\s*/\s*(\d+)\s*/\s*(\d+)\s*$", str(steps_str))
-        if not m:
-            raise ValueError(f"Invalid steps format: {steps_str}. Expected 'start/stop/step' in hours")
-        start_h, stop_h, step_h = map(int, m.groups())
-
-        # Include stop_h (inclusive). Produce strings like 0000,0600,1200,...,12000
-        lead_hours = range(start_h, stop_h + 1, step_h)
-        leadtimes = ",".join(f"{h:02d}00" for h in lead_hours)
-
-        # Render template with init_time and computed leadtimes
-        context = {"init_time": wildcards.init_time, "leadtimes": leadtimes}
-        template_path = Path(input.template)
-        env = jinja2.Environment(loader=jinja2.FileSystemLoader(str(template_path.parent)))
-        template = env.get_template(template_path.name)
-        namelist = template.render(**context)
-        print(f"MEC namelist created: \n{namelist}")
-        
-        out_path = Path(str(output.namelist))
-        out_path.parent.mkdir(parents=True, exist_ok=True)
-        with out_path.open("w", encoding="utf-8") as f:
-            f.write(namelist)
-
-rule run_mec:
-    input:
-        namelist=rules.generate_mec_namelist.output.namelist,
-        run_dir=directory(rules.collect_mec_input.output.mod),
-        mod_dir=directory(rules.collect_mec_input.output.mod),
-
-    output:
-        folder_to_delete=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/folder_to_delete")
-    resources:
-        cpus_per_task=1,
-        runtime="1h",
-    shell:
-        #TODO(mmcglohon): Replace podman with sarus if needed.
-        """
-        echo 'would run mec on namelist:'
-        cat {input.namelist}
-        ls -l {input.run_dir}
-        mkdir -p {output.folder_to_delete}
-        # Note: pull command currently redundant; may not be the case with sarus.
-        #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        #srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        #ls -l {{output}}
-        """
\ No newline at end of file
+from pathlib import Path
+
+
+rule collect_mec_input:
+    input:
+        inference_dir=rules.prepare_inference_forecaster.output.grib_out_dir,
+    output:
+        obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
+        mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
+    params:
+        steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
+    shell:
+        """
+        # create the input_obs and input_mod dirs
+        mkdir -p {output.obs} {output.mod}
+
+        # extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths
+        init="{wildcards.init_time}"
+        ym="${{init:0:6}}"
+        lt="{params.lead_time}" 120h start, end, step
+        # collect obs and mod files
+        import config here?  
+        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}
+        cat {input.inference_dir}/*.grib > {output.mod}/fc_${{init}}
+        ls -l {output.mod}  {output.obs}
+        """
+
+
+rule generate_mec_namelist:
+    localrule: True
+    input:
+        script="workflow/scripts/generate_mec_namelist.py",
+        template="resources/mec/namelist.jinja2",
+    output:
+        namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
+    params:
+        steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
+    shell:
+        """
+        uv run {input.script} \
+            --steps {params.steps} \
+            --init_time {wildcards.init_time} \
+            --template {input.template} \
+            --namelist {output.namelist}
+        """
+
+
+rule run_mec:
+    input:
+        namelist=rules.generate_mec_namelist.output.namelist,
+        run_dir=directory(rules.collect_mec_input.output.mod),
+        mod_dir=directory(rules.collect_mec_input.output.mod),
+    output:
+        folder_to_delete=directory(
+            OUT_ROOT / "data/runs/{run_id}/{init_time}/folder_to_delete"
+        ),
+    resources:
+        cpus_per_task=1,
+        runtime="1h",
+    shell:
+        #TODO(mmcglohon): Replace podman with sarus if needed.
+        """
+        echo 'would run mec on namelist:'
+        cat {input.namelist}
+        ls -l {input.run_dir}
+        mkdir -p {output.folder_to_delete}
+        # Note: pull command currently redundant; may not be the case with sarus.
+        #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        #srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        #ls -l {{output}}
+        """
diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py
index 0e938805..f468f81b 100644
--- a/workflow/scripts/generate_mec_namelist.py
+++ b/workflow/scripts/generate_mec_namelist.py
@@ -1,27 +1,72 @@
 import logging
-import jinja2
-# snakemake object inherited by default, but this enables code completion.
-from snakemake.script import snakemake
+from argparse import ArgumentParser
+from datetime import datetime
 from pathlib import Path
 
+import jinja2
+
+LOG = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+
+
+def _parse_steps(steps: str) -> int:
+    # check that steps is in the format "start/stop/step"
+    if "/" not in steps:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    if len(steps.split("/")) != 3:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    start, end, step = map(int, steps.split("/"))
+    return list(range(start, end + 1, step))
+
+
 # Note: not currently in use; optional script in case we want to factor it out
 # of the rules file
 def main(args):
-	#TODO: get wildcards working
-	context = {}
-	#context = {"init_time": snakemake.wildcards.init_time}
-	template_path = Path(snakemake.input.template)
-	logging.info('writing namelist to {template_filename}')
-	env = jinja2.Environment(
-		loader=jinja2.FileSystemLoader({template_path.parent})
-	)
-	template = env.get_template(template_path.name)
-	namelist = template.render(**context)
-	namelist_fn = Path(snakemake.output['namelist'])
-	with namelist_fn.open("w+") as f:
-		f.write(namelist)
-	logging.info('finished writing namelist')
+
+    # Include stop_h (inclusive). Produce strings like 0000,0600,1200,...,12000
+    lead_hours = args.steps
+    leadtimes = ",".join(f"{h:02d}00" for h in lead_hours)
+
+    # Render template with init_time and computed leadtimes
+    context = {"init_time": f"{args.init_time:%Y%m%d%H%M}", "leadtimes": leadtimes}
+    template_path = Path(args.template)
+    env = jinja2.Environment(loader=jinja2.FileSystemLoader(str(template_path.parent)))
+    template = env.get_template(template_path.name)
+    namelist = template.render(**context)
+    LOG.info(f"MEC namelist created: {namelist}")
+
+    out_path = Path(str(args.namelist))
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with out_path.open("w", encoding="utf-8") as f:
+        f.write(namelist)
 
 
 if __name__ == "__main__":
-	main()
\ No newline at end of file
+
+    parser = ArgumentParser()
+
+    parser.add_argument("--steps", type=_parse_steps, default="0/120/6")
+
+    parser.add_argument(
+        "--init_time",
+        type=lambda s: datetime.strptime(s, "%Y%m%d%H%M"),
+        default="202010010000",
+        help="Valid time for the data in ISO format.",
+    )
+
+    parser.add_argument(
+        "--template",
+        type=str,
+    )
+
+    parser.add_argument(
+        "--namelist",
+        type=str,
+        help="Anything useful",
+    )
+
+    args = parser.parse_args()
+
+    main(args)

From f793d8596acafd4c344690b5669d85cfaccf07d8 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln002.cscs.ch>
Date: Thu, 18 Dec 2025 17:14:22 +0100
Subject: [PATCH 23/34] setup MEC case

---
 config/forecasters.yaml       | 10 ++---
 resources/mec/namelist.jinja2 |  2 +-
 workflow/rules/verif_obs.smk  | 73 ++++++++++++++++++++++++++++-------
 3 files changed, 65 insertions(+), 20 deletions(-)

diff --git a/config/forecasters.yaml b/config/forecasters.yaml
index bd114857..c1638f9f 100644
--- a/config/forecasters.yaml
+++ b/config/forecasters.yaml
@@ -3,15 +3,15 @@ description: |
   Evaluate skill of COSMO-E emulator (M-1 forecaster).
 
 dates:
-  - 2020-02-03T00:00 # Storm Petra
-  - 2020-02-07T00:00 # Storm Sabine
-  - 2020-10-01T00:00 # Storm Brigitte
+  start: 2020-01-02T00:00
+  end: 2020-01-04T00:00
+  frequency: 12h
 
 runs:
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
-      steps: 0/120/6
+      steps: 0/48/6
       config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference.git@main
@@ -21,7 +21,7 @@ baselines:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/120/6
+      steps: 0/48/6
 
 analysis:
   label: COSMO KENDA
diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2
index 9fd930d9..be7ac599 100644
--- a/resources/mec/namelist.jinja2
+++ b/resources/mec/namelist.jinja2
@@ -55,7 +55,7 @@
   prefix_in     = 'ekf'                                                                                                                                                                                           
   prefix_out    = 'ver'               
   rm_old        = 2                          ! overwrite entries in verification file ?
-  fc_file       = '_FCR_TIME_/lfffDDVVMMSS'  ! template for forecast file name TODO: adapt to new structure!
+  fc_file       = 'fc__FCR_TIME'  ! template for forecast file name
   time_range    = 1                                                                                                                                                                                               
   ekf_concat    = F                                                                                                                                                                                               
   ref_runtype   = 'any'  ! accept any runtype for the reference state
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index d14b30c9..b1b34354 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -1,4 +1,23 @@
 from pathlib import Path
+from datetime import datetime, timedelta
+
+
+def get_init_times(wc):
+    """
+    Return list of init times (YYYYMMDDHHMM) from init_time - lead ... init_time
+    stepping by configured frequency (default 12h).
+    """
+    init = wc.init_time
+    lt = get_leadtime(wc)  # expects something like "48h"
+    lead_h = int(str(lt).rstrip("h"))
+    freq_cfg = RUN_CONFIGS[wc.run_id].get("frequency", "12h")
+    freq_h = int(str(freq_cfg).rstrip("h"))
+    base = datetime.strptime(init, "%Y%m%d%H%M")
+    times = []
+    for h in range(lead_h, -1, -freq_h):
+        t = base - timedelta(hours=h)
+        times.append(t.strftime("%Y%m%d%H%M"))
+    return times
 
 
 rule collect_mec_input:
@@ -9,20 +28,45 @@ rule collect_mec_input:
         mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
     params:
         steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
+        init_list_str=lambda wc: " ".join(get_init_times(wc)),
+        run_root=lambda wc: str(OUT_ROOT / f"data/runs/{wc.run_id}"),
+    log:
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}.log",
     shell:
         """
+        (
+        set -euo pipefail
+
         # create the input_obs and input_mod dirs
         mkdir -p {output.obs} {output.mod}
 
         # extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths
         init="{wildcards.init_time}"
         ym="${{init:0:6}}"
-        lt="{params.lead_time}" 120h start, end, step
-        # collect obs and mod files
-        import config here?  
-        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}
-        cat {input.inference_dir}/*.grib > {output.mod}/fc_${{init}}
-        ls -l {output.mod}  {output.obs}
+        ymdh="${{init:0:10}}"
+        echo "init time: ${{init}}, ym: ${{ym}}"
+        
+        # collect observations (ekfSYNOP) and/or (monSYNOP from DWD; includes precip) files
+        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}/efkSYNOP.nc
+        cp /scratch/mch/paa/mec/MEC_ML_input/monFiles2020/hpc/uwork/swahl/temp/feedback/monSYNOP.${{init:0:10}} {output.obs}/monSYNOP.nc
+
+        # For each source init (src_init) produce one output file named fc_<src_init>
+        for src_init in {params.init_list_str}; do
+            src_dir="{params.run_root}/$src_init/grib"
+            out_file="{output.mod}/fc_$src_init"
+            echo "creating $out_file from $src_dir"
+            # create/truncate out_file
+            : > "$out_file"
+            # only concat if matching files exist
+            if compgen -G "$src_dir/2*.grib" > /dev/null; then
+                cat "$src_dir"/2*.grib >> "$out_file"
+            else
+                echo "WARNING: no grib files found in $src_dir" >&2
+            fi
+        done
+
+        ls -l {output.mod} {output.obs}
+        ) > {log} 2>&1
         """
 
 
@@ -51,21 +95,22 @@ rule run_mec:
         run_dir=directory(rules.collect_mec_input.output.mod),
         mod_dir=directory(rules.collect_mec_input.output.mod),
     output:
-        folder_to_delete=directory(
-            OUT_ROOT / "data/runs/{run_id}/{init_time}/folder_to_delete"
-        ),
+        fdbk_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/verSYNOP.nc",
     resources:
         cpus_per_task=1,
         runtime="1h",
+    log:
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_run_mec.log",
     shell:
         #TODO(mmcglohon): Replace podman with sarus if needed.
         """
-        echo 'would run mec on namelist:'
-        cat {input.namelist}
-        ls -l {input.run_dir}
-        mkdir -p {output.folder_to_delete}
+        (
+        set -euo pipefail
+
         # Note: pull command currently redundant; may not be the case with sarus.
         #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        #srun --pty -N1 -c 11 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        srun --pty -N1 -c 1 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
         #ls -l {{output}}
+        #TODO: copy output verSYNOP.nc to appropriate location
+        ) > {log} 2>&1
         """

From 3839476ebce88f4ba6144d24da09ddec273636a5 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln002.cscs.ch>
Date: Wed, 7 Jan 2026 17:42:06 +0100
Subject: [PATCH 24/34] add use of local MEC executable and cleaning

---
 resources/mec/namelist.jinja2             | 10 ++++----
 workflow/rules/verif_obs.smk              | 28 +++++++++++++++--------
 workflow/scripts/generate_mec_namelist.py |  2 --
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2
index be7ac599..3dff1651 100644
--- a/resources/mec/namelist.jinja2
+++ b/resources/mec/namelist.jinja2
@@ -11,7 +11,7 @@
    input          = './input_mod'                 ! input data path
    data           = '/oprusers/osm/opr.emme/data/'      ! data path for auxiliary data
    obsinput       = './input_obs'                 ! observation input data path
-   output         = './output'                    ! output data to working directory
+   output         = '.'                           ! output data to working directory
    time_ana       = {{ init_time }}00             ! analysis date YYYYMMDDHHMMSS
    read_fields    = 'ps u t v q geof t2m td2m u_10m v_10m'
    grib_edition   = 2
@@ -50,12 +50,12 @@
  ! Verification related parameters
  !================================
  &veri_obs
-  obstypes      = "SYNOP"      ! "SYNOP TEMP"
+  obstypes      = "SYNOP"         ! "SYNOP TEMP"
   fc_times      = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,...  
-  prefix_in     = 'ekf'                                                                                                                                                                                           
+  prefix_in     = 'ekf'           ! prefix for input files. ekf or mon                                                                                                                                                                        
   prefix_out    = 'ver'               
-  rm_old        = 2                          ! overwrite entries in verification file ?
-  fc_file       = 'fc__FCR_TIME'  ! template for forecast file name
+  rm_old        = 2                  ! overwrite entries in verification file ?
+  fc_file       = 'fc__FCR_TIME_00'  ! template for forecast file name
   time_range    = 1                                                                                                                                                                                               
   ekf_concat    = F                                                                                                                                                                                               
   ref_runtype   = 'any'  ! accept any runtype for the reference state
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index b1b34354..72876ca0 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -31,11 +31,12 @@ rule collect_mec_input:
         init_list_str=lambda wc: " ".join(get_init_times(wc)),
         run_root=lambda wc: str(OUT_ROOT / f"data/runs/{wc.run_id}"),
     log:
-        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}.log",
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_collect_mec_input.log",
     shell:
         """
         (
         set -euo pipefail
+        echo "...time at start of collect_mec_input: $(date)"
 
         # create the input_obs and input_mod dirs
         mkdir -p {output.obs} {output.mod}
@@ -47,7 +48,7 @@ rule collect_mec_input:
         echo "init time: ${{init}}, ym: ${{ym}}"
         
         # collect observations (ekfSYNOP) and/or (monSYNOP from DWD; includes precip) files
-        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}/efkSYNOP.nc
+        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}/ekfSYNOP.nc
         cp /scratch/mch/paa/mec/MEC_ML_input/monFiles2020/hpc/uwork/swahl/temp/feedback/monSYNOP.${{init:0:10}} {output.obs}/monSYNOP.nc
 
         # For each source init (src_init) produce one output file named fc_<src_init>
@@ -58,14 +59,15 @@ rule collect_mec_input:
             # create/truncate out_file
             : > "$out_file"
             # only concat if matching files exist
-            if compgen -G "$src_dir/2*.grib" > /dev/null; then
-                cat "$src_dir"/2*.grib >> "$out_file"
+            if compgen -G "$src_dir/20*.grib" > /dev/null; then
+                cat "$src_dir"/20*.grib >> "$out_file"
             else
                 echo "WARNING: no grib files found in $src_dir" >&2
             fi
         done
 
         ls -l {output.mod} {output.obs}
+        echo "...time at end of collect_mec_input: $(date)"
         ) > {log} 2>&1
         """
 
@@ -93,7 +95,6 @@ rule run_mec:
     input:
         namelist=rules.generate_mec_namelist.output.namelist,
         run_dir=directory(rules.collect_mec_input.output.mod),
-        mod_dir=directory(rules.collect_mec_input.output.mod),
     output:
         fdbk_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/verSYNOP.nc",
     resources:
@@ -106,11 +107,20 @@ rule run_mec:
         """
         (
         set -euo pipefail
-
+        echo "...time at start of run_mec: $(date)"
         # Note: pull command currently redundant; may not be the case with sarus.
         #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        srun --pty -N1 -c 1 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        #ls -l {{output}}
-        #TODO: copy output verSYNOP.nc to appropriate location
+        #srun --pty -N1 -c 1 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+
+        # change to the MEC run directory, set env and run MEC
+        cd {input.run_dir}/..
+        export LM_HOST=balfrin-ln002
+        source /oprusers/osm/opr.emme/abs/mec.env
+        ./mec > ./mec_out.log 2>&1
+
+        # move the output file to the expected location
+        mkdir -p ../../fdbk_files
+        cp verSYNOP.nc ../../fdbk_files/verSYNOP_{wildcards.init_time}.nc
+        echo "...time at end of run_mec: $(date)"
         ) > {log} 2>&1
         """
diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py
index f468f81b..31942f7f 100644
--- a/workflow/scripts/generate_mec_namelist.py
+++ b/workflow/scripts/generate_mec_namelist.py
@@ -21,8 +21,6 @@ def _parse_steps(steps: str) -> int:
     return list(range(start, end + 1, step))
 
 
-# Note: not currently in use; optional script in case we want to factor it out
-# of the rules file
 def main(args):
 
     # Include stop_h (inclusive). Produce strings like 0000,0600,1200,...,12000

From 5b58b7a84da5d35160478b146e210b88e4d0a277 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln002.cscs.ch>
Date: Thu, 8 Jan 2026 17:16:44 +0100
Subject: [PATCH 25/34] Support of mec in a sarus container

---
 workflow/rules/verif_obs.smk | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index 72876ca0..c76101e8 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -24,6 +24,7 @@ rule collect_mec_input:
     input:
         inference_dir=rules.prepare_inference_forecaster.output.grib_out_dir,
     output:
+        run=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
         obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
         mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
     params:
@@ -31,7 +32,8 @@ rule collect_mec_input:
         init_list_str=lambda wc: " ".join(get_init_times(wc)),
         run_root=lambda wc: str(OUT_ROOT / f"data/runs/{wc.run_id}"),
     log:
-        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_collect_mec_input.log",
+        OUT_ROOT
+        / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_collect_mec_input.log",
     shell:
         """
         (
@@ -39,7 +41,7 @@ rule collect_mec_input:
         echo "...time at start of collect_mec_input: $(date)"
 
         # create the input_obs and input_mod dirs
-        mkdir -p {output.obs} {output.mod}
+        mkdir -p {output.run} {output.obs} {output.mod}
 
         # extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths
         init="{wildcards.init_time}"
@@ -94,7 +96,7 @@ rule generate_mec_namelist:
 rule run_mec:
     input:
         namelist=rules.generate_mec_namelist.output.namelist,
-        run_dir=directory(rules.collect_mec_input.output.mod),
+        run_dir=directory(rules.collect_mec_input.output.run),
     output:
         fdbk_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/verSYNOP.nc",
     resources:
@@ -103,24 +105,26 @@ rule run_mec:
     log:
         OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_run_mec.log",
     shell:
-        #TODO(mmcglohon): Replace podman with sarus if needed.
         """
         (
         set -euo pipefail
         echo "...time at start of run_mec: $(date)"
-        # Note: pull command currently redundant; may not be the case with sarus.
-        #podman pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-        #srun --pty -N1 -c 1 -p postproc -t 2:00:00 podman run --mount=type=bind,source={{input.run_dir}},destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
 
-        # change to the MEC run directory, set env and run MEC
-        cd {input.run_dir}/..
-        export LM_HOST=balfrin-ln002
-        source /oprusers/osm/opr.emme/abs/mec.env
-        ./mec > ./mec_out.log 2>&1
+        # Run MEC inside sarus container
+        # Note: pull command currently needed only once to download the container
+        # sarus pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        abs_run_dir=$(realpath {input.run_dir})
+        sarus run --mount=type=bind,source=$abs_run_dir,destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
 
-        # move the output file to the expected location
-        mkdir -p ../../fdbk_files
-        cp verSYNOP.nc ../../fdbk_files/verSYNOP_{wildcards.init_time}.nc
+        # Run MEC using local executable (Alternative to sarus container) 
+        #cd {input.run_dir}
+        #export LM_HOST=balfrin-ln002
+        #source /oprusers/osm/opr.emme/abs/mec.env
+        #./mec > ./mec_out.log 2>&1
+
+        # move the output file to the final location for the Feedback files
+        mkdir -p {input.run_dir}/../../fdbk_files
+        cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}.nc
         echo "...time at end of run_mec: $(date)"
         ) > {log} 2>&1
         """

From 292878d9431fdf86171623dccd599399fcef31b5 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Thu, 15 Jan 2026 17:18:18 +0100
Subject: [PATCH 26/34] target final feedback files

---
 config/forecasters.yaml      |  6 +++---
 workflow/Snakefile           | 11 ++++++-----
 workflow/rules/verif_obs.smk |  2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/config/forecasters.yaml b/config/forecasters.yaml
index c1638f9f..847a786a 100644
--- a/config/forecasters.yaml
+++ b/config/forecasters.yaml
@@ -3,9 +3,9 @@ description: |
   Evaluate skill of COSMO-E emulator (M-1 forecaster).
 
 dates:
-  start: 2020-01-02T00:00
-  end: 2020-01-04T00:00
-  frequency: 12h
+  start: 2020-01-04T00:00
+  end: 2020-01-05T00:00
+  frequency: 6h
 
 runs:
   - forecaster:
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 59419d02..fa276e29 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -37,14 +37,15 @@ onerror:
 # -----------------------------------------------------
 EXPERIMENT_HASH = short_hash_config()
 
-
+# TODO: target final scorefile instead of FF
 rule experiment_all:
     """Target rule for experiment workflow."""
     input:
         OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/dashboard",
         OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/plots",
         expand(
-            OUT_ROOT / "data/runs/{run_id}/summary.md",
+            OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
+            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=collect_all_candidates(),
         ),
 
@@ -104,11 +105,11 @@ rule verif_metrics_plot_all:
 
 # To run:  snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
 # clean: rm -rf ./output/data/runs/d0846032f/202001040000/mec ./output/data/runs/d0846032f/202001040000/folder_to_delete
-#            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], 
+#             init_time=202001040000,             run_id=collect_all_runs(),
 rule verif_obs_all:
     input:
         expand(
             rules.run_mec.output,
-            init_time=202001040000,
-            run_id=collect_all_runs(),
+            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
+            run_id=collect_all_candidates(),
         )
\ No newline at end of file
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index c76101e8..d07dd93d 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -98,7 +98,7 @@ rule run_mec:
         namelist=rules.generate_mec_namelist.output.namelist,
         run_dir=directory(rules.collect_mec_input.output.run),
     output:
-        fdbk_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/verSYNOP.nc",
+        fdbk_file=OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
     resources:
         cpus_per_task=1,
         runtime="1h",

From 09f06da7f44de12defcf26c101d06d2dd7713011 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Mon, 19 Jan 2026 16:59:43 +0100
Subject: [PATCH 27/34] Fix linting

---
 resources/mec/namelist.jinja2             | 17 ++++++++---------
 workflow/Snakefile                        |  9 ++++++++-
 workflow/rules/verif_obs.smk              |  4 ++--
 workflow/scripts/generate_mec_namelist.py |  2 --
 4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2
index 3dff1651..569d82b6 100644
--- a/resources/mec/namelist.jinja2
+++ b/resources/mec/namelist.jinja2
@@ -27,7 +27,7 @@
  !===============================
   &observations
    !---------------------------------------------------
-   ! read from CDFIN files (if not set use mon/cof/ekf)  
+   ! read from CDFIN files (if not set use mon/cof/ekf)
    !---------------------------------------------------
    read_cdfin   = F      ! (F): dont read COSMO CDFIN files get obs from ekf
    vint_lin_t   = T      ! linear vertical interpolation for temperature
@@ -35,7 +35,7 @@
    vint_lin_uv  = T      ! linear vertical interpolation for wind
    ptop_lapse   = 850.
    pbot_lapse   = 950.
-!  int_nn       = T      ! horizontal interpolation: nearest neighbor 
+!  int_nn       = T      ! horizontal interpolation: nearest neighbor
  /
 
  !====================
@@ -51,13 +51,13 @@
  !================================
  &veri_obs
   obstypes      = "SYNOP"         ! "SYNOP TEMP"
-  fc_times      = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,...  
-  prefix_in     = 'ekf'           ! prefix for input files. ekf or mon                                                                                                                                                                        
-  prefix_out    = 'ver'               
+  fc_times      = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,...
+  prefix_in     = 'ekf'           ! prefix for input files. ekf or mon
+  prefix_out    = 'ver'
   rm_old        = 2                  ! overwrite entries in verification file ?
   fc_file       = 'fc__FCR_TIME_00'  ! template for forecast file name
-  time_range    = 1                                                                                                                                                                                               
-  ekf_concat    = F                                                                                                                                                                                               
+  time_range    = 1
+  ekf_concat    = F
   ref_runtype   = 'any'  ! accept any runtype for the reference state
  /
 
@@ -72,6 +72,5 @@
    verification_end   =  30       ! (min, inclusive)
  /
  &synop_obs
-   version = 1  
+   version = 1
  /
- 
\ No newline at end of file
diff --git a/workflow/Snakefile b/workflow/Snakefile
index fa276e29..6952eb5d 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -19,6 +19,7 @@ include: "rules/plot.smk"
 include: "rules/summary.smk"
 include: "rules/verif_obs.smk"
 
+
 # optional messages, log and error handling
 # -----------------------------------------------------
 onstart:
@@ -37,6 +38,7 @@ onerror:
 # -----------------------------------------------------
 EXPERIMENT_HASH = short_hash_config()
 
+
 # TODO: target final scorefile instead of FF
 rule experiment_all:
     """Target rule for experiment workflow."""
@@ -48,6 +50,10 @@ rule experiment_all:
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=collect_all_candidates(),
         ),
+        expand(
+            OUT_ROOT / "data/runs/{run_id}/summary.md",
+            run_id=collect_all_candidates(),
+        ),
 
 
 rule showcase_all:
@@ -103,6 +109,7 @@ rule verif_metrics_plot_all:
             experiment=EXPERIMENT_HASH,
         ),
 
+
 # To run:  snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
 # clean: rm -rf ./output/data/runs/d0846032f/202001040000/mec ./output/data/runs/d0846032f/202001040000/folder_to_delete
 #             init_time=202001040000,             run_id=collect_all_runs(),
@@ -112,4 +119,4 @@ rule verif_obs_all:
             rules.run_mec.output,
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=collect_all_candidates(),
-        )
\ No newline at end of file
+        ),
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index d07dd93d..adcf5a41 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -48,7 +48,7 @@ rule collect_mec_input:
         ym="${{init:0:6}}"
         ymdh="${{init:0:10}}"
         echo "init time: ${{init}}, ym: ${{ym}}"
-        
+
         # collect observations (ekfSYNOP) and/or (monSYNOP from DWD; includes precip) files
         cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}/ekfSYNOP.nc
         cp /scratch/mch/paa/mec/MEC_ML_input/monFiles2020/hpc/uwork/swahl/temp/feedback/monSYNOP.${{init:0:10}} {output.obs}/monSYNOP.nc
@@ -116,7 +116,7 @@ rule run_mec:
         abs_run_dir=$(realpath {input.run_dir})
         sarus run --mount=type=bind,source=$abs_run_dir,destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
 
-        # Run MEC using local executable (Alternative to sarus container) 
+        # Run MEC using local executable (Alternative to sarus container)
         #cd {input.run_dir}
         #export LM_HOST=balfrin-ln002
         #source /oprusers/osm/opr.emme/abs/mec.env
diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py
index 31942f7f..44fab72c 100644
--- a/workflow/scripts/generate_mec_namelist.py
+++ b/workflow/scripts/generate_mec_namelist.py
@@ -22,7 +22,6 @@ def _parse_steps(steps: str) -> int:
 
 
 def main(args):
-
     # Include stop_h (inclusive). Produce strings like 0000,0600,1200,...,12000
     lead_hours = args.steps
     leadtimes = ",".join(f"{h:02d}00" for h in lead_hours)
@@ -42,7 +41,6 @@ def main(args):
 
 
 if __name__ == "__main__":
-
     parser = ArgumentParser()
 
     parser.add_argument("--steps", type=_parse_steps, default="0/120/6")

From 6776572ccf50bc55e01a970c9ecbacad105e28a1 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln002.cscs.ch>
Date: Wed, 21 Jan 2026 17:38:05 +0100
Subject: [PATCH 28/34] Ensure newline at the end of MEC namelist

---
 workflow/scripts/generate_mec_namelist.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py
index 44fab72c..1f2f252c 100644
--- a/workflow/scripts/generate_mec_namelist.py
+++ b/workflow/scripts/generate_mec_namelist.py
@@ -32,6 +32,9 @@ def main(args):
     env = jinja2.Environment(loader=jinja2.FileSystemLoader(str(template_path.parent)))
     template = env.get_template(template_path.name)
     namelist = template.render(**context)
+    # Ensure file ends with a newline (prevent editors/tools from removing final RETURN)
+    if not namelist.endswith("\n"):
+        namelist += "\n"
     LOG.info(f"MEC namelist created: {namelist}")
 
     out_path = Path(str(args.namelist))

From 5d381f1d6ce7355e0e06210f3a29f99cf5205443 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Thu, 29 Jan 2026 17:45:43 +0100
Subject: [PATCH 29/34] model data preparation for MEC

---
 config/forecasters.yaml                   |  12 +-
 resources/mec/namelist.jinja2             |   2 +-
 workflow/Snakefile                        |  13 ++-
 workflow/rules/verif_obs.smk              | 128 ++++++++++++++--------
 workflow/scripts/generate_mec_namelist.py |   2 +-
 5 files changed, 97 insertions(+), 60 deletions(-)

diff --git a/config/forecasters.yaml b/config/forecasters.yaml
index 847a786a..16e1f197 100644
--- a/config/forecasters.yaml
+++ b/config/forecasters.yaml
@@ -1,27 +1,27 @@
 # yaml-language-server: $schema=../workflow/tools/config.schema.json
-description: |
+description:
   Evaluate skill of COSMO-E emulator (M-1 forecaster).
 
 dates:
-  start: 2020-01-04T00:00
-  end: 2020-01-05T00:00
+  start: 2020-01-08T18:00
+  end: 2020-01-09T00:00
   frequency: 6h
 
 runs:
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
-      steps: 0/48/6
+      steps: 0/12/6
       config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
       extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference.git@main
+        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
 
 baselines:
   - baseline:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/48/6
+      steps: 0/12/6
 
 analysis:
   label: COSMO KENDA
diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2
index 569d82b6..e6fceba5 100644
--- a/resources/mec/namelist.jinja2
+++ b/resources/mec/namelist.jinja2
@@ -52,7 +52,7 @@
  &veri_obs
   obstypes      = "SYNOP"         ! "SYNOP TEMP"
   fc_times      = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,...
-  prefix_in     = 'ekf'           ! prefix for input files. ekf or mon
+  prefix_in     = 'mon'           ! prefix for input files. ekf or mon
   prefix_out    = 'ver'
   rm_old        = 2                  ! overwrite entries in verification file ?
   fc_file       = 'fc__FCR_TIME_00'  ! template for forecast file name
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 6952eb5d..d48745fe 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -19,6 +19,8 @@ include: "rules/plot.smk"
 include: "rules/summary.smk"
 include: "rules/verif_obs.smk"
 
+# prefer one rule because snakemake complains about ambiguous rules (same output)
+ruleorder: prepare_inference_forecaster > prepare_inference_interpolator
 
 # optional messages, log and error handling
 # -----------------------------------------------------
@@ -46,16 +48,17 @@ rule experiment_all:
         OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/dashboard",
         OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/plots",
         expand(
-            OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
-            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
+            OUT_ROOT / "data/runs/{run_id}/summary.md",
             run_id=collect_all_candidates(),
         ),
         expand(
-            OUT_ROOT / "data/runs/{run_id}/summary.md",
+            OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
+            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=collect_all_candidates(),
         ),
 
 
+
 rule showcase_all:
     """Target rule for showcase workflow."""
     input:
@@ -111,8 +114,8 @@ rule verif_metrics_plot_all:
 
 
 # To run:  snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
-# clean: rm -rf ./output/data/runs/d0846032f/202001040000/mec ./output/data/runs/d0846032f/202001040000/folder_to_delete
-#             init_time=202001040000,             run_id=collect_all_runs(),
+# run_id=collect_all_runs(),      init_time=202001081200,
+
 rule verif_obs_all:
     input:
         expand(
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index adcf5a41..c86e56d5 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -10,7 +10,7 @@ def get_init_times(wc):
     init = wc.init_time
     lt = get_leadtime(wc)  # expects something like "48h"
     lead_h = int(str(lt).rstrip("h"))
-    freq_cfg = RUN_CONFIGS[wc.run_id].get("frequency", "12h")
+    freq_cfg = RUN_CONFIGS[wc.run_id].get("frequency", "6h")
     freq_h = int(str(freq_cfg).rstrip("h"))
     base = datetime.strptime(init, "%Y%m%d%H%M")
     times = []
@@ -20,56 +20,83 @@ def get_init_times(wc):
     return times
 
 
-rule collect_mec_input:
+# prepare_mec_input: setup run dir, gather observations and model data in the run dir for the actual init time
+rule prepare_mec_input:
     input:
-        inference_dir=rules.prepare_inference_forecaster.output.grib_out_dir,
+        src_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
     output:
         run=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
         obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
-        mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
-    params:
-        steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
-        init_list_str=lambda wc: " ".join(get_init_times(wc)),
-        run_root=lambda wc: str(OUT_ROOT / f"data/runs/{wc.run_id}"),
+        ekf_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs/ekfSYNOP.nc",
+        # prepare_mec_input no longer claims ownership of input_mod dir;
+        # it should produce the fc file somewhere visible (or create a temp),
+        # but we keep fc as a produced file returned here in the mec dir
+        fc_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/fc_{init_time}",
     log:
-        OUT_ROOT
-        / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_collect_mec_input.log",
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/prepare_mec_input.log",
     shell:
         """
         (
-        set -euo pipefail
-        echo "...time at start of collect_mec_input: $(date)"
+        #set -uo pipefail
 
-        # create the input_obs and input_mod dirs
-        mkdir -p {output.run} {output.obs} {output.mod}
+        mkdir -p {output.run} {output.obs}
+        src_dir="{input.src_dir}"
+        fc_file="{output.fc_file}"
 
-        # extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths
+        # extract YYYYMM from init_time (which is YYYYMMDDHHMM)
         init="{wildcards.init_time}"
         ym="${{init:0:6}}"
         ymdh="${{init:0:10}}"
-        echo "init time: ${{init}}, ym: ${{ym}}"
+        echo "init time: ${{init}}"
+
+        # concatenate all grib files in src_dir into a single file fc_file
+        echo "grib files processed:"
+        if ls "$src_dir"/20*.grib >/dev/null 2>&1; then
+            # concatenate all matching files into the target file
+            ls  "$src_dir"/20*.grib
+            cat "$src_dir"/20*.grib > "$fc_file"
+        else
+            echo "WARNING: no grib files found in $src_dir" >&2
+        fi
 
         # collect observations (ekfSYNOP) and/or (monSYNOP from DWD; includes precip) files
-        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}/ekfSYNOP.nc
+        cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.ekf_file}
         cp /scratch/mch/paa/mec/MEC_ML_input/monFiles2020/hpc/uwork/swahl/temp/feedback/monSYNOP.${{init:0:10}} {output.obs}/monSYNOP.nc
 
-        # For each source init (src_init) produce one output file named fc_<src_init>
-        for src_init in {params.init_list_str}; do
-            src_dir="{params.run_root}/$src_init/grib"
-            out_file="{output.mod}/fc_$src_init"
-            echo "creating $out_file from $src_dir"
-            # create/truncate out_file
-            : > "$out_file"
-            # only concat if matching files exist
-            if compgen -G "$src_dir/20*.grib" > /dev/null; then
-                cat "$src_dir"/20*.grib >> "$out_file"
-            else
-                echo "WARNING: no grib files found in $src_dir" >&2
-            fi
-        done
+        ) > {log} 2>&1
+        """
+
 
-        ls -l {output.mod} {output.obs}
-        echo "...time at end of collect_mec_input: $(date)"
+# link_mec_input: create the input_mod dir with symlinks to all fc files from all source inits
+rule link_mec_input:
+    input:
+        # list of source fc files produced by prepare_mec_input for each init in the window
+        fc_files=lambda wc: [
+            OUT_ROOT / f"data/runs/{wc.run_id}/{t}/mec/fc_{t}"
+            for t in get_init_times(wc)
+        ],
+    output:
+        # own the final input_mod directory for this init (and its contents)
+        mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
+    log:
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/link_mec_input.log",
+    shell:
+        """
+        (
+        set -euo pipefail
+        mkdir -p {output.mod}
+        cd {output.mod}/../../..
+
+        # create symlinks for each source init into this init's input_mod
+        for src in {input.fc_files}; do
+            src_basename=$(basename "$src")
+            echo "Processing source fc file: $src_basename"
+            one_init_time="${{src_basename: -12}}"
+            realpath_src=$(realpath -m "$PWD/$one_init_time/mec/")
+
+            echo "Linking $realpath_src/$src_basename to {wildcards.init_time}/mec/input_mod/$src_basename" 
+            ln -s "$realpath_src/$src_basename" {wildcards.init_time}/mec/input_mod/"$src_basename"
+        done
         ) > {log} 2>&1
         """
 
@@ -79,6 +106,7 @@ rule generate_mec_namelist:
     input:
         script="workflow/scripts/generate_mec_namelist.py",
         template="resources/mec/namelist.jinja2",
+        mod_dir=directory(rules.link_mec_input.output.mod),
     output:
         namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
     params:
@@ -96,35 +124,41 @@ rule generate_mec_namelist:
 rule run_mec:
     input:
         namelist=rules.generate_mec_namelist.output.namelist,
-        run_dir=directory(rules.collect_mec_input.output.run),
+        run_dir=directory(rules.prepare_mec_input.output.run),
+        mod_dir=directory(rules.link_mec_input.output.mod),
     output:
         fdbk_file=OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
-    resources:
-        cpus_per_task=1,
-        runtime="1h",
     log:
-        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_run_mec.log",
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/run_mec.log",
     shell:
         """
         (
         set -euo pipefail
-        echo "...time at start of run_mec: $(date)"
-
+ 
         # Run MEC inside sarus container
         # Note: pull command currently needed only once to download the container
-        # sarus pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+        sarus pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
         abs_run_dir=$(realpath {input.run_dir})
-        sarus run --mount=type=bind,source=$abs_run_dir,destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
-
+        abs_mod_root=$(realpath {input.run_dir}/../..)   # two levels up (so that all links are mounted to the container)
+
+        # build mount options in a variable for readability
+        MOUNTS="\
+          --mount=type=bind,source=$abs_run_dir,destination=/src/bin2 \
+          --mount=type=bind,source=$abs_mod_root,destination=$abs_mod_root,readonly \
+          --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ \
+        "
+
+        # run container (split over multiple lines for readability)
+        sarus run $MOUNTS container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
+ 
         # Run MEC using local executable (Alternative to sarus container)
         #cd {input.run_dir}
-        #export LM_HOST=balfrin-ln002
+        #export LM_HOST=balfrin-ln003
         #source /oprusers/osm/opr.emme/abs/mec.env
         #./mec > ./mec_out.log 2>&1
-
-        # move the output file to the final location for the Feedback files
+ 
+        # copy the output file to the final location for the Feedback files
         mkdir -p {input.run_dir}/../../fdbk_files
         cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}.nc
-        echo "...time at end of run_mec: $(date)"
         ) > {log} 2>&1
         """
diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py
index 1f2f252c..0bfe145a 100644
--- a/workflow/scripts/generate_mec_namelist.py
+++ b/workflow/scripts/generate_mec_namelist.py
@@ -35,7 +35,7 @@ def main(args):
     # Ensure file ends with a newline (prevent editors/tools from removing final RETURN)
     if not namelist.endswith("\n"):
         namelist += "\n"
-    LOG.info(f"MEC namelist created: {namelist}")
+    LOG.info("MEC namelist created:\n%s", namelist)
 
     out_path = Path(str(args.namelist))
     out_path.parent.mkdir(parents=True, exist_ok=True)

From f9a588990d46bee6f89307faab8d74d94c994866 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Wed, 4 Feb 2026 17:28:27 +0100
Subject: [PATCH 30/34] fix init_times_for_mec and add touch output/input (MEC
 waits for all inference output)

---
 workflow/Snakefile           |  8 +++++++-
 workflow/rules/verif_obs.smk | 21 ++++++++++++++-------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 7aecc89e..96dfdb9f 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -105,6 +105,10 @@ rule run_inference_all:
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=collect_all_candidates(),
         ),
+    output:
+        touch=OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok",
+    shell:
+        "touch {output.touch}"
 
 
 rule verif_metrics_all:
@@ -126,7 +130,7 @@ rule verif_metrics_plot_all:
 
 
 # To run:  snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
-# run_id=collect_all_runs(),      init_time=202001081200,
+# run_id=collect_all_runs(),
 
 rule verif_obs_all:
     input:
@@ -135,3 +139,5 @@ rule verif_obs_all:
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=collect_all_candidates(),
         ),
+        # wait for the global run_inference_all sentinel
+        OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok",
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index c86e56d5..be873b81 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -2,21 +2,27 @@ from pathlib import Path
 from datetime import datetime, timedelta
 
 
-def get_init_times(wc):
+def init_times_for_mec(wc):
     """
     Return list of init times (YYYYMMDDHHMM) from init_time - lead ... init_time
-    stepping by configured frequency (default 12h).
+    stepping by configured frequency.
     """
     init = wc.init_time
     lt = get_leadtime(wc)  # expects something like "48h"
     lead_h = int(str(lt).rstrip("h"))
-    freq_cfg = RUN_CONFIGS[wc.run_id].get("frequency", "6h")
-    freq_h = int(str(freq_cfg).rstrip("h"))
+    dates_cfg = config["dates"]
+
+    # use the same parsing as in common.smk; support "Xh" and "Xd"
+    freq_td = _parse_timedelta(dates_cfg["frequency"])
+
     base = datetime.strptime(init, "%Y%m%d%H%M")
     times = []
-    for h in range(lead_h, -1, -freq_h):
-        t = base - timedelta(hours=h)
+
+    # iterate from base - lead to base stepping by the parsed timedelta
+    t = base - timedelta(hours=lead_h)
+    while t <= base:
         times.append(t.strftime("%Y%m%d%H%M"))
+        t += freq_td
     return times
 
 
@@ -73,7 +79,7 @@ rule link_mec_input:
         # list of source fc files produced by prepare_mec_input for each init in the window
         fc_files=lambda wc: [
             OUT_ROOT / f"data/runs/{wc.run_id}/{t}/mec/fc_{t}"
-            for t in get_init_times(wc)
+            for t in init_times_for_mec(wc)
         ],
     output:
         # own the final input_mod directory for this init (and its contents)
@@ -84,6 +90,7 @@ rule link_mec_input:
         """
         (
         set -euo pipefail
+
         mkdir -p {output.mod}
         cd {output.mod}/../../..
 

From 99753e5a9deb5a92662723e17f4d57b8fa71eb0b Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Thu, 12 Feb 2026 15:17:50 +0100
Subject: [PATCH 31/34] Refactoring, bugfixes

---
 config/forecasters-co2.yaml  | 10 +++---
 workflow/Snakefile           | 19 ++++-------
 workflow/rules/verif_obs.smk | 65 +++++++++++++++++++++++++++---------
 3 files changed, 62 insertions(+), 32 deletions(-)

diff --git a/config/forecasters-co2.yaml b/config/forecasters-co2.yaml
index 16e1f197..df88196e 100644
--- a/config/forecasters-co2.yaml
+++ b/config/forecasters-co2.yaml
@@ -3,15 +3,15 @@ description:
   Evaluate skill of COSMO-E emulator (M-1 forecaster).
 
 dates:
-  start: 2020-01-08T18:00
-  end: 2020-01-09T00:00
-  frequency: 6h
+  start: 2020-07-25T00:00
+  end: 2020-07-27T00:00
+  frequency: 12h
 
 runs:
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
-      steps: 0/12/6
+      steps: 0/24/12
       config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
@@ -21,7 +21,7 @@ baselines:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/12/6
+      steps: 0/24/12
 
 analysis:
   label: COSMO KENDA
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 96dfdb9f..8d16d058 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -46,7 +46,7 @@ EXPERIMENT_LABEL = (
 EXPERIMENT_DIRNAME = f"{EXPERIMENT_DATE}_{EXPERIMENT_LABEL}_{EXPERIMENT_HASH}"
 
 
-# TODO: target final scorefile instead of FF
+
 rule experiment_all:
     """Target rule for experiment workflow."""
     input:
@@ -58,7 +58,7 @@ rule experiment_all:
         ),
         expand(
             OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
-            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
+            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES_MEC],
             run_id=collect_all_candidates(),
         ),
 
@@ -101,14 +101,13 @@ rule run_inference_all:
     """Run inference for all reference times as defined in the configuration."""
     input:
         expand(
-            OUT_ROOT / "data/runs/{run_id}/{init_time}/raw",
+            OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=collect_all_candidates(),
         ),
     output:
-        touch=OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok",
-    shell:
-        "touch {output.touch}"
+        inference_ok=touch(OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok")
+
 
 
 rule verif_metrics_all:
@@ -129,15 +128,11 @@ rule verif_metrics_plot_all:
         ),
 
 
-# To run:  snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
-# run_id=collect_all_runs(),
-
 rule verif_obs_all:
     input:
         expand(
             rules.run_mec.output,
-            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
+            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES_MEC],
             run_id=collect_all_candidates(),
         ),
-        # wait for the global run_inference_all sentinel
-        OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok",
+
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index be873b81..f5c98403 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -1,6 +1,44 @@
 from pathlib import Path
 from datetime import datetime, timedelta
 
+EXPERIMENT_HASH = short_hash_config()
+
+
+# TODO: merge _parse_steps from generate_mec_namelist.py and verif_single_init.py?
+def _parse_steps(steps: str) -> list[int]:
+    # check that steps is in the format "start/stop/step"
+    if "/" not in steps:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    if len(steps.split("/")) != 3:
+        raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
+    start, end, step = map(int, steps.split("/"))
+    return list(range(start, end + 1, step))
+
+
+# TODO: merge with _ref_times from common.smk?
+def _reftimes_mec():
+    """
+    Construct ref times for MEC. Needs to be max of all
+    leadtimes shorter than ref times from the config.
+    """
+    cfg = config["dates"]
+    if isinstance(cfg, list):
+        return [datetime.strptime(t, "%Y-%m-%dT%H:%M") for t in cfg]
+    start = datetime.strptime(cfg["start"], "%Y-%m-%dT%H:%M")
+    leads = _parse_steps(config["runs"][0]["forecaster"]["steps"])
+    start_mec = start + timedelta(hours=max(leads))
+    end = datetime.strptime(cfg["end"], "%Y-%m-%dT%H:%M")
+    freq = _parse_timedelta(cfg["frequency"])
+    times = []
+    t = start_mec
+    while t <= end:
+        times.append(t)
+        t += freq
+    return times
+
+
+REFTIMES_MEC = _reftimes_mec()
+
 
 def init_times_for_mec(wc):
     """
@@ -8,18 +46,16 @@ def init_times_for_mec(wc):
     stepping by configured frequency.
     """
     init = wc.init_time
+    base = datetime.strptime(init, "%Y%m%d%H%M")
+
     lt = get_leadtime(wc)  # expects something like "48h"
     lead_h = int(str(lt).rstrip("h"))
-    dates_cfg = config["dates"]
-
-    # use the same parsing as in common.smk; support "Xh" and "Xd"
-    freq_td = _parse_timedelta(dates_cfg["frequency"])
-
-    base = datetime.strptime(init, "%Y%m%d%H%M")
-    times = []
+    freq_td = _parse_timedelta(config["dates"]["frequency"])
 
     # iterate from base - lead to base stepping by the parsed timedelta
     t = base - timedelta(hours=lead_h)
+    times = []
+
     while t <= base:
         times.append(t.strftime("%Y%m%d%H%M"))
         t += freq_td
@@ -30,20 +66,19 @@ def init_times_for_mec(wc):
 rule prepare_mec_input:
     input:
         src_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
+        inference_ok=OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok",
     output:
         run=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
         obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
         ekf_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs/ekfSYNOP.nc",
-        # prepare_mec_input no longer claims ownership of input_mod dir;
-        # it should produce the fc file somewhere visible (or create a temp),
-        # but we keep fc as a produced file returned here in the mec dir
         fc_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/fc_{init_time}",
     log:
         OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/prepare_mec_input.log",
     shell:
         """
         (
-        #set -uo pipefail
+        set -euo pipefail
+        shopt -s nullglob
 
         mkdir -p {output.run} {output.obs}
         src_dir="{input.src_dir}"
@@ -57,10 +92,10 @@ rule prepare_mec_input:
 
         # concatenate all grib files in src_dir into a single file fc_file
         echo "grib files processed:"
-        if ls "$src_dir"/20*.grib >/dev/null 2>&1; then
-            # concatenate all matching files into the target file
-            ls  "$src_dir"/20*.grib
-            cat "$src_dir"/20*.grib > "$fc_file"
+        files=( "$src_dir"/20*.grib )
+        if (( ${{#files[@]}} )); then
+            printf '%s\n' "${{files[@]}}"
+            cat "${{files[@]}}" > "$fc_file"
         else
             echo "WARNING: no grib files found in $src_dir" >&2
         fi

From 4d7191b4c1c3c861ba08e3ec6000dc1763282fd3 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Tue, 24 Feb 2026 08:24:48 +0100
Subject: [PATCH 32/34] Formatting requirements

---
 workflow/Snakefile           | 10 +++++-----
 workflow/rules/verif_obs.smk |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 3e9e581b..06e83156 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -9,6 +9,7 @@ from evalml.config import ConfigModel
 # validate configuration and add defaults
 update_config(config, ConfigModel.model_validate(dict(config)).model_dump(mode="json"))
 
+
 # load rules
 # -----------------------------------------------------
 include: "rules/common.smk"
@@ -20,6 +21,7 @@ include: "rules/report.smk"
 include: "rules/plot.smk"
 include: "rules/verif_obs.smk"
 
+
 # about workflow
 # -----------------------------------------------------
 
@@ -35,9 +37,11 @@ DATA_DIR = OUT_ROOT / "data"
 LOGS_DIR = OUT_ROOT / "logs"
 RESULTS_DIR = OUT_ROOT / "results" / EXPERIMENT_NAME
 
+
 # prefer one rule because snakemake complains about ambiguous rules (same output)
 ruleorder: prepare_inference_forecaster > prepare_inference_interpolator
 
+
 # optional messages, log and error handling
 # -----------------------------------------------------
 
@@ -116,7 +120,6 @@ onerror:
 # -----------------------------------------------------
 
 
-
 rule experiment_all:
     """Target rule for experiment workflow."""
     input:
@@ -135,7 +138,6 @@ rule experiment_all:
         ),
 
 
-
 rule showcase_all:
     """Target rule for showcase workflow."""
     input:
@@ -175,8 +177,7 @@ rule run_inference_all:
             run_id=CANDIDATES,
         ),
     output:
-        inference_ok=touch(OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok")
-
+        inference_ok=touch(OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok"),
 
 
 rule verif_metrics_all:
@@ -204,4 +205,3 @@ rule verif_obs_all:
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES_MEC],
             run_id=CANDIDATES,
         ),
-
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index f5c98403..182e6feb 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -136,7 +136,7 @@ rule link_mec_input:
             one_init_time="${{src_basename: -12}}"
             realpath_src=$(realpath -m "$PWD/$one_init_time/mec/")
 
-            echo "Linking $realpath_src/$src_basename to {wildcards.init_time}/mec/input_mod/$src_basename" 
+            echo "Linking $realpath_src/$src_basename to {wildcards.init_time}/mec/input_mod/$src_basename"
             ln -s "$realpath_src/$src_basename" {wildcards.init_time}/mec/input_mod/"$src_basename"
         done
         ) > {log} 2>&1
@@ -176,7 +176,7 @@ rule run_mec:
         """
         (
         set -euo pipefail
- 
+
         # Run MEC inside sarus container
         # Note: pull command currently needed only once to download the container
         sarus pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
@@ -192,13 +192,13 @@ rule run_mec:
 
         # run container (split over multiple lines for readability)
         sarus run $MOUNTS container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
- 
+
         # Run MEC using local executable (Alternative to sarus container)
         #cd {input.run_dir}
         #export LM_HOST=balfrin-ln003
         #source /oprusers/osm/opr.emme/abs/mec.env
         #./mec > ./mec_out.log 2>&1
- 
+
         # copy the output file to the final location for the Feedback files
         mkdir -p {input.run_dir}/../../fdbk_files
         cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}.nc

From 87a4d07d10db3542eb564e8cea8b652ba5692967 Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Wed, 11 Mar 2026 17:20:53 +0100
Subject: [PATCH 33/34] fix rule dependencies and feedback file naming

---
 workflow/Snakefile           | 9 +++++++--
 workflow/rules/verif_obs.smk | 8 ++++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 06e83156..a26c08a4 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -172,12 +172,17 @@ rule run_inference_all:
     """Run inference for all reference times as defined in the configuration."""
     input:
         expand(
-            OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
+            rules.execute_inference.output.okfile,
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
             run_id=CANDIDATES,
         ),
     output:
-        inference_ok=touch(OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok"),
+        run_all_ok=touch(OUT_ROOT / "logs/run_inference_all.ok"),
+    shell:
+        """
+        mkdir -p $(dirname {output.run_all_ok})
+        touch {output.run_all_ok}
+        """
 
 
 rule verif_metrics_all:
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index 182e6feb..0201fc94 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -66,7 +66,11 @@ def init_times_for_mec(wc):
 rule prepare_mec_input:
     input:
         src_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
-        inference_ok=OUT_ROOT / f"run_inference_all.{EXPERIMENT_HASH}.ok",
+        inference_ok=lambda wc: expand(
+            rules.execute_inference.output.okfile,
+            run_id=wc.run_id,
+            init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
+        ),
     output:
         run=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
         obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
@@ -201,6 +205,6 @@ rule run_mec:
 
         # copy the output file to the final location for the Feedback files
         mkdir -p {input.run_dir}/../../fdbk_files
-        cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}.nc
+        cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}00.nc
         ) > {log} 2>&1
         """

From 8e87ea218dabb5ec125cb972f34db82c8bcae11b Mon Sep 17 00:00:00 2001
From: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Date: Thu, 12 Mar 2026 16:11:53 +0100
Subject: [PATCH 34/34] same feedback file naming as NWP

---
 workflow/Snakefile           | 4 ++--
 workflow/rules/verif_obs.smk | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index a26c08a4..140b3c97 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -132,9 +132,9 @@ rule experiment_all:
             experiment=EXPERIMENT_NAME,
         ),
         expand(
-            OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
+            OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}00.nc",
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES_MEC],
-            run_id=collect_all_candidates(),
+            run_id=CANDIDATES,
         ),
 
 
diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk
index 0201fc94..1f22fa74 100644
--- a/workflow/rules/verif_obs.smk
+++ b/workflow/rules/verif_obs.smk
@@ -173,7 +173,9 @@ rule run_mec:
         run_dir=directory(rules.prepare_mec_input.output.run),
         mod_dir=directory(rules.link_mec_input.output.mod),
     output:
-        fdbk_file=OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
+        fdbk_file=OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}00.nc",
+    wildcard_constraints:
+        init_time=r"\d{12}",
     log:
         OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/run_mec.log",
     shell:
@@ -203,7 +205,8 @@ rule run_mec:
         #source /oprusers/osm/opr.emme/abs/mec.env
         #./mec > ./mec_out.log 2>&1
 
-        # copy the output file to the final location for the Feedback files
+        # copy the output file to the final location for the Feedback files plus renaming to
+        # match NWP naming conventions (verSYNOP_YYYYMMDDHHMMSS.nc)
         mkdir -p {input.run_dir}/../../fdbk_files
         cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}00.nc
         ) > {log} 2>&1