Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
c1375ab
Initial draft (pseudo code)
dnerini Oct 7, 2025
9f608f2
add namelist as resource
Nov 13, 2025
e82bd94
add verif_obs.smk to Snakefile
Nov 13, 2025
c3ab651
Add rules for observation data and namelist generation (using fake data)
Nov 13, 2025
7512d96
add newline to namelist template
Nov 13, 2025
13301a5
somewhat working version of run_mec (with fake data)
Nov 13, 2025
e722e5f
correct typo and add optional script for generating namelist, in case…
Nov 24, 2025
3d9e3c1
fix: add localrule to inference_interpolator rule (#57)
frazane Oct 8, 2025
918913f
Fix for interpolator rule
OpheliaMiralles Oct 8, 2025
179eb4d
Consolidate multi packages into unique src/ dir (#58)
dnerini Oct 14, 2025
e791a30
Update configs (#63)
OpheliaMiralles Oct 15, 2025
d197712
Adopt 'steps' instead of 'lead_time' (#62)
dnerini Oct 20, 2025
9568987
Update example config for experiment with interpolators (#70)
dnerini Oct 20, 2025
128eb91
Distinguish between primary runs ('candidates') and secondary runs (#64)
dnerini Oct 20, 2025
6315afc
Adopt forecast intervals including the end point (#71)
dnerini Oct 21, 2025
e028f59
Mrb 550 inconcsistent forecast initializations in evalml (#72)
jonasbhend Oct 21, 2025
5406777
Update vega-lite spec (#69)
frazane Oct 22, 2025
8d01490
Decouple inference preparation and execution (#68)
frazane Oct 22, 2025
b7b1311
Scores by Region (#75)
jonasbhend Oct 29, 2025
04c4cf1
input data and namelist for MEC
Dec 11, 2025
b1959dc
Merge remote-tracking branch 'origin/main' into MRB-534-Implement-rul…
dnerini Dec 11, 2025
23c9599
Cleanup
dnerini Dec 11, 2025
804455a
Refactor MEC namelist generation
Dec 11, 2025
f793d85
setup MEC case
Dec 18, 2025
3839476
add use of local MEC executable and cleaning
Jan 7, 2026
5b58b7a
Support of mec in a sarus container
Jan 8, 2026
292878d
target final feedback files
Jan 15, 2026
09f06da
Fix linting
Jan 19, 2026
6776572
Ensure newline at the end of MEC namelist
Jan 21, 2026
5d381f1
model data preparation for MEC
Jan 29, 2026
acce2f7
Merge branch 'main' into MRB-534-Implement-rule-to-generate-namelist
Feb 2, 2026
f9a5889
fix init_times_for_mec and add touch output/input (MEC waits for all …
Feb 4, 2026
99753e5
Refactoring, bugfixes
Feb 12, 2026
5fa2a34
Merge branch 'main' into MRB-534-Implement-rule-to-generate-namelist
dnerini Feb 12, 2026
4d7191b
Formatting requirements
Feb 24, 2026
87a4d07
fix rule dependencies and feedback file naming
Mar 11, 2026
8e87ea2
same feedback file naming as NWP
Mar 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions config/forecasters-co2.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# yaml-language-server: $schema=../workflow/tools/config.schema.json
description: |
description:
Evaluate skill of COSMO-E emulator (M-1 forecaster).

dates:
- 2020-02-03T00:00 # Storm Petra
- 2020-02-07T00:00 # Storm Sabine
- 2020-10-01T00:00 # Storm Brigitte
start: 2020-07-25T00:00
end: 2020-07-27T00:00
frequency: 12h

runs:
- forecaster:
mlflow_id: d0846032fc7248a58b089cbe8fa4c511
label: M-1 forecaster
steps: 0/120/6
steps: 0/24/12
config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
extra_dependencies:
- git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
Expand All @@ -21,7 +21,7 @@ baselines:
baseline_id: COSMO-E
label: COSMO-E
root: /store_new/mch/msopr/ml/COSMO-E
steps: 0/120/6
steps: 0/24/12

analysis:
label: COSMO KENDA
Expand Down
76 changes: 76 additions & 0 deletions resources/mec/namelist.jinja2
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
!==============================================================================
! namelist template for MEC
!==============================================================================

!===================
! general parameters
!===================
&run
method = 'GMESTAT' ! Model Equivalent Calculator
model = 'ML' ! forecast model. One of "COSMO" "ICON" "ML"
input = './input_mod' ! input data path
data = '/oprusers/osm/opr.emme/data/' ! data path for auxiliary data
obsinput = './input_obs' ! observation input data path
output = '.' ! output data to working directory
time_ana = {{ init_time }}00 ! analysis date YYYYMMDDHHMMSS
read_fields = 'ps u t v q geof t2m td2m u_10m v_10m'
grib_edition = 2
grib_library = 2 ! GRIB-API used: 1=GRIBEX 2=GRIB2-API
cosmo_refatm = 2 ! reference atmosphere to be used for COSMO:1or2
fc_hours = 0 ! Default is 3h. Has to be set to 0 if one wants to verify +0h leadtime
nproc1 = 1
nproc2 = 1
/

!===============================
! observation related parameters
!===============================
&observations
!---------------------------------------------------
! read from CDFIN files (if not set use mon/cof/ekf)
!---------------------------------------------------
read_cdfin = F ! (F): dont read COSMO CDFIN files get obs from ekf
vint_lin_t = T ! linear vertical interpolation for temperature
vint_lin_z = T ! linear vertical interpolation for geopotential
vint_lin_uv = T ! linear vertical interpolation for wind
ptop_lapse = 850.
pbot_lapse = 950.
! int_nn = T ! horizontal interpolation: nearest neighbor
/

!====================
! Ensemble parameters
!====================
&ENKF
k_enkf = 0 ! ensemble size (0 for det. run)
det_run = 1 ! set to 1 for deterministic run, 0 for ensemble
/

!================================
! Verification related parameters
!================================
&veri_obs
obstypes = "SYNOP" ! "SYNOP TEMP"
fc_times = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,...
prefix_in = 'mon' ! prefix for input files. ekf or mon
prefix_out = 'ver'
rm_old = 2 ! overwrite entries in verification file ?
fc_file = 'fc__FCR_TIME_00' ! template for forecast file name
time_range = 1
ekf_concat = F
ref_runtype = 'any' ! accept any runtype for the reference state
/

&report
time_b = -0029 ! (hhmm, inclusive)
time_e = 0030 ! (hhmm, exclusive)
/

&cosmo_obs
lcd187 = .true. ! use ground based wind lidar obs
verification_start = -29 ! (min, inclusive)
verification_end = 30 ! (min, inclusive)
/
&synop_obs
version = 1
/
28 changes: 27 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include: "rules/inference.smk"
include: "rules/verif.smk"
include: "rules/report.smk"
include: "rules/plot.smk"
include: "rules/verif_obs.smk"


# about workflow
Expand All @@ -37,6 +38,10 @@ LOGS_DIR = OUT_ROOT / "logs"
RESULTS_DIR = OUT_ROOT / "results" / EXPERIMENT_NAME


# prefer one rule because snakemake complains about ambiguous rules (same output)
ruleorder: prepare_inference_forecaster > prepare_inference_interpolator
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to have a closer look at this, I don0't understand why this problem would appear with your changes

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without it snakemake complains. Thanks for having a closer look at it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this clarified?



# optional messages, log and error handling
# -----------------------------------------------------

Expand Down Expand Up @@ -126,6 +131,11 @@ rule experiment_all:
rules.verif_metrics_plot.output,
experiment=EXPERIMENT_NAME,
),
expand(
OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}00.nc",
init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES_MEC],
run_id=CANDIDATES,
),


rule showcase_all:
Expand Down Expand Up @@ -162,10 +172,17 @@ rule run_inference_all:
"""Run inference for all reference times as defined in the configuration."""
input:
expand(
OUT_ROOT / "data/runs/{run_id}/{init_time}/raw",
rules.execute_inference.output.okfile,
init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
run_id=CANDIDATES,
),
output:
run_all_ok=touch(OUT_ROOT / "logs/run_inference_all.ok"),
shell:
"""
mkdir -p $(dirname {output.run_all_ok})
touch {output.run_all_ok}
"""


rule verif_metrics_all:
Expand All @@ -184,3 +201,12 @@ rule verif_metrics_plot_all:
rules.verif_metrics_plot.output,
experiment=EXPERIMENT_NAME,
),


rule verif_obs_all:
input:
expand(
rules.run_mec.output,
init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES_MEC],
run_id=CANDIDATES,
),
213 changes: 213 additions & 0 deletions workflow/rules/verif_obs.smk
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we move the mec directory from being inside each forecast run directory (output/data/runs/<run-id>/<init-time>/mec) to output/data/mec/<valid-time> so as to not mix up inittime-based directory structure and validtime-based directory structure? This is also the same approach adopted by osm in the operational archive.

Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
from pathlib import Path
from datetime import datetime, timedelta

EXPERIMENT_HASH = short_hash_config()


# TODO: merge _parse_steps from generate_mec_namelist.py and verif_single_init.py?
def _parse_steps(steps: str) -> list[int]:
# check that steps is in the format "start/stop/step"
if "/" not in steps:
raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
if len(steps.split("/")) != 3:
raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
start, end, step = map(int, steps.split("/"))
return list(range(start, end + 1, step))


# TODO: merge with _ref_times from common.smk?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not merged but perhaps could be moved to common.smk.

def _reftimes_mec():
"""
Construct ref times for MEC. Needs to be max of all
leadtimes shorter than ref times from the config.
"""
cfg = config["dates"]
if isinstance(cfg, list):
return [datetime.strptime(t, "%Y-%m-%dT%H:%M") for t in cfg]
start = datetime.strptime(cfg["start"], "%Y-%m-%dT%H:%M")
leads = _parse_steps(config["runs"][0]["forecaster"]["steps"])
start_mec = start + timedelta(hours=max(leads))
end = datetime.strptime(cfg["end"], "%Y-%m-%dT%H:%M")
freq = _parse_timedelta(cfg["frequency"])
times = []
t = start_mec
while t <= end:
times.append(t)
t += freq
return times


REFTIMES_MEC = _reftimes_mec()


def init_times_for_mec(wc):
"""
Return list of init times (YYYYMMDDHHMM) from init_time - lead ... init_time
stepping by configured frequency.
"""
init = wc.init_time
base = datetime.strptime(init, "%Y%m%d%H%M")

lt = get_leadtime(wc) # expects something like "48h"
lead_h = int(str(lt).rstrip("h"))
freq_td = _parse_timedelta(config["dates"]["frequency"])

# iterate from base - lead to base stepping by the parsed timedelta
t = base - timedelta(hours=lead_h)
times = []

while t <= base:
times.append(t.strftime("%Y%m%d%H%M"))
t += freq_td
return times


# prepare_mec_input: setup run dir, gather observations and model data in the run dir for the actual init time
rule prepare_mec_input:
input:
src_dir=OUT_ROOT / "data/runs/{run_id}/{init_time}/grib",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no rule giving this an output, so this should to the very list trigger some warnings from snakemake. You could specify it as a parameter instead.

inference_ok=lambda wc: expand(
rules.execute_inference.output.okfile,
run_id=wc.run_id,
init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
),
output:
run=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
ekf_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs/ekfSYNOP.nc",
fc_file=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/fc_{init_time}",
log:
OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/prepare_mec_input.log",
shell:
"""
(
set -euo pipefail
shopt -s nullglob

mkdir -p {output.run} {output.obs}
src_dir="{input.src_dir}"
fc_file="{output.fc_file}"

# extract YYYYMM from init_time (which is YYYYMMDDHHMM)
init="{wildcards.init_time}"
ym="${{init:0:6}}"
ymdh="${{init:0:10}}"
echo "init time: ${{init}}"

# concatenate all grib files in src_dir into a single file fc_file
echo "grib files processed:"
files=( "$src_dir"/20*.grib )
if (( ${{#files[@]}} )); then
printf '%s\n' "${{files[@]}}"
cat "${{files[@]}}" > "$fc_file"
else
echo "WARNING: no grib files found in $src_dir" >&2
fi
Comment on lines +97 to +105
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really necessary? We are effectively duplicating the entire output data.


# collect observations (ekfSYNOP) and/or (monSYNOP from DWD; includes precip) files
cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.ekf_file}
cp /scratch/mch/paa/mec/MEC_ML_input/monFiles2020/hpc/uwork/swahl/temp/feedback/monSYNOP.${{init:0:10}} {output.obs}/monSYNOP.nc

) > {log} 2>&1
"""


# link_mec_input: create the input_mod dir with symlinks to all fc files from all source inits
rule link_mec_input:
input:
# list of source fc files produced by prepare_mec_input for each init in the window
fc_files=lambda wc: [
OUT_ROOT / f"data/runs/{wc.run_id}/{t}/mec/fc_{t}"
for t in init_times_for_mec(wc)
],
output:
# own the final input_mod directory for this init (and its contents)
mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
log:
OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/link_mec_input.log",
shell:
"""
(
set -euo pipefail

mkdir -p {output.mod}
cd {output.mod}/../../..

# create symlinks for each source init into this init's input_mod
for src in {input.fc_files}; do
src_basename=$(basename "$src")
echo "Processing source fc file: $src_basename"
one_init_time="${{src_basename: -12}}"
realpath_src=$(realpath -m "$PWD/$one_init_time/mec/")

echo "Linking $realpath_src/$src_basename to {wildcards.init_time}/mec/input_mod/$src_basename"
ln -s "$realpath_src/$src_basename" {wildcards.init_time}/mec/input_mod/"$src_basename"
done
) > {log} 2>&1
"""


rule generate_mec_namelist:
localrule: True
input:
script="workflow/scripts/generate_mec_namelist.py",
template="resources/mec/namelist.jinja2",
mod_dir=directory(rules.link_mec_input.output.mod),
output:
namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
params:
steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
shell:
"""
uv run {input.script} \
--steps {params.steps} \
--init_time {wildcards.init_time} \
--template {input.template} \
--namelist {output.namelist}
"""


rule run_mec:
input:
namelist=rules.generate_mec_namelist.output.namelist,
run_dir=directory(rules.prepare_mec_input.output.run),
mod_dir=directory(rules.link_mec_input.output.mod),
output:
fdbk_file=OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}00.nc",
wildcard_constraints:
init_time=r"\d{12}",
log:
OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/run_mec.log",
shell:
"""
(
set -euo pipefail

# Run MEC inside sarus container
# Note: pull command currently needed only once to download the container
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the pull command could then be factored out into a separate rule that is run only once before launching all the parallel MEC jobs

sarus pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
abs_run_dir=$(realpath {input.run_dir})
abs_mod_root=$(realpath {input.run_dir}/../..) # two levels up (so that all links are mounted to the container)

# build mount options in a variable for readability
MOUNTS="\
--mount=type=bind,source=$abs_run_dir,destination=/src/bin2 \
--mount=type=bind,source=$abs_mod_root,destination=$abs_mod_root,readonly \
--mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ \
"

# run container (split over multiple lines for readability)
sarus run $MOUNTS container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main

# Run MEC using local executable (Alternative to sarus container)
#cd {input.run_dir}
#export LM_HOST=balfrin-ln003
#source /oprusers/osm/opr.emme/abs/mec.env
#./mec > ./mec_out.log 2>&1

# copy the output file to the final location for the Feedback files plus renaming to
# match NWP naming conventions (verSYNOP_YYYYMMDDHHMMSS.nc)
mkdir -p {input.run_dir}/../../fdbk_files
cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}00.nc
) > {log} 2>&1
"""
Loading
Loading