diff --git a/config/hourly-rollout-o96.yaml b/config/hourly-rollout-o96.yaml new file mode 100644 index 0000000..dc8cf4c --- /dev/null +++ b/config/hourly-rollout-o96.yaml @@ -0,0 +1,42 @@ +# yaml-language-server: $schema=../workflow/tools/config.schema.json +description: | + Evaluate skill of COSMO-E emulator (M-1 forecaster). + +dates: + - 2022-02-03T00:00 # Some example + +runs: + - forecaster: + checkpoint: https://mlflow.ecmwf.int/#/experiments/418/runs/2bee8f6b7d0048089a5e25cb60def0e5 + label: Hourly rollout O96 forecaster + steps: 0/120/6 + config: resources/inference/configs/global-forecaster.yaml + disable_local_eccodes_definitions: true + extra_requirements: + - git+https://github.com/ecmwf/anemoi-inference.git@main + - git+https://github.com/MeteoSwiss/anemoi-core.git@06e5533f3e8da37c44d887c42b67440b40286cb3#subdirectory=models + +baselines: [] + +analysis: + label: ERA5 + analysis_zarr: /scratch/mch/rradev/datasets/aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v4.zarr + +stratification: + regions: [] + root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 + +locations: + output_root: output/ + +profile: + executor: slurm + global_resources: + gpus: 16 + default_resources: + slurm_partition: "postproc" + cpus_per_task: 1 + mem_mb_per_cpu: 1800 + runtime: "1h" + gpus: 0 + jobs: 50 diff --git a/resources/inference/configs/global-forecaster.yaml b/resources/inference/configs/global-forecaster.yaml new file mode 100644 index 0000000..4c8d186 --- /dev/null +++ b/resources/inference/configs/global-forecaster.yaml @@ -0,0 +1,12 @@ +input: + test: + use_original_paths: true + +allow_nans: true + +output: + grib: + path: grib/{date}{time}_{step:03}.grib + negative_step_mode: skip + +write_initial_state: true diff --git a/src/data_input/__init__.py b/src/data_input/__init__.py index e67816f..19c06dc 100644 --- a/src/data_input/__init__.py +++ b/src/data_input/__init__.py @@ -1,20 +1,29 @@ +import yaml import logging import os import sys from datetime import datetime, timedelta from pathlib import Path +from functools import lru_cache eccodes_definition_path = Path(sys.prefix) / "share/eccodes-cosmo-resources/definitions" os.environ["ECCODES_DEFINITION_PATH"] = str(eccodes_definition_path) -from meteodatalab import data_source, grib_decoder # noqa: E402 - import numpy as np # noqa: E402 import xarray as xr # noqa: E402 +import earthkit.data as ekd # noqa: E402 LOG = logging.getLogger(__name__) +@lru_cache(maxsize=1) +def earthkit_xarray_engine_profile() -> dict: + fn = Path(__file__).parent / "profile.yaml" + with open(fn) as f: + profile = yaml.safe_load(f) + return profile + + def _select_valid_times(ds, times: np.datetime64): # (handle special case where some valid times are not in the dataset, e.g. at the end) times_np = np.asarray(times, dtype="datetime64[ns]") @@ -108,9 +117,15 @@ def load_fct_data_from_grib( ) -> xr.Dataset: """Load forecast data from GRIB files for a specific valid time.""" files = sorted(root.glob(f"{reftime:%Y%m%d%H%M}*.grib")) - fds = data_source.FileDataSource(datafiles=files) - ds = grib_decoder.load(fds, {"param": params, "step": steps}) - for var, da in ds.items(): + + profile = earthkit_xarray_engine_profile() + ds: xr.Dataset = ( + ekd.from_source("file", files) + .sel(param=params, step=steps) + .to_xarray(profile=profile) + ) + + for var, da in ds.data_vars.items(): if "z" in da.dims and da.sizes["z"] == 1: ds[var] = da.squeeze("z", drop=True) elif "z" in da.dims and da.sizes["z"] > 1: diff --git a/src/data_input/profile.yaml b/src/data_input/profile.yaml new file mode 100644 index 0000000..56d7517 --- /dev/null +++ b/src/data_input/profile.yaml @@ -0,0 +1,20 @@ +ensure_dims: [z,number,step, forecast_reference_time] +rename_dims: { + level: z, + number: eps, + step: lead_time, + forecast_reference_time: ref_time +} +variable_attrs: +- cfName +- name +- units +- typeOfLevel +- levtype +- paramId + +global_attrs: +- Conventions: CF-1.8 +- institution: MeteoSwiss + +add_valid_time_coord: true