From 43550e2a2ded94745da3afad48ff6182bda43287 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Mon, 2 Dec 2024 10:42:48 -0600 Subject: [PATCH 01/23] Reformat by pre-commit run --- zppy/templates/pcmdi_diags/mean_climate_plot_driver.py | 6 +++--- zppy/templates/pcmdi_diags/mean_climate_plot_parser.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py b/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py index 913621a9..a85cf4e0 100755 --- a/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py +++ b/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py @@ -102,9 +102,9 @@ def save_figure_data( outdic = outdic.drop(columns=[var]) else: # replace the variable with the name + units - outdic.columns.values[ - outdic.columns.values.tolist().index(var) - ] = var_units[var_names.index(var)] + outdic.columns.values[outdic.columns.values.tolist().index(var)] = ( + var_units[var_names.index(var)] + ) # save data to .csv file outdic.to_csv(outfile) diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py b/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py index 7c66d895..e73e4904 100755 --- a/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py +++ b/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py @@ -270,9 +270,9 @@ def fill_plot_var_and_units(model_lib, cmip_lib): for stat in cmip_lib.df_dict: for season in cmip_lib.df_dict[stat]: for region in cmip_lib.df_dict[stat][season]: - cmip_lib.df_dict[stat][season][region][ - "rtmt" - ] = cmip_lib.df_dict[stat][season][region].pop("rt") + cmip_lib.df_dict[stat][season][region]["rtmt"] = ( + cmip_lib.df_dict[stat][season][region].pop("rt") + ) if var in model_lib.var_list and var in cmip_lib.var_list: varunt = var + "\n" + str(units_all[var]) From a9e870868e033c3525818543d86e2864613c7d4b Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Thu, 26 Dec 2024 17:43:01 -0600 Subject: [PATCH 02/23] Small bug fix in the first line to avoid failure --- zppy/templates/pcmdi_diags.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index e8519d55..0af779d6 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -1,5 +1,5 @@ #!/bin/bash -{% include 'slurm_header.sh' %} +{% include 'inclusions/slurm_header.sh' %} {{ environment_commands }} From 561c82c6f89af4fc56b08947e4acd26a74d32545 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Mon, 30 Dec 2024 16:23:37 -0600 Subject: [PATCH 03/23] update the workflow to be modulized Changes are made in the pcmdi_diags.bash to change the worflow to be more modulized and easier to read --- zppy/defaults/default.ini | 56 +- zppy/pcmdi_diags.py | 10 +- zppy/templates/pcmdi_diags.bash | 2288 +++++++++++++++---------------- 3 files changed, 1109 insertions(+), 1245 deletions(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index a3cf960d..0a583572 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -138,20 +138,14 @@ scratch = string(default="") [pcmdi_diags] backend = string(default="mpl") cfg = string(default="") -# File of cmip variable lists (cmip6 convention) -cmip_variables = string(default="pcmdi_diags/cmip_variables.json") # File of specified regions for mean climate calculation -regions_specs = string(default="pcmdi_diags/regions_specs.json") -# File of derived variables -derived_variable = string(default="pcmdi_diags/derived_variable.json") +regions_specs = string(default="inclusions/pcmdi/regions_specs.json") # File of observation data name for mean climate calculation -reference_alias = string(default="pcmdi_diags/reference_alias.json") -# File of fuction to generate land/sea mask -process_sftlf = string(default="pcmdi_diags/process_sftlf.py") +reference_alias = string(default="inclusions/pcmdi/reference_alias.json") # File of fuction to generate mean climate metrics figure -clim_plot_parser = string(default="pcmdi_diags/mean_climate_plot_parser.py") +clim_plot_parser = string(default="inclusions/pcmdi/mean_climate_plot_parser.py") # File of module to plot mean climate metrics figure -clim_plot_driver = string(default="pcmdi_diags/mean_climate_plot_driver.py") +clim_plot_driver = string(default="inclusions/pcmdi/mean_climate_plot_driver.py") # Path to observation time-series data # Required for "mean_climate","variability_mode","enso" obs_ts = string(default="") @@ -163,13 +157,19 @@ obs_sets = string(default="default") cmip_name = string(default="e3sm.historical.v3-LR.0051") # required for "model_vs_model" comparison cmip_name_ref = string(default="e3sm.historical.v3-LR.0051") +# required for "model_vs_obs" comparison +cmip_tableID = string(default="Amon") +# variables in the cmip6 table that can be potentially used by pcmdi +# this list depends on the definition of cmip variable +# required for "mean climate" diagnostics +cmip_vars = string(default="pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsut,rsutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta,ua,va,zg") # options shared by pcmdi pmp_debug = string(default=False) # flag to process the land/sea mask within pcmdi generate_sftlf = string(default=True) # variables to be used by the pcmdi diagnostics -# needs to setup for each subsections -vars = string(default="") +# needs to setup for each subsections, defalut setup is the mean climate metrics +vars = string(default="pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta-200,ta-850,ua-200,ua-850,va-200,va-850,zg-500") # sets of diagnostics from pcmdi package sets = string_list(default=list("mean_climate","variability_mode_atm","variability_mode_cpl","enso")) # options to identify subset of pcmdi drivers ("mean_climate","variability_mode","enso") @@ -220,9 +220,9 @@ swap_test_ref = boolean(default=False) # vars = "ts" for cpl_modes ######################################################################################### #name of atmospheric modes varibility -atm_modes = string_list(default=list("NAM","NAO","PNA","NPO","SAM","PSA1","PSA2")) +atm_modes = string(default="NAM,NAO,PNA,NPO,SAM,PSA1,PSA2") #name of coupled modes varibility -cpl_modes = string_list(default=list("PDO","NPGO","AMO")) +cpl_modes = string(default="PDO,NPGO,AMO") #keywards for unit conversion in pcmdi (model) ModUnitsAdjust = string(default="") #keywards for unit conversion in pcmdi (observation) @@ -236,7 +236,7 @@ RmDomainMean = string(default=True) EofScaling = string(default=False) ConvEOF = string(default=True) CBF = string(default=True) -cmec = string(default=True) +cmec = string(default=False) update_json = string(default=False) plot_obs = string(default=True) plot = string(default=True) @@ -246,7 +246,7 @@ nc_out = string(default=True) # options for pcmdi enso diagnostics # vars = "psl,pr,prsn,ts,tas,tauu,tauv,hflx,hfss,rlds,rsds,rlus,rlut,rsdt" ########################################################################################## -groups = string_list(default=list("ENSO_perf","ENSO_proc","ENSO_tel")) +enso_groups = string(default="ENSO_perf,ENSO_proc,ENSO_tel") ########################################################################################## # optional for mean climate diagnostics # vars = "pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsds,rsdscs,rsdt,rsus,rsuscs, @@ -261,15 +261,11 @@ regional = string(default="y") regions = string(default="global,ocean,land,NHEX,SHEX,TROPICS,NHEX_ocean,SHEX_ocean,NHEX_land,SHEX_land,ocean_50S50N") # save derived climatology data save_test_clims = string(default=True) -# method to determine the way to process mean climate data -# default used nco instead of pcmdi built-in function -climatology_process_method = string(default="nco") -# Regridding by pcmdi (default is to regrid data to 2.5x2.5 grid for diagnostic metrics) -# Required for mean climate -# OPTIONS: '2.5x2.5' or an actual cdms2 grid object -target_grid = string(default="2.5x2.5") -# OPTIONS: String for description on the selected grid -target_grid_string = string(default="2p5x2p5") +# Regridding by pcmdi (required for mean climate) +# OPTIONS: '1x1' for e3sm, '2.5x2.5' for cmip, or an actual cdms2 grid object, +target_grid = string(default="1x1") +# OPTIONS: "1px1p" for e3sm, "2p5x2p5" for cmip, description on the selected grid +target_grid_string = string(default="1px1p") # OPTIONS: 'regrid2','esmf' regrid_tool = string(default="esmf") # OPTIONS: 'linear','conservative', only if tool is esmf @@ -287,7 +283,6 @@ portrait_vertical_line = string(default=True) vars = string(default=None) grid = string(default=None) cmip_metadata = string(default=None) - cmip_variables = string(default=None) pcmdi_data_set = string(default=None) pcmdi_data_path = string(default=None) derived_variable = string(default=None) @@ -309,7 +304,6 @@ portrait_vertical_line = string(default=True) swap_test_ref = boolean(default=None) tag = string(default=None) ts_num_years_ref = integer(default=None) - climatology_process_method = string(default=None) target_grid = string(default=None) target_grid_string = string(default=None) regrid_tool = string(default=None) @@ -330,13 +324,15 @@ portrait_vertical_line = string(default=True) landmask = string(default=None) frequency = string(default=None) generate_sftlf = string(default=None) - atm_modes = string_list(default=None) - cpl_modes = string_list(default=None) - groups = string_list(default=None) + atm_modes = string(default=None) + cpl_modes = string(default=None) + enso_groups = string(default=None) ModUnitsAdjust = string(default=None) ObsUnitsAdjust = string(default=None) cmip_name = string(default=None) cmip_name_ref = string(default=None) + cmip_tableID = string(default=None) + cmip_vars = string(default=None) pmp_debug = string(default=None) nc_out_obs = string(default=None) nc_out = string(default=None) diff --git a/zppy/pcmdi_diags.py b/zppy/pcmdi_diags.py index cc38d5d7..c28d12c5 100644 --- a/zppy/pcmdi_diags.py +++ b/zppy/pcmdi_diags.py @@ -2,14 +2,13 @@ import pprint from typing import List -import jinja2 - from zppy.bundle import handle_bundles from zppy.utils import ( add_dependencies, check_status, get_tasks, get_years, + initialize_template, make_executable, print_url, submit_script, @@ -19,12 +18,7 @@ # ----------------------------------------------------------------------------- def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): - # Initialize jinja2 template engine - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("pcmdi_diags.bash") + template, _ = initialize_template(config, "pcmdi_diags.bash") # --- List of pcmdi_diags tasks --- tasks = get_tasks(config, "pcmdi_diags") diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 0af779d6..058f69e0 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -43,463 +43,569 @@ results_dir=${tag}_${Y1}-${Y2} ref_name={{ ref_name }} -#info for pcmdi specific data structure +################################################## +#info to construct pcmdi-preferred data convension +################################################## +cmip_name='{{ cmip_name }}' +tableID='{{ cmip_tableID }}' +{% if run_type == "model_vs_obs" %} +cmip_name_ref='obs.historical.%(model).00' +tableID_ref=${tableID} +{% elif run_type == "model_vs_model" %} +cmip_name_ref='{{ cmip_name_ref }}' +tableID_ref='{{ cmip_tableID_ref }}' +{%- endif %} case_id=v$(date '+%Y%m%d') # Create temporary workdir workdir=`mktemp -d tmp.${id}.XXXX` cd ${workdir} -# Create results directory -if [ ! -d ${results_dir} ];then - mkdir -p ${results_dir} -fi -#directory to save land/sea mask generated by pcmdi -fixed_dir="${results_dir}/fixed" -if [ ! -d ${fixed_dir} ];then - mkdir -p ${fixed_dir} -fi - -# Prepare data files for pcmdi diagnostics, which is achieved by two steps: -# (a) convert e3sm output to cmip type, which used the "e3sm_to_cmip" function -# available at zppy (modifications are made to process more variables and -# 3D fileds at fixed pressure levels). -# (b) locate observations in e3sm diagnostics and convert them to the pcmdi preferred -# data format -#file to specify reference data used to derive the diagnostic metrics -cat > reference_alias.json << EOF -{% include reference_alias %} -EOF -#regions specified to derive global/regional mean metrics -cat > regions_specs.json << EOF -{% include regions_specs %} -EOF -#file include derived variables -cat > derived_variable.json << EOF -{% include derived_variable %} -EOF -#file to genereate land/sea mask data if not available -cat > process_sftlf.py << EOF -{% include process_sftlf %} -EOF +create_links_acyc_climo() +{ + ts_dir_source=$1 + ts_dir_destination=$2 + begin_year=$3 + end_year=$4 + name_key=$5 + error_num=$6 + # Create netcdf files for time series variables + mkdir -p ${ts_dir_destination} + cd ${ts_dir_destination} + # https://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable + variables="{{ cmip_vars }}" + for v in ${variables//,/ } + do + # Go through the time series files for between year1 and year2, using a step size equal to the number of years per time series file + for year in `seq ${begin_year} {{ ts_num_years }} ${end_year}`; + do + YYYY=`printf "%04d" ${year}` + for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc + do + # Add this time series file to the list of files for cdscan to use + echo ${file} >> ${v}_files.txt + done + done + #derive annual cycle climate mean + dofm=(15 46 74 105 135 166 196 227 258 288 319 349) #middle day of month + for month in `seq 1 1 12`; + do + MM=`printf "%02d" ${month}` + MDAY=dofm[${month}-1] + cat ${v}_files.txt | ncra -O -h -F -d "time,${month},,12" ${v}_clm_${MM}.nc + done + #Concatenate files to form the annual cycle monthly climatology file + combined_name="${name_key}.${v}.${begin_year}01-${end_year}12.AC.${case_id}.nc" + ncrcat -O -d time,0, ${v}_clm_*.nc ${combined_name} + #modify time to avoid issues in pcmdi calculation + ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 135.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time_bnds[time,bnds]={0, 31, 31, 59, 59, 90, 90, 120, 120, 151, 151, 181, 181, 212, 212, 243, 243, 273, 273, 304, 304, 334, 334, 365.};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";time@bounds="time_bnds"' ${combined_name} ${combined_name} + rm -rvf ${v}_clm_*.nc + if [ $? != 0 ]; then + cd {{ scriptDir }} + echo "ERROR (${error_num})" > {{ prefix }}.status + exit ${error_num} + fi + done + cd .. +} + +create_links_ts() +{ + ts_dir_source=$1 + ts_dir_destination=$2 + begin_year=$3 + end_year=$4 + subname=$5 + error_num=$6 + # Create netcdf files for time series variables + mkdir -p ${ts_dir_destination} + cd ${ts_dir_destination} + # https://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable + variables="{{ vars }}" + for v in ${variables//,/ } + do + # Go through the time series files for between year1 and year2, using a step size equal to the number of years per time series file + for year in `seq ${begin_year} {{ ts_num_years }} ${end_year}`; + do + YYYY=`printf "%04d" ${year}` + for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc + do + # Add this time series file to the list of files for cdscan to use + echo ${file} >> ${v}_files.txt + done + done + # netcdf file will be combined to cover the whole period from year1 to year2 + combined_name="${subname}.${v}.${begin_year}01-${end_year}12.nc" + cat ${v}_files.txt | ncrcat -v ${v} -d "time,${begin_year}-01-01,${end_year}-12-31" ${combined_name} + #modify time to avoid issues in pcmdi calculation + ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} + if [ $? != 0 ]; then + cd {{ scriptDir }} + echo "ERROR (${error_num})" > {{ prefix }}.status + exit ${error_num} + fi + done + cd .. +} + +create_links_acyc_climo_obs() +{ + ts_dir_source=$1 + ts_dir_destination=$2 + begin_year=$3 + end_year=$4 + error_num=$5 + # Create netcdf files for time series variables + mkdir -p ${ts_dir_destination} + cd ${ts_dir_destination} + for file in ${ts_dir_source}/*.nc + do + fname=`basename $file` + PREFIX=${fname: :-17} + YYYYS=${fname: -16:-10} + YYYYE=${fname: -9:-3} + if [[ ${YYYYS} < ${begin_year} ]];then + YYYYS=${begin_year} + fi + if [[ ${YYYYE} > ${end_year} ]];then + YYYYE=${end_year} + fi + ttag=`printf "%04d" ${YYYYS}`01-`printf "%04d" ${YYYYE}`12 + # select the interest period + tmp_file="tmp_combine_${ttag}.nc" + ncrcat -d time,"${YYYYS}-01-01,${YYYYE}-12-31" ${file} ${tmp_file} + # Go through the time serie file, and derive annual cycle climate mean + dofm=(15 46 74 105 135 166 196 227 258 288 319 349) #middle day of month + for month in `seq 1 1 12`; + do + MM=`printf "%02d" ${month}` + MDAY=dofm[${month}-1] + ncra -O -h -F -d "time,${month},,12" ${tmp_file} tmp_clm_${MM}.nc + done + #Concatenate files to form the annual cycle monthly climatology file + combined_name="${PREFIX}.${ttag}.AC.${case_id}.nc" + ncrcat -O -d time,0, tmp_clm_*.nc ${combined_name} + #modify time to avoid issues in pcmdi calculation + ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 135.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";' ${combined_name} ${combined_name} + ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} + rm -rvf tmp_*.nc + if [ $? != 0 ]; then + cd {{ scriptDir }} + echo "ERROR (${error_num})" > {{ prefix }}.status + exit ${error_num} + fi + done + cd .. +} + +create_links_ts_obs() +{ + ts_dir_source=$1 + ts_dir_destination=$2 + begin_year=$3 + end_year=$4 + error_num=$5 + # Create netcdf files for time series variables + mkdir -p ${ts_dir_destination} + cd ${ts_dir_destination} + for file in ${ts_dir_source}/*.nc + do + fname=`basename $file` + PREFIX=${fname: :-17} + YYYYS=${fname: -16:-10} + YYYYE=${fname: -9:-3} + if [[ ${YYYYS} < ${begin_year} ]];then + YYYYS=${begin_year} + fi + if [[ ${YYYYE} > ${end_year} ]];then + YYYYE=${end_year} + fi + ttag=`printf "%04d" ${YYYYS}`01-`printf "%04d" ${YYYYE}`12 + # Go through the time series files and extract analysis period + combined_name=${PREFIX}.${ttag}.nc + ncrcat -d time,${YYYYS}-01-01,${YYYYE}-12-31 ${file} ${combined_name} + #modify time to avoid issues in pcmdi calculation + ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} + if [ $? != 0 ]; then + cd {{ scriptDir }} + echo "ERROR (${error_num})" > {{ prefix }}.status + exit ${error_num} + fi + done + cd .. +} + +{%- if ("mean_climate" in subset) %} +{% if run_type == "model_vs_obs" %} +climo_dir_primary=climo +{% elif run_type == "model_vs_model" %} +climo_dir_primary=climo_test +{%- endif %} +# Create local links to input climo files +climo_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly +create_links_acyc_climo ${climo_dir_source} ${climo_dir_primary} ${Y1} ${Y2} ${cmip_name}.${tableID} 1 +{% if run_type == "model_vs_model" %} +# Create local links to input climo files (ref model) +climo_dir_source={{ reference_data_path }} +climo_dir_ref=climo_ref +create_links_acyc_climo ${climo_dir_source} ${climo_dir_ref} ${ref_Y1} ${ref_Y2} ${cmip_name_ref}.${tableID_ref} 2 +{%- endif %} +{%- endif %} -{%- if ("mean_climate" in sets) %} -#file to genereate figures for mean climate metrics(temporary) -cat > mean_climate_plot_parser.py << EOF -{% include clim_plot_parser %} -EOF -#file to genereate figures for mean climate metrics(temporary) -cat > mean_climate_plot_driver.py << EOF -{% include clim_plot_driver %} -EOF +######################## +#prepare the model data +######################## +{%- if ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} +#all diags will be run with ts data +{% if run_type == "model_vs_obs" %} +ts_dir_primary=ts +{% elif run_type == "model_vs_model" %} +ts_dir_primary=ts_test +{%- endif %} +# Create netcdf files for time series variables +ts_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly +create_links_ts ${ts_dir_source} ${ts_dir_primary} ${Y1} ${Y2} ${cmip_name}.${tableID} 3 +{% if run_type == "model_vs_model" %} +ts_dir_source={{ reference_data_path_ts }}/{{ ts_num_years_ref }}yr +ts_dir_ref=ts_ref +create_links_ts ${ts_dir_source} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} ${cmip_name_ref}.${tableID_ref} 4 +{%- endif %} {%- endif %} -# script for pcmdi pre-processing -cat > collect_data.py << EOF +######################################################### +#process the derived quantities for pcmdi diagnostics. +#this module is created as variables such as rltcre and +#rstcre were not included as default in cmip6 table +#this part can be removed when all variables converated +#during the 'e3sm_to_cmip' step +######################################################### +cat > process_derived_var.py << EOF import os -import subprocess -import time -import psutil -import json -import sys import glob -import collections -import cdms2 -import gc +import json +import time +import datetime +import xarray as xr +import xcdat as xc import numpy as np -from re import split -from itertools import chain -from shutil import copyfile -from subprocess import Popen, PIPE, call +import shutil -def childCount(): - current_process = psutil.Process() - children = current_process.children() - return(len(children)) +import pcmdi_metrics +from pcmdi_metrics.io import ( + xcdat_open +) -def combine_time_series(variables, start_yr, end_yr, num_years, - cmip_name, dir_source, out_dic_file, outpath, - multiprocessing, num_workers): - #special case treatment (variables not in cmip cmor list) - altmod_dic = {"sst" : "ts", - "taux" : "tauu", - "tauy" : "tauv", - "rstcre" : "SWCF", - "rltcre" : "LWCF"} - # list of model data dictionary - var_list = []; lstcm0 = []; lstcm1 = [] - mod_out = collections.OrderedDict() - for key in variables: - if "_" in key or "-" in key: - var = split("_|-", key)[0] - else: - var = key - varin = var - if var in ["areacella", "sftlf", "orog"]: - fpaths = sorted(glob.glob(os.path.join(dir_source,var+"_*.nc"))) - for fpath in fpaths: - if os.path.exists(fpath): - output = os.path.join(outpath,"{}_fx_{}.nc".format(var,product)) - copyfile(fpath,output) - del(fpaths) - else: - fpaths = sorted(glob.glob(os.path.join(dir_source,varin+"_*.nc"))) - ######################################################################################### - #code below attempts to address special scenarios - if len(fpaths) < 1 and var in altmod_dic.keys(): - varin = altmod_dic.get(var,var) - if varin == "SWCF" or varin == "LWCF": - dir_source1 = "/".join(dir_source.split("/")[0:-2])+"/ts/monthly/{{ts_num_years}}yr" - fpaths = sorted(glob.glob(os.path.join(dir_source1,varin+"_*.nc"))) - else: - fpaths = sorted(glob.glob(os.path.join(dir_source,varin+"_*.nc"))) - ######################################################################################### - if len(fpaths) > 0: - tableId = fpaths[0].split("/")[-1].split("_")[1] - if tableId not in [ "Amon", "Lmon", "Omon", "SImon" ]: - tableId = "Amon" - yms = '{:04d}01'.format(start_yr) - yme = '{:04d}12'.format(end_yr) - fname = "{}.{}.{}.{}.{}.{}.{}-{}.nc".format( - cmip_name.split(".")[0], - cmip_name.split(".")[1], - cmip_name.split(".")[2].replace(".","-"), - cmip_name.split(".")[3], - tableId,var,yms,yme) - output = os.path.join(outpath,fname) - if (var not in var_list) or (not os.path.exists(output)): - var_list.append(var) - cmd_list = [] - cmd_list.append("ncrcat -v {} -d time,{}-01-01,{}-12-31".format(varin,yms[0:4],yme[0:4])) - for fpath in fpaths: - cmd_list.append(fpath) - cmd_list.append(output) - cdm0 = (" ".join(cmd_list)) - lstcm0.append(cdm0) - del(cmd_list,cdm0) - if varin != var: - cmd_extra = "ncrename -v {},{} {}".format(varin,var,output) - lstcm1.append(cmd_extra) - del(cmd_extra) - ############################################################ - #record the test model data information - mod_out[var] = { "mip" : cmip_name.split(".")[0], - "exp" : cmip_name.split(".")[1], - "model" : cmip_name.split(".")[2].replace(".","-"), - "realization": cmip_name.split(".")[3], - "tableId" : tableId, - "file_path" : output, - "template" : fname, - "start_yymm" : yms, - "end_yymm" : yme, - "varin" : varin } - del(tableId,yms,yme,fname,output) - del(fpaths) - del(var,varin) - gc.collect() - # Save test model data information required for next step - json.dump(mod_out, - open(out_dic_file, "w"), - sort_keys=True, - indent=4, - separators=(",", ": ")) - del(mod_out,variables,altmod_dic) - - #finally process the data in parallel - if not os.path.exists(outpath): - os.makedirs(outpath,mode=0o777) - lstall = list(chain(lstcm0,lstcm1)) - lensub = [len(lstcm0),len(lstcm1)] - lensub = np.cumsum(lensub) - 1 - print("Number of jobs starting is ", str(len(lstall))) - procs = [] - for i,p in enumerate(lstall): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - if multiprocessing == True: - procs.append(proc) - while (childCount() > num_workers): - time.sleep(0.25) - [pp.communicate() for pp in procs] # this will get the exit code - procs = [] - else: - if (i == len(lstall)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstall[i]))) - except: - break - else: - return_code = proc.communicate() - if return_code != 0: - exit("Failed to run {}".format(str(p))) - del(lstall,lensub,lstcm0,lstcm1) +def derive_var(path,vout,var_dic,fname): + for i,var in enumerate(var_dic.keys()): + fpath = sorted(glob.glob(os.path.join(path,"*."+var+".*.nc"))) + df = xcdat_open(fpath[0]) + if i == 0: + template = fpath[0].split("/")[-1] + #construct a copy of file for derived variable + out = os.path.join(path,template.replace(".{}.".format(var),".{}.".format(vout))) + shutil.copy(fpath[0],out) + ds = xcdat_open(fpath[0]) + ds = ds.rename_vars({var:vout}) + ds[vout].data = ds[vout].data * var_dic[var] + else: + ds[vout].data = ds[vout].data + df[var].data * var_dic[var] + ds.to_netcdf(out) + return - #set a delay to esure all process fully done - time.sleep(1) - print("done submitting") +{% if run_type == "model_vs_obs" %} +cmip_groups = ['${cmip_name}.${tableID}'] +{%- if ("mean_climate" in subset) %} +run_groups=['${climo_dir_primary}'] +variables = '{{ cmip_vars }}'.split(",") +{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} +run_groups=['${ts_dir_primary}'] +variables = '{{ vars }}'.split(",") +{%- endif %} +{% elif run_type == "model_vs_model" %} +cmip_groups = ['${cmip_name}.${tableID}','${cmip_name_ref}.${tableID_ref}'] +{%- if ("mean_climate" in subset) %} +run_groups=['${climo_dir_primary}','${climo_dir_ref}'] +variables = '{{ cmip_vars }}'.split(",") +{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} +run_groups=['${ts_dir_primary}','${ts_dir_ref}'] +variables = '{{ vars }}'.split(",") +{%- endif %} +{%- endif %} - if len(var_list) > 0: - print("# of variables available for diagnostics: ", len(var_list)) - else: - exit("ERROR: can not found model variables to process....") - - return var_list - -def locate_ts_observation (variables, obs_sets, start_yr, end_yr, - input_path, out_dic_file, outpath, - multiprocessing, num_workers): - # fixed observational name convention to be consistent with cmip - mip = "obs"; realization = "00"; tableId = "Amon" - # special case treatment (these obs vars are inconsistent with cmor vars) - altobs_dic = { "pr" : "PRECT", - "sst" : "ts", - "sfcWind" : "si10", - "taux" : "tauu", - "tauy" : "tauv", - "rltcre" : "toa_cre_lw_mon", - "rstcre" : "toa_cre_sw_mon", - "rtmt" : "toa_net_all_mon"} - - # find and process observational data avaiable in e3sm_diags - var_list = []; lstcm0 = []; lstcm1 = [] - obs_dic = json.load(open(os.path.join('.','reference_alias.json'))) - obs_out = collections.OrderedDict() - for i,key in enumerate(variables): - if "_" in key or "-" in key: - var = key.split("_|-", var)[0] - else: - var = key - if len(obs_sets) != len(variables): - option = obs_sets[0] - else: - option = obs_sets[i] - if "default" in obs_sets or "alternate" in obs_sets: - obstag = obs_dic[var][option] - else: - inv_map = {v: k for k, v in obs_dic[var].items()} - if len(obs_sets) != len(variables): - obstag = obs_sets[0] - else: - obstag = obs_sets[i] - option = inv_map[obstag] - del(inv_map) - varin = var - if "ceres_ebaf" in obstag: - fpaths = sorted(glob.glob(os.path.join(input_path, - obstag.replace('ceres_ebaf','ceres_ebaf*'), - varin+"_*.nc"))) - if len(fpaths) < 1 and var in altobs_dic.keys(): - varin = altobs_dic.get(var,var) - fpaths = sorted(glob.glob(os.path.join(input_path, - obstag.replace('ceres_ebaf','ceres_ebaf*'), - varin+"_*.nc"))) - else: - fpaths = sorted(glob.glob(os.path.join(input_path,obstag,var+"_*.nc"))) - if len(fpaths) < 1 and var in altobs_dic.keys(): - varin = altobs_dic.get(var,var) - fpaths = sorted(glob.glob(os.path.join(input_path,obstag,varin+"_*.nc"))) - - if len(fpaths) > 0 and os.path.exists(fpaths[0]): - template = fpaths[0].split("/")[-1] - obsname = fpaths[0].split("/")[-2] - fyms = template.split("_")[-2][0:6] - fyme = template.split("_")[-1][0:6] - yms = '{:04d}{:02d}'.format(start_yr,1) - yme = '{:04d}{:02d}'.format(end_yr,12) - if int(yms) < int(fyms): - yms = fyms - if int(yme) > int(fyme): - yme = fyme - - #rename file following cmip-like convention - fname = "{}.{}.{}.{}.{}.{}.{}-{}.nc".format( - mip,option,obsname.replace(".","-"),realization,tableId,var,yms,yme) - output = os.path.join(outpath,fname) - if (var not in var_list) or (not os.path.exists(output)): - var_list.append(var) - cmd = "ncrcat -v {} -d time,{}-01-01,{}-12-31 {} {}".format( - varin,yms[0:4],yme[0:4],fpaths[0],output) - lstcm0.append(cmd); del(cmd) - if var != varin: - cmd_extra = "ncrename -v {},{} {}".format(varin,var,output) - lstcm1.append(cmd_extra) - del(cmd_extra) - - #record the observation information - obs_out[var] = { "mip" : mip, - "exp" : option, - "realization" : realization, - "tableId" : tableId, - "model" : obsname, - "file_path" : output, - "template" : fname, - "start_yymm" : yms, - "end_yymm" : yme, - "varin" : varin} - del(template,obsname,fyms,fyme,yms,yme,fname,output) - else : - print("warning: reference data not found for", var) - del(var,varin,option,obstag) - gc.collect() - - # Save observational information required for next step - json.dump(obs_out, - open(out_dic_file,"w"), - sort_keys=True, - indent=4, - separators=(",", ": ")) - del(obs_dic,obs_out,obs_sets,altobs_dic) - - #finally process the data in parallel - if not os.path.exists(outpath): - os.makedirs(outpath,mode=0o777) - lstall = list(chain(lstcm0,lstcm1)) - lensub = [len(lstcm0),len(lstcm1)] - lensub = np.cumsum(lensub) - 1 - print("Number of jobs starting is ", str(len(lstall))) - procs = [] - for i,p in enumerate(lstall): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - if multiprocessing == True: - procs.append(proc) - while (childCount() > num_workers): - time.sleep(0.25) - [pp.communicate() for pp in procs] # this will get the exit code - procs = [] - else: - if (i == len(lstall)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstall[i]))) - except: - break +############################################################### +#check and process derived quantities, these quantities are not +#included as default in e3sm_to_cmip module +############################################################### +for i,group in enumerate(run_groups): + for j,var in enumerate(variables): + if "_" in var or "-" in var: + varin = var.split("_|-", varin)[0] else: - return_code = proc.communicate() - if return_code != 0: - exit("Failed to run {}".format(str(p))) - del(lstall,lensub,lstcm0,lstcm1) + varin = var + if varin in ['rltcre','rstcre']: + fpaths = sorted(glob.glob(os.path.join(group,"*"+var+"_*.nc"))) + if len(fpaths) < 1: + if varin == 'rstcre': + derive_var(group,varin,{'rsutcs':1,'rsut':-1},cmip_groups[i]) + elif varin == 'rltcre': + derive_var(group,varin,{'rlutcs':1,'rlut':-1},cmip_groups[i]) + +EOF +################### +# run process jobs +################### +command="srun -N 1 python -u process_derived_var.py" +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (5)' > {{ prefix }}.status + exit 5 +fi - #set a delay to avoid delay in writing process - time.sleep(1) - print("done submitting") +{% if run_type == "model_vs_obs" %} +######################################################### +#prepare the observation data. As observation are often +#depends on the source available for analysis, therefore, +#we use external files to help collect the information +#for pcmdi diagnostics. +######################################################### +# Create netcdf files for time series variables +obstmp_dir="obs_link" +mkdir -p ${obstmp_dir} +#create a python module to link observation data +cat > link_observation.py << EOF +import os +import glob +import json +import time +import datetime +import xarray as xr +import xcdat as xc +import numpy as np +import shutil - if len(var_list) > 0: - print("# of variables in observations: ", len(var_list)) +import pcmdi_metrics +from pcmdi_metrics.io import ( + xcdat_open +) + +def derive_var(path,vout,var_dic,fname): + for i,var in enumerate(var_dic.keys()): + fpath = sorted(glob.glob(os.path.join(path,"*."+var+".*.nc"))) + df = xcdat_open(fpath[0]) + if i == 0: + template = fpath[0].split("/")[-1] + #construct a copy of file for derived variable + out = os.path.join(path,template.replace(".{}.".format(var),".{}.".format(vout))) + shutil.copy(fpath[0],out) + ds = xcdat_open(fpath[0]) + ds = ds.rename_vars({var:vout}) + ds[vout].data = ds[vout].data * var_dic[var] + else: + ds[vout].data = ds[vout].data + df[var].data * var_dic[var] + ds.to_netcdf(out) + return + +cmip_name = '${cmip_name_ref}.${tableID_ref}' + +{%- if ("mean_climate" in subset) %} +variables = '{{ cmip_vars }}'.split(",") +obs_sets = '{{ obs_sets }}'.split(",") +{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} +variables = '{{ vars }}'.split(",") +obs_sets = '{{ obs_sets }}'.split(",") +{%- endif %} +ts_dir_ref_source = '{{ obs_ts }}' + +# variable map from observation to cmip +altobs_dic = { "pr" : "PRECT", + "sst" : "ts", + "sfcWind" : "si10", + "taux" : "tauu", + "tauy" : "tauv", + "rltcre" : "toa_cre_lw_mon", + "rstcre" : "toa_cre_sw_mon", + "rtmt" : "toa_net_all_mon"} + +obs_dic = json.load(open('{{reference_alias}}')) + +#loop each variable and process the data +for i,var in enumerate(variables): + if "_" in var or "-" in var: + varin = var.split("_|-", varin)[0] else: - exit("ERROR: can not found model variables to process....") + varin = var - return var_list + if len(obs_sets) > 1 and len(obs_sets) == len(variables): + obsid = obs_sets[i] + else: + obsid = obs_sets[0] -def main(): - #basic information - start_yr = int('${Y1}') - end_yr = int('${Y2}') - num_years = end_yr - start_yr + 1 + obsname = obs_dic[var][obsid] + if "ceres_ebaf" in obsname: + obsstr = obsname.replace("_","*").replace("-","*") + else: + obsstr = obsname + + fpaths = sorted(glob.glob(os.path.join(ts_dir_ref_source,obsstr,varin+"_*.nc"))) + if (len(fpaths) < 1) and (varin in altobs_dic.keys()): + #these variables were not included as cmip type + varin = altobs_dic[varin] + fpaths = sorted(glob.glob(os.path.join(ts_dir_ref_source,obsstr,varin+"_*.nc"))) + + if (len(fpaths) > 0) and (os.path.exists(fpaths[0])): + template = fpaths[0].split("/")[-1] + yms = template.split("_")[-2][0:6] + yme = template.split("_")[-1][0:6] + obs = obsname.replace(".","_") + out = os.path.join('${obstmp_dir}', + '{}.{}.{}-{}.nc'.format( + cmip_name.replace('%(model)',obs), + var,yms,yme)) + #rename variable if needed then save file + if varin != var: + ds = xcdat_open(fpaths[0]) + ds = ds.rename(name_dict={varin:var}) + ds.to_netcdf(out) + elif not os.path.exists(out): + os.symlink(fpaths[0],out) - multiprocessing = {{multiprocessing}} - num_workers = {{num_workers}} + ##################################################################### + #check and process derived quantities + #note: these quantities are possibly not included as default in cmip + if varin in ['rltcre','rstcre']: + fpaths = sorted(glob.glob(os.path.join('${obstmp_dir}',"*"+varin+"_*.nc"))) + if len(fpaths) < 1: + if varin == 'rstcre': + derive_var('${obstmp_dir}',varin,{'rsutcs':1,'rsut':-1},cmip_name) + elif varin == 'rltcre': + derive_var('${obstmp_dir}',varin,{'rlutcs':1,'rlut':-1},cmip_name) - # Model - # Test data directory -{% if run_type == "model_vs_obs" %} - test_data_dir = 'ts' -{% elif run_type == "model_vs_model" %} - test_data_dir = 'ts_test' +EOF +################### +# run process jobs +################### +command="srun -N 1 python -u link_observation.py" +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (6)' > {{ prefix }}.status + exit 6 +fi +####################################################### +#now create obs climo and timeseries for pcmdi diags +#use same period as test model when possible +####################################################### +ts_dir_ref_source="{{ scriptDir }}/${workdir}/${obstmp_dir}" +{%- if ("mean_climate" in subset) %} +climo_dir_ref=climo_ref +create_links_acyc_climo_obs ${ts_dir_ref_source} ${climo_dir_ref} ${Y1} ${Y2} 7 +{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} +ts_dir_ref=ts_ref +create_links_ts_obs ${ts_dir_ref_source} ${ts_dir_ref} ${Y1} ${Y2} 8 {%- endif %} - test_name = '${case}' - test_start_yr = start_yr - test_end_yr = end_yr - test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' - #info for pcmdi data structure - test_cmip_name = '{{cmip_name}}' +{%- endif %} - #Ref -{% if run_type == "model_vs_obs" %} - # Obs - reference_dir_source = '{{ obs_ts }}' - ref_data_dir = 'ts_ref' - ref_start_yr = {{ ref_start_yr }} - ref_end_yr = ref_start_yr + num_years - 1 - if (ref_end_yr <= {{ ref_final_yr }}): - ref_end_yr = ref_end_yr - else: - ref_end_yr = {{ ref_final_yr }} -{% elif run_type == "model_vs_model" %} - # Reference - reference_dir_source = '{{ reference_data_path_ts }}' - ref_data_dir = 'ts_ref' - ref_name = '${ref_name}' - short_ref_name = '{{ short_ref_name }}' - ref_start_yr = {{ ref_start_yr }} - ref_end_yr = {{ ref_final_yr }} - #info for pcmdi data structure - ref_cmip_name = '{{ cmip_name_ref }}' - - # Optionally, swap test and reference model - if {{ swap_test_ref }}: - test_data_dir, ref_data_dir = ref_data_dir, test_data_dir - test_name, ref_name = ref_name, test_name - short_test_name, short_ref_name = short_ref_name, short_test_name - ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +################################################## +#collect data description and save in a json file +#for the convinience of later-on process +################################################## +mkdir -p ${results_dir} +cat > data_info_collect.py << EOF +import os +import glob +import json +import collections +from collections import OrderedDict + +{%- if ("mean_climate" in subset) %} +test = '${climo_dir_primary}' +refr = '${climo_dir_ref}' +variables = '{{ cmip_vars }}'.split(",") +{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) %} +test = '${ts_dir_primary}' +refr = '${ts_dir_ref}' +variables = '{{ vars }}'.split(",") +{%- elif ("enso" in subset) %} +test = '${ts_dir_primary}' +refr = '${ts_dir_ref}' +variables = '{{ vars }}'.split(",") {%- endif %} - ################################################################ - # process test model data for comparision - ################################################################ - # variable list in configuration file # - variables = list("{{ vars }}".split(",")) - print("process test model data for comparision") - test_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)) - cmor_vars = combine_time_series(variables,test_start_yr,test_end_yr, - int({{ts_num_years}}),test_cmip_name, - test_dir_source,test_dic_file,test_data_dir, - multiprocessing,num_workers) - ################################################################ - # process reference data for comparison - ################################################################ - print("process reference obs/model data for comparision") +test_data_set = ['${cmip_name}'.split(".")[1]] {% if run_type == "model_vs_obs" %} - obs_sets = list('{{ obs_sets }}'.split(",")) - refr_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) - refr_vars = locate_ts_observation(cmor_vars,obs_sets, - ref_start_yr,ref_end_yr, - reference_dir_source, - refr_dic_file,ref_data_dir, - multiprocessing,num_workers) - - print("# of variables in test model: ", len(cmor_vars)) - print("# of variables in reference model: ", len(refr_vars)) - del(refr_vars,cmor_vars) +refr_data_set = '{{ obs_sets }}'.split(",") {% elif run_type == "model_vs_model" %} - refr_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) - refr_vars = combine_time_series(cmor_vars,ref_start_yr,ref_end_yr, - int({{ts_num_years_ref}}),ref_cmip_name, - ref_dir_source,refr_dic_file,ref_data_dir, - multiprocessing,num_workers) - - print("# of variables in test model: ", len(cmor_vars)) - print("# of variables in reference model: ", len(refr_vars)) - del(refr_vars,cmor_vars) +refr_data_set = ['${cmip_name_ref}'.split(".")[1]] {%- endif %} -if __name__ == "__main__": - main() +#collect variables when both model and observations are available +refr_dic,test_dic = OrderedDict(),OrderedDict() +for i,var in enumerate(variables): + if "_" in var or "-" in var: + varin = var.split("_|-", varin)[0] + else: + varin = var + test_path = sorted(glob.glob(os.path.join(test,"*.{}.*.nc".format(varin)))) + refr_path = sorted(glob.glob(os.path.join(refr,"*.{}.*.nc".format(varin)))) + if (len(test_path) > 0) and (len(refr_path) > 0): + if (os.path.exists(test_path[0])) and (os.path.exists(refr_path[0])): + for j,path in enumerate([test_path[0],refr_path[0]]): + fname = path.split("/")[-1] + model = fname.split(".")[2] + sbdic = { "mip" : fname.split(".")[0], + "exp" : fname.split(".")[1], + "model" : fname.split(".")[2], + "realization" : fname.split(".")[3], + "tableID" : fname.split(".")[4], + "yymms" : fname.split(".")[6].split("-")[0], + "yymme" : fname.split(".")[6].split("-")[1], + "var_in_file" : varin, + "var_name" : var, + "file_path" : path, + "template" : fname } + if j == 0: + if var not in test_dic.keys(): + test_dic[var] = {} + if len(test_data_set) != len(variables): + kset = test_data_set[0] + else: + kset = test_data_set[i] + test_dic[var]['set'] = kset + test_dic[var][kset] = model + test_dic[var][model] = sbdic + else: + if var not in refr_dic.keys(): + refr_dic[var] = {} + if len(refr_data_set) != len(variables): + kset = refr_data_set[0] + else: + kset = refr_data_set[i] + refr_dic[var][kset] = model + refr_dic[var][model] = sbdic + refr_dic[var]['set'] = kset + +# Save test and obs/reference data information for next step +for i,group in enumerate([test,refr]): + if i == 0: + out_dic = test_dic + else: + out_dic = refr_dic + out_file = os.path.join( + '${results_dir}', + '{}_{}_catalogue.json'.format(group,'{{subset}}') + ) + json.dump(out_dic, + open(out_file, "w"), + sort_keys=False, + indent=4, + separators=(",", ": ")) EOF - -################################ -# Pcmdi pre-processing to link -# required data to work directory -command="python -u collect_data.py" +##################### +# run process jobs +command="srun -N 1 python -u data_info_collect.py" time ${command} if [ $? != 0 ]; then cd {{ scriptDir }} @@ -507,64 +613,125 @@ if [ $? != 0 ]; then exit 9 fi -################################################################ -# generate input parameter for pcmdi metrics driver -{%- if ("mean_climate" in sets) or ("variability_mode" in sets) or ("enso" in sets) %} +############################################################################## +# land/sea mask is needed in PCMDI diagnostics, check and generate it here as +# these data are not always available for model or observations +############################################################################## +fixed_dir="fixed" +mkdir -p ${fixed_dir} +cat > create_landsea_mask.py << EOF +import os +import glob +import json +import datetime +import numpy as np +import collections +from collections import OrderedDict + +import pcmdi_metrics +from pcmdi_metrics.io import ( + xcdat_open +) +from pcmdi_metrics.utils import ( + create_land_sea_mask +) + +############################################### +# Flag to turn on/off land/sea mask processing +############################################# +if {{ generate_sftlf }} in ['true', 'y', True]: + generate_sftlf = True +else: + generate_sftlf = False + +if generate_sftlf: + +{%- if ("mean_climate" in subset) %} + test = '${climo_dir_primary}' + refr = '${climo_dir_ref}' +{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} + test = '${ts_dir_primary}' + refr = '${ts_dir_ref}' +{%- endif %} + + #loop each group and process land/mask if not exist + for group in [test,refr]: + dic_file = os.path.join( + '${results_dir}', + '{}_{}_catalogue.json'.format(group,'{{subset}}') + ) + data_dic = json.load(open(dic_file)) + for var in data_dic.keys(): + mdset = data_dic[var]['set'] + model = data_dic[var][mdset] + mpath = data_dic[var][model]['file_path'] + mpath_lf = os.path.join( + '${fixed_dir}', + 'sftlf.{}.nc'.format(model) + ) + # generate land/sea mask if not exist + if not os.path.exists(mpath_lf): + ds = xcdat_open(mpath, decode_times=True) + ds = ds.bounds.add_missing_bounds() + try: + lf_array = create_land_sea_mask(ds, method="pcmdi") + print("land mask is estimated using pcmdi method.") + except Exception: + lf_array = create_land_sea_mask(ds, method="regionmask") + print("land mask is estimated using regionmask method.") + lf_array = lf_array * 100.0 + lf_array.attrs['long_name']= "land_area_fraction" + lf_array.attrs['units'] = "%" + lf_array.attrs['id'] = "sftlf" # Rename + ds_lf = lf_array.to_dataset().compute() + ds_lf = ds_lf.bounds.add_missing_bounds() + ds_lf = ds_lf.rename_vars({"lsmask": "sftlf"}) + ds_lf.fillna(1.0e20) + ds_lf.attrs['model'] = model + ds_lf.attrs['associated_files'] = mpath + ds_lf.attrs['history'] = "File processed: " + datetime.datetime.now().strftime("%Y%m%d") + comp = dict(_FillValue=1.0e20,zlib=True,complevel=5) + encoding = {var: comp for var in list(ds_lf.data_vars)+list(ds_lf.coords)} + ds_lf.to_netcdf(mpath_lf,encoding=encoding) + del(ds,ds_lf,lf_array) +EOF +##################### +# run process script +command="srun -N 1 python -u create_landsea_mask.py" +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (10)' > {{ prefix }}.status + exit 10 +fi + +######################################################## +# generate basic parameter file for pcmdi metrics driver +######################################################## cat > parameterfile.py << EOF import os import sys import json +import numpy as np +import collections +from collections import OrderedDict +##################### #basic information +##################### start_yr = int('${Y1}') end_yr = int('${Y2}') num_years = end_yr - start_yr + 1 +period = "{:04d}{:02d}-{:04d}{:02d}".format(start_yr,1,end_yr,12) -# Model -# Test data path -{% if run_type == "model_vs_obs" %} -test_data_dir = 'ts' -{% elif run_type == "model_vs_model" %} -test_data_dir = 'ts_test' -{%- endif %} -test_name = '${case}' -test_start_yr = start_yr -test_end_yr = end_yr -test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' -test_cmip_name = '{{ cmip_name }}' - -# Ref -{% if run_type == "model_vs_obs" %} -# Obs -reference_dir_source = '{{ obs_ts }}' -ref_data_dir = 'ts_ref' -ref_start_yr = {{ ref_start_yr }} -ref_end_yr = ref_start_yr + num_years - 1 -if (ref_end_yr <= {{ ref_final_yr }}): - ref_end_yr = ref_end_yr -else: - ref_end_yr = {{ ref_final_yr }} -{% elif run_type == "model_vs_model" %} -# Reference -reference_dir_source = '{{ reference_data_path_ts }}' -ref_data_dir = 'ts_ref' -ref_name = '${ref_name}' -short_ref_name = '{{ short_ref_name }}' -ref_start_yr = {{ ref_start_yr }} -ref_end_yr = {{ ref_final_yr }} -ref_cmip_name = '{{ cmip_name_ref }}' - -# Optionally, swap test and reference model -if {{ swap_test_ref }}: - test_data_dir, ref_data_dir = ref_data_dir, test_data_dir - test_name, ref_name = ref_name, test_name - short_test_name, short_ref_name = short_ref_name, short_test_name - ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name -{%- endif %} - -# shared options -case_id = "${case_id}" +mip = '${cmip_name}'.split(".")[0] +exp = '${cmip_name}'.split(".")[1] +product = '${cmip_name}'.split(".")[2] +realm = '${cmip_name}'.split(".")[3] +############################################## +#Configuration shared with pcmdi diagnostics +############################################## # Record NetCDF output nc_out_obs = {{ nc_out_obs }} nc_out = {{ nc_out }} @@ -572,9 +739,7 @@ if nc_out: ext = ".nc" else: ext = ".xml" - user_notes = 'Provenance and results' -parallel = False debug = {{ pmp_debug }} # Generate plots @@ -585,174 +750,155 @@ plot_obs = {{ plot_obs }} # optional run_type = '{{ run_type }}' figure_format = '{{ figure_format }}' -{%- if "mean_climate" in subset %} -############################################################# -#parameter setup specific for mean climate metrics -############################################################# -mip = test_cmip_name.split(".")[0] -exp = test_cmip_name.split(".")[1] -product = test_cmip_name.split(".")[2] -realm = test_cmip_name.split(".")[3] -realization = realm +# Save interpolated model climatology ? +save_test_clims = {{ save_test_clims }} -{% if run_type == "model_vs_obs" %} -test_data_set = [ test_cmip_name.split(".")[2] ] -{% elif run_type == "model_vs_model" %} -test_data_set = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] -{%- endif %} +# Save Metrics Results in Single File ? +# option: 'y' or 'n', set to 'n' as we +# run pcmdi for each variable separately +metrics_in_single_file = 'n' + +# customize land/sea mask values +regions_values = {"land":100.,"ocean":0.} + +#setup template for land/sea mask (fixed) +modpath_lf = os.path.join( + '${fixed_dir}', + 'sftlf.%(model).nc' +) + +############################################ +#setup specific for mean climate metrics +{%- if ("mean_climate" in subset) %} +#case id modver = "${case_id}" -# Generate CMEC compliant json -cmec = {{ cmec }} +#always turn off +parallel = False -# SIMULATION PARAMETER -period = "{:04d}{:02d}-{:04d}{:02d}".format(test_start_yr,1,test_end_yr,12) +#land/sea mask file (already generated) +generate_sftlf = False +sftlf_filename_template = modpath_lf # INTERPOLATION OPTIONS -target_grid = '{{ target_grid }}' # OPTIONS: '2.5x2.5' or an actual cdms2 grid object +# OPTIONS: '2.5x2.5' or an actual cdms2 grid object +target_grid = '{{ target_grid }}' targetGrid = target_grid target_grid_string = '{{ target_grid_string }}' -regrid_tool = '{{ regrid_tool }}' # OPTIONS: 'regrid2','esmf' -regrid_method = '{{ regrid_method }}' # OPTIONS: 'linear','conservative', only if tool is esmf -regrid_tool_ocn = '{{ regrid_tool_ocn }}' # OPTIONS: "regrid2","esmf" -regrid_method_ocn = ( '{{ regrid_method_ocn }}' ) # OPTIONS: 'linear','conservative', only if tool is esmf - -# SAVE INTERPOLATED MODEL CLIMATOLOGIES ? -save_test_clims = {{ save_test_clims }} - -# CUSTOMIZE REGIONS VALUES NAMES -regions_values = {"land":100.,"ocean":0.} - -#defined regions -regions_specs = json.load(open(os.path.join(".",'regions_specs.json'))) -for kk in regions_specs.keys(): - if "domain" in regions_specs[kk].keys(): - if "latitude" in regions_specs[kk]['domain'].keys(): - regions_specs[kk]['domain']['latitude'] = tuple(regions_specs[kk]['domain']['latitude']) - if "longitude" in regions_specs[kk]['domain'].keys(): - regions_specs[kk]['domain']['longitude'] = tuple(regions_specs[kk]['domain']['longitude']) - -#region specified for each variable -regions =json.load(open(os.path.join("${results_dir}",'var_region_{{sub}}_catalogue.json'))) +# OPTIONS: 'regrid2','esmf' +regrid_tool = '{{ regrid_tool }}' +# OPTIONS: 'linear','conservative', only if tool is esmf +regrid_method = '{{ regrid_method }}' +# OPTIONS: "regrid2","esmf" +regrid_tool_ocn = '{{ regrid_tool_ocn }}' +# OPTIONS: 'linear','conservative', only if tool is esmf +regrid_method_ocn = ( '{{ regrid_method_ocn }}' ) ####################################### -# DATA LOCATION: MODELS, OBS AND METRICS OUTPUT +# DATA LOCATION: MODELS # --------------------------------------------- +test_data_set = [ product ] +test_data_path = '${climo_dir_primary}' # Templates for model climatology files -test_data_path = os.path.join( - "${results_dir}", - "climo", - "${case_id}") -test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(test_data_dir)))) -template = test_dic['ts'][product]['template'] -filename_template = template.replace('ts',"%(variable)").replace(product,"%(model)") -del(test_dic) - -####################################### -# ROOT PATH FOR OBSERVATIONS -reference_data_set = list('{{ obs_sets }}'.split(",")) -reference_data_path = os.path.join("${results_dir}","climo","${case_id}") -observation_file = os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(ref_data_dir)) -custom_observations = os.path.abspath(observation_file) -if not os.path.exists(custom_observations): - sys.exit("ERROR: observation climatology file is missing....") +filename_template = '.'.join([ + mip, + exp, + '%(model)', + '*', + '${tableID}', + '%(variable)', + period, + 'AC', + '${case_id}', + 'nc' +]) + +#observation info +reference_data_path = '${climo_dir_ref}' +custom_observations = os.path.join( + '${results_dir}', + '{}_{}_catalogue.json'.format( + '${climo_dir_ref}', + '{{subset}}')) + +#load caclulated regions for each variable +regions = json.load(open('regions.json')) + +#load predefined region information +regions_specs = json.load(open('{{regions_specs}}')) +for key in regions_specs.keys(): + if "domain" in regions_specs[key].keys(): + if "latitude" in regions_specs[key]['domain'].keys(): + regions_specs[key]['domain']['latitude'] = tuple( + regions_specs[key]['domain']['latitude'] + ) + if "longitude" in regions_specs[key]['domain'].keys(): + regions_specs[key]['domain']['longitude'] = tuple( + regions_specs[key]['domain']['longitude'] + ) ####################################### -# DIRECTORY AND FILENAME FOR OUTPUTING METRICS RESULTS -metrics_in_single_file = 'n' # 'y' or 'n' +# DATA LOCATION: METRICS OUTPUT metrics_output_path = os.path.join( - "${results_dir}", - "metrics_results", - "mean_climate", - mip, - exp, - "%(case_id)" -) # All SAME FILE + '${results_dir}', + 'metrics_results', + 'mean_climate', + mip, + exp, + '%(case_id)' +) + ############################################################ -# DIRECTORY WHERE TO PUT INTERPOLATED MODELS' CLIMATOLOGIES +# DATA LOCATION: INTERPOLATED MODELS' CLIMATOLOGIES diagnostics_output_path= os.path.join( - "${results_dir}", - "diagnostic_results", - "mean_climate", - mip, - exp, - "%(case_id)" + '${results_dir}', + 'diagnostic_results', + 'mean_climate', + mip, + exp, + '%(case_id)' ) - -########################################### -# Templates for MODEL land/sea mask (sftlf) -# depracated in new version of pcmdi -############################################# -generate_sftlf = {{ generate_sftlf }} -os.path.join("${fixed_dir}","sftlf_%(model).nc") test_clims_interpolated_output = diagnostics_output_path {%- endif %} {%- if "variability_mode" in subset %} -############################################################ -#parameter setup specific for mode variability metrics -############################################################ -mip = test_cmip_name.split(".")[0] -exp = test_cmip_name.split(".")[1] -product = test_cmip_name.split(".")[2] - -{% if run_type == "model_vs_obs" %} -modnames = [ test_cmip_name.split(".")[2] ] -{% elif run_type == "model_vs_model" %} -modnames = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] -{%- endif %} - -realm = test_cmip_name.split(".")[3] -realization = realm - -msyear = test_start_yr -meyear = test_end_yr -osyear = ref_start_yr -oeyear = ref_end_yr - -seasons = list('{{ seasons }}'.split(",")) +######################################## +#setup for mode variability diagnostics +######################################## +seasons = '{{ seasons }}'.split(",") frequency = '{{ frequency }}' #from configuration file -varOBS = '{{vars}}' varModel = '{{vars}}' -ObsUnitsAdjust = {{ ObsUnitsAdjust }} + +#unit conversion (namelist) ModUnitsAdjust = {{ ModUnitsAdjust }} +ObsUnitsAdjust = {{ ObsUnitsAdjust }} # If True, maskout land region thus consider only over ocean landmask = {{ landmask }} -#open dictional file to locate model and reference files -test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)))) -modpath = test_dic[varModel]['file_path'] -model = test_dic[varModel]['model'] -if model != product: - print("warning: model {} in dataset differ from user setup {}".format(model,product)) - print("warning: use model in datasets to continue....") - modnames = [model] -del (test_dic) - -#setup template for fixed files (e.g. land/sea mask) -modpath_lf = os.path.join("${fixed_dir}","sftlf_%(model).nc") - -#open dictional file to locate reference data -ref_dic = json.load(open(os.path.join("${results_dir}", - '{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)))) -reference_data_name = ref_dic[varOBS]['model'] -reference_data_path = ref_dic[varOBS]['file_path'] - -#update time for observation if different -ref_syear = str(ref_dic[varOBS]['start_yymm'])[0:4] -ref_eyear = str(ref_dic[varOBS]['end_yymm'])[0:4] -if int(ref_syear) > osyear: - osyear = int(ref_syear) -if int(ref_eyear) < oeyear: - oeyear = int(ref_eyear) -del(ref_dic,ref_syear,ref_eyear) - -####################################### +#template for model file +modnames = [ product ] +realization = "*" +modpath = '.'.join([ + mip, + exp, + '%(model)', + '*', + '${tableID}', + '%(variable)', + period, + 'AC', + '${case_id}', + 'nc' +]) + +#start and end year for analysis +msyear = int(start_yr) +meyear = int(end_yr) # If True, remove Domain Mean of each time step RmDomainMean = {{ RmDomainMean }} @@ -772,102 +918,51 @@ cmec = {{ cmec }} # Update diagnostic file if exist update_json = {{ update_json }} -####################################### +#results directory structure. results_dir = os.path.join( - "${results_dir}", - "%(output_type)", - "variability_modes", - "%(mip)", - "%(exp)", - "${case_id}", - "%(variability_mode)", - "%(reference_data_name)", + '${results_dir}', + '%(output_type)', + 'variability_modes', + '%(mip)', + '%(exp)', + '${case_id}', + '%(variability_mode)', + '%(reference_data_name)', ) {%- endif %} {%- if "enso" in subset %} -############################################################ +########################################### #parameter setup specific for enso metrics -############################################################ -mip = test_cmip_name.split(".")[0] -exp = test_cmip_name.split(".")[1] - -{% if run_type == "model_vs_obs" %} -modnames = [ test_cmip_name.split(".")[2] ] -{% elif run_type == "model_vs_model" %} -modnames = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] -{%- endif %} - -realm = test_cmip_name.split(".")[3] +########################################### +modnames = [ product ] realization = realm -msyear = test_start_yr -meyear = test_end_yr - -osyear = ref_start_yr -oeyear = ref_end_yr - -####################################### -# Model (test) -# setup template for fixed files (e.g. land/sea mask) -modpath_lf = os.path.join("${fixed_dir}","sftlf_%(model).nc") -# construct model template -test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)))) -vv0 = list(test_dic.keys())[0] -tableId = test_dic[vv0]['tableId'] modpath = os.path.join( - test_data_dir, - "%(mip).%(exp).%(model).%(realization)."+tableId+".%(variable)." - + '{:04d}{:02d}-{:04d}{:02d}'.format(msyear,1,meyear,12) - + ".nc") -del(test_dic,vv0) - -# OBSERVATIONS -reference_data_path = {} -reference_data_lf_path = {} -#orgnize obs catalog -ref_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)))) -for var in ref_dic: - refname = ref_dic[var]['model'] - if refname not in reference_data_path.keys(): - reference_data_path[refname] = {} - reference_data_path[refname][var] = {'template': ref_dic[var]['template']} - #land/sea mask - reference_data_lf_path[refname] = os.path.join("${fixed_dir}",'sftlf.{}.nc'.format(refname)) - #update time information(minimum overlap) - ref_syear = str(ref_dic[var]['start_yymm'])[0:4] - ref_eyear = str(ref_dic[var]['end_yymm'])[0:4] - if int(ref_syear) > osyear: - osyear = int(ref_syear) - if int(ref_eyear) < oeyear: - oeyear = int(ref_eyear) - del(refname) -del(ref_dic) - -#document the observation catalogue + '${ts_dir_primary}', + '.'.join([mip,exp,'%(model)','%(realization)', + '${tableID}','%(variable)',period,'nc']) +) +#observation/reference file catalogue obs_cmor = True -obs_cmor_path = ref_data_dir -obs_catalogue = 'obs_info_catalogue.json' -json.dump(reference_data_path, - open(obs_catalogue,"w"), - sort_keys=True, - indent=4, - separators=(",", ": ")) -del(reference_data_path) +obs_cmor_path = '${ts_dir_ref}' +obs_catalogue = 'obs_catalogue.json' + +#land/sea mask for obs/reference model +reference_data_lf_path = json.load(open('obs_landmask.json')) -# METRICS COLLECTION (ENSO_perf, ENSO_tel, ENSO_proc) -# will set in main driver +# METRICS COLLECTION (set in namelist, and main driver) # metricsCollection = ENSO_perf # ENSO_perf, ENSO_tel, ENSO_proc # OUTPUT results_dir = os.path.join( - "${results_dir}", - "%(output_type)", - "enso_metric", - "%(mip)", - "%(exp)", - "${case_id}", - "%(metricsCollection)", + '${results_dir}', + '%(output_type)', + 'enso_metric', + '%(mip)', + '%(exp)', + '${case_id}', + '%(metricsCollection)', ) json_name = "%(mip)_%(exp)_%(metricsCollection)_${case_id}_%(model)_%(realization)" @@ -877,517 +972,105 @@ netcdf_name = json_name {%- endif %} EOF -{%- endif %} ################################################################ - # Run PCMDI Diags echo echo ===== RUN PCMDI DIAGS ===== echo - # Prepare configuration file cat > pcmdi.py << EOF import os import glob +import glob import json -import re -import sys -import cdms2 -import psutil +import time +import datetime +import xarray as xr +import xcdat as xc import numpy as np + import collections -import subprocess -import time +from collections import OrderedDict + import pcmdi_metrics -from pcmdi_metrics.utils import StringConstructor -from argparse import RawTextHelpFormatter -from shutil import copyfile -from re import split +from pcmdi_metrics.io import ( + xcdat_open +) + +import psutil +import subprocess from itertools import chain from subprocess import Popen, PIPE, call -{%- if "mean_climate" in subset %} -from mean_climate_plot_parser import ( - create_mean_climate_plot_parser, -) -from mean_climate_plot_driver import ( - mean_climate_metrics_plot, -) -{%- endif %} - def childCount(): current_process = psutil.Process() children = current_process.children() return(len(children)) -def generate_land_sea_mask(data_file,outpath): - data_dic = json.load(open(data_file)) - for var in data_dic: - model = data_dic[var]['model'] - mpath = data_dic[var]['file_path'] - mpath_lf = os.path.join(outpath,"sftlf.{}.nc".format(model)) - # generate land/sea mask if not exist - if not os.path.exists(mpath_lf): - print("generate land/sea mask file....") - return_code = call(['python','process_sftlf.py',var,model,mpath,mpath_lf],text=False) - else: - return_code = 0 - del(model,mpath,mpath_lf) - del(data_dic) +start_yr = int('${Y1}') +end_yr = int('${Y2}') +num_years = end_yr - start_yr + 1 - return return_code +#parallel calculation +num_workers = {{ num_workers }} +multiprocessing = {{multiprocessing}} +# DATA LOCATION: Reference {%- if "mean_climate" in subset %} -def calculate_climatology(method,start_yr,end_yr,data_dic,out_dic, - outpath,multiprocessing,num_workers): - - #first check the monthly data dictionary - if not os.path.exists(data_dic): - exit("ERROR: monthly data dictionary file not found...") - else: - data_dic = json.load(open(data_dic)) - - if not os.path.exists(outpath): - os.makedirs(outpath,mode=0o777) - - ##################################### - #calculate annual cycle climatology - ##################################### - clim_dic = collections.OrderedDict() - lstcmd = []; lstcm0 = []; lstcm1 = []; lstcm2 = [] - for var in data_dic.keys(): - cyms = '{:04d}-{:02d}'.format(start_yr,1) - cyme = '{:04d}-{:02d}'.format(end_yr,12) - if int(data_dic[var]['start_yymm']) > (start_yr*100+1): - cyms = '{}-{}'.format(str(data_dic[var]['start_yymm'])[0:4], - str(data_dic[var]['start_yymm'])[4:6]) - if int(data_dic[var]['end_yymm']) < (end_yr*100+12): - cyme = '{}-{}'.format(str(data_dic[var]['end_yymm'])[0:4], - str(data_dic[var]['end_yymm'])[4:6]) - infile = data_dic[var]['file_path'] - if os.path.exists(infile): - if method == "pcmdi": - #reform the output file template - outfile = ".".join(data_dic[var]['template'].split(".")[:-2]) + ".nc" - cmd = (" ".join(["pcmdi_compute_climatologies.py", - "--start", cyms, - "--end", cyme, - "--var", var, - "--infile", infile, - "--outpath", outpath+"/", - "--outfilename", outfile ])) - lstcmd.append(cmd); del(cmd,outfile) - else: - # use nco to process mean climatology - # middle month days from January to February - dofm = [15,46,74,105,135,166,196,227,258,288,319,349] - #create a temporary directory to save temporary files - if not os.path.exists("tmpnco"): - os.mkdir("tmpnco",mode=0o777) - #derive annual cycle climate mean - for imon,mday in enumerate(dofm): - tmpfile = os.path.join('tmpnco',"{}_tmp_{:02d}-clim.nc".format(var,imon+1)) - cmd = (" ".join(['ncra -O -h -F -d', - 'time,{},,12'.format(imon+1), - infile,tmpfile])) - lstcmd.append(cmd) - cm0 = (" ".join(['ncatted -O -h -a', - 'units,time,o,c,"days since 0001-01-01 00:00:0.0"', - tmpfile,tmpfile])) - lstcm0.append(cm0) - cm1 = (" ".join(['ncap2 -O -h -s', - "'time=time*0+{};defdim({},{});time_bnds=make_bounds(time,{},{})'".format( - mday,'"bnds"',2,'\$bnds','"time_bnds"'), - tmpfile,tmpfile])) - lstcm1.append(cm1); del(cmd,cm0,cm1,tmpfile) - #derive seasonal and annual mean - for season in ["AC", "DJF", "JJA", "MAM", "SON", "ANN"]: - period = "{}-{}".format(cyms.replace("-",""),cyme.replace("-","")) - outpre = ".".join(data_dic[var]['template'].split(".")[:-2]) - outfile = os.path.join(outpath,".".join([outpre,"{}.{}.{}.nc".format(period,season,"${case_id}")])) - if season == "AC": - cm2 = (" ".join(["ncrcat -O -v {} -d time,0,".format(var), - os.path.join('tmpnco',"{}_*_*-clim.nc".format(var)), - outfile])) - elif season == "DJF": - cm2 = (" ".join(["ncra -O -h", - os.path.join('tmpnco',"{}_*_12-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_01-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_02-clim.nc".format(var)), - outfile])) - elif season == "JJA": - cm2 = (" ".join(["ncra -O -h", - os.path.join('tmpnco',"{}_*_06-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_07-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_08-clim.nc".format(var)), - outfile])) - elif season == "MAM": - cm2 = (" ".join(["ncra -O -h", - os.path.join('tmpnco',"{}_*_03-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_04-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_05-clim.nc".format(var)), - outfile])) - elif season == "SON": - cm2 = (" ".join(["ncra -O -h", - os.path.join('tmpnco',"{}_*_09-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_10-clim.nc".format(var)), - os.path.join('tmpnco',"{}_*_11-clim.nc".format(var)), - outfile])) - elif season == "ANN": - cm2 = (" ".join(["ncra -O -h", - os.path.join('tmpnco',"{}_*_*-clim.nc".format(var)), - outfile])) - lstcm2.append(cm2); del(cm2,period,outfile,outpre) - #document climatology info in dictionary file# - period = "{}-{}".format(cyms.replace("-",""),cyme.replace("-","")) - template = ".".join(data_dic[var]['template'].split(".")[:-2]) + \ - ".{}.AC.{}.nc".format(period,"${case_id}") - clim_dic[var] = {data_dic[var]['exp'] : data_dic[var]['model'], - data_dic[var]['model'] : {'template' : template, - 'period' : period, - 'data_path' : outpath}} - #save climatology dictionary - json.dump(clim_dic, - open(out_dic,"w"), - sort_keys=True, - indent=4, - separators=(",", ": ")) - - #finally process the data in parallela - if method == "pcmdi": - print("Number of jobs starting is ", str(len(lstcmd))) - procs = [] - for i,p in enumerate(lstcmd): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - if multiprocessing == True: - procs.append(proc) - while (childCount() > num_workers): - time.sleep(0.25) - [pp.communicate() for pp in procs] # this will get the exit code - procs = [] - else: - if (i == len(lstcmd)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) - except: - break - else: - return_code = proc.communicate() - if return_code != 0: - exit("Failed to run {}".format(str(p))) - elif method == "nco": - lstall = list(chain(lstcmd,lstcm0,lstcm1,lstcm2)) - lensub = [len(lstcmd),len(lstcm0),len(lstcm1),len(lstcm2)] - lensub = np.cumsum(lensub) - 1 - print("Number of jobs starting is ", str(len(lstall))) - procs = [] - for i,p in enumerate(lstall): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - if multiprocessing == True: - procs.append(proc) - while (childCount() > num_workers): - time.sleep(0.25) - [pp.communicate() for pp in procs] # this will get the exit code - procs = [] - else: - if (i == len(lstall)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstall[i]))) - except: - break - else: - return_code = proc.communicate() - if return_code != 0: - exit("Failed to run {}".format(str(p))) - # clean the temporary files - for tmpfil in glob.glob(os.path.join('tmpnco',"_*_*-clim.nc".format(var))): - if os.path.exists(tmpfil): - os.remove(tmpfil) - - # add a delay to ensure the processing fully done - time.sleep(1) - print("done submitting") - del(lstcmd,lstcm0,lstall,lstcm1,lstcm2,clim_dic,data_dic) - - return - -def calculate_derived_variable(var,data_dic,data_path): - #################################################### - #this function is used to calculate a quantity given - #the data documented in the data_dic passed by user - #derived_variable.json is a file documen the rules to - #calculate the required diagnostic variables - ##################################################### - derive_dic = json.load(open("derived_variable.json")) - vsublist = []; operator = [] - #collect the variable and operation rulse for derivation - for vv in derive_dic[var]: - vsublist.append(vv) - operator.append(derive_dic[var][vv]) - - #now search data file and judge if the derivation is possible - l_derive = True - for i,vv in enumerate(vsublist): - infile = data_dic[vv]['data_path'] - if i == 0: - outfile = infile.replace(vv,var) - if (not os.path.exists(infile)) or (os.path.exists(outfile)): - l_derive = False - - # finally do derivation - if l_derive: - for i,vv in enumerate(derive_dic[var].keys()): - infile = data_dic[vv]['data_path'] - f = cdms2.open(infile) - if i == 0: - d = f(vv) * operator[i] - else: - d = d + f(vv) * operator[i] - f.close() - del(infile) - f = cdms2.open(outfile,'w') - f.write(d) - f.close() - del(d,outfile,f) - outdic = {'template' : outfile.split("/")[-1], - 'data_path' : outfile} - del(derive_dic,vsublist,operator) - - return outdic, outfile - -{%- endif %} - -def main (): - start_yr = int('${Y1}') - end_yr = int('${Y2}') - num_years = end_yr - start_yr + 1 - - num_workers = {{ num_workers }} - multiprocessing = {{multiprocessing}} - - # Model - # Test data directory -{% if run_type == "model_vs_obs" %} - test_data_dir = 'ts' -{% elif run_type == "model_vs_model" %} - test_data_dir = 'ts_test' -{%- endif %} - test_name = '${case}' - test_start_yr = start_yr - test_end_yr = end_yr - test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' - test_cmip_name = '{{ cmip_name }}' - - # Ref -{% if run_type == "model_vs_obs" %} - # Obs - reference_dir_source = '{{ obs_ts }}' - ref_data_dir = 'ts_ref' - ref_start_yr = {{ ref_start_yr }} - ref_end_yr = ref_start_yr + num_years - 1 - if (ref_end_yr <= {{ ref_final_yr }}): - ref_end_yr = ref_end_yr - else: - ref_end_yr = {{ ref_final_yr }} -{% elif run_type == "model_vs_model" %} - # Reference - reference_dir_source = '{{ reference_data_path_ts }}' - ref_data_dir = 'ts_ref' - ref_name = '${ref_name}' - short_ref_name = '{{ short_ref_name }}' - ref_start_yr = {{ ref_start_yr }} - ref_end_yr = {{ ref_final_yr }} - ref_cmip_name = '{{ cmip_name_ref }}' - - # Optionally, swap test and reference model - if {{ swap_test_ref }}: - test_data_dir, ref_data_dir = ref_data_dir, test_data_dir - test_name, ref_name = ref_name, test_name - short_test_name, short_ref_name = short_ref_name, short_test_name - ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +reference_data_path = '${climo_dir_ref}' +{%- elif ("variability_mode" in subset) or ("enso" in subset) %} +reference_data_path = '${ts_dir_ref}' {%- endif %} +observation_file = os.path.join( + '${results_dir}', + '{}_{}_catalogue.json'.format( + reference_data_path, + '{{subset}}') +) +obs_dic = json.load(open(observation_file)) - ################################################################################ - # land/sea mask is needed in PCMDI diagnostics, check and generate it here as - # these data are not always available for model or observations - ################################################################################ - # Model - test_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)) - return_code = generate_land_sea_mask(test_dic,"${fixed_dir}") - if return_code != 0: - exit("Failed to generate land/sea mask...") - del(test_dic) - # Reference - ref_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) - return_code = generate_land_sea_mask(ref_dic,"${fixed_dir}") - if return_code != 0: - exit("Failed to generate land/sea mask...") - del(ref_dic) - - # Run PCMDI for diagnostics {%- if "mean_climate" in subset %} - ##################################################################### - # calculate test and reference model climatology - ##################################################################### - print("calculate mean climate diagnostics") - outpath = os.path.join("${results_dir}","climo","${case_id}") - method = '{{climatology_process_method}}' - for key in ["test","ref"]: - if key == "test": - data_dir = test_data_dir - start_yr = test_start_yr - end_yr = test_end_yr - elif key == "ref": - data_dir = ref_data_dir - start_yr = ref_start_yr - end_yr = ref_end_yr - data_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(data_dir)) - clim_dic = os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(data_dir)) - if method in [ "pcmdi", "PCMDI", "default" ]: - #method 1: built in PCMDI package (may have memory issue for highres data) - calculate_climatology("pcmdi",start_yr,end_yr,data_dic,clim_dic,outpath,multiprocessing,num_workers) - elif method in [ "nco", "NCO", "alternate"]: - #method 2: use nco package(default,faster) - calculate_climatology("nco",start_yr,end_yr,data_dic,clim_dic,outpath,multiprocessing,num_workers) - if not os.path.exists(clim_dic): - exist("ERROR: failed to process data climatology....") - del(data_dir,start_yr,end_yr,data_dic,clim_dic) - - ##################################################################### - # call mean_climate_driver.py to process diagnostics - ##################################################################### - #defined regions - regional = '{{ regional }}' - if regional == "y": - default_regions = list('{{ regions }}'.split(",")) - else: - default_regions = ["global", "NHEX", "SHEX", "TROPICS"] - # create command list for mean climate driver - lstcmd = [] - reg_var_dic = {} - for vv in list("{{vars}}".split(",")): - vkys = vv.split("-")[0] - reg_var_dic[vkys] = default_regions - vars = vv - cmd = (" ".join(["mean_climate_driver.py", - "-p", "parameterfile.py", - "--vars", '{}'.format(vars)])) - lstcmd.append(cmd); del(cmd,vars,vkys) - - #create regions for regional mean of each variable - json.dump(reg_var_dic, - open(os.path.join("${results_dir}",'var_region_{{sub}}_catalogue.json'),"w"), - sort_keys=True, - indent=4, - separators=(",", ": ")) - - #finally process the data in parallel - print("Number of jobs starting is ", str(len(lstcmd))) - procs = [] - if len(lstcmd) > 0: - for i,p in enumerate(lstcmd): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - if multiprocessing == True: - procs.append(proc) - while (childCount() > num_workers): - time.sleep(0.25) - [pp.communicate() for pp in procs] - procs = [] - else: - if (i == len(lstcmd)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) - except: - break - else: - return_code = proc.communicate() - if return_code != 0: - exit("Failed to run {}".format(str(p))) - - #set a delay to avoid delay in writing process - time.sleep(1) - print("done submitting") - del(reg_var_dic,regional,lstcmd) - - #generate diagnostics figures - print("--- prepare for mean climate metrics plot ---") - parser = create_mean_climate_plot_parser() - parameter = parser.get_parameter(argparse_vals_only=False) - parameter.regions = default_regions - parameter.run_type = "${run_type}" - parameter.period = "{}-{}".format(test_start_yr,test_end_yr) - parameter.pcmdi_data_set = "{{pcmdi_data_set}}" - parameter.pcmdi_data_path = os.path.join('{{pcmdi_data_path}}',"mean_climate") - parameter.test_data_set = "{}.{}".format(test_cmip_name,"${case_id}") - parameter.test_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") -{% if run_type == "model_vs_obs" %} - parameter.refr_data_set = "" - parameter.refr_period = "" - parameter.refr_data_path = "" -{% elif run_type == "model_vs_model" %} - parameter.refr_data_set = "{}.{}".format(ref_cmip_name,"${case_id}") - parameter.refr_period = "{}-{}".format(ref_start_yr,ref_end_yr) - parameter.refr_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") -{%- endif %} - parameter.output_path = os.path.join("${results_dir}","graphics","mean_climate") - parameter.ftype = '{{ figure_format }}' - parameter.debug = {{ pmp_debug }} - parameter.parcord_show_markers = {{parcord_show_markers}} #False - parameter.add_vertical_line = {{portrait_vertical_line}} #True - - #generate diagnostics figures - print("--- generate mean climate metrics plot ---") - mean_climate_metrics_plot(parameter) - del(parameter) - -{%- endif %} +###################################### +# call pcmdi mean climate diagnostics +##################################### +#customized region, otherwise default +regional = '{{ regional }}' +if regional == "y": + default_regions = '{{ regions }}'.split(",") +else: + default_regions = ["global", "NHEX", "SHEX", "TROPICS"] + +################################################### +# generate the command list for each reference and +# each variable (will execuate in parallel later) +lstcmd = [] +regv_dic = OrderedDict() +for var in "{{vars}}".split(","): + if var in obs_dic.keys(): + vkey = var.split("-")[0] + refset = obs_dic[var]['set'] + regv_dic[vkey] = default_regions + lstcmd.append(" ".join([ + 'mean_climate_driver.py', + '-p parameterfile.py' , + '--vars' , '{}'.format(var), + '-r' , '{}'.format(refset), + '--varname_in_test_data', '{}'.format(vkey), + '--case_id' , '{}'.format('${case_id}') + ])) + +#save region info dictionary +json.dump(regv_dic, + open('regions.json', "w"), + sort_keys=False, + indent=4, + separators=(",", ": ")) -{%- if "variability_mode" in subset %} - print("calculate mode variability metrics") -{%- if subset == "variability_mode_atm" %} - modes = list({{ atm_modes }}) -{% elif subset == "variability_mode_cpl" %} - modes = list({{ cpl_modes }}) -{%- endif %} - ##################################################################### - # call variability_modes_driver.py to process diagnostics - ##################################################################### - lstcmd = [] - for variability_mode in modes: - if variability_mode in ["NPO", "NPGO", "PSA1"]: - eofn_obs = "2" - eofn_mod = "2" - elif variability_mode in ["PSA2"]: - eofn_obs = "3" - eofn_mod = "3" - else: - eofn_obs = "1" - eofn_mod = "1" - cmd = (" ".join(['variability_modes_driver.py', - '-p', "parameterfile.py", - '--variability_mode', variability_mode, - '--eofn_mod', eofn_mod, - '--eofn_obs', eofn_obs ])) - lstcmd.append(cmd); del(cmd) - #finally process the data in parallel - print("Number of jobs starting is ", str(len(lstcmd))) - procs = [] +#finally process the data in parallel +print("Number of jobs starting is ", str(len(lstcmd))) +procs = [] +if len(lstcmd) > 0: for i,p in enumerate(lstcmd): print('running %s' % (str(p))) proc = Popen(p, stdout=PIPE, shell=True) @@ -1395,7 +1078,7 @@ def main (): procs.append(proc) while (childCount() > num_workers): time.sleep(0.25) - [pp.communicate() for pp in procs] # this will get the exit code + [pp.communicate() for pp in procs] procs = [] else: if (i == len(lstcmd)-1): @@ -1411,32 +1094,69 @@ def main (): return_code = proc.communicate() if return_code != 0: exit("Failed to run {}".format(str(p))) - #set a delay to avoid delay in writing process - time.sleep(1) - print("done submitting") - del(lstcmd) + +#set a delay to avoid delay in writing process +time.sleep(1) +print("done submitting") + {%- endif %} -{%- if "enso" in subset %} - ##################################################################### - # call enso_driver.py to process diagnostics - ##################################################################### - print("calculate enso metrics") - groups = list({{ groups }}) - lstcmd = [] - for metricsCollection in groups: - cmd = (" ".join(['enso_driver.py', - '-p', "parameterfile.py", - '--metricsCollection',metricsCollection])) - lstcmd.append(cmd); del(cmd) - #finally process the data in parallel - print("Number of jobs starting is ", str(len(lstcmd))) - procs = [] - for i,p in enumerate(lstcmd): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) +{%- if "variability_mode" in subset %} +########################################## +# call pcmdi mode variability diagnostics +########################################## +print("calculate mode variability metrics") + +{%- if subset == "variability_mode_atm" %} +var_modes = '{{ atm_modes }}'.split(",") +{% elif subset == "variability_mode_cpl" %} +var_modes = '{{ cpl_modes }}'.split(",") +{%- endif %} + +#from configuration file +varOBS = '{{vars}}' +refset = obs_dic[varOBS]['set'] +refname = obs_dic[varOBS][refset] +refpath = obs_dic[varOBS][refname]['file_path'] +reftyrs = int(str(obs_dic[varOBS][refname]['yymms'])[0:4]) +reftyre = int(str(obs_dic[varOBS][refname]['yymme'])[0:4]) + +lstcmd = [] +for variability_mode in var_modes: + if variability_mode in ["NPO", "NPGO", "PSA1"]: + eofn_obs = "2" + eofn_mod = "2" + elif variability_mode in ["PSA2"]: + eofn_obs = "3" + eofn_mod = "3" + else: + eofn_obs = "1" + eofn_mod = "1" + ############################################## + cmd = (" ".join([ + 'variability_modes_driver.py', + '-p parameterfile.py' , + '--variability_mode' , '{}'.format(variability_mode), + '--eofn_mod' , '{}'.format(eofn_mod), + '--eofn_obs' , '{}'.format(eofn_obs), + '--varOBS' , '{}'.format(varOBS), + '--osyear' , '{}'.format(reftyrs), + '--oeyear' , '{}'.format(reftyre), + '--reference_data_name' , '{}'.format(refname), + '--reference_data_path' , '{}'.format(refpath), + '--case_id' , '{}'.format('${case_id}') + ])) + lstcmd.append(cmd); del(cmd) + +#finally process the data in parallel +print("Number of jobs starting is ", str(len(lstcmd))) +procs = [] +for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: procs.append(proc) - while (childCount() > {{num_workers}}): + while (childCount() > num_workers): time.sleep(0.25) [pp.communicate() for pp in procs] # this will get the exit code procs = [] @@ -1450,29 +1170,183 @@ def main (): exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) except: break - #set a delay to avoid delay in writing process - time.sleep(1) - print("done submitting") - del(lstcmd,procs) + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) +#set a delay to avoid delay in writing process +time.sleep(1) +print("done submitting") +del(lstcmd) {%- endif %} -if __name__ == "__main__": - main() +{%- if "enso" in subset %} +############################################# +# call enso_driver.py to process diagnostics +############################################# +#reorgnize observation needed for enso driver +refr_dic = OrderedDict() +relf_dic = OrderedDict() +for var in list("{{vars}}".split(",")): + vkey = var.split("-")[0] + refset = obs_dic[var]['set'] + refname = obs_dic[var][refset] + #data file in model->var sequence + if refname not in refr_dic.keys(): + refr_dic[refname] = {} + refr_dic[refname][var] = obs_dic[var][refname] + #land/sea mask + if refname not in relf_dic.keys(): + relf_dic[refname] = os.path.join( + "${fixed_dir}", + 'sftlf.{}.nc'.format(refname)) + +#save data file dictionary +json.dump(refr_dic, + open('obs_catalogue.json', "w"), + sort_keys=False, + indent=4, + separators=(",", ": ")) + +#save land/sea mask dictionary +json.dump(relf_dic, + open('obs_landmask.json', "w"), + sort_keys=False, + indent=4, + separators=(",", ": ")) + +#now start enso driver +print("calculate enso metrics") +enso_groups = '{{ enso_groups }}'.split(",") +lstcmd = [] +for metricsCollection in enso_groups: + cmd = (" ".join([ + 'enso_driver.py ', + '-p parameterfile.py', + '--metricsCollection', '{}'.format(metricsCollection), + '--case_id' , '{}'.format('${case_id}') + ])) + lstcmd.append(cmd); del(cmd) + +print("Number of jobs starting: ", str(len(lstcmd))) + +#finally process the data in parallel +procs = [] +for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + procs.append(proc) + while (childCount() > {{num_workers}}): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break +#set a delay to avoid delay in writing process +time.sleep(1) +print("done submitting") +{%- endif %} +EOF +################################ +# Run diagnostics +command="srun -N 1 python -u pcmdi.py" +# Run diagnostics +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (11)' > {{ prefix }}.status + exit 11 +fi + +################################################################ +# this post-processing module is to generate sythentic metrics +# for mean-climate diagnostics (compared with cmip model results) +################################################################ +{%- if "mean_climate" in subset %} +echo +echo ===== RUN PCMDI POST-PROCESSING ===== +echo +# Prepare configuration file +cat > post_processing.py << EOF +import os +import glob +import glob +import json +import time +import datetime +import xarray as xr +import xcdat as xc +import numpy as np +import pcmdi_metrics + +# external module for plot +{%- if ("mean_climate" in subset) %} +import {{clim_plot_parser}} +import {{clim_plot_driver}} +{%- endif %} + +#customized region, otherwise default +regional = '{{ regional }}' +if regional == "y": + default_regions = '{{ regions }}'.split(",") +else: + default_regions = ["global", "NHEX", "SHEX", "TROPICS"] + +#generate diagnostics figures +print("--- prepare for mean climate metrics plot ---") +parser = create_mean_climate_plot_parser() +parameter = parser.get_parameter(argparse_vals_only=False) +parameter.regions = default_regions +parameter.run_type = "${run_type}" +parameter.period = "{:04d}-{:04d}".format(${Y1},${Y2}) +parameter.pcmdi_data_set = "{{pcmdi_data_set}}" +parameter.pcmdi_data_path = os.path.join('{{pcmdi_data_path}}',"mean_climate") +parameter.test_data_set = "{}.{}".format(${cmip_name},"${case_id}") +parameter.test_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") + +{% if run_type == "model_vs_obs" %} +parameter.refr_data_set = "" +parameter.refr_period = "" +parameter.refr_data_path = "" +{% elif run_type == "model_vs_model" %} +parameter.refr_data_set = "{}.{}".format(${cmip_name_ref},"${case_id}") +parameter.refr_period = "{}-{}".format(${ref_Y1},${ref_Y2}) +parameter.refr_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") +{%- endif %} + +parameter.output_path = os.path.join("${results_dir}","graphics","mean_climate") +parameter.ftype = '{{ figure_format }}' +parameter.debug = {{ pmp_debug }} +parameter.parcord_show_markers = {{parcord_show_markers}} #False +parameter.add_vertical_line = {{portrait_vertical_line}} #True + +#generate diagnostics figures +print("--- generate mean climate metrics plot ---") +mean_climate_metrics_plot(parameter) EOF ################################ # Run diagnostics -#command="srun -n 1 python -u pcmdi.py" -command="python -u pcmdi.py" +command="srun -N 1 python -u post_processing.py" # Run diagnostics time ${command} if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (10)' > {{ prefix }}.status - exit 9 + echo 'ERROR (12)' > {{ prefix }}.status + exit 12 fi +{% endif %} +################################# # Copy output to web server echo echo ===== COPY FILES TO WEB SERVER ===== @@ -1483,8 +1357,8 @@ web_dir=${www}/${case}/pcmdi_diags #/{{ sub }} mkdir -p ${web_dir} if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (10)' > {{ prefix }}.status - exit 10 + echo 'ERROR (13)' > {{ prefix }}.status + exit 13 fi {% if machine in ['pm-cpu', 'pm-gpu'] %} @@ -1506,8 +1380,8 @@ done rsync -a ${results_dir} ${web_dir}/ if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (11)' > {{ prefix }}.status - exit 11 + echo 'ERROR (14)' > {{ prefix }}.status + exit 14 fi {% if machine in ['pm-cpu', 'pm-gpu'] %} From b6c638e0401a5059f1bf9edcb1fd593fe6b83b66 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Mon, 30 Dec 2024 16:24:56 -0600 Subject: [PATCH 04/23] move the needed modules to inclusions --- .../pcmdi/mean_climate_plot_driver.py | 670 ++++++++++++++++++ .../pcmdi/mean_climate_plot_parser.py | 373 ++++++++++ .../inclusions/pcmdi/reference_alias.json | 341 +++++++++ .../inclusions/pcmdi/regions_specs.json | 263 +++++++ 4 files changed, 1647 insertions(+) create mode 100755 zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py create mode 100755 zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py create mode 100755 zppy/templates/inclusions/pcmdi/reference_alias.json create mode 100755 zppy/templates/inclusions/pcmdi/regions_specs.json diff --git a/zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py b/zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py new file mode 100755 index 00000000..a85cf4e0 --- /dev/null +++ b/zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py @@ -0,0 +1,670 @@ +#!/bin/env python +############################################################################## +# This model is used to generate mean climate diagnostic figures +# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) +############################################################################# +import os +import shutil + +import numpy as np +import pandas as pd +from mean_climate_plot_parser import ( + fill_plot_var_and_units, + find_metrics_data, + metrics_inquire, + shift_row_to_bottom, +) +from pcmdi_metrics.graphics import ( + Metrics, + normalize_by_median, + parallel_coordinate_plot, + portrait_plot, +) + + +def load_test_model_data(test_file, refr_file, mip, run_type): + # load the data and reorganize if needed + pd.set_option("future.no_silent_downcasting", True) + test_lib = Metrics(test_file) + + # model_vs_model, merge the reference model data into test model + if run_type == "model_vs_model": + refr_lib = Metrics(refr_file) + test_lib = test_lib.merge(refr_lib) + del refr_lib + + # collect and reorgnize test model data for plotting: + test_models = [] + for stat in test_lib.df_dict: + for season in test_lib.df_dict[stat]: + for region in test_lib.df_dict[stat][season]: + df = pd.DataFrame(test_lib.df_dict[stat][season][region]) + for i, model in enumerate(df["model"].tolist()): + model_run = df["model_run"].tolist()[i] + new_name = "{}-{}".format(mip.upper(), model_run.upper()) + idxs = df[df.iloc[:, 2] == model_run].index + df.loc[idxs, "model"] = list( + map( + lambda x: x.replace(model, new_name), + df.loc[idxs, "model"], + ) + ) + if new_name not in test_models: + test_models.append(new_name) + test_lib.df_dict[stat][season][region] = df + del df + return test_models, test_lib + + +def load_cmip_metrics_data(cmip_file): + # collect cmip multi-model ensemble data for comparison + pd.set_option("future.no_silent_downcasting", True) + cmip_lib = Metrics(cmip_file) + cmip_models = [] + highlight_models = [] + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + # now find all E3SM models in cmip6 + df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) + for model in df["model"].tolist(): + if model not in cmip_models: + cmip_models.append(model) + if ("e3sm" in model.lower()) and (model not in highlight_models): + highlight_models.append(model) + # move highlight_models to the end + for model in highlight_models: + idxs = df[df.iloc[:, 0] == model].index + cmip_models.remove(model) + cmip_models.append(model) + for idx in idxs: + df = shift_row_to_bottom(df, idx) + cmip_lib.df_dict[stat][season][region] = df + del df + return cmip_models, highlight_models, cmip_lib + + +def save_figure_data( + stat, region, season, var_names, var_units, data_dict, template, outdir +): + # construct output file name + fname = ( + template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", season) + ) + outfile = os.path.join(outdir, fname) + outdic = pd.DataFrame(data_dict) + outdic = outdic.drop(columns=["model_run"]) + for var in list(outdic.columns.values[3:]): + if var not in var_names: + print("{} is excluded from the {}".format(var, fname)) + outdic = outdic.drop(columns=[var]) + else: + # replace the variable with the name + units + outdic.columns.values[outdic.columns.values.tolist().index(var)] = ( + var_units[var_names.index(var)] + ) + + # save data to .csv file + outdic.to_csv(outfile) + del (fname, outfile, outdic) + return + + +def construct_port4sea_axis_lables( + var_names, cmip_models, test_models, highlight_models +): + model_list = cmip_models + test_models + # assign colors for labels of models + lable_colors = [] + for model in model_list: + if model in highlight_models: + lable_colors.append("#5170d7") + elif model in test_models: + lable_colors.append("#FC5A50") + else: + lable_colors.append("#000000") + + if len(model_list) > len(var_names): + xlabels = model_list + ylabels = var_names + landscape = True + else: + xlabels = var_names + ylabels = model_list + landscape = False + del model_list + return xlabels, ylabels, lable_colors, landscape + + +def construct_port4sea_data( + stat, + seasons, + region, + data_dict, + var_names, + var_units, + file_template, + outdir, + landscape, +): + # work array + data_all = dict() + # loop 4 seasons and collect data + for season in seasons: + # save raw metric results as a .csv file for each season + save_figure_data( + stat, + region, + season, + var_names, + var_units, + data_dict[stat][season][region], + file_template, + outdir, + ) + if stat == "cor_xy": + data_nor = data_dict[stat][season][region][var_names].to_numpy() + if landscape: + data_all[season] = data_nor.T + else: + data_all[season] = data_nor + del data_nor + elif stat == "bias_xy": + # calculate the relative bias + data_sea = data_dict[stat][season][region][var_names].to_numpy() + data_rfm = data_dict["mean-obs_xy"][season][region][var_names].to_numpy() + data_msk = np.where(np.abs(data_rfm) == 0.0, np.nan, data_rfm) + data_nor = data_sea * 100.0 / data_msk + if landscape: + data_all[season] = data_nor.T + else: + data_all[season] = data_nor + del (data_sea, data_rfm, data_msk, data_nor) + else: + data_sea = data_dict[stat][season][region][var_names].to_numpy() + if landscape: + data_sea = data_sea.T + data_all[season] = normalize_by_median(data_sea, axis=1) + else: + data_all[season] = normalize_by_median(data_sea, axis=0) + del data_sea + + # data for final plot + data_all_nor = np.stack( + [data_all["djf"], data_all["mam"], data_all["jja"], data_all["son"]] + ) + del data_all + return data_all_nor + + +def port4sea_plot( + stat, + region, + seasons, + data_dict, + var_names, + var_units, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + outdir, + add_vertical_line, + data_version=None, + watermark=False, +): + + # process figure + fontsize = 20 + var_names = sorted(var_names) + var_units = sorted(var_units) + + # construct the axis labels and colors + ( + xaxis_labels, + yaxis_labels, + lable_colors, + landscape, + ) = construct_port4sea_axis_lables( + var_names, cmip_models, test_models, highlight_models + ) + + # construct data for plotting + data_all_nor = construct_port4sea_data( + stat, + seasons, + region, + data_dict, + var_names, + var_units, + file_template, + outdir, + landscape, + ) + + if stat == "cor_xy": + cbar_label = "Pattern Corr." + var_range = (-1.0, 1.0) + cmap_bounds = [0.1, 0.2, 0.4, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] + elif stat == "bias_xy": + cbar_label = "{}, relative (%)".format(stat.upper()) + var_range = (-30.0, 30.0) + cmap_bounds = [-30.0, -20.0, -10.0, -5.0, -1, 0.0, 1.0, 5.0, 10.0, 20.0, 30.0] + else: + cbar_label = "{}, normalized by median".format(stat.upper()) + var_range = (-0.5, 0.5) + cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5] + + if landscape: + figsize = (40, 18) + legend_box_xy = (1.08, 1.18) + legend_box_size = 4 + legend_lw = 1.5 + shrink = 0.8 + legend_fontsize = fontsize * 0.8 + else: + figsize = (18, 25) + legend_box_xy = (1.25, 1) + legend_box_size = 3 + legend_lw = 1.5 + shrink = 1.0 + legend_fontsize = fontsize * 0.8 + + # Add Watermark/Logo + if watermark: + logo_rect = [0.85, 0.15, 0.07, 0.07] + logo_off = False + else: + logo_rect = [0, 0, 0, 0] + logo_off = True + + # Using Matplotlib-based PMP Visualization Function to Generate Portrait Plot + fig, ax, cbar = portrait_plot( + data_all_nor, + xaxis_labels=xaxis_labels, + yaxis_labels=yaxis_labels, + cbar_label=cbar_label, + cbar_label_fontsize=fontsize * 1.2, + box_as_square=True, + vrange=var_range, + figsize=figsize, + cmap="RdYlBu_r", + cmap_bounds=cmap_bounds, + cbar_kw={"extend": "both", "shrink": shrink}, + missing_color="white", + legend_on=True, + legend_labels=["DJF", "MAM", "JJA", "SON"], + legend_box_xy=legend_box_xy, + legend_box_size=legend_box_size, + legend_lw=legend_lw, + legend_fontsize=legend_fontsize, + logo_rect=logo_rect, + logo_off=logo_off, + ) + + if add_vertical_line: + ax.axvline( + x=len(xaxis_labels) - len(highlight_models) - len(test_models), + color="k", + linewidth=3, + ) + + if landscape: + ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") + ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") + for xtick, color in zip(ax.get_xticklabels(), lable_colors): + xtick.set_color(color) + ax.yaxis.label.set_color(lable_colors[0]) + else: + ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") + ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") + ax.xaxis.label.set_color(lable_colors[0]) + for ytick, color in zip(ax.get_yticklabels(), lable_colors): + ytick.set_color(color) + + ax.tick_params(axis="x", labelsize=fontsize) + ax.tick_params(axis="y", labelsize=fontsize) + + cbar.ax.tick_params(labelsize=fontsize) + + # Add title + ax.set_title( + "Model Performance of Seasonal Climatology ({}, {})".format( + stat.upper(), region.upper() + ), + fontsize=fontsize * 1.5, + pad=30, + ) + + # Add Watermark + if watermark: + ax.text( + 0.5, + 0.5, + "E3SM-PCMDI", + transform=ax.transAxes, + fontsize=100, + color="black", + alpha=0.5, + ha="center", + va="center", + rotation=25, + ) + # Add data info + fig.text( + 1.25, + 0.9, + "Data version\n" + data_version, + transform=ax.transAxes, + fontsize=12, + color="black", + alpha=0.6, + ha="left", + va="top", + ) + + # Save figure as an image file + figname = ( + figure_template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", "4season") + ) + figfile = os.path.join(outdir, figname) + fig.savefig(figfile, facecolor="w", bbox_inches="tight") + del ( + data_all_nor, + xaxis_labels, + yaxis_labels, + lable_colors, + ) + + return + + +def paracord_plot( + stat, + region, + season, + data_dict, + var_names, + var_units, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + outdir, + identify_all_models, + data_version=None, + watermark=False, +): + + # construct plotting data + var_names = sorted(var_names) + var_units = sorted(var_units) + + # write out the results as a table + save_figure_data( + stat, region, season, var_names, var_units, data_dict, file_template, outdir + ) + + # add ensemble mean + model_data = data_dict[var_names].to_numpy() + + # construct the string for plot + model_list = data_dict[ + "model" + ].to_list() # cmip_models + test_models + ["CMIP6 MME"] + model_list_group2 = highlight_models + test_models + models_to_highlight = test_models + [ + data_dict["model"].to_list()[-1] + ] # ["CMIP6 MME"] + figsize = (40, 12) + fontsize = 20 + legend_ncol = int(7 * figsize[0] / 40.0) + legend_posistion = (0.50, -0.14) + # color map for markers + colormap = "tab20_r" + # color map for highlight lines + xcolors = [ + "#000000", + "#e41a1c", + "#ff7f00", + "#4daf4a", + "#f781bf", + "#a65628", + "#984ea3", + "#999999", + "#377eb8", + "#dede00", + ] + lncolors = xcolors[1 : len(test_models) + 1] + [xcolors[0]] + # Add Watermark/Logo + if watermark: + logo_rect = [0.85, 0.15, 0.07, 0.07] + logo_off = False + else: + logo_rect = [0, 0, 0, 0] + logo_off = True + + xlabel = "Metric" + if "rms" in stat: + ylabel = "RMS Error (" + stat.upper() + ")" + elif "std" in stat: + ylabel = "Standard Deviation (" + stat.upper() + ")" + else: + ylabel = "value (" + stat.upper() + ")" + + if not np.isnan(model_data).all(): + print(model_data.min(), model_data.max()) + title = "Model Performance of {} Climatology ({}, {})".format( + season.upper(), stat.upper(), region.upper() + ) + fig, ax = parallel_coordinate_plot( + model_data, + var_units, + model_list, + model_names2=model_list_group2, + group1_name="CMIP6", + group2_name="E3SM", + models_to_highlight=models_to_highlight, + models_to_highlight_colors=lncolors, + models_to_highlight_labels=models_to_highlight, + identify_all_models=identify_all_models, # hide indiviaul model markers for CMIP6 models + vertical_center="median", + vertical_center_line=True, + title=title, + figsize=figsize, + axes_labelsize=fontsize * 1.1, + title_fontsize=fontsize * 1.1, + yaxes_label=ylabel, + xaxes_label=xlabel, + colormap=colormap, + show_boxplot=False, + show_violin=True, + violin_colors=("lightgrey", "pink"), + legend_ncol=legend_ncol, + legend_bbox_to_anchor=legend_posistion, + legend_fontsize=fontsize * 0.85, + xtick_labelsize=fontsize * 0.95, + ytick_labelsize=fontsize * 0.95, + logo_rect=logo_rect, + logo_off=logo_off, + ) + + # Add Watermark + if watermark: + ax.text( + 0.5, + 0.5, + "E3SM-PCMDI", + transform=ax.transAxes, + fontsize=100, + color="black", + alpha=0.5, + ha="center", + va="center", + rotation=25, + ) + # Add data info + fig.text( + 1.25, + 0.9, + "Data version\n" + data_version, + transform=ax.transAxes, + fontsize=12, + color="black", + alpha=0.6, + ha="left", + va="top", + ) + + # Save figure as an image file + figname = ( + figure_template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", season) + ) + figfile = os.path.join(outdir, figname) + fig.savefig(figfile, facecolor="w", bbox_inches="tight") + + del (model_data, model_list, model_list_group2, models_to_highlight) + + return + + +def mean_climate_metrics_plot(parameter): + # info for test simulation + test_mip = parameter.test_data_set.split(".")[0] + test_exp = parameter.test_data_set.split(".")[1] + test_product = parameter.test_data_set.split(".")[2] + test_case_id = parameter.test_data_set.split(".")[-1] + # output directory + outdir = os.path.join(parameter.output_path, test_mip, test_exp, test_case_id) + + # construct file template to save the figure data in .csv file + file_template = "%(metric)_%(region)_{}_{}_{}_{}_mean_climate_%(season)_{}.csv" + file_template = file_template.format( + parameter.run_type.upper(), + test_mip.upper(), + test_exp.upper(), + test_product.upper(), + parameter.period, + ) + # construct figure template + figure_template = file_template.replace("csv", parameter.ftype) + + # find the metrics data + test_file, refr_file, cmip_file = find_metrics_data(parameter) + + # load cmip metrics data + cmip_models, highlight_models, cmip_lib = load_cmip_metrics_data(cmip_file) + + # load test model metrics data + test_models, test_lib = load_test_model_data( + test_file, refr_file, test_mip, parameter.run_type + ) + # collect overlap sets of variables for plotting: + test_lib, cmip_lib, var_list, var_unit_list = fill_plot_var_and_units( + test_lib, cmip_lib + ) + # search overlap of regions in test and reference + regions = [] + for reg in parameter.regions: + if (reg in test_lib.regions) and (reg in cmip_lib.regions): + regions.append(reg) + + # merge the cmip and model data + merged_lib = cmip_lib.merge(test_lib) + + ################################### + # generate parallel coordinate plot + ################################### + parall_fig_dir = os.path.join(outdir, "paracord_annual") + if os.path.exists(parall_fig_dir): + shutil.rmtree(parall_fig_dir) + os.makedirs(parall_fig_dir) + print("Parallel Coordinate Plots (4 seasons), loop each region and metric....") + # add ensemble mean + for metric in [ + "rms_xyt", + "std-obs_xyt", + "std_xyt", + "rms_y", + "rms_devzm", + "std_xy_devzm", + "std-obs_xy_devzm", + ]: + for region in regions: + for season in ["ann"]: + data_dict = merged_lib.df_dict[metric][season][region] + data_dict.loc["CMIP MMM"] = cmip_lib.df_dict[metric][season][ + region + ].mean(numeric_only=True, skipna=True) + data_dict.at["CMIP MMM", "model"] = "CMIP MMM" + if parameter.parcord_show_markers is not None: + identify_all_models = parameter.parcord_show_markers + else: + identify_all_models = True + paracord_plot( + metric, + region, + season, + data_dict, + var_list, + var_unit_list, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + parall_fig_dir, + identify_all_models, + data_version=None, + watermark=False, + ) + del data_dict + + ################################### + # generate portrait plot + ################################### + ptrait_fig_dir = os.path.join(outdir, "portrait_4seasons") + if os.path.exists(ptrait_fig_dir): + shutil.rmtree(ptrait_fig_dir) + os.makedirs(ptrait_fig_dir) + print("Portrait Plots (4 seasons),loop each region and metric....") + ######################################################################### + seasons = ["djf", "mam", "jja", "son"] + data_dict = merged_lib.df_dict + for metric in ["rms_xy", "cor_xy", "bias_xy"]: + for region in regions: + print("working on {} in {} region".format(metrics_inquire(metric), region)) + if parameter.add_vertical_line is not None: + add_vertical_line = parameter.add_vertical_line + else: + add_vertical_line = False + port4sea_plot( + metric, + region, + seasons, + data_dict, + var_list, + var_unit_list, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + ptrait_fig_dir, + add_vertical_line, + data_version=None, + watermark=False, + ) + + # release the data space + del (merged_lib, cmip_lib, test_lib, var_unit_list, var_list, regions) + + return diff --git a/zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py b/zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py new file mode 100755 index 00000000..e73e4904 --- /dev/null +++ b/zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python +import ast +import glob +import os + +import numpy as np +import pandas as pd +from pcmdi_metrics.mean_climate.lib import pmp_parser + + +def create_mean_climate_plot_parser(): + parser = pmp_parser.PMPMetricsParser() + parser.add_argument( + "--test_model", + dest="test_model", + help="Defines target model for the metrics plots", + required=False, + ) + + parser.add_argument( + "--test_data_set", + type=str, + nargs="+", + dest="test_data_set", + help="List of observations or models to test " + + "against the reference_data_set", + required=False, + ) + + parser.add_argument( + "--test_data_path", + dest="test_data_path", + help="Path for the test climitologies", + required=False, + ) + + parser.add_argument( + "--period", dest="period", help="A simulation parameter", required=False + ) + + parser.add_argument( + "--run_type", dest="run_type", help="A post-process parameter", required=False + ) + + parser.add_argument( + "--regions", + type=ast.literal_eval, + dest="regions", + help="Regions on which to run the metrics", + required=False, + ) + + parser.add_argument( + "--pcmdi_data_set", + type=str, + nargs="+", + dest="pcmdi_data_set", + help="PCMDI CMIP dataset that is used as a " + + "CMIP multi-model ensembles against the test_data_set", + required=False, + ) + + parser.add_argument( + "--pcmdi_data_path", + dest="pcmdi_data_path", + help="Path for the PCMDI CMIP mean climate metrics data", + required=False, + ) + + parser.add_argument( + "--refr_model", + dest="refr_model", + help="A simulation parameter", + required=False, + ) + + parser.add_argument( + "--refr_data_set", + type=str, + nargs="+", + dest="refr_data_set", + help="List of reference models to test " + "against the reference_data_set", + required=False, + ) + + parser.add_argument( + "--refr_data_path", + dest="refr_data_path", + help="Path for the reference model climitologies", + required=False, + ) + + parser.add_argument( + "--output_path", + dest="output_path", + help="Path for the metrics plots", + required=False, + ) + + parser.add_argument( + "--parcord_show_markers", + dest="parcord_show_markers", + help="show markers for individual model in parallel coordinate plots", + required=False, + ) + parser.add_argument( + "--add_vertical_line", + dest="add_vertical_line", + help="draw a vertical line to separate test and reference models for portrait plots", + required=False, + ) + return parser + + +def metrics_inquire(name): + # list of metrics name and long-name + metrics = { + "std-obs_xy": "Spatial Standard Deviation (Reference)", + "std_xy": "Spatial Standard Deviation (Model)", + "std-obs_xyt": "Spatial-temporal Standard Deviation (Reference)", + "std_xyt": "Spatial-temporal Standard Deviation (Model)", + "std-obs_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Reference)", + "mean_xy": "Area Weighted Spatial Mean (Model)", + "mean-obs_xy": "Area Weighted Spatial Mean (Reference)", + "std_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Model)", + "rms_xyt": "Spatio-Temporal Root Mean Square Error", + "rms_xy": "Spatial Root Mean Square Error", + "rmsc_xy": "Centered Spatial Root Mean Square Error", + "cor_xy": "Spatial Pattern Correlation Coefficient", + "bias_xy": "Mean Bias (Model - Reference)", + "mae_xy": "Mean Absolute Difference (Model - Reference)", + "rms_y": "Root Mean Square Error of Zonal Mean", + "rms_devzm": "Root Mean Square Error of Deviation From Zonal Mean", + } + if name in metrics.keys(): + long_name = metrics[name] + + return long_name + + +def find_latest(pmprdir, mip, exp): + versions = sorted( + [ + r.split("/")[-1] + for r in glob.glob(os.path.join(pmprdir, mip, exp, "v????????")) + ] + ) + latest_version = versions[-1] + return latest_version + + +def shift_row_to_bottom(df, index_to_shift): + idx = [i for i in df.index if i != index_to_shift] + return df.loc[idx + [index_to_shift]] + + +def find_cmip_metric_data(pmprdir, data_set, var): + # cmip data for comparison + mip = data_set.split(".")[0] + exp = data_set.split(".")[1] + case_id = data_set.split(".")[2] + if case_id == "": + case_id = find_latest(pmprdir, mip, exp) + fpath = glob.glob(os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format(var))) + if len(fpath) < 1 and var == "rtmt": + fpath = glob.glob( + os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format("rt")) + ) + if len(fpath) > 0 and os.path.exists(fpath[0]): + cmip_list = fpath[0] + return_code = 0 + else: + print("Warning: cmip metrics data not found for {}....".format(var)) + print("Warning: remove {} from the metric list....".format(var)) + cmip_list = None + return_code = -99 + return cmip_list, return_code + + +def select_models(df, selected_models): + # Selected models only + model_names = df["model"].tolist() + for model_name in model_names: + drop_model = True + for keyword in selected_models: + if keyword in model_name: + drop_model = False + break + if drop_model: + df.drop(df.loc[df["model"] == model_name].index, inplace=True) + df.reset_index(drop=True, inplace=True) + + return df + + +def exclude_models(df, excluded_models): + # eclude models + model_names = df["model"].tolist() + for model_name in model_names: + drop_model = False + for keyword in excluded_models: + if keyword in model_name: + drop_model = True + break + if drop_model: + df.drop(df.loc[df["model"] == model_name].index, inplace=True) + df.reset_index(drop=True, inplace=True) + return df + + +def fill_plot_var_and_units(model_lib, cmip_lib): + # we define fixed sets of variables used for final plotting. + units_all = { + "prw": "[kg m$^{-2}$]", + "pr": "[mm d$^{-1}$]", + "prsn": "[mm d$^{-1}$]", + "prc": "[mm d$^{-1}$]", + "hfls": "[W m$^{-2}$]", + "hfss": "[W m$^{-2}$]", + "clivi": "[kg $m^{-2}$]", + "clwvi": "[kg $m^{-2}$]", + "psl": "[Pa]", + "evspsbl": "[kg m$^{-2} s^{-1}$]", + "rlds": "[W m$^{-2}$]", + "rldscs": "[W $m^{-2}$]", + "rtmt": "[W m$^{-2}$]", + "rsdt": "[W m$^{-2}$]", + "rlus": "[W m$^{-2}$]", + "rluscs": "[W m$^{-2}$]", + "rlut": "[W m$^{-2}$]", + "rlutcs": "[W m$^{-2}$]", + "rsds": "[W m$^{-2}$]", + "rsdscs": "[W m$^{-2}$]", + "rstcre": "[W m$^{-2}$]", + "rltcre": "[W m$^{-2}$]", + "rsus": "[W m$^{-2}$]", + "rsuscs": "[W m$^{-2}$]", + "rsut": "[W m$^{-2}$]", + "rsutcs": "[W m$^{-2}$]", + "ts": "[K]", + "tas": "[K]", + "tauu": "[Pa]", + "tauv": "[Pa]", + "sfcWind": "[m s$^{-1}$]", + "zg-500": "[m]", + "ta-200": "[K]", + "ta-850": "[K]", + "ua-200": "[m s$^{-1}$]", + "ua-850": "[m s$^{-1}$]", + "va-200": "[m s$^{-1}$]", + "va-850": "[m s$^{-1}$]", + "uas": "[m s$^{-1}$]", + "vas": "[m s$^{-1}$]", + "tasmin": "[K]", + "tasmax": "[K]", + "clt": "[%]", + } + + # loop variable list and find them in cmip and target models + variable_units = [] + variable_names = [] + for var in units_all.keys(): + # reorgnize cmip data + if var == "rtmt": + if ("rt" in cmip_lib.var_list) and ("rtmt" in model_lib.var_list): + # special case (rt is used in pcmdi datasets, but rtmt is for cmip) + cmip_lib.var_list = list( + map(lambda x: x.replace("rt", "rtmt"), cmip_lib.var_list) + ) + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + cmip_lib.df_dict[stat][season][region]["rtmt"] = ( + cmip_lib.df_dict[stat][season][region].pop("rt") + ) + + if var in model_lib.var_list and var in cmip_lib.var_list: + varunt = var + "\n" + str(units_all[var]) + indv1 = cmip_lib.var_list.index(var) + indv2 = model_lib.var_list.index(var) + cmip_lib.var_unit_list[indv1] = varunt + model_lib.var_unit_list[indv2] = varunt + variable_units.append(varunt) + variable_names.append(var) + del (indv1, indv2, varunt) + else: + print("Warning: {} is not found in metrics data".format(var)) + print( + "Warning: {} is possibly not included as default in fill_plot_var_and_units()".format( + var + ) + ) + + # sanity check for cmip data + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) + for i, model in enumerate(df["model"].tolist()): + if model in ["E3SM-1-0", "E3SM-1-1-ECA"]: + idxs = df[df.iloc[:, 0] == model].index + df.loc[idxs, "ta-850"] = np.nan + del idxs + if model in ["CIESM"]: + idxs = df[df.iloc[:, 0] == model].index + df.loc[idxs, "pr"] = np.nan + del idxs + cmip_lib.df_dict[stat][season][region] = df + del df + + return model_lib, cmip_lib, variable_names, variable_units + + +def find_metrics_data(parameter): + pmp_set = parameter.pcmdi_data_set + pmp_path = parameter.pcmdi_data_path + test_set = parameter.test_data_set + test_path = parameter.test_data_path + refr_set = parameter.refr_data_set + refr_path = parameter.refr_data_path + run_type = parameter.run_type + debug = parameter.debug + + test_mip = test_set.split(".")[0] + test_exp = test_set.split(".")[1] + test_case_id = test_set.split(".")[-1] + test_dir = os.path.join(test_path, test_mip, test_exp, test_case_id) + if run_type == "model_vs_model": + refr_mip = refr_set.split(".")[0] + refr_exp = refr_set.split(".")[1] + refr_case_id = refr_set.split(".")[-1] + refr_dir = os.path.join(refr_path, refr_mip, refr_exp, refr_case_id) + + variables = [ + s.split("/")[-1].split("_")[0] + for s in glob.glob(os.path.join(test_dir, "*{}.json".format(test_case_id))) + if os.path.exists(s) + ] + variables = list(set(variables)) + + # find list of metrics data files + test_list = [] + refr_list = [] + cmip_list = [] + + for vv in variables: + ftest = glob.glob( + os.path.join(test_dir, "{}_*_{}.json".format(vv, test_case_id)) + ) + fcmip, rcode = find_cmip_metric_data(pmp_path, pmp_set, vv) + if rcode == 0: + if len(ftest) > 0 and len(fcmip) > 0: + for fx in ftest: + test_list.append(fx) + cmip_list.append(fcmip) + if debug: + print(ftest[0].split("/")[-1], fcmip.split("/")[-1]) + if run_type == "model_vs_model": + frefr = glob.glob( + os.path.join(refr_dir, "{}_*_{}.json".format(vv, refr_case_id)) + ) + if len(frefr) > 0: + for fr in frefr: + refr_list.append(fr) + if debug: + print( + ftest[0].split("/")[-1], + frefr[0].split("/")[-1], + fcmip.split("/")[-1], + ) + del frefr + del (ftest, fcmip) + return test_list, refr_list, cmip_list diff --git a/zppy/templates/inclusions/pcmdi/reference_alias.json b/zppy/templates/inclusions/pcmdi/reference_alias.json new file mode 100755 index 00000000..5fee538d --- /dev/null +++ b/zppy/templates/inclusions/pcmdi/reference_alias.json @@ -0,0 +1,341 @@ +{ + "rlds" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rldscs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlus" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsds" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsdscs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + + "rsus" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsuscs": { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rstcre" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rltcre" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlut" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlutcs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsdt" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsut" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsutcs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rtmt" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "pr" : { + "default" : "GPCP_v2.3", + "alternate" : "GPCP_v2.2", + "alternate1" : "GPCP_1DD", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C", + "alternate6" : "GPCP_v3.2" + }, + "prc" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "prsn" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "prw" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "MERRA2", + "alternate2" : "ERA-Interim", + "alternate3" : "NOAA-20C" + }, + "psl" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ps" : { + "default" : "ERA5", + "alternate " : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "huss" : { + "default" : "MERRA2", + "alternate" : "NOAA-20C", + "alternate1" : "ERA5", + "alternate2" : "ERA-Interim" + }, + "ta" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ua" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "va" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "hur" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "wap" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "zg" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "o3" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "hus" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "uas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "vas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "tauu" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "taux" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tauv" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tauy" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ts" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "HadISST2" + }, + "sst" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "HadISST2" + }, + "sfcWind" : { + "default" : "NOAA-20C", + "alternate" : "ERA5", + "alternate1" : "MERRA2", + "alternate2" : "ERA-Interim" + }, + "hfls" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "OAFlux" + }, + "hfss" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "OAFlux" + }, + "evspsbl" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "clt" : { + "default" : "ERA5", + "alternate3" : "NOAA-20C" + }, + "clwvi" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "clivi" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "tasmin" : { + "default" : "MERRA2" + }, + "tasmax" : { + "default" : "MERRA2" + }, + "sic" : { + "default" : "HadSST2" + }, + "tos" : { + "default" : "HadSST2" + }, + "zos" : { + "default" : "AVISO", + "alternate" : "HadISST" + }, + "sos" : { + "default" : "Aquarius", + "alternate" : "HadISST" + } +} diff --git a/zppy/templates/inclusions/pcmdi/regions_specs.json b/zppy/templates/inclusions/pcmdi/regions_specs.json new file mode 100755 index 00000000..811eb1e9 --- /dev/null +++ b/zppy/templates/inclusions/pcmdi/regions_specs.json @@ -0,0 +1,263 @@ +{ + "global": { + "domain": { "latitude":[-90.0, 90.0]} + }, + "NH": { + "domain": { "latitude":[0.0, 90.0]} + }, + "SH": { + "domain": { "latitude":[-90.0, 0]} + }, + "NHEX": { + "domain": { "latitude":[30.0, 90.0]} + }, + "SHEX": { + "domain": { "latitude":[-90.0, -30.0]} + }, + "TROPICS": { + "domain": { "latitude":[-30.0, 30.0]} + }, + "90S50S": { + "domain": { "latitude":[-90.0, -50.0]} + }, + "50S20S": { + "domain": { "latitude":[-50.0, -20.0]} + }, + "20S20N": { + "domain": { "latitude":[-20.0, 20.0]} + }, + "20N50N": { + "domain": { "latitude":[20.0, 50.0]} + }, + "50N90N": { + "domain": { "latitude":[50.0, 90.0]} + }, + "ocean_NH": { + "value": 0.0, + "domain": { "latitude":[0.0, 90.0]} + }, + "ocean_SH": { + "value": 0.0, + "domain": { "latitude":[-90.0, 0.0]} + }, + "land_NH": { + "value": 100, + "domain": { "latitude":[0.0, 90.0]} + }, + "land_SH": { + "value": 100, + "domain": { "latitude":[-90.0, 0.0]} + }, + "land_NHEX": { + "value": 100, + "domain": { "latitude":[30.0, 90.0]} + }, + "land_SHEX": { + "value": 100, + "domain": { "latitude":[-90.0, -30.0]} + }, + "land_TROPICS": { + "value": 100, + "domain": { "latitude":[-30.0, 30.0]} + }, + "land": { + "value": 100 + }, + "ocean_NHEX": { + "value": 0, + "domain": { "latitude":[30.0, 90.0]} + }, + "ocean_SHEX": { + "value": 0, + "domain": { "latitude":[-90.0, -30.0]} + }, + "ocean_TROPICS": { + "value": 0, + "domain": { "latitude":[30.0, 30.0]} + }, + "ocean": { + "value": 0 + }, + "ocean_50S50N": { + "value": 0.0, + "domain": { "latitude":[-50.0, 50.0]} + }, + "ocean_50S20S": { + "value": 0.0, + "domain": { "latitude":[-50.0, -20.0]} + }, + "ocean_20S20N": { + "value": 0.0, + "domain": { "latitude":[-20.0, 20.0]} + }, + "ocean_20N50N": { + "value": 0.0, + "domain": { "latitude":[20.0, 50.0]} + }, + "ocean_50N90N": { + "value": 0.0, + "domain": { "latitude":[50.0, 90.0]} + }, + "ocean_90S50S": { + "value": 0.0, + "domain": { "latitude":[-90.0, -50.0]} + }, + "NAM": { + "domain": { "latitude":[20.0, 90], + "longitude":[-180, 180]} + }, + "NAO": { + "domain": { "latitude":[20.0, 80], + "longitude":[-90, 40]} + }, + "SAM": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PSA1": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PSA2": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PNA": { + "domain": { "latitude":[20.0, 85], + "longitude":[120, 240]} + }, + "PDO": { + "domain": { "latitude":[20.0, 70], + "longitude":[110, 260]} + }, + "AMO": { + "domain": { "latitude":[0.0, 70], + "longitude":[-80, 0]} + }, + "AllMW": { + "domain": { "latitude":[-40.0, 45.0], + "longitude":[0.0, 360.0]} + }, + "AllM": { + "domain": { "latitude":[-45.0, 45.0], + "longitude":[0.0, 360.0]} + }, + "NAMM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[210.0, 310.0]} + }, + "SAMM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[240.0, 330.0]} + }, + "NAFM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[310.0, 60.0]} + }, + "SAFM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[0.0, 90.0]} + }, + "ASM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[60.0, 180.0]} + }, + "AUSM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[90.0, 160.0]} + }, + "AIR": { + "domain": { "latitude":[7.0, 25.0], + "longitude":[65.0, 85.0]} + }, + "AUS": { + "domain": { "latitude":[-20.0, -10.0], + "longitude":[120.0, 150.0]} + }, + "Sahel": { + "domain": { "latitude":[13.0, 18.0], + "longitude":[-10.0, 10.0]} + }, + "GoG": { + "domain": { "latitude":[0.0, 5.0], + "longitude":[-10.0, 10.0]} + }, + "NAmo": { + "domain": { "latitude":[20.0, 37.0], + "longitude":[-112.0, -103.0]} + }, + "SAmo": { + "domain": { "latitude":[-20.0, 2.5], + "longitude":[-65.0, -40.0]} + }, + "Nino34": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[190.0, 240.0]} + }, + "Nino3": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[210.0, 270.0]} + }, + "Nino4": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[160.0, 210.0]} + }, + "ONI": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[190.0, 240.0]} + }, + "Nino12": { + "value": 0.0, + "domain": { "latitude":[-10.0, 0.0], + "longitude":[270.0, 280.0]} + }, + "AMMS": { + "value": 0.0, + "domain": { "latitude":[-15.0, -5.0], + "longitude":[-20.0, 10.0]} + }, + "AMMN": { + "value": 0.0, + "domain": { "latitude":[5.0, 15.0], + "longitude":[-50.0, -20.0]} + }, + "ATL3": { + "value": 0.0, + "domain": { "latitude":[-3.0, 3.0], + "longitude":[-20.0, 0.0]} + }, + "TSA": { + "value": 0.0, + "domain": { "latitude":[-20.0, 0.0], + "longitude":[-30.0, 10.0]} + }, + "TNA": { + "value": 0.0, + "domain": { "latitude":[5.5, 23.5], + "longitude":[302.5, 345.0]} + }, + "TIO": { + "value": 0.0, + "domain": { "latitude":[-15.0, 15.0], + "longitude":[40.0, 115.0]} + }, + "IODE": { + "value": 0.0, + "domain": { "latitude":[-10.0, 10.0], + "longitude":[50.0, 70.0]} + }, + "IODW": { + "value": 0.0, + "domain": { "latitude":[-10.0, 0.0], + "longitude":[90.0, 110.0]} + }, + "SOCN": { + "value": 0.0, + "domain": { "latitude":[-70.0, -50.0], + "longitude":[0.0, 360.0]} + } +} From 23b6fa2627102212c5b4fa10ea26debfd0f939bb Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Mon, 30 Dec 2024 16:25:43 -0600 Subject: [PATCH 05/23] clean up the reduandent files for zppy-pcmdi --- .../templates/pcmdi_diags/cmip_variables.json | 121 ---- .../pcmdi_diags/derived_variable.json | 26 - .../pcmdi_diags/mean_climate_plot_driver.py | 670 ------------------ .../pcmdi_diags/mean_climate_plot_parser.py | 373 ---------- .../pcmdi_diags/observation_to_cmip.py | 85 --- .../pcmdi_diags/plot_mean_climate.py | 84 --- .../pcmdi_diags/post_merge_clim_jsons.py | 164 ----- zppy/templates/pcmdi_diags/process_sftlf.py | 61 -- .../pcmdi_diags/reference_alias.json | 340 --------- zppy/templates/pcmdi_diags/regions_specs.json | 263 ------- 10 files changed, 2187 deletions(-) delete mode 100755 zppy/templates/pcmdi_diags/cmip_variables.json delete mode 100755 zppy/templates/pcmdi_diags/derived_variable.json delete mode 100755 zppy/templates/pcmdi_diags/mean_climate_plot_driver.py delete mode 100755 zppy/templates/pcmdi_diags/mean_climate_plot_parser.py delete mode 100755 zppy/templates/pcmdi_diags/observation_to_cmip.py delete mode 100755 zppy/templates/pcmdi_diags/plot_mean_climate.py delete mode 100755 zppy/templates/pcmdi_diags/post_merge_clim_jsons.py delete mode 100755 zppy/templates/pcmdi_diags/process_sftlf.py delete mode 100755 zppy/templates/pcmdi_diags/reference_alias.json delete mode 100755 zppy/templates/pcmdi_diags/regions_specs.json diff --git a/zppy/templates/pcmdi_diags/cmip_variables.json b/zppy/templates/pcmdi_diags/cmip_variables.json deleted file mode 100755 index e5c3336e..00000000 --- a/zppy/templates/pcmdi_diags/cmip_variables.json +++ /dev/null @@ -1,121 +0,0 @@ -{ - "SImon":[ - "siu", - "siv", - "sitemptop", - "sisnmass", - "simass", - "sisnthick", - "sithick", - "sitimefrac", - "siconc" - ], - "Omon": [ - "areacello", - "fsitherm", - "hfds", - "masso", - "mlotst", - "sfdsi", - "sob", - "soga", - "sos", - "tauuo", - "tauvo", - "thetaoga", - "tob", - "tos", - "tosga", - "volo", - "wfo", - "zos", - "thetaoga", - "hfsifrazil", - "masscello", - "so", - "thetao", - "thkcello", - "uo", - "vo", - "volcello", - "wo", - "zhalfo" - ], - "lnd": [ - "mrsos", - "mrso", - "mrfso", - "mrros", - "mrro", - "prveg", - "evspsblveg", - "evspsblsoi", - "tran", - "tsl", - "lai" - ], - "atm": [ - "hur", - "hus", - "ta", - "ua", - "va", - "wap", - "zg", - "o3", - "pfull", - "phalf", - "tas", - "ts", - "psl", - "ps", - "sfcWind", - "huss", - "pr", - "prc", - "prsn", - "evspsbl", - "tauu", - "tauv", - "hfls", - "clt", - "rlds", - "rlus", - "rsds", - "rsdscs", - "rsus", - "rsuscs", - "hfss", - "cl", - "clw", - "cli", - "clivi", - "clwvi", - "prw", - "rldscs", - "rlut", - "rlutcs", - "rsdt", - "rsut", - "rsutcs", - "rtmt", - "abs550aer", - "od550aer", - "tasmax", - "tasmin", - "clisccp", - "cltisccp", - "albisccp", - "pctisccp", - "clcalipso", - "cltcalipso", - "cllcalipso", - "clmcalipso", - "clhcalipso" - ], - "fx": [ - "areacella", - "sftlf", - "orog" - ] -} diff --git a/zppy/templates/pcmdi_diags/derived_variable.json b/zppy/templates/pcmdi_diags/derived_variable.json deleted file mode 100755 index 6ca047ed..00000000 --- a/zppy/templates/pcmdi_diags/derived_variable.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "rltcre":{ - "rlutcs" : 1, - "rlut" : -1 - }, - "rstcre":{ - "rsutcs" : 1, - "rsut" : -1 - }, - "netsw":{ - "rsds" : 1, - "rsus" : -1 - }, - "netlw":{ - "rlus" : 1, - "rlds" : -1 - }, - "netflux":{ - "rsds" : 1, - "rsus" : -1, - "rlds" : 1, - "rlus" : -1, - "hfls" : -1, - "hfss" : -1 - } -} diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py b/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py deleted file mode 100755 index a85cf4e0..00000000 --- a/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py +++ /dev/null @@ -1,670 +0,0 @@ -#!/bin/env python -############################################################################## -# This model is used to generate mean climate diagnostic figures -# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) -############################################################################# -import os -import shutil - -import numpy as np -import pandas as pd -from mean_climate_plot_parser import ( - fill_plot_var_and_units, - find_metrics_data, - metrics_inquire, - shift_row_to_bottom, -) -from pcmdi_metrics.graphics import ( - Metrics, - normalize_by_median, - parallel_coordinate_plot, - portrait_plot, -) - - -def load_test_model_data(test_file, refr_file, mip, run_type): - # load the data and reorganize if needed - pd.set_option("future.no_silent_downcasting", True) - test_lib = Metrics(test_file) - - # model_vs_model, merge the reference model data into test model - if run_type == "model_vs_model": - refr_lib = Metrics(refr_file) - test_lib = test_lib.merge(refr_lib) - del refr_lib - - # collect and reorgnize test model data for plotting: - test_models = [] - for stat in test_lib.df_dict: - for season in test_lib.df_dict[stat]: - for region in test_lib.df_dict[stat][season]: - df = pd.DataFrame(test_lib.df_dict[stat][season][region]) - for i, model in enumerate(df["model"].tolist()): - model_run = df["model_run"].tolist()[i] - new_name = "{}-{}".format(mip.upper(), model_run.upper()) - idxs = df[df.iloc[:, 2] == model_run].index - df.loc[idxs, "model"] = list( - map( - lambda x: x.replace(model, new_name), - df.loc[idxs, "model"], - ) - ) - if new_name not in test_models: - test_models.append(new_name) - test_lib.df_dict[stat][season][region] = df - del df - return test_models, test_lib - - -def load_cmip_metrics_data(cmip_file): - # collect cmip multi-model ensemble data for comparison - pd.set_option("future.no_silent_downcasting", True) - cmip_lib = Metrics(cmip_file) - cmip_models = [] - highlight_models = [] - for stat in cmip_lib.df_dict: - for season in cmip_lib.df_dict[stat]: - for region in cmip_lib.df_dict[stat][season]: - # now find all E3SM models in cmip6 - df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) - for model in df["model"].tolist(): - if model not in cmip_models: - cmip_models.append(model) - if ("e3sm" in model.lower()) and (model not in highlight_models): - highlight_models.append(model) - # move highlight_models to the end - for model in highlight_models: - idxs = df[df.iloc[:, 0] == model].index - cmip_models.remove(model) - cmip_models.append(model) - for idx in idxs: - df = shift_row_to_bottom(df, idx) - cmip_lib.df_dict[stat][season][region] = df - del df - return cmip_models, highlight_models, cmip_lib - - -def save_figure_data( - stat, region, season, var_names, var_units, data_dict, template, outdir -): - # construct output file name - fname = ( - template.replace("%(metric)", stat) - .replace("%(region)", region) - .replace("%(season)", season) - ) - outfile = os.path.join(outdir, fname) - outdic = pd.DataFrame(data_dict) - outdic = outdic.drop(columns=["model_run"]) - for var in list(outdic.columns.values[3:]): - if var not in var_names: - print("{} is excluded from the {}".format(var, fname)) - outdic = outdic.drop(columns=[var]) - else: - # replace the variable with the name + units - outdic.columns.values[outdic.columns.values.tolist().index(var)] = ( - var_units[var_names.index(var)] - ) - - # save data to .csv file - outdic.to_csv(outfile) - del (fname, outfile, outdic) - return - - -def construct_port4sea_axis_lables( - var_names, cmip_models, test_models, highlight_models -): - model_list = cmip_models + test_models - # assign colors for labels of models - lable_colors = [] - for model in model_list: - if model in highlight_models: - lable_colors.append("#5170d7") - elif model in test_models: - lable_colors.append("#FC5A50") - else: - lable_colors.append("#000000") - - if len(model_list) > len(var_names): - xlabels = model_list - ylabels = var_names - landscape = True - else: - xlabels = var_names - ylabels = model_list - landscape = False - del model_list - return xlabels, ylabels, lable_colors, landscape - - -def construct_port4sea_data( - stat, - seasons, - region, - data_dict, - var_names, - var_units, - file_template, - outdir, - landscape, -): - # work array - data_all = dict() - # loop 4 seasons and collect data - for season in seasons: - # save raw metric results as a .csv file for each season - save_figure_data( - stat, - region, - season, - var_names, - var_units, - data_dict[stat][season][region], - file_template, - outdir, - ) - if stat == "cor_xy": - data_nor = data_dict[stat][season][region][var_names].to_numpy() - if landscape: - data_all[season] = data_nor.T - else: - data_all[season] = data_nor - del data_nor - elif stat == "bias_xy": - # calculate the relative bias - data_sea = data_dict[stat][season][region][var_names].to_numpy() - data_rfm = data_dict["mean-obs_xy"][season][region][var_names].to_numpy() - data_msk = np.where(np.abs(data_rfm) == 0.0, np.nan, data_rfm) - data_nor = data_sea * 100.0 / data_msk - if landscape: - data_all[season] = data_nor.T - else: - data_all[season] = data_nor - del (data_sea, data_rfm, data_msk, data_nor) - else: - data_sea = data_dict[stat][season][region][var_names].to_numpy() - if landscape: - data_sea = data_sea.T - data_all[season] = normalize_by_median(data_sea, axis=1) - else: - data_all[season] = normalize_by_median(data_sea, axis=0) - del data_sea - - # data for final plot - data_all_nor = np.stack( - [data_all["djf"], data_all["mam"], data_all["jja"], data_all["son"]] - ) - del data_all - return data_all_nor - - -def port4sea_plot( - stat, - region, - seasons, - data_dict, - var_names, - var_units, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - outdir, - add_vertical_line, - data_version=None, - watermark=False, -): - - # process figure - fontsize = 20 - var_names = sorted(var_names) - var_units = sorted(var_units) - - # construct the axis labels and colors - ( - xaxis_labels, - yaxis_labels, - lable_colors, - landscape, - ) = construct_port4sea_axis_lables( - var_names, cmip_models, test_models, highlight_models - ) - - # construct data for plotting - data_all_nor = construct_port4sea_data( - stat, - seasons, - region, - data_dict, - var_names, - var_units, - file_template, - outdir, - landscape, - ) - - if stat == "cor_xy": - cbar_label = "Pattern Corr." - var_range = (-1.0, 1.0) - cmap_bounds = [0.1, 0.2, 0.4, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] - elif stat == "bias_xy": - cbar_label = "{}, relative (%)".format(stat.upper()) - var_range = (-30.0, 30.0) - cmap_bounds = [-30.0, -20.0, -10.0, -5.0, -1, 0.0, 1.0, 5.0, 10.0, 20.0, 30.0] - else: - cbar_label = "{}, normalized by median".format(stat.upper()) - var_range = (-0.5, 0.5) - cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5] - - if landscape: - figsize = (40, 18) - legend_box_xy = (1.08, 1.18) - legend_box_size = 4 - legend_lw = 1.5 - shrink = 0.8 - legend_fontsize = fontsize * 0.8 - else: - figsize = (18, 25) - legend_box_xy = (1.25, 1) - legend_box_size = 3 - legend_lw = 1.5 - shrink = 1.0 - legend_fontsize = fontsize * 0.8 - - # Add Watermark/Logo - if watermark: - logo_rect = [0.85, 0.15, 0.07, 0.07] - logo_off = False - else: - logo_rect = [0, 0, 0, 0] - logo_off = True - - # Using Matplotlib-based PMP Visualization Function to Generate Portrait Plot - fig, ax, cbar = portrait_plot( - data_all_nor, - xaxis_labels=xaxis_labels, - yaxis_labels=yaxis_labels, - cbar_label=cbar_label, - cbar_label_fontsize=fontsize * 1.2, - box_as_square=True, - vrange=var_range, - figsize=figsize, - cmap="RdYlBu_r", - cmap_bounds=cmap_bounds, - cbar_kw={"extend": "both", "shrink": shrink}, - missing_color="white", - legend_on=True, - legend_labels=["DJF", "MAM", "JJA", "SON"], - legend_box_xy=legend_box_xy, - legend_box_size=legend_box_size, - legend_lw=legend_lw, - legend_fontsize=legend_fontsize, - logo_rect=logo_rect, - logo_off=logo_off, - ) - - if add_vertical_line: - ax.axvline( - x=len(xaxis_labels) - len(highlight_models) - len(test_models), - color="k", - linewidth=3, - ) - - if landscape: - ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") - ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") - for xtick, color in zip(ax.get_xticklabels(), lable_colors): - xtick.set_color(color) - ax.yaxis.label.set_color(lable_colors[0]) - else: - ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") - ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") - ax.xaxis.label.set_color(lable_colors[0]) - for ytick, color in zip(ax.get_yticklabels(), lable_colors): - ytick.set_color(color) - - ax.tick_params(axis="x", labelsize=fontsize) - ax.tick_params(axis="y", labelsize=fontsize) - - cbar.ax.tick_params(labelsize=fontsize) - - # Add title - ax.set_title( - "Model Performance of Seasonal Climatology ({}, {})".format( - stat.upper(), region.upper() - ), - fontsize=fontsize * 1.5, - pad=30, - ) - - # Add Watermark - if watermark: - ax.text( - 0.5, - 0.5, - "E3SM-PCMDI", - transform=ax.transAxes, - fontsize=100, - color="black", - alpha=0.5, - ha="center", - va="center", - rotation=25, - ) - # Add data info - fig.text( - 1.25, - 0.9, - "Data version\n" + data_version, - transform=ax.transAxes, - fontsize=12, - color="black", - alpha=0.6, - ha="left", - va="top", - ) - - # Save figure as an image file - figname = ( - figure_template.replace("%(metric)", stat) - .replace("%(region)", region) - .replace("%(season)", "4season") - ) - figfile = os.path.join(outdir, figname) - fig.savefig(figfile, facecolor="w", bbox_inches="tight") - del ( - data_all_nor, - xaxis_labels, - yaxis_labels, - lable_colors, - ) - - return - - -def paracord_plot( - stat, - region, - season, - data_dict, - var_names, - var_units, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - outdir, - identify_all_models, - data_version=None, - watermark=False, -): - - # construct plotting data - var_names = sorted(var_names) - var_units = sorted(var_units) - - # write out the results as a table - save_figure_data( - stat, region, season, var_names, var_units, data_dict, file_template, outdir - ) - - # add ensemble mean - model_data = data_dict[var_names].to_numpy() - - # construct the string for plot - model_list = data_dict[ - "model" - ].to_list() # cmip_models + test_models + ["CMIP6 MME"] - model_list_group2 = highlight_models + test_models - models_to_highlight = test_models + [ - data_dict["model"].to_list()[-1] - ] # ["CMIP6 MME"] - figsize = (40, 12) - fontsize = 20 - legend_ncol = int(7 * figsize[0] / 40.0) - legend_posistion = (0.50, -0.14) - # color map for markers - colormap = "tab20_r" - # color map for highlight lines - xcolors = [ - "#000000", - "#e41a1c", - "#ff7f00", - "#4daf4a", - "#f781bf", - "#a65628", - "#984ea3", - "#999999", - "#377eb8", - "#dede00", - ] - lncolors = xcolors[1 : len(test_models) + 1] + [xcolors[0]] - # Add Watermark/Logo - if watermark: - logo_rect = [0.85, 0.15, 0.07, 0.07] - logo_off = False - else: - logo_rect = [0, 0, 0, 0] - logo_off = True - - xlabel = "Metric" - if "rms" in stat: - ylabel = "RMS Error (" + stat.upper() + ")" - elif "std" in stat: - ylabel = "Standard Deviation (" + stat.upper() + ")" - else: - ylabel = "value (" + stat.upper() + ")" - - if not np.isnan(model_data).all(): - print(model_data.min(), model_data.max()) - title = "Model Performance of {} Climatology ({}, {})".format( - season.upper(), stat.upper(), region.upper() - ) - fig, ax = parallel_coordinate_plot( - model_data, - var_units, - model_list, - model_names2=model_list_group2, - group1_name="CMIP6", - group2_name="E3SM", - models_to_highlight=models_to_highlight, - models_to_highlight_colors=lncolors, - models_to_highlight_labels=models_to_highlight, - identify_all_models=identify_all_models, # hide indiviaul model markers for CMIP6 models - vertical_center="median", - vertical_center_line=True, - title=title, - figsize=figsize, - axes_labelsize=fontsize * 1.1, - title_fontsize=fontsize * 1.1, - yaxes_label=ylabel, - xaxes_label=xlabel, - colormap=colormap, - show_boxplot=False, - show_violin=True, - violin_colors=("lightgrey", "pink"), - legend_ncol=legend_ncol, - legend_bbox_to_anchor=legend_posistion, - legend_fontsize=fontsize * 0.85, - xtick_labelsize=fontsize * 0.95, - ytick_labelsize=fontsize * 0.95, - logo_rect=logo_rect, - logo_off=logo_off, - ) - - # Add Watermark - if watermark: - ax.text( - 0.5, - 0.5, - "E3SM-PCMDI", - transform=ax.transAxes, - fontsize=100, - color="black", - alpha=0.5, - ha="center", - va="center", - rotation=25, - ) - # Add data info - fig.text( - 1.25, - 0.9, - "Data version\n" + data_version, - transform=ax.transAxes, - fontsize=12, - color="black", - alpha=0.6, - ha="left", - va="top", - ) - - # Save figure as an image file - figname = ( - figure_template.replace("%(metric)", stat) - .replace("%(region)", region) - .replace("%(season)", season) - ) - figfile = os.path.join(outdir, figname) - fig.savefig(figfile, facecolor="w", bbox_inches="tight") - - del (model_data, model_list, model_list_group2, models_to_highlight) - - return - - -def mean_climate_metrics_plot(parameter): - # info for test simulation - test_mip = parameter.test_data_set.split(".")[0] - test_exp = parameter.test_data_set.split(".")[1] - test_product = parameter.test_data_set.split(".")[2] - test_case_id = parameter.test_data_set.split(".")[-1] - # output directory - outdir = os.path.join(parameter.output_path, test_mip, test_exp, test_case_id) - - # construct file template to save the figure data in .csv file - file_template = "%(metric)_%(region)_{}_{}_{}_{}_mean_climate_%(season)_{}.csv" - file_template = file_template.format( - parameter.run_type.upper(), - test_mip.upper(), - test_exp.upper(), - test_product.upper(), - parameter.period, - ) - # construct figure template - figure_template = file_template.replace("csv", parameter.ftype) - - # find the metrics data - test_file, refr_file, cmip_file = find_metrics_data(parameter) - - # load cmip metrics data - cmip_models, highlight_models, cmip_lib = load_cmip_metrics_data(cmip_file) - - # load test model metrics data - test_models, test_lib = load_test_model_data( - test_file, refr_file, test_mip, parameter.run_type - ) - # collect overlap sets of variables for plotting: - test_lib, cmip_lib, var_list, var_unit_list = fill_plot_var_and_units( - test_lib, cmip_lib - ) - # search overlap of regions in test and reference - regions = [] - for reg in parameter.regions: - if (reg in test_lib.regions) and (reg in cmip_lib.regions): - regions.append(reg) - - # merge the cmip and model data - merged_lib = cmip_lib.merge(test_lib) - - ################################### - # generate parallel coordinate plot - ################################### - parall_fig_dir = os.path.join(outdir, "paracord_annual") - if os.path.exists(parall_fig_dir): - shutil.rmtree(parall_fig_dir) - os.makedirs(parall_fig_dir) - print("Parallel Coordinate Plots (4 seasons), loop each region and metric....") - # add ensemble mean - for metric in [ - "rms_xyt", - "std-obs_xyt", - "std_xyt", - "rms_y", - "rms_devzm", - "std_xy_devzm", - "std-obs_xy_devzm", - ]: - for region in regions: - for season in ["ann"]: - data_dict = merged_lib.df_dict[metric][season][region] - data_dict.loc["CMIP MMM"] = cmip_lib.df_dict[metric][season][ - region - ].mean(numeric_only=True, skipna=True) - data_dict.at["CMIP MMM", "model"] = "CMIP MMM" - if parameter.parcord_show_markers is not None: - identify_all_models = parameter.parcord_show_markers - else: - identify_all_models = True - paracord_plot( - metric, - region, - season, - data_dict, - var_list, - var_unit_list, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - parall_fig_dir, - identify_all_models, - data_version=None, - watermark=False, - ) - del data_dict - - ################################### - # generate portrait plot - ################################### - ptrait_fig_dir = os.path.join(outdir, "portrait_4seasons") - if os.path.exists(ptrait_fig_dir): - shutil.rmtree(ptrait_fig_dir) - os.makedirs(ptrait_fig_dir) - print("Portrait Plots (4 seasons),loop each region and metric....") - ######################################################################### - seasons = ["djf", "mam", "jja", "son"] - data_dict = merged_lib.df_dict - for metric in ["rms_xy", "cor_xy", "bias_xy"]: - for region in regions: - print("working on {} in {} region".format(metrics_inquire(metric), region)) - if parameter.add_vertical_line is not None: - add_vertical_line = parameter.add_vertical_line - else: - add_vertical_line = False - port4sea_plot( - metric, - region, - seasons, - data_dict, - var_list, - var_unit_list, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - ptrait_fig_dir, - add_vertical_line, - data_version=None, - watermark=False, - ) - - # release the data space - del (merged_lib, cmip_lib, test_lib, var_unit_list, var_list, regions) - - return diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py b/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py deleted file mode 100755 index e73e4904..00000000 --- a/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py +++ /dev/null @@ -1,373 +0,0 @@ -#!/usr/bin/env python -import ast -import glob -import os - -import numpy as np -import pandas as pd -from pcmdi_metrics.mean_climate.lib import pmp_parser - - -def create_mean_climate_plot_parser(): - parser = pmp_parser.PMPMetricsParser() - parser.add_argument( - "--test_model", - dest="test_model", - help="Defines target model for the metrics plots", - required=False, - ) - - parser.add_argument( - "--test_data_set", - type=str, - nargs="+", - dest="test_data_set", - help="List of observations or models to test " - + "against the reference_data_set", - required=False, - ) - - parser.add_argument( - "--test_data_path", - dest="test_data_path", - help="Path for the test climitologies", - required=False, - ) - - parser.add_argument( - "--period", dest="period", help="A simulation parameter", required=False - ) - - parser.add_argument( - "--run_type", dest="run_type", help="A post-process parameter", required=False - ) - - parser.add_argument( - "--regions", - type=ast.literal_eval, - dest="regions", - help="Regions on which to run the metrics", - required=False, - ) - - parser.add_argument( - "--pcmdi_data_set", - type=str, - nargs="+", - dest="pcmdi_data_set", - help="PCMDI CMIP dataset that is used as a " - + "CMIP multi-model ensembles against the test_data_set", - required=False, - ) - - parser.add_argument( - "--pcmdi_data_path", - dest="pcmdi_data_path", - help="Path for the PCMDI CMIP mean climate metrics data", - required=False, - ) - - parser.add_argument( - "--refr_model", - dest="refr_model", - help="A simulation parameter", - required=False, - ) - - parser.add_argument( - "--refr_data_set", - type=str, - nargs="+", - dest="refr_data_set", - help="List of reference models to test " + "against the reference_data_set", - required=False, - ) - - parser.add_argument( - "--refr_data_path", - dest="refr_data_path", - help="Path for the reference model climitologies", - required=False, - ) - - parser.add_argument( - "--output_path", - dest="output_path", - help="Path for the metrics plots", - required=False, - ) - - parser.add_argument( - "--parcord_show_markers", - dest="parcord_show_markers", - help="show markers for individual model in parallel coordinate plots", - required=False, - ) - parser.add_argument( - "--add_vertical_line", - dest="add_vertical_line", - help="draw a vertical line to separate test and reference models for portrait plots", - required=False, - ) - return parser - - -def metrics_inquire(name): - # list of metrics name and long-name - metrics = { - "std-obs_xy": "Spatial Standard Deviation (Reference)", - "std_xy": "Spatial Standard Deviation (Model)", - "std-obs_xyt": "Spatial-temporal Standard Deviation (Reference)", - "std_xyt": "Spatial-temporal Standard Deviation (Model)", - "std-obs_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Reference)", - "mean_xy": "Area Weighted Spatial Mean (Model)", - "mean-obs_xy": "Area Weighted Spatial Mean (Reference)", - "std_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Model)", - "rms_xyt": "Spatio-Temporal Root Mean Square Error", - "rms_xy": "Spatial Root Mean Square Error", - "rmsc_xy": "Centered Spatial Root Mean Square Error", - "cor_xy": "Spatial Pattern Correlation Coefficient", - "bias_xy": "Mean Bias (Model - Reference)", - "mae_xy": "Mean Absolute Difference (Model - Reference)", - "rms_y": "Root Mean Square Error of Zonal Mean", - "rms_devzm": "Root Mean Square Error of Deviation From Zonal Mean", - } - if name in metrics.keys(): - long_name = metrics[name] - - return long_name - - -def find_latest(pmprdir, mip, exp): - versions = sorted( - [ - r.split("/")[-1] - for r in glob.glob(os.path.join(pmprdir, mip, exp, "v????????")) - ] - ) - latest_version = versions[-1] - return latest_version - - -def shift_row_to_bottom(df, index_to_shift): - idx = [i for i in df.index if i != index_to_shift] - return df.loc[idx + [index_to_shift]] - - -def find_cmip_metric_data(pmprdir, data_set, var): - # cmip data for comparison - mip = data_set.split(".")[0] - exp = data_set.split(".")[1] - case_id = data_set.split(".")[2] - if case_id == "": - case_id = find_latest(pmprdir, mip, exp) - fpath = glob.glob(os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format(var))) - if len(fpath) < 1 and var == "rtmt": - fpath = glob.glob( - os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format("rt")) - ) - if len(fpath) > 0 and os.path.exists(fpath[0]): - cmip_list = fpath[0] - return_code = 0 - else: - print("Warning: cmip metrics data not found for {}....".format(var)) - print("Warning: remove {} from the metric list....".format(var)) - cmip_list = None - return_code = -99 - return cmip_list, return_code - - -def select_models(df, selected_models): - # Selected models only - model_names = df["model"].tolist() - for model_name in model_names: - drop_model = True - for keyword in selected_models: - if keyword in model_name: - drop_model = False - break - if drop_model: - df.drop(df.loc[df["model"] == model_name].index, inplace=True) - df.reset_index(drop=True, inplace=True) - - return df - - -def exclude_models(df, excluded_models): - # eclude models - model_names = df["model"].tolist() - for model_name in model_names: - drop_model = False - for keyword in excluded_models: - if keyword in model_name: - drop_model = True - break - if drop_model: - df.drop(df.loc[df["model"] == model_name].index, inplace=True) - df.reset_index(drop=True, inplace=True) - return df - - -def fill_plot_var_and_units(model_lib, cmip_lib): - # we define fixed sets of variables used for final plotting. - units_all = { - "prw": "[kg m$^{-2}$]", - "pr": "[mm d$^{-1}$]", - "prsn": "[mm d$^{-1}$]", - "prc": "[mm d$^{-1}$]", - "hfls": "[W m$^{-2}$]", - "hfss": "[W m$^{-2}$]", - "clivi": "[kg $m^{-2}$]", - "clwvi": "[kg $m^{-2}$]", - "psl": "[Pa]", - "evspsbl": "[kg m$^{-2} s^{-1}$]", - "rlds": "[W m$^{-2}$]", - "rldscs": "[W $m^{-2}$]", - "rtmt": "[W m$^{-2}$]", - "rsdt": "[W m$^{-2}$]", - "rlus": "[W m$^{-2}$]", - "rluscs": "[W m$^{-2}$]", - "rlut": "[W m$^{-2}$]", - "rlutcs": "[W m$^{-2}$]", - "rsds": "[W m$^{-2}$]", - "rsdscs": "[W m$^{-2}$]", - "rstcre": "[W m$^{-2}$]", - "rltcre": "[W m$^{-2}$]", - "rsus": "[W m$^{-2}$]", - "rsuscs": "[W m$^{-2}$]", - "rsut": "[W m$^{-2}$]", - "rsutcs": "[W m$^{-2}$]", - "ts": "[K]", - "tas": "[K]", - "tauu": "[Pa]", - "tauv": "[Pa]", - "sfcWind": "[m s$^{-1}$]", - "zg-500": "[m]", - "ta-200": "[K]", - "ta-850": "[K]", - "ua-200": "[m s$^{-1}$]", - "ua-850": "[m s$^{-1}$]", - "va-200": "[m s$^{-1}$]", - "va-850": "[m s$^{-1}$]", - "uas": "[m s$^{-1}$]", - "vas": "[m s$^{-1}$]", - "tasmin": "[K]", - "tasmax": "[K]", - "clt": "[%]", - } - - # loop variable list and find them in cmip and target models - variable_units = [] - variable_names = [] - for var in units_all.keys(): - # reorgnize cmip data - if var == "rtmt": - if ("rt" in cmip_lib.var_list) and ("rtmt" in model_lib.var_list): - # special case (rt is used in pcmdi datasets, but rtmt is for cmip) - cmip_lib.var_list = list( - map(lambda x: x.replace("rt", "rtmt"), cmip_lib.var_list) - ) - for stat in cmip_lib.df_dict: - for season in cmip_lib.df_dict[stat]: - for region in cmip_lib.df_dict[stat][season]: - cmip_lib.df_dict[stat][season][region]["rtmt"] = ( - cmip_lib.df_dict[stat][season][region].pop("rt") - ) - - if var in model_lib.var_list and var in cmip_lib.var_list: - varunt = var + "\n" + str(units_all[var]) - indv1 = cmip_lib.var_list.index(var) - indv2 = model_lib.var_list.index(var) - cmip_lib.var_unit_list[indv1] = varunt - model_lib.var_unit_list[indv2] = varunt - variable_units.append(varunt) - variable_names.append(var) - del (indv1, indv2, varunt) - else: - print("Warning: {} is not found in metrics data".format(var)) - print( - "Warning: {} is possibly not included as default in fill_plot_var_and_units()".format( - var - ) - ) - - # sanity check for cmip data - for stat in cmip_lib.df_dict: - for season in cmip_lib.df_dict[stat]: - for region in cmip_lib.df_dict[stat][season]: - df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) - for i, model in enumerate(df["model"].tolist()): - if model in ["E3SM-1-0", "E3SM-1-1-ECA"]: - idxs = df[df.iloc[:, 0] == model].index - df.loc[idxs, "ta-850"] = np.nan - del idxs - if model in ["CIESM"]: - idxs = df[df.iloc[:, 0] == model].index - df.loc[idxs, "pr"] = np.nan - del idxs - cmip_lib.df_dict[stat][season][region] = df - del df - - return model_lib, cmip_lib, variable_names, variable_units - - -def find_metrics_data(parameter): - pmp_set = parameter.pcmdi_data_set - pmp_path = parameter.pcmdi_data_path - test_set = parameter.test_data_set - test_path = parameter.test_data_path - refr_set = parameter.refr_data_set - refr_path = parameter.refr_data_path - run_type = parameter.run_type - debug = parameter.debug - - test_mip = test_set.split(".")[0] - test_exp = test_set.split(".")[1] - test_case_id = test_set.split(".")[-1] - test_dir = os.path.join(test_path, test_mip, test_exp, test_case_id) - if run_type == "model_vs_model": - refr_mip = refr_set.split(".")[0] - refr_exp = refr_set.split(".")[1] - refr_case_id = refr_set.split(".")[-1] - refr_dir = os.path.join(refr_path, refr_mip, refr_exp, refr_case_id) - - variables = [ - s.split("/")[-1].split("_")[0] - for s in glob.glob(os.path.join(test_dir, "*{}.json".format(test_case_id))) - if os.path.exists(s) - ] - variables = list(set(variables)) - - # find list of metrics data files - test_list = [] - refr_list = [] - cmip_list = [] - - for vv in variables: - ftest = glob.glob( - os.path.join(test_dir, "{}_*_{}.json".format(vv, test_case_id)) - ) - fcmip, rcode = find_cmip_metric_data(pmp_path, pmp_set, vv) - if rcode == 0: - if len(ftest) > 0 and len(fcmip) > 0: - for fx in ftest: - test_list.append(fx) - cmip_list.append(fcmip) - if debug: - print(ftest[0].split("/")[-1], fcmip.split("/")[-1]) - if run_type == "model_vs_model": - frefr = glob.glob( - os.path.join(refr_dir, "{}_*_{}.json".format(vv, refr_case_id)) - ) - if len(frefr) > 0: - for fr in frefr: - refr_list.append(fr) - if debug: - print( - ftest[0].split("/")[-1], - frefr[0].split("/")[-1], - fcmip.split("/")[-1], - ) - del frefr - del (ftest, fcmip) - return test_list, refr_list, cmip_list diff --git a/zppy/templates/pcmdi_diags/observation_to_cmip.py b/zppy/templates/pcmdi_diags/observation_to_cmip.py deleted file mode 100755 index 7b6eccee..00000000 --- a/zppy/templates/pcmdi_diags/observation_to_cmip.py +++ /dev/null @@ -1,85 +0,0 @@ -#! /usr/bin/env python -import glob -import json -import os -import shutil -import subprocess - -# command = shlex.split("bash -c 'source init_env && env'") -# proc = subprocess.Popen(command, stdout = subprocess.PIPE) - -srcdir = "/lcrc/group/e3sm/ac.szhang/acme_scratch/e3sm_project/test_zppy_pmp/zppy" -cmip_var = json.load( - open(os.path.join(srcdir, "zppy/templates/pcmdi_diags", "cmip_var.json")) -) -ref_dic = json.load( - open(os.path.join(srcdir, "zppy/templates/pcmdi_diags", "reference_data.json")) -) - -output_path = ( - "/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/time-series/NOAA_20C" -) - - -default_metadata = os.path.join( - srcdir, "zppy/templates/pcmdi_diags/default_metadata.json" -) -tables_path = "/lcrc/group/e3sm/diagnostics/cmip6-cmor-tables/Tables" - -input_path = os.path.join(output_path, "input_data") -if not os.path.exists(input_path): - os.makedirs(input_path) - -raw_data_path = "/lcrc/group/acme/ac.szhang/acme_scratch/data/CVDP_RGD/NOAA_20C" -fpaths = sorted(glob.glob(os.path.join(raw_data_path, "{}*.nc".format("NOAA_20C")))) -for fpath in fpaths: - fname = fpath.split("/")[-1] - fname = fname.replace("-", ".") - fout = "_".join(fname.split(".")[2:]) - fout = os.path.join(input_path, fout.replace("_nc", ".nc")) - print("input: ", fpath) - print("output: ", fout) - if os.path.islink(fout): - os.remove(fout) - os.symlink(fpath, fout) - else: - os.symlink(fpath, fout) - del (fname, fout) -del (fpaths, raw_data_path) - -for key in cmip_var.keys(): - cmip_var_list = ", ".join(cmip_var[key]) - print(cmip_var_list) - subprocess.call( - [ - "e3sm_to_cmip", - "--output-path", - output_path, - "--var-list", - cmip_var_list, - "--input-path", - input_path, - "--user-metadata", - default_metadata, - "--tables-path", - tables_path, - ] - ) - -# move data to target location -opaths = sorted(glob.glob(os.path.join(output_path, "CMIP6/CMIP/*/*/*/*/*/*/*/*/*.nc"))) -for opath in opaths: - outfile = opath.split("/")[-1] - outname = outfile.replace("-", "_").split("_") - fout = "_".join([outname[0], outname[-2], outname[-1]]) - fout = os.path.join(output_path, fout.replace("_nc", ".nc")) - if os.path.exists(opath): - os.rename(opath, fout) - del (outfile, outname, fout) - -# clean up directory -if os.path.exists(os.path.join(output_path, "CMIP6")): - shutil.rmtree(os.path.join(output_path, "CMIP6")) - -if os.path.exists(input_path): - shutil.rmtree(input_path) diff --git a/zppy/templates/pcmdi_diags/plot_mean_climate.py b/zppy/templates/pcmdi_diags/plot_mean_climate.py deleted file mode 100755 index e4abd119..00000000 --- a/zppy/templates/pcmdi_diags/plot_mean_climate.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/env python -############################################################################## -# This model is used to generate mean climate diagnostic figures -# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) -############################################################################# -import os - -from mean_climate_plot_driver import mean_climate_metrics_plot -from mean_climate_plot_parser import create_mean_climate_plot_parser - - -def main( - run_type, - test_data_set, - test_data_dir, - test_period, - refr_data_set, - refr_data_dir, - refr_period, - cmip_data_set, - pcmdi_data_dir, - results_dir, -): - parser = create_mean_climate_plot_parser() - parameter = parser.get_parameter(argparse_vals_only=False) - - parameter.pcmdi_data_set = cmip_data_set - parameter.pcmdi_data_path = pcmdi_data_dir - - parameter.period = test_period - parameter.test_product = test_data_set.split(".")[2] - parameter.test_data_set = test_data_set - parameter.test_data_path = os.path.join(test_data_dir, "mean_climate") - parameter.run_type = run_type - - if parameter.run_type == "model_vs_model": - parameter.refr_data_set = refr_data_set - parameter.refr_period = refr_period - parameter.refr_data_path = os.path.join(refr_data_dir, "mean_climate") - - parameter.output_path = os.path.join(results_dir, "graphics", "mean_climate") - parameter.ftype = "png" - parameter.debug = False - parameter.regions = ["global", "NHEX", "SHEX", "TROPICS"] - parameter.parcord_show_markers = False - parameter.add_vertical_line = True - - mean_climate_metrics_plot(parameter) - - -if __name__ == "__main__": - cmip_data_set = "cmip6.amip.v20241029" - pcmdi_data_dir = ( - "/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/pcmdi_data/mean_climate" - ) - results_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" - run_type = "model_vs_obs" - - test_data_set = "e3sm.amip.v3-LR.all.v20241030" - test_data_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" - test_period = "1985-2014" - - if run_type == "model_vs_obs": - refr_data_set = "" - refr_data_dir = "" - refr_period = "" - else: - print("need to provide reference data information ...") - refr_data_set = "e3sm.historical.v3-LR.all.v20241030" - refr_data_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" - refr_period = "1985-2014" - - main( - run_type, - test_data_set, - test_data_dir, - test_period, - refr_data_set, - refr_data_dir, - refr_period, - cmip_data_set, - pcmdi_data_dir, - results_dir, - ) diff --git a/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py b/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py deleted file mode 100755 index 8e265a14..00000000 --- a/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env python -import copy -import glob -import json -import os - -from pcmdi_metrics.utils import StringConstructor -from pcmdi_metrics.variability_mode.lib import dict_merge - - -def main(): - mip = "e3sm" - exp = "amip" - case_id = "v20241030" - period = "1985-2014" - metric_collection = "mean_climate" - run_type = "model_vs_obs" - data_path = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi" - obs_selection = "default" - - # target here is to merge all product models at all realizations to one-big file - # product = ['v3.LR'] - # realm = ["0101", "0151", "0201"] - - # template for diagnostic directory tree - # construct the directory for specific mpi, exp and case - pmprdir_template = StringConstructor( - "%(product).%(exp)_%(realization)/pcmdi_diags/%(run_type)_%(period)" - ) - pmprdir = os.path.join( - data_path, - pmprdir_template( - mip=mip, - exp=exp, - case_id=case_id, - product="*", - realization="*", - run_type=run_type, - period=period, - ), - ) - print("pmprdir:", pmprdir) - - # template for metrics directory tree - json_file_dir_template = StringConstructor( - "metrics_results/%(metric_collection)/%(mip)/%(exp)/%(case_id)" - ) - json_file_dir = os.path.join( - pmprdir, - json_file_dir_template( - metric_collection=metric_collection, - mip=mip, - exp=exp, - case_id=case_id, - ), - ) - print("json_file_dir:", json_file_dir) - - # template for output directory tree - out_file_dir_template = StringConstructor( - "%(run_type)_%(period)/%(metric_collection)/%(mip)/%(exp)/%(case_id)" - ) - out_file_dir = os.path.join( - data_path, - "merged_data", - out_file_dir_template( - metric_collection=metric_collection, - mip=mip, - exp=exp, - case_id=case_id, - run_type=run_type, - period=period, - ), - ) - print("out_file_dir:", out_file_dir) - variables = [ - s.split("/")[-1] - for s in glob.glob( - os.path.join( - json_file_dir, - "*", - ) - ) - if os.path.isdir(s) - ] - variables = list(set(variables)) - print("variables:", variables) - - for var in variables: - # json merge - # try: - if 1: - merge_json( - mip, exp, case_id, var, obs_selection, json_file_dir, out_file_dir - ) - """ - except Exception as err: - print("ERROR: ", mip, exp, var, err) - pass - """ - - -def merge_json(mip, exp, case_id, var, obs, json_file_dir, out_file_dir): - print("json_file_dir:", json_file_dir) - json_file_template = StringConstructor( - "%(var)_%(model)_%(realization)_*_%(obs)_%(case_id).json" - ) - # Search for individual JSONs - json_files = sorted( - glob.glob( - os.path.join( - json_file_dir, - var, - json_file_template( - var=var, - model="*", - realization="*", - obs=obs, - case_id=case_id, - ), - ) - ) - ) - - print("json_files:", json_files) - - # Remove diveDown JSONs and previously generated merged JSONs if included - json_files_revised = copy.copy(json_files) - for j, json_file in enumerate(json_files): - filename_component = json_file.split("/")[-1].split(".")[0].split("_") - if "allModels" in filename_component: - json_files_revised.remove(json_file) - elif "allRuns" in filename_component: - json_files_revised.remove(json_file) - - # Load individual JSON and merge to one big dictionary - for j, json_file in enumerate(json_files_revised): - print(j, json_file) - f = open(json_file) - dict_tmp = json.loads(f.read()) - if j == 0: - dict_final = dict_tmp.copy() - else: - dict_merge(dict_final, dict_tmp) - f.close() - - # Dump final dictionary to JSON - if not os.path.exists(out_file_dir): - os.makedirs(out_file_dir) - - final_json_filename = StringConstructor("%(var)_%(mip)_%(exp)_%(case_id).json")( - var=var, mip=mip, exp=exp, case_id=case_id - ) - final_json_file = os.path.join(out_file_dir, final_json_filename) - if os.path.exists(final_json_file): - # previously generated merged JSONs if included - os.remove(final_json_file) - - with open(final_json_file, "w") as fp: - json.dump(dict_final, fp, sort_keys=True, indent=4) - - -if __name__ == "__main__": - main() diff --git a/zppy/templates/pcmdi_diags/process_sftlf.py b/zppy/templates/pcmdi_diags/process_sftlf.py deleted file mode 100755 index 032625b1..00000000 --- a/zppy/templates/pcmdi_diags/process_sftlf.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/env python -################################################################## -# This script attemts to generate land/sea mask for a given input -################################################################## -import datetime -import os -import sys - -import cdms2 as cdm -import cdutil -import numpy as np - -if len(sys.argv) > 4: - modvar = sys.argv[1] - modname = sys.argv[2] - modpath = sys.argv[3] - modpath_lf = sys.argv[4] -else: - print("ERROR: must specify {modname},{modpath},{outpath} info") - exit() - -# Set netcdf file criterion - turned on from default 0s -cdm.setCompressionWarnings(0) # Suppress warnings -cdm.setNetcdfShuffleFlag(0) -cdm.setNetcdfDeflateFlag(1) -cdm.setNetcdfDeflateLevelFlag(9) -cdm.setAutoBounds(1) - -cdm.setNetcdfDeflateLevelFlag(9) -cdm.setAutoBounds(1) -f_h = cdm.open(modpath) -var = f_h(modvar)[0, ...] -if var.ndim == 2: - landMask = cdutil.generateLandSeaMask(var) - # Deal with land values - landMask[np.greater(landMask, 1e-15)] = 100 - # Rename - landMask = cdm.createVariable( - landMask, id="sftlf", axes=var.getAxisList(), typecode="float32" - ) - landMask.associated_files = modpath - landMask.long_name = "Land Area Fraction" - landMask.standard_name = "land_area_fraction" - landMask.units = "%" - landMask.setMissing(1.0e20) - landMask.id = "sftlf" # Rename - - # Write variables to file - print("output sftlf:", modpath_lf) - if os.path.isfile(modpath_lf): - os.remove(modpath_lf) - fOut = cdm.open(modpath_lf, "w") - # Use function to write standard global atts - fOut.Conventions = "CF-1.0" - fOut.history = "File processed: " + datetime.datetime.now().strftime("%Y%m%d") - fOut.pcmdi_metrics_version = "0.1-alpha" - fOut.pcmdi_metrics_comment = "PCMDI metrics package" - fOut.write(landMask.astype("float32")) - fOut.close() - f_h.close() - del (f_h, landMask, fOut, var) diff --git a/zppy/templates/pcmdi_diags/reference_alias.json b/zppy/templates/pcmdi_diags/reference_alias.json deleted file mode 100755 index a23f6349..00000000 --- a/zppy/templates/pcmdi_diags/reference_alias.json +++ /dev/null @@ -1,340 +0,0 @@ -{ - "rlds" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rldscs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rlus" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsds" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsdscs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - - "rsus" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsuscs": { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rstcre" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rltcre" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rlut" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rlutcs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsdt" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsut" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsutcs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rtmt" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "pr" : { - "default" : "GPCP_v2.3", - "alternate" : "GPCP_v2.2", - "alternate1" : "GPCP_1DD", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "prc" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "prsn" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "prw" : { - "default" : "ERA5", - "alternate" : "NOAA-20C", - "alternate1" : "MERRA2", - "alternate2" : "ERA-Interim", - "alternate3" : "NOAA-20C" - }, - "psl" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "ps" : { - "default" : "ERA5", - "alternate " : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "huss" : { - "default" : "MERRA2", - "alternate" : "NOAA-20C", - "alternate1" : "ERA5", - "alternate2" : "ERA-Interim" - }, - "ta" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "ua" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "va" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "hur" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "wap" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "zg" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "o3" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "hus" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "uas" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "vas" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "tauu" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "taux" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "tauv" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "tauy" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "tas" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "ts" : { - "default" : "ERA5", - "alternate" : "NOAA-20C", - "alternate1" : "HadISST2" - }, - "sst" : { - "default" : "ERA5", - "alternate" : "NOAA-20C", - "alternate1" : "HadISST2" - }, - "sfcWind" : { - "default" : "NOAA-20C", - "alternate" : "ERA5", - "alternate1" : "MERRA2", - "alternate2" : "ERA-Interim" - }, - "hfls" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "OAFlux" - }, - "hfss" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "OAFlux" - }, - "evspsbl" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "clt" : { - "default" : "ERA5", - "alternate3" : "NOAA-20C" - }, - "clwvi" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "clivi" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "tasmin" : { - "default" : "MERRA2" - }, - "tasmax" : { - "default" : "MERRA2" - }, - "sic" : { - "default" : "HadSST2" - }, - "tos" : { - "default" : "HadSST2" - }, - "zos" : { - "default" : "AVISO", - "alternate" : "HadISST" - }, - "sos" : { - "default" : "Aquarius", - "alternate" : "HadISST" - } -} diff --git a/zppy/templates/pcmdi_diags/regions_specs.json b/zppy/templates/pcmdi_diags/regions_specs.json deleted file mode 100755 index 811eb1e9..00000000 --- a/zppy/templates/pcmdi_diags/regions_specs.json +++ /dev/null @@ -1,263 +0,0 @@ -{ - "global": { - "domain": { "latitude":[-90.0, 90.0]} - }, - "NH": { - "domain": { "latitude":[0.0, 90.0]} - }, - "SH": { - "domain": { "latitude":[-90.0, 0]} - }, - "NHEX": { - "domain": { "latitude":[30.0, 90.0]} - }, - "SHEX": { - "domain": { "latitude":[-90.0, -30.0]} - }, - "TROPICS": { - "domain": { "latitude":[-30.0, 30.0]} - }, - "90S50S": { - "domain": { "latitude":[-90.0, -50.0]} - }, - "50S20S": { - "domain": { "latitude":[-50.0, -20.0]} - }, - "20S20N": { - "domain": { "latitude":[-20.0, 20.0]} - }, - "20N50N": { - "domain": { "latitude":[20.0, 50.0]} - }, - "50N90N": { - "domain": { "latitude":[50.0, 90.0]} - }, - "ocean_NH": { - "value": 0.0, - "domain": { "latitude":[0.0, 90.0]} - }, - "ocean_SH": { - "value": 0.0, - "domain": { "latitude":[-90.0, 0.0]} - }, - "land_NH": { - "value": 100, - "domain": { "latitude":[0.0, 90.0]} - }, - "land_SH": { - "value": 100, - "domain": { "latitude":[-90.0, 0.0]} - }, - "land_NHEX": { - "value": 100, - "domain": { "latitude":[30.0, 90.0]} - }, - "land_SHEX": { - "value": 100, - "domain": { "latitude":[-90.0, -30.0]} - }, - "land_TROPICS": { - "value": 100, - "domain": { "latitude":[-30.0, 30.0]} - }, - "land": { - "value": 100 - }, - "ocean_NHEX": { - "value": 0, - "domain": { "latitude":[30.0, 90.0]} - }, - "ocean_SHEX": { - "value": 0, - "domain": { "latitude":[-90.0, -30.0]} - }, - "ocean_TROPICS": { - "value": 0, - "domain": { "latitude":[30.0, 30.0]} - }, - "ocean": { - "value": 0 - }, - "ocean_50S50N": { - "value": 0.0, - "domain": { "latitude":[-50.0, 50.0]} - }, - "ocean_50S20S": { - "value": 0.0, - "domain": { "latitude":[-50.0, -20.0]} - }, - "ocean_20S20N": { - "value": 0.0, - "domain": { "latitude":[-20.0, 20.0]} - }, - "ocean_20N50N": { - "value": 0.0, - "domain": { "latitude":[20.0, 50.0]} - }, - "ocean_50N90N": { - "value": 0.0, - "domain": { "latitude":[50.0, 90.0]} - }, - "ocean_90S50S": { - "value": 0.0, - "domain": { "latitude":[-90.0, -50.0]} - }, - "NAM": { - "domain": { "latitude":[20.0, 90], - "longitude":[-180, 180]} - }, - "NAO": { - "domain": { "latitude":[20.0, 80], - "longitude":[-90, 40]} - }, - "SAM": { - "domain": { "latitude":[-20.0, -90], - "longitude":[0, 360]} - }, - "PSA1": { - "domain": { "latitude":[-20.0, -90], - "longitude":[0, 360]} - }, - "PSA2": { - "domain": { "latitude":[-20.0, -90], - "longitude":[0, 360]} - }, - "PNA": { - "domain": { "latitude":[20.0, 85], - "longitude":[120, 240]} - }, - "PDO": { - "domain": { "latitude":[20.0, 70], - "longitude":[110, 260]} - }, - "AMO": { - "domain": { "latitude":[0.0, 70], - "longitude":[-80, 0]} - }, - "AllMW": { - "domain": { "latitude":[-40.0, 45.0], - "longitude":[0.0, 360.0]} - }, - "AllM": { - "domain": { "latitude":[-45.0, 45.0], - "longitude":[0.0, 360.0]} - }, - "NAMM": { - "domain": { "latitude":[0.0, 45.0], - "longitude":[210.0, 310.0]} - }, - "SAMM": { - "domain": { "latitude":[-45.0, 0.0], - "longitude":[240.0, 330.0]} - }, - "NAFM": { - "domain": { "latitude":[0.0, 45.0], - "longitude":[310.0, 60.0]} - }, - "SAFM": { - "domain": { "latitude":[-45.0, 0.0], - "longitude":[0.0, 90.0]} - }, - "ASM": { - "domain": { "latitude":[0.0, 45.0], - "longitude":[60.0, 180.0]} - }, - "AUSM": { - "domain": { "latitude":[-45.0, 0.0], - "longitude":[90.0, 160.0]} - }, - "AIR": { - "domain": { "latitude":[7.0, 25.0], - "longitude":[65.0, 85.0]} - }, - "AUS": { - "domain": { "latitude":[-20.0, -10.0], - "longitude":[120.0, 150.0]} - }, - "Sahel": { - "domain": { "latitude":[13.0, 18.0], - "longitude":[-10.0, 10.0]} - }, - "GoG": { - "domain": { "latitude":[0.0, 5.0], - "longitude":[-10.0, 10.0]} - }, - "NAmo": { - "domain": { "latitude":[20.0, 37.0], - "longitude":[-112.0, -103.0]} - }, - "SAmo": { - "domain": { "latitude":[-20.0, 2.5], - "longitude":[-65.0, -40.0]} - }, - "Nino34": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[190.0, 240.0]} - }, - "Nino3": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[210.0, 270.0]} - }, - "Nino4": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[160.0, 210.0]} - }, - "ONI": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[190.0, 240.0]} - }, - "Nino12": { - "value": 0.0, - "domain": { "latitude":[-10.0, 0.0], - "longitude":[270.0, 280.0]} - }, - "AMMS": { - "value": 0.0, - "domain": { "latitude":[-15.0, -5.0], - "longitude":[-20.0, 10.0]} - }, - "AMMN": { - "value": 0.0, - "domain": { "latitude":[5.0, 15.0], - "longitude":[-50.0, -20.0]} - }, - "ATL3": { - "value": 0.0, - "domain": { "latitude":[-3.0, 3.0], - "longitude":[-20.0, 0.0]} - }, - "TSA": { - "value": 0.0, - "domain": { "latitude":[-20.0, 0.0], - "longitude":[-30.0, 10.0]} - }, - "TNA": { - "value": 0.0, - "domain": { "latitude":[5.5, 23.5], - "longitude":[302.5, 345.0]} - }, - "TIO": { - "value": 0.0, - "domain": { "latitude":[-15.0, 15.0], - "longitude":[40.0, 115.0]} - }, - "IODE": { - "value": 0.0, - "domain": { "latitude":[-10.0, 10.0], - "longitude":[50.0, 70.0]} - }, - "IODW": { - "value": 0.0, - "domain": { "latitude":[-10.0, 0.0], - "longitude":[90.0, 110.0]} - }, - "SOCN": { - "value": 0.0, - "domain": { "latitude":[-70.0, -50.0], - "longitude":[0.0, 360.0]} - } -} From f7fcaaac1f5d49382255957b7ec06bc095f14bab Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Mon, 30 Dec 2024 23:00:29 -0600 Subject: [PATCH 06/23] remove rltcre and rstcre from the default list of cmip_vars as these variables were not included in e3sm_to_cmip module --- zppy/defaults/default.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 0a583572..7a17a8d3 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -162,7 +162,7 @@ cmip_tableID = string(default="Amon") # variables in the cmip6 table that can be potentially used by pcmdi # this list depends on the definition of cmip variable # required for "mean climate" diagnostics -cmip_vars = string(default="pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsut,rsutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta,ua,va,zg") +cmip_vars = string(default="pr,prw,psl,rlds,rldscs,rlut,rlutcs,rsut,rsutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta,ua,va,zg") # options shared by pcmdi pmp_debug = string(default=False) # flag to process the land/sea mask within pcmdi From 7a52e329d4177072a7db88dc7dd928b3b59ac9b1 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Fri, 3 Jan 2025 02:03:54 -0600 Subject: [PATCH 07/23] adjustment on the workflow --- zppy/defaults/default.ini | 30 +- zppy/templates/pcmdi_diags.bash | 635 ++++++++++++++++++++------------ 2 files changed, 419 insertions(+), 246 deletions(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 7a17a8d3..8ab666e7 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -163,8 +163,6 @@ cmip_tableID = string(default="Amon") # this list depends on the definition of cmip variable # required for "mean climate" diagnostics cmip_vars = string(default="pr,prw,psl,rlds,rldscs,rlut,rlutcs,rsut,rsutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta,ua,va,zg") -# options shared by pcmdi -pmp_debug = string(default=False) # flag to process the land/sea mask within pcmdi generate_sftlf = string(default=True) # variables to be used by the pcmdi diagnostics @@ -174,11 +172,6 @@ vars = string(default="pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsds,rsd sets = string_list(default=list("mean_climate","variability_mode_atm","variability_mode_cpl","enso")) # options to identify subset of pcmdi drivers ("mean_climate","variability_mode","enso") subset = string(default="") -#options for cmip model metrics data from pcmdi -#group of pcmdi generated cmip metrics data (mip.exp.version) -pcmdi_data_set=string(default="cmip6.historical.v20220928") -# path to pcmdi generated cmip metrics data -pcmdi_data_path=string(default="") ########################################################################################## # below followed the setup in e3sm_diag but used for PCMDI workflow ########################################################################################## @@ -255,10 +248,9 @@ enso_groups = string(default="ENSO_perf,ENSO_proc,ENSO_tel") ########################################################################################## # model data grid after remapping grid = string(default="180x360_aave") -#flag to turn on regional mean climate metrics -regional = string(default="y") #default regions for mean climate metrics data -regions = string(default="global,ocean,land,NHEX,SHEX,TROPICS,NHEX_ocean,SHEX_ocean,NHEX_land,SHEX_land,ocean_50S50N") +#more options can be found at "regions_specs" +regions = string(default="global,ocean,land,NHEX,SHEX,TROPICS") # save derived climatology data save_test_clims = string(default=True) # Regridding by pcmdi (required for mean climate) @@ -272,6 +264,17 @@ regrid_tool = string(default="esmf") regrid_method = string(default="regrid2") # OPTIONS: 'linear','conservative', only if tool is esmf regrid_method_ocn = string(default="conservative") +#options for sythetic plots with cmip model metrics data from pcmdi +########################################################################################## +sythentic_plots = string(default="n") +# path to pcmdi generated cmip metrics data +pcmdi_data_path = string(default="") +#group of pcmdi generated cmip metrics data (mip.exp.version) +pcmdi_cmip_mclm = string(default="cmip6.historical.v20220928") +pcmdi_cmip_mov = string(default="cmip6.historical.v20220825") +pcmdi_cmip_enso = string(default="cmip6.historical.v20210620") +# options shared by pcmdi +pmp_debug = string(default=False) # setup for parallel coordinate plots (hide makers for sigle model) parcord_show_markers = string(default=False) # setup for portrait plots (add vertical line to separate test and reference models) @@ -282,8 +285,10 @@ portrait_vertical_line = string(default=True) cfg = string(default=None) vars = string(default=None) grid = string(default=None) - cmip_metadata = string(default=None) - pcmdi_data_set = string(default=None) + sythentic_plots = string(default=None) + pcmdi_cmip_mclm = string(default=None) + pcmdi_cmip_mov = string(default=None) + pcmdi_cmip_enso = string(default=None) pcmdi_data_path = string(default=None) derived_variable = string(default=None) reference_alias = string(default=None) @@ -311,7 +316,6 @@ portrait_vertical_line = string(default=True) regrid_method_ocn = string(default=None) obs_sets = string(default=None) regions = string(default=None) - regional = string(default=None) save_test_clims = string(default=None) seasons = string(default=None) RmDomainMean = string(default=None) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 058f69e0..3097567c 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -87,7 +87,7 @@ create_links_acyc_climo() done done #derive annual cycle climate mean - dofm=(15 46 74 105 135 166 196 227 258 288 319 349) #middle day of month + dofm=(15 46 74 105 125 166 196 227 258 288 319 349) #middle day of month for month in `seq 1 1 12`; do MM=`printf "%02d" ${month}` @@ -98,7 +98,7 @@ create_links_acyc_climo() combined_name="${name_key}.${v}.${begin_year}01-${end_year}12.AC.${case_id}.nc" ncrcat -O -d time,0, ${v}_clm_*.nc ${combined_name} #modify time to avoid issues in pcmdi calculation - ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 135.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time_bnds[time,bnds]={0, 31, 31, 59, 59, 90, 90, 120, 120, 151, 151, 181, 181, 212, 212, 243, 243, 273, 273, 304, 304, 334, 334, 365.};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";time@bounds="time_bnds"' ${combined_name} ${combined_name} + ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 125.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time_bnds[time,bnds]={0, 31, 31, 59, 59, 90, 90, 120, 120, 151, 151, 181, 181, 212, 212, 243, 243, 273, 273, 304, 304, 334, 334, 365.};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";time@bounds="time_bnds"' ${combined_name} ${combined_name} rm -rvf ${v}_clm_*.nc if [ $? != 0 ]; then cd {{ scriptDir }} @@ -175,7 +175,7 @@ create_links_acyc_climo_obs() tmp_file="tmp_combine_${ttag}.nc" ncrcat -d time,"${YYYYS}-01-01,${YYYYE}-12-31" ${file} ${tmp_file} # Go through the time serie file, and derive annual cycle climate mean - dofm=(15 46 74 105 135 166 196 227 258 288 319 349) #middle day of month + dofm=(15 46 74 105 125 166 196 227 258 288 319 349) #middle day of month for month in `seq 1 1 12`; do MM=`printf "%02d" ${month}` @@ -186,7 +186,7 @@ create_links_acyc_climo_obs() combined_name="${PREFIX}.${ttag}.AC.${case_id}.nc" ncrcat -O -d time,0, tmp_clm_*.nc ${combined_name} #modify time to avoid issues in pcmdi calculation - ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 135.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";' ${combined_name} ${combined_name} + ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 125.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";' ${combined_name} ${combined_name} ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} rm -rvf tmp_*.nc if [ $? != 0 ]; then @@ -285,7 +285,6 @@ import glob import json import time import datetime -import xarray as xr import xcdat as xc import numpy as np import shutil @@ -379,7 +378,6 @@ import glob import json import time import datetime -import xarray as xr import xcdat as xc import numpy as np import shutil @@ -511,7 +509,7 @@ create_links_ts_obs ${ts_dir_ref_source} ${ts_dir_ref} ${Y1} ${Y2} 8 #collect data description and save in a json file #for the convinience of later-on process ################################################## -mkdir -p ${results_dir} +mkdir -p pcmdi_diags cat > data_info_collect.py << EOF import os import glob @@ -593,7 +591,7 @@ for i,group in enumerate([test,refr]): else: out_dic = refr_dic out_file = os.path.join( - '${results_dir}', + 'pcmdi_diags', '{}_{}_catalogue.json'.format(group,'{{subset}}') ) json.dump(out_dic, @@ -657,7 +655,7 @@ if generate_sftlf: #loop each group and process land/mask if not exist for group in [test,refr]: dic_file = os.path.join( - '${results_dir}', + 'pcmdi_diags', '{}_{}_catalogue.json'.format(group,'{{subset}}') ) data_dic = json.load(open(dic_file)) @@ -674,18 +672,17 @@ if generate_sftlf: ds = xcdat_open(mpath, decode_times=True) ds = ds.bounds.add_missing_bounds() try: - lf_array = create_land_sea_mask(ds, method="pcmdi") - print("land mask is estimated using pcmdi method.") - except Exception: lf_array = create_land_sea_mask(ds, method="regionmask") print("land mask is estimated using regionmask method.") + except Exception: + lf_array = create_land_sea_mask(ds, method="pcmdi") + print("land mask is estimated using pcmdi method.") lf_array = lf_array * 100.0 lf_array.attrs['long_name']= "land_area_fraction" lf_array.attrs['units'] = "%" lf_array.attrs['id'] = "sftlf" # Rename - ds_lf = lf_array.to_dataset().compute() + ds_lf = lf_array.to_dataset(name='sftlf').compute() ds_lf = ds_lf.bounds.add_missing_bounds() - ds_lf = ds_lf.rename_vars({"lsmask": "sftlf"}) ds_lf.fillna(1.0e20) ds_lf.attrs['model'] = model ds_lf.attrs['associated_files'] = mpath @@ -705,6 +702,99 @@ if [ $? != 0 ]; then exit 10 fi +{%- if '{{sythentic_plots}}' == "y" %} +################################################################### +# this module is added as an external module to generate sythentic +# metrics plots for mean-climate diagnostics (compared with cmip +################################################################### +# Prepare configuration file +cat > sythentic_plots.py << EOF +import os +import sys +import glob +import json +import time +import datetime +import xcdat as xc +import numpy as np +import pcmdi_metrics + +# external module for plot +sys.path.append('{{clim_plot_parser}}'.split("/")[-1]) +clim_plot_parser = '{{clim_plot_parser}}'.split("/")[-1] +clim_plot_driver = '{{clim_plot_driver}}'.split("/")[-1] +from clim_plot_parser import ( + create_mean_climate_plot_parser, +) +from clim_plot_driver import ( + mean_climate_metrics_plot, +) + +parser = create_mean_climate_plot_parser() +parameter = parser.get_parameter(argparse_vals_only=False) +parameter.run_type = "${run_type}" + +{% if run_type == "model_vs_obs" %} +parameter.refr_data_set = "" +parameter.refr_period = "" +parameter.refr_data_path = "" +{% elif run_type == "model_vs_model" %} +parameter.refr_data_set = '${cmip_name_ref}.${case_id}' +parameter.refr_period = "{}-{}".format(${ref_Y1},${ref_Y2}) +parameter.refr_data_path = ${reference_data_path} +{%- endif %} + +parameter.test_data_set = '${cmip_name}' +parameter.test_period = "{:04d}-{:04d}".format(${Y1},${Y2})" +parameter.test_data_path = os.path.join( + '${cmip_name}'.split(".")[0], + '${cmip_name}'.split(".")[1], + '${case_id}' +) + +{%- if ("mean_climate" in subset) %} +pcmdi_data_set = '{{pcmdi_cmip_mclm}}' +pcmdi_data_key = 'mean_climate' +{%- elif ("variability_mode" in subset) %} +pcmdi_data_set = '{{pcmdi_cmip_mov}}' +pcmdi_data_key = 'variability_modes' +{%- elif ("enso" in subset) %} +pcmdi_data_set = '{{pcmdi_cmip_enso}}' +pcmdi_data_key = 'enso_metric' +{%- endif %} + +#existing pcmdi cmip diagnostic metrics +parameter.pcmdi_data_set = pcmdi_data_set +parameter.pcmdi_data_path = os.path.join( + "{{pcmdi_data_path}}", + "variability_modes", + pcmdi_data_set.split(".")[0], + pcmdi_data_set.split(".")[1], + pcmdi_data_set.split(".")[2] +) + +parameter.output_path = os.path.join( + "pcmdi_diags", + "graphics", + pcmdi_data_key, +) +parameter.ftype = '{{ figure_format }}' +parameter.debug = {{ pmp_debug }} +parameter.parcord_show_markers = {{parcord_show_markers}} #False +parameter.add_vertical_line = {{portrait_vertical_line}} #True + +#generate diagnostics figures + +print("--- generate mean climate metrics plot ---") +compute_regions = '{{ regions }}'.split(",") +compute_variables ='{{ vars }}'.split(",") + +mean_climate_metrics_plot(parameter) + +EOF + +{%- endif %} + ######################################################## # generate basic parameter file for pcmdi metrics driver ######################################################## @@ -798,6 +888,7 @@ regrid_method_ocn = ( '{{ regrid_method_ocn }}' ) ####################################### # DATA LOCATION: MODELS # --------------------------------------------- +realization = "*" test_data_set = [ product ] test_data_path = '${climo_dir_primary}' # Templates for model climatology files @@ -805,7 +896,7 @@ filename_template = '.'.join([ mip, exp, '%(model)', - '*', + '%(realization)', '${tableID}', '%(variable)', period, @@ -817,10 +908,10 @@ filename_template = '.'.join([ #observation info reference_data_path = '${climo_dir_ref}' custom_observations = os.path.join( - '${results_dir}', + 'pcmdi_diags', '{}_{}_catalogue.json'.format( - '${climo_dir_ref}', - '{{subset}}')) + '${climo_dir_ref}', + '{{subset}}')) #load caclulated regions for each variable regions = json.load(open('regions.json')) @@ -841,7 +932,7 @@ for key in regions_specs.keys(): ####################################### # DATA LOCATION: METRICS OUTPUT metrics_output_path = os.path.join( - '${results_dir}', + 'pcmdi_diags', 'metrics_results', 'mean_climate', mip, @@ -852,7 +943,7 @@ metrics_output_path = os.path.join( ############################################################ # DATA LOCATION: INTERPOLATED MODELS' CLIMATOLOGIES diagnostics_output_path= os.path.join( - '${results_dir}', + 'pcmdi_diags', 'diagnostic_results', 'mean_climate', mip, @@ -863,7 +954,7 @@ test_clims_interpolated_output = diagnostics_output_path {%- endif %} -{%- if "variability_mode" in subset %} +{%- if ("variability_mode" in subset) %} ######################################## #setup for mode variability diagnostics ######################################## @@ -887,7 +978,7 @@ modpath = '.'.join([ mip, exp, '%(model)', - '*', + '%(realization)', '${tableID}', '%(variable)', period, @@ -920,7 +1011,7 @@ update_json = {{ update_json }} #results directory structure. results_dir = os.path.join( - '${results_dir}', + 'pcmdi_diags', '%(output_type)', 'variability_modes', '%(mip)', @@ -931,7 +1022,7 @@ results_dir = os.path.join( ) {%- endif %} -{%- if "enso" in subset %} +{%- if ("enso" in subset) %} ########################################### #parameter setup specific for enso metrics ########################################### @@ -956,7 +1047,7 @@ reference_data_lf_path = json.load(open('obs_landmask.json')) # OUTPUT results_dir = os.path.join( - '${results_dir}', + 'pcmdi_diags', '%(output_type)', 'enso_metric', '%(mip)', @@ -982,11 +1073,9 @@ echo cat > pcmdi.py << EOF import os import glob -import glob import json import time import datetime -import xarray as xr import xcdat as xc import numpy as np @@ -1008,6 +1097,89 @@ def childCount(): children = current_process.children() return(len(children)) +def parallel_jobs(cmds,num_workers): + procs = [] + for i,p in enumerate(cmds): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + procs.append(proc) + if (i == len(cmds)-1): + outs, errs = proc.communicate() + rcode = proc.returncode + time.sleep(0.25); break + else: + njobs = childCount() + while (njobs > num_workers): + [pp.communicate() for pp in procs] + time.sleep(0.25) + procs = [] + return outs,errs,rcode + +def serial_jobs(cmds,num_workers): + for i,p in enumerate(cmds): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + + return outs,errs,rcode + +def variable_region(regions,variables): + regv_dic = OrderedDict() + for var in variables: + vkey = var.split("-")[0] + regv_dic[vkey] = regions + + #save region info dictionary + json.dump(regv_dic, + open('regions.json', "w"), + sort_keys=False, + indent=4, + separators=(",", ": ")) + return + +def enso_obsvar_dict(obs_dic,variables): + #orgnize observation for enso driver + refr_dic = OrderedDict() + for var in variables: + vkey = var.split("-")[0] + refset = obs_dic[var]['set'] + refname = obs_dic[var][refset] + #data file in model->var sequence + if refname not in refr_dic.keys(): + refr_dic[refname] = {} + refr_dic[refname][var] = obs_dic[var][refname] + + #save data file dictionary + json.dump(refr_dic, + open('obs_catalogue.json', "w"), + sort_keys=False, + indent=4, + separators=(",", ": ")) + + return + +def enso_obsvar_lmsk(regions,variables): + #orgnize observation landmask for enso driver + relf_dic = OrderedDict() + for var in variables: + vkey = var.split("-")[0] + refset = obs_dic[var]['set'] + refname = obs_dic[var][refset] + #land/sea mask + if refname not in relf_dic.keys(): + relf_dic[refname] = os.path.join( + "${fixed_dir}", + 'sftlf.{}.nc'.format(refname)) + + #save data file dictionary + json.dump(relf_dic, + open('obs_landmask.json', "w"), + sort_keys=False, + indent=4, + separators=(",", ": ")) + + return + +############################## start_yr = int('${Y1}') end_yr = int('${Y2}') num_years = end_yr - start_yr + 1 @@ -1023,7 +1195,7 @@ reference_data_path = '${climo_dir_ref}' reference_data_path = '${ts_dir_ref}' {%- endif %} observation_file = os.path.join( - '${results_dir}', + 'pcmdi_diags', '{}_{}_catalogue.json'.format( reference_data_path, '{{subset}}') @@ -1034,70 +1206,52 @@ obs_dic = json.load(open(observation_file)) ###################################### # call pcmdi mean climate diagnostics ##################################### -#customized region, otherwise default -regional = '{{ regional }}' -if regional == "y": - default_regions = '{{ regions }}'.split(",") -else: - default_regions = ["global", "NHEX", "SHEX", "TROPICS"] - +compute_regions = '{{regions}}'.split(",") +compute_variables = '{{vars}}'.split(",") +#assiagn region to each variable +variable_region( + compute_regions, + compute_variables +) ################################################### # generate the command list for each reference and # each variable (will execuate in parallel later) lstcmd = [] -regv_dic = OrderedDict() -for var in "{{vars}}".split(","): +for var in compute_variables: if var in obs_dic.keys(): vkey = var.split("-")[0] refset = obs_dic[var]['set'] - regv_dic[vkey] = default_regions lstcmd.append(" ".join([ 'mean_climate_driver.py', '-p parameterfile.py' , '--vars' , '{}'.format(var), '-r' , '{}'.format(refset), - '--varname_in_test_data', '{}'.format(vkey), '--case_id' , '{}'.format('${case_id}') ])) -#save region info dictionary -json.dump(regv_dic, - open('regions.json', "w"), - sort_keys=False, - indent=4, - separators=(",", ": ")) - -#finally process the data in parallel -print("Number of jobs starting is ", str(len(lstcmd))) -procs = [] -if len(lstcmd) > 0: - for i,p in enumerate(lstcmd): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - if multiprocessing == True: - procs.append(proc) - while (childCount() > num_workers): - time.sleep(0.25) - [pp.communicate() for pp in procs] - procs = [] - else: - if (i == len(lstcmd)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) - except: - break - else: - return_code = proc.communicate() - if return_code != 0: - exit("Failed to run {}".format(str(p))) +if (len(lstcmd) > 0 ) and multiprocessing: + print("Parallel computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) +elif (len(lstcmd) > 0 ): + print("Serial computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) +else: + print("no jobs to run...") + return_code = 0 -#set a delay to avoid delay in writing process -time.sleep(1) -print("done submitting") +if return_code != 0: + exit("ERROR: {} jobs failed".format('{{subset}}')) +else: + print("successfully finish all jobs....") + #time delay to ensure process completely finished + time.sleep(1) + +{%- if '{{sythentic_plots}}' == 'y' %} +#process sythetic metric plot if turned on +return_code = subprocess.call(["python", 'sythentic_plots.py']) +if return_code != 0: + exit("Failed to process {{sythentic_plots}}") +{%- endif %} {%- endif %} @@ -1148,72 +1302,35 @@ for variability_mode in var_modes: ])) lstcmd.append(cmd); del(cmd) -#finally process the data in parallel -print("Number of jobs starting is ", str(len(lstcmd))) -procs = [] -for i,p in enumerate(lstcmd): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - if multiprocessing == True: - procs.append(proc) - while (childCount() > num_workers): - time.sleep(0.25) - [pp.communicate() for pp in procs] # this will get the exit code - procs = [] - else: - if (i == len(lstcmd)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) - except: - break - else: - return_code = proc.communicate() - if return_code != 0: - exit("Failed to run {}".format(str(p))) -#set a delay to avoid delay in writing process -time.sleep(1) -print("done submitting") -del(lstcmd) +if (len(lstcmd) > 0 ) and multiprocessing: + print("Parallel computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) +elif (len(lstcmd) > 0 ): + print("Serial computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) +else: + print("no jobs to run...") + return_code = 0 + +if return_code != 0: + exit("ERROR: {} jobs failed".format('{{subset}}')) +else: + print("successfully finish all jobs....") + #time delay to ensure process completely finished + time.sleep(1) + {%- endif %} {%- if "enso" in subset %} ############################################# # call enso_driver.py to process diagnostics ############################################# -#reorgnize observation needed for enso driver -refr_dic = OrderedDict() -relf_dic = OrderedDict() -for var in list("{{vars}}".split(",")): - vkey = var.split("-")[0] - refset = obs_dic[var]['set'] - refname = obs_dic[var][refset] - #data file in model->var sequence - if refname not in refr_dic.keys(): - refr_dic[refname] = {} - refr_dic[refname][var] = obs_dic[var][refname] - #land/sea mask - if refname not in relf_dic.keys(): - relf_dic[refname] = os.path.join( - "${fixed_dir}", - 'sftlf.{}.nc'.format(refname)) - -#save data file dictionary -json.dump(refr_dic, - open('obs_catalogue.json', "w"), - sort_keys=False, - indent=4, - separators=(",", ": ")) - -#save land/sea mask dictionary -json.dump(relf_dic, - open('obs_landmask.json', "w"), - sort_keys=False, - indent=4, - separators=(",", ": ")) + +#orgnize observation var list +enso_obsvar_dict(obs_dic,"{{vars}}".split(",")) + +#orgnize observation landmask +enso_obsvar_lmsk(obs_dic,"{{vars}}".split(",")) #now start enso driver print("calculate enso metrics") @@ -1228,31 +1345,22 @@ for metricsCollection in enso_groups: ])) lstcmd.append(cmd); del(cmd) -print("Number of jobs starting: ", str(len(lstcmd))) - -#finally process the data in parallel -procs = [] -for i,p in enumerate(lstcmd): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - procs.append(proc) - while (childCount() > {{num_workers}}): - time.sleep(0.25) - [pp.communicate() for pp in procs] # this will get the exit code - procs = [] - else: - if (i == len(lstcmd)-1): - try: - outs, errs = proc.communicate() - if proc.returncode == 0: - print("stdout = {}; stderr = {}".format(str(outs),str(errs))) - else: - exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) - except: - break -#set a delay to avoid delay in writing process -time.sleep(1) -print("done submitting") +if (len(lstcmd) > 0 ) and multiprocessing: + print("Parallel computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) +elif (len(lstcmd) > 0 ): + print("Serial computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) +else: + print("no jobs to run...") + return_code = 0 + +if return_code != 0: + exit("ERROR: {} jobs failed".format('{{subset}}')) +else: + print("successfully finish all jobs....") + #time delay to ensure process completely finished + time.sleep(1) {%- endif %} EOF ################################ @@ -1266,86 +1374,147 @@ if [ $? != 0 ]; then exit 11 fi -################################################################ -# this post-processing module is to generate sythentic metrics -# for mean-climate diagnostics (compared with cmip model results) -################################################################ -{%- if "mean_climate" in subset %} -echo -echo ===== RUN PCMDI POST-PROCESSING ===== -echo +########################################### +# reorgnize pcmdi diagnostics output +########################################### # Prepare configuration file -cat > post_processing.py << EOF +cat > graphic_viewer.py << EOF import os import glob -import glob import json import time import datetime -import xarray as xr -import xcdat as xc -import numpy as np -import pcmdi_metrics +import collections +from collections import OrderedDict -# external module for plot -{%- if ("mean_climate" in subset) %} -import {{clim_plot_parser}} -import {{clim_plot_driver}} -{%- endif %} +def get_mean_climate_graphics(regions,variables,fig_format,input_dir,output_dir): + diag_metric = "mean_climate" + seasons = ['DJF','MAM','JJA','SON','AC'] + input_dir = input_dir.replace("%(metric_type)",diag_metric) + + fig_sets = OrderedDict() + fig_sets['CLIM_patttern'] = ['graphics','*'] + fig_sets['ERROR_metric'] = ['graphics','*'] + + for fset in fig_sets.keys(): + fdir = input_dir.replace('%(output_type)',fig_sets[fset][0] ) + output = output_dir.replace("%(group_type)",fset) + for region in regions: + for sea in seasons: + outpath = os.path.join(output,region,sea) + if not os.path.exists(outpath): + os.makedirs(outpath) + for var in variables: + fpaths = sorted(glob.glob(os.path.join(fdir,var, + '{}{}_{}*.{}'.format(fig_sets[fset][1],region,sea,fig_format)))) + for fpath in fpaths: + refname = fpath.split("/")[-2] + filname = fpath.split("/")[-1] + outfile = os.path.join(outpath,filname) + os.rename(fpath,outfile) + + return + +def get_variability_graphics(modes,fig_format,input_dir,output_dir): + diag_metric = "variability_modes" + input_dir = input_dir.replace("%(metric_type)",diag_metric) + + fig_sets = OrderedDict() + fig_sets['MOV_eofvar'] = ['diagnostic_results','EG_Spec*'] + fig_sets['MOV_telecon'] = ['graphics','*teleconnection'] + fig_sets['MOV_pattern'] = ['graphics','*'] + + for mode in modes: + for fset in fig_sets.keys(): + fdir = input_dir.replace('%(output_type)',fig_sets[fset][0] ) + output = output_dir.replace("%(group_type)",fset) + fpaths = sorted(glob.glob(os.path.join(fdir,mode,'*', + '{}.{}'.format(fig_sets[fset][1],fig_format)))) + for fpath in fpaths: + refname = fpath.split("/")[-2] + filname = fpath.split("/")[-1] + outpath = os.path.join(output,'{}_model_vs_{}'.format(mode,refname)) + if not os.path.exists(outpath): + os.makedirs(outpath) + outfile = os.path.join(outpath,filname) + os.rename(fpath,outfile) + return + +def get_enso_graphics(groups,fig_format,refname,input_dir,output_dir): + diag_metric = "enso_metric" + input_dir = input_dir.replace("%(metric_type)",diag_metric) + + fig_sets = OrderedDict() + fig_sets['ENSO_metric'] = ['graphics','*'] + + for fset in fig_sets.keys(): + for group in groups: + fdir = input_dir.replace('%(output_type)',fig_sets[fset][0] ) + output = output_dir.replace("%(group_type)",fset) + fpaths = sorted(glob.glob(os.path.join(fdir,group, + '{}.{}'.format(fig_sets[fset][1],fig_format)))) + for fpath in fpaths: + filname = fpath.split("/")[-1] + outpath = os.path.join(output,'{}_model_vs_{}'.format(group,refname)) + if not os.path.exists(outpath): + os.makedirs(outpath) + outfile = os.path.join(outpath,filname) + os.rename(fpath,outfile) + + return + +############# +fig_format = '{{ figure_format }}' + +diag_types = ['metrics_results','diagnostic_result','graphics'] + +input_template = os.path.join( + 'pcmdi_diags', + '%(output_type)', + '%(metric_type)', + '${cmip_name}'.split(".")[0], + '${cmip_name}'.split(".")[1], + '${case_id}', +) -#customized region, otherwise default -regional = '{{ regional }}' -if regional == "y": - default_regions = '{{ regions }}'.split(",") -else: - default_regions = ["global", "NHEX", "SHEX", "TROPICS"] +out_path = os.path.join( + '${results_dir}', + '%(group_type)' +) -#generate diagnostics figures -print("--- prepare for mean climate metrics plot ---") -parser = create_mean_climate_plot_parser() -parameter = parser.get_parameter(argparse_vals_only=False) -parameter.regions = default_regions -parameter.run_type = "${run_type}" -parameter.period = "{:04d}-{:04d}".format(${Y1},${Y2}) -parameter.pcmdi_data_set = "{{pcmdi_data_set}}" -parameter.pcmdi_data_path = os.path.join('{{pcmdi_data_path}}',"mean_climate") -parameter.test_data_set = "{}.{}".format(${cmip_name},"${case_id}") -parameter.test_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") +{%- if ("mean_climate" in subset) %} +compute_regions = '{{ regions }}'.split(",") +compute_variables = '{{ vars }}'.split(",") +get_mean_climate_graphics( + compute_regions,compute_variables, + fig_format,input_template,out_path +) +{% endif %} -{% if run_type == "model_vs_obs" %} -parameter.refr_data_set = "" -parameter.refr_period = "" -parameter.refr_data_path = "" -{% elif run_type == "model_vs_model" %} -parameter.refr_data_set = "{}.{}".format(${cmip_name_ref},"${case_id}") -parameter.refr_period = "{}-{}".format(${ref_Y1},${ref_Y2}) -parameter.refr_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") +{%- if ("variability_mode" in subset) %} +{%- if ("variability_mode_atm" in subset) %} +compute_modes = '{{ atm_modes }}'.split(",") +{% elif ("variability_mode_cpl" in subset) %} +compute_modes = '{{ cpl_modes }}'.split(",") +{%- endif %} +get_variability_graphics( + compute_modes,fig_format, + input_template,out_path +) {%- endif %} -parameter.output_path = os.path.join("${results_dir}","graphics","mean_climate") -parameter.ftype = '{{ figure_format }}' -parameter.debug = {{ pmp_debug }} -parameter.parcord_show_markers = {{parcord_show_markers}} #False -parameter.add_vertical_line = {{portrait_vertical_line}} #True - -#generate diagnostics figures -print("--- generate mean climate metrics plot ---") -mean_climate_metrics_plot(parameter) +{%- if ("enso" in subset) %} +compute_groups = '{{ enso_groups }}'.split(",") +obs_dict = json.load(open('obs_catalogue.json')) +obs_name = list(obs_dict.keys())[0] +get_enso_graphics( + compute_groups,fig_format, + obs_name,input_template,out_path +) +{% endif %} EOF -################################ -# Run diagnostics -command="srun -N 1 python -u post_processing.py" -# Run diagnostics -time ${command} -if [ $? != 0 ]; then - cd {{ scriptDir }} - echo 'ERROR (12)' > {{ prefix }}.status - exit 12 -fi -{% endif %} - ################################# # Copy output to web server echo @@ -1353,12 +1522,12 @@ echo ===== COPY FILES TO WEB SERVER ===== echo # Create top-level directory -web_dir=${www}/${case}/pcmdi_diags #/{{ sub }} +web_dir=${www}/${case}/pcmdi_diags mkdir -p ${web_dir} if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (13)' > {{ prefix }}.status - exit 13 + echo 'ERROR (12)' > {{ prefix }}.status + exit 12 fi {% if machine in ['pm-cpu', 'pm-gpu'] %} @@ -1375,13 +1544,13 @@ do done {% endif %} +############################################ # Copy files -#rsync -a --delete ${results_dir} ${web_dir}/ -rsync -a ${results_dir} ${web_dir}/ +rsync -a --delete ${results_dir} ${web_dir}/ if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (14)' > {{ prefix }}.status - exit 14 + echo 'ERROR (13)' > {{ prefix }}.status + exit 13 fi {% if machine in ['pm-cpu', 'pm-gpu'] %} From 2d38e9b2e59f4123acb69007260ecc1733897678 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Mon, 6 Jan 2025 14:19:46 -0600 Subject: [PATCH 08/23] Add revisions on the sythentic plot --- zppy/defaults/default.ini | 23 +- .../pcmdi/mean_climate_plot_driver.py | 670 ------------------ .../pcmdi/mean_climate_plot_parser.py | 373 ---------- .../inclusions/pcmdi/reference_alias.json | 2 +- zppy/templates/pcmdi_diags.bash | 651 +++++++++++++---- 5 files changed, 510 insertions(+), 1209 deletions(-) delete mode 100755 zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py delete mode 100755 zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 8ab666e7..7f2a8d7b 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -154,11 +154,11 @@ obs_ts = string(default="") obs_sets = string(default="default") # options specific for constructing pcmdi preferred file name conventions # required for "model_vs_obs" comparison -cmip_name = string(default="e3sm.historical.v3-LR.0051") +model_name = string(default="e3sm.historical.v3-LR.0051") # required for "model_vs_model" comparison -cmip_name_ref = string(default="e3sm.historical.v3-LR.0051") +model_name_ref = string(default="e3sm.historical.v3-LR.0051") # required for "model_vs_obs" comparison -cmip_tableID = string(default="Amon") +model_tableID = string(default="Amon") # variables in the cmip6 table that can be potentially used by pcmdi # this list depends on the definition of cmip variable # required for "mean climate" diagnostics @@ -250,6 +250,7 @@ enso_groups = string(default="ENSO_perf,ENSO_proc,ENSO_tel") grid = string(default="180x360_aave") #default regions for mean climate metrics data #more options can be found at "regions_specs" +#regions = string(default="global,ocean,land,NHEX,SHEX,TROPICS,NHEX_ocean,SHEX_ocean,NHEX_land,SHEX_land,ocean_50S50N") regions = string(default="global,ocean,land,NHEX,SHEX,TROPICS") # save derived climatology data save_test_clims = string(default=True) @@ -270,8 +271,8 @@ sythentic_plots = string(default="n") # path to pcmdi generated cmip metrics data pcmdi_data_path = string(default="") #group of pcmdi generated cmip metrics data (mip.exp.version) -pcmdi_cmip_mclm = string(default="cmip6.historical.v20220928") -pcmdi_cmip_mov = string(default="cmip6.historical.v20220825") +pcmdi_cmip_clim = string(default="cmip6.historical.v20220928") +pcmdi_cmip_movs = string(default="cmip6.historical.v20220825") pcmdi_cmip_enso = string(default="cmip6.historical.v20210620") # options shared by pcmdi pmp_debug = string(default=False) @@ -286,8 +287,8 @@ portrait_vertical_line = string(default=True) vars = string(default=None) grid = string(default=None) sythentic_plots = string(default=None) - pcmdi_cmip_mclm = string(default=None) - pcmdi_cmip_mov = string(default=None) + pcmdi_cmip_clim = string(default=None) + pcmdi_cmip_movs = string(default=None) pcmdi_cmip_enso = string(default=None) pcmdi_data_path = string(default=None) derived_variable = string(default=None) @@ -333,10 +334,10 @@ portrait_vertical_line = string(default=True) enso_groups = string(default=None) ModUnitsAdjust = string(default=None) ObsUnitsAdjust = string(default=None) - cmip_name = string(default=None) - cmip_name_ref = string(default=None) - cmip_tableID = string(default=None) - cmip_vars = string(default=None) + model_name = string(default=None) + model_name_ref = string(default=None) + model_tableID = string(default=None) + model_vars = string(default=None) pmp_debug = string(default=None) nc_out_obs = string(default=None) nc_out = string(default=None) diff --git a/zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py b/zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py deleted file mode 100755 index a85cf4e0..00000000 --- a/zppy/templates/inclusions/pcmdi/mean_climate_plot_driver.py +++ /dev/null @@ -1,670 +0,0 @@ -#!/bin/env python -############################################################################## -# This model is used to generate mean climate diagnostic figures -# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) -############################################################################# -import os -import shutil - -import numpy as np -import pandas as pd -from mean_climate_plot_parser import ( - fill_plot_var_and_units, - find_metrics_data, - metrics_inquire, - shift_row_to_bottom, -) -from pcmdi_metrics.graphics import ( - Metrics, - normalize_by_median, - parallel_coordinate_plot, - portrait_plot, -) - - -def load_test_model_data(test_file, refr_file, mip, run_type): - # load the data and reorganize if needed - pd.set_option("future.no_silent_downcasting", True) - test_lib = Metrics(test_file) - - # model_vs_model, merge the reference model data into test model - if run_type == "model_vs_model": - refr_lib = Metrics(refr_file) - test_lib = test_lib.merge(refr_lib) - del refr_lib - - # collect and reorgnize test model data for plotting: - test_models = [] - for stat in test_lib.df_dict: - for season in test_lib.df_dict[stat]: - for region in test_lib.df_dict[stat][season]: - df = pd.DataFrame(test_lib.df_dict[stat][season][region]) - for i, model in enumerate(df["model"].tolist()): - model_run = df["model_run"].tolist()[i] - new_name = "{}-{}".format(mip.upper(), model_run.upper()) - idxs = df[df.iloc[:, 2] == model_run].index - df.loc[idxs, "model"] = list( - map( - lambda x: x.replace(model, new_name), - df.loc[idxs, "model"], - ) - ) - if new_name not in test_models: - test_models.append(new_name) - test_lib.df_dict[stat][season][region] = df - del df - return test_models, test_lib - - -def load_cmip_metrics_data(cmip_file): - # collect cmip multi-model ensemble data for comparison - pd.set_option("future.no_silent_downcasting", True) - cmip_lib = Metrics(cmip_file) - cmip_models = [] - highlight_models = [] - for stat in cmip_lib.df_dict: - for season in cmip_lib.df_dict[stat]: - for region in cmip_lib.df_dict[stat][season]: - # now find all E3SM models in cmip6 - df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) - for model in df["model"].tolist(): - if model not in cmip_models: - cmip_models.append(model) - if ("e3sm" in model.lower()) and (model not in highlight_models): - highlight_models.append(model) - # move highlight_models to the end - for model in highlight_models: - idxs = df[df.iloc[:, 0] == model].index - cmip_models.remove(model) - cmip_models.append(model) - for idx in idxs: - df = shift_row_to_bottom(df, idx) - cmip_lib.df_dict[stat][season][region] = df - del df - return cmip_models, highlight_models, cmip_lib - - -def save_figure_data( - stat, region, season, var_names, var_units, data_dict, template, outdir -): - # construct output file name - fname = ( - template.replace("%(metric)", stat) - .replace("%(region)", region) - .replace("%(season)", season) - ) - outfile = os.path.join(outdir, fname) - outdic = pd.DataFrame(data_dict) - outdic = outdic.drop(columns=["model_run"]) - for var in list(outdic.columns.values[3:]): - if var not in var_names: - print("{} is excluded from the {}".format(var, fname)) - outdic = outdic.drop(columns=[var]) - else: - # replace the variable with the name + units - outdic.columns.values[outdic.columns.values.tolist().index(var)] = ( - var_units[var_names.index(var)] - ) - - # save data to .csv file - outdic.to_csv(outfile) - del (fname, outfile, outdic) - return - - -def construct_port4sea_axis_lables( - var_names, cmip_models, test_models, highlight_models -): - model_list = cmip_models + test_models - # assign colors for labels of models - lable_colors = [] - for model in model_list: - if model in highlight_models: - lable_colors.append("#5170d7") - elif model in test_models: - lable_colors.append("#FC5A50") - else: - lable_colors.append("#000000") - - if len(model_list) > len(var_names): - xlabels = model_list - ylabels = var_names - landscape = True - else: - xlabels = var_names - ylabels = model_list - landscape = False - del model_list - return xlabels, ylabels, lable_colors, landscape - - -def construct_port4sea_data( - stat, - seasons, - region, - data_dict, - var_names, - var_units, - file_template, - outdir, - landscape, -): - # work array - data_all = dict() - # loop 4 seasons and collect data - for season in seasons: - # save raw metric results as a .csv file for each season - save_figure_data( - stat, - region, - season, - var_names, - var_units, - data_dict[stat][season][region], - file_template, - outdir, - ) - if stat == "cor_xy": - data_nor = data_dict[stat][season][region][var_names].to_numpy() - if landscape: - data_all[season] = data_nor.T - else: - data_all[season] = data_nor - del data_nor - elif stat == "bias_xy": - # calculate the relative bias - data_sea = data_dict[stat][season][region][var_names].to_numpy() - data_rfm = data_dict["mean-obs_xy"][season][region][var_names].to_numpy() - data_msk = np.where(np.abs(data_rfm) == 0.0, np.nan, data_rfm) - data_nor = data_sea * 100.0 / data_msk - if landscape: - data_all[season] = data_nor.T - else: - data_all[season] = data_nor - del (data_sea, data_rfm, data_msk, data_nor) - else: - data_sea = data_dict[stat][season][region][var_names].to_numpy() - if landscape: - data_sea = data_sea.T - data_all[season] = normalize_by_median(data_sea, axis=1) - else: - data_all[season] = normalize_by_median(data_sea, axis=0) - del data_sea - - # data for final plot - data_all_nor = np.stack( - [data_all["djf"], data_all["mam"], data_all["jja"], data_all["son"]] - ) - del data_all - return data_all_nor - - -def port4sea_plot( - stat, - region, - seasons, - data_dict, - var_names, - var_units, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - outdir, - add_vertical_line, - data_version=None, - watermark=False, -): - - # process figure - fontsize = 20 - var_names = sorted(var_names) - var_units = sorted(var_units) - - # construct the axis labels and colors - ( - xaxis_labels, - yaxis_labels, - lable_colors, - landscape, - ) = construct_port4sea_axis_lables( - var_names, cmip_models, test_models, highlight_models - ) - - # construct data for plotting - data_all_nor = construct_port4sea_data( - stat, - seasons, - region, - data_dict, - var_names, - var_units, - file_template, - outdir, - landscape, - ) - - if stat == "cor_xy": - cbar_label = "Pattern Corr." - var_range = (-1.0, 1.0) - cmap_bounds = [0.1, 0.2, 0.4, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] - elif stat == "bias_xy": - cbar_label = "{}, relative (%)".format(stat.upper()) - var_range = (-30.0, 30.0) - cmap_bounds = [-30.0, -20.0, -10.0, -5.0, -1, 0.0, 1.0, 5.0, 10.0, 20.0, 30.0] - else: - cbar_label = "{}, normalized by median".format(stat.upper()) - var_range = (-0.5, 0.5) - cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5] - - if landscape: - figsize = (40, 18) - legend_box_xy = (1.08, 1.18) - legend_box_size = 4 - legend_lw = 1.5 - shrink = 0.8 - legend_fontsize = fontsize * 0.8 - else: - figsize = (18, 25) - legend_box_xy = (1.25, 1) - legend_box_size = 3 - legend_lw = 1.5 - shrink = 1.0 - legend_fontsize = fontsize * 0.8 - - # Add Watermark/Logo - if watermark: - logo_rect = [0.85, 0.15, 0.07, 0.07] - logo_off = False - else: - logo_rect = [0, 0, 0, 0] - logo_off = True - - # Using Matplotlib-based PMP Visualization Function to Generate Portrait Plot - fig, ax, cbar = portrait_plot( - data_all_nor, - xaxis_labels=xaxis_labels, - yaxis_labels=yaxis_labels, - cbar_label=cbar_label, - cbar_label_fontsize=fontsize * 1.2, - box_as_square=True, - vrange=var_range, - figsize=figsize, - cmap="RdYlBu_r", - cmap_bounds=cmap_bounds, - cbar_kw={"extend": "both", "shrink": shrink}, - missing_color="white", - legend_on=True, - legend_labels=["DJF", "MAM", "JJA", "SON"], - legend_box_xy=legend_box_xy, - legend_box_size=legend_box_size, - legend_lw=legend_lw, - legend_fontsize=legend_fontsize, - logo_rect=logo_rect, - logo_off=logo_off, - ) - - if add_vertical_line: - ax.axvline( - x=len(xaxis_labels) - len(highlight_models) - len(test_models), - color="k", - linewidth=3, - ) - - if landscape: - ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") - ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") - for xtick, color in zip(ax.get_xticklabels(), lable_colors): - xtick.set_color(color) - ax.yaxis.label.set_color(lable_colors[0]) - else: - ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") - ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") - ax.xaxis.label.set_color(lable_colors[0]) - for ytick, color in zip(ax.get_yticklabels(), lable_colors): - ytick.set_color(color) - - ax.tick_params(axis="x", labelsize=fontsize) - ax.tick_params(axis="y", labelsize=fontsize) - - cbar.ax.tick_params(labelsize=fontsize) - - # Add title - ax.set_title( - "Model Performance of Seasonal Climatology ({}, {})".format( - stat.upper(), region.upper() - ), - fontsize=fontsize * 1.5, - pad=30, - ) - - # Add Watermark - if watermark: - ax.text( - 0.5, - 0.5, - "E3SM-PCMDI", - transform=ax.transAxes, - fontsize=100, - color="black", - alpha=0.5, - ha="center", - va="center", - rotation=25, - ) - # Add data info - fig.text( - 1.25, - 0.9, - "Data version\n" + data_version, - transform=ax.transAxes, - fontsize=12, - color="black", - alpha=0.6, - ha="left", - va="top", - ) - - # Save figure as an image file - figname = ( - figure_template.replace("%(metric)", stat) - .replace("%(region)", region) - .replace("%(season)", "4season") - ) - figfile = os.path.join(outdir, figname) - fig.savefig(figfile, facecolor="w", bbox_inches="tight") - del ( - data_all_nor, - xaxis_labels, - yaxis_labels, - lable_colors, - ) - - return - - -def paracord_plot( - stat, - region, - season, - data_dict, - var_names, - var_units, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - outdir, - identify_all_models, - data_version=None, - watermark=False, -): - - # construct plotting data - var_names = sorted(var_names) - var_units = sorted(var_units) - - # write out the results as a table - save_figure_data( - stat, region, season, var_names, var_units, data_dict, file_template, outdir - ) - - # add ensemble mean - model_data = data_dict[var_names].to_numpy() - - # construct the string for plot - model_list = data_dict[ - "model" - ].to_list() # cmip_models + test_models + ["CMIP6 MME"] - model_list_group2 = highlight_models + test_models - models_to_highlight = test_models + [ - data_dict["model"].to_list()[-1] - ] # ["CMIP6 MME"] - figsize = (40, 12) - fontsize = 20 - legend_ncol = int(7 * figsize[0] / 40.0) - legend_posistion = (0.50, -0.14) - # color map for markers - colormap = "tab20_r" - # color map for highlight lines - xcolors = [ - "#000000", - "#e41a1c", - "#ff7f00", - "#4daf4a", - "#f781bf", - "#a65628", - "#984ea3", - "#999999", - "#377eb8", - "#dede00", - ] - lncolors = xcolors[1 : len(test_models) + 1] + [xcolors[0]] - # Add Watermark/Logo - if watermark: - logo_rect = [0.85, 0.15, 0.07, 0.07] - logo_off = False - else: - logo_rect = [0, 0, 0, 0] - logo_off = True - - xlabel = "Metric" - if "rms" in stat: - ylabel = "RMS Error (" + stat.upper() + ")" - elif "std" in stat: - ylabel = "Standard Deviation (" + stat.upper() + ")" - else: - ylabel = "value (" + stat.upper() + ")" - - if not np.isnan(model_data).all(): - print(model_data.min(), model_data.max()) - title = "Model Performance of {} Climatology ({}, {})".format( - season.upper(), stat.upper(), region.upper() - ) - fig, ax = parallel_coordinate_plot( - model_data, - var_units, - model_list, - model_names2=model_list_group2, - group1_name="CMIP6", - group2_name="E3SM", - models_to_highlight=models_to_highlight, - models_to_highlight_colors=lncolors, - models_to_highlight_labels=models_to_highlight, - identify_all_models=identify_all_models, # hide indiviaul model markers for CMIP6 models - vertical_center="median", - vertical_center_line=True, - title=title, - figsize=figsize, - axes_labelsize=fontsize * 1.1, - title_fontsize=fontsize * 1.1, - yaxes_label=ylabel, - xaxes_label=xlabel, - colormap=colormap, - show_boxplot=False, - show_violin=True, - violin_colors=("lightgrey", "pink"), - legend_ncol=legend_ncol, - legend_bbox_to_anchor=legend_posistion, - legend_fontsize=fontsize * 0.85, - xtick_labelsize=fontsize * 0.95, - ytick_labelsize=fontsize * 0.95, - logo_rect=logo_rect, - logo_off=logo_off, - ) - - # Add Watermark - if watermark: - ax.text( - 0.5, - 0.5, - "E3SM-PCMDI", - transform=ax.transAxes, - fontsize=100, - color="black", - alpha=0.5, - ha="center", - va="center", - rotation=25, - ) - # Add data info - fig.text( - 1.25, - 0.9, - "Data version\n" + data_version, - transform=ax.transAxes, - fontsize=12, - color="black", - alpha=0.6, - ha="left", - va="top", - ) - - # Save figure as an image file - figname = ( - figure_template.replace("%(metric)", stat) - .replace("%(region)", region) - .replace("%(season)", season) - ) - figfile = os.path.join(outdir, figname) - fig.savefig(figfile, facecolor="w", bbox_inches="tight") - - del (model_data, model_list, model_list_group2, models_to_highlight) - - return - - -def mean_climate_metrics_plot(parameter): - # info for test simulation - test_mip = parameter.test_data_set.split(".")[0] - test_exp = parameter.test_data_set.split(".")[1] - test_product = parameter.test_data_set.split(".")[2] - test_case_id = parameter.test_data_set.split(".")[-1] - # output directory - outdir = os.path.join(parameter.output_path, test_mip, test_exp, test_case_id) - - # construct file template to save the figure data in .csv file - file_template = "%(metric)_%(region)_{}_{}_{}_{}_mean_climate_%(season)_{}.csv" - file_template = file_template.format( - parameter.run_type.upper(), - test_mip.upper(), - test_exp.upper(), - test_product.upper(), - parameter.period, - ) - # construct figure template - figure_template = file_template.replace("csv", parameter.ftype) - - # find the metrics data - test_file, refr_file, cmip_file = find_metrics_data(parameter) - - # load cmip metrics data - cmip_models, highlight_models, cmip_lib = load_cmip_metrics_data(cmip_file) - - # load test model metrics data - test_models, test_lib = load_test_model_data( - test_file, refr_file, test_mip, parameter.run_type - ) - # collect overlap sets of variables for plotting: - test_lib, cmip_lib, var_list, var_unit_list = fill_plot_var_and_units( - test_lib, cmip_lib - ) - # search overlap of regions in test and reference - regions = [] - for reg in parameter.regions: - if (reg in test_lib.regions) and (reg in cmip_lib.regions): - regions.append(reg) - - # merge the cmip and model data - merged_lib = cmip_lib.merge(test_lib) - - ################################### - # generate parallel coordinate plot - ################################### - parall_fig_dir = os.path.join(outdir, "paracord_annual") - if os.path.exists(parall_fig_dir): - shutil.rmtree(parall_fig_dir) - os.makedirs(parall_fig_dir) - print("Parallel Coordinate Plots (4 seasons), loop each region and metric....") - # add ensemble mean - for metric in [ - "rms_xyt", - "std-obs_xyt", - "std_xyt", - "rms_y", - "rms_devzm", - "std_xy_devzm", - "std-obs_xy_devzm", - ]: - for region in regions: - for season in ["ann"]: - data_dict = merged_lib.df_dict[metric][season][region] - data_dict.loc["CMIP MMM"] = cmip_lib.df_dict[metric][season][ - region - ].mean(numeric_only=True, skipna=True) - data_dict.at["CMIP MMM", "model"] = "CMIP MMM" - if parameter.parcord_show_markers is not None: - identify_all_models = parameter.parcord_show_markers - else: - identify_all_models = True - paracord_plot( - metric, - region, - season, - data_dict, - var_list, - var_unit_list, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - parall_fig_dir, - identify_all_models, - data_version=None, - watermark=False, - ) - del data_dict - - ################################### - # generate portrait plot - ################################### - ptrait_fig_dir = os.path.join(outdir, "portrait_4seasons") - if os.path.exists(ptrait_fig_dir): - shutil.rmtree(ptrait_fig_dir) - os.makedirs(ptrait_fig_dir) - print("Portrait Plots (4 seasons),loop each region and metric....") - ######################################################################### - seasons = ["djf", "mam", "jja", "son"] - data_dict = merged_lib.df_dict - for metric in ["rms_xy", "cor_xy", "bias_xy"]: - for region in regions: - print("working on {} in {} region".format(metrics_inquire(metric), region)) - if parameter.add_vertical_line is not None: - add_vertical_line = parameter.add_vertical_line - else: - add_vertical_line = False - port4sea_plot( - metric, - region, - seasons, - data_dict, - var_list, - var_unit_list, - cmip_models, - test_models, - highlight_models, - file_template, - figure_template, - ptrait_fig_dir, - add_vertical_line, - data_version=None, - watermark=False, - ) - - # release the data space - del (merged_lib, cmip_lib, test_lib, var_unit_list, var_list, regions) - - return diff --git a/zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py b/zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py deleted file mode 100755 index e73e4904..00000000 --- a/zppy/templates/inclusions/pcmdi/mean_climate_plot_parser.py +++ /dev/null @@ -1,373 +0,0 @@ -#!/usr/bin/env python -import ast -import glob -import os - -import numpy as np -import pandas as pd -from pcmdi_metrics.mean_climate.lib import pmp_parser - - -def create_mean_climate_plot_parser(): - parser = pmp_parser.PMPMetricsParser() - parser.add_argument( - "--test_model", - dest="test_model", - help="Defines target model for the metrics plots", - required=False, - ) - - parser.add_argument( - "--test_data_set", - type=str, - nargs="+", - dest="test_data_set", - help="List of observations or models to test " - + "against the reference_data_set", - required=False, - ) - - parser.add_argument( - "--test_data_path", - dest="test_data_path", - help="Path for the test climitologies", - required=False, - ) - - parser.add_argument( - "--period", dest="period", help="A simulation parameter", required=False - ) - - parser.add_argument( - "--run_type", dest="run_type", help="A post-process parameter", required=False - ) - - parser.add_argument( - "--regions", - type=ast.literal_eval, - dest="regions", - help="Regions on which to run the metrics", - required=False, - ) - - parser.add_argument( - "--pcmdi_data_set", - type=str, - nargs="+", - dest="pcmdi_data_set", - help="PCMDI CMIP dataset that is used as a " - + "CMIP multi-model ensembles against the test_data_set", - required=False, - ) - - parser.add_argument( - "--pcmdi_data_path", - dest="pcmdi_data_path", - help="Path for the PCMDI CMIP mean climate metrics data", - required=False, - ) - - parser.add_argument( - "--refr_model", - dest="refr_model", - help="A simulation parameter", - required=False, - ) - - parser.add_argument( - "--refr_data_set", - type=str, - nargs="+", - dest="refr_data_set", - help="List of reference models to test " + "against the reference_data_set", - required=False, - ) - - parser.add_argument( - "--refr_data_path", - dest="refr_data_path", - help="Path for the reference model climitologies", - required=False, - ) - - parser.add_argument( - "--output_path", - dest="output_path", - help="Path for the metrics plots", - required=False, - ) - - parser.add_argument( - "--parcord_show_markers", - dest="parcord_show_markers", - help="show markers for individual model in parallel coordinate plots", - required=False, - ) - parser.add_argument( - "--add_vertical_line", - dest="add_vertical_line", - help="draw a vertical line to separate test and reference models for portrait plots", - required=False, - ) - return parser - - -def metrics_inquire(name): - # list of metrics name and long-name - metrics = { - "std-obs_xy": "Spatial Standard Deviation (Reference)", - "std_xy": "Spatial Standard Deviation (Model)", - "std-obs_xyt": "Spatial-temporal Standard Deviation (Reference)", - "std_xyt": "Spatial-temporal Standard Deviation (Model)", - "std-obs_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Reference)", - "mean_xy": "Area Weighted Spatial Mean (Model)", - "mean-obs_xy": "Area Weighted Spatial Mean (Reference)", - "std_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Model)", - "rms_xyt": "Spatio-Temporal Root Mean Square Error", - "rms_xy": "Spatial Root Mean Square Error", - "rmsc_xy": "Centered Spatial Root Mean Square Error", - "cor_xy": "Spatial Pattern Correlation Coefficient", - "bias_xy": "Mean Bias (Model - Reference)", - "mae_xy": "Mean Absolute Difference (Model - Reference)", - "rms_y": "Root Mean Square Error of Zonal Mean", - "rms_devzm": "Root Mean Square Error of Deviation From Zonal Mean", - } - if name in metrics.keys(): - long_name = metrics[name] - - return long_name - - -def find_latest(pmprdir, mip, exp): - versions = sorted( - [ - r.split("/")[-1] - for r in glob.glob(os.path.join(pmprdir, mip, exp, "v????????")) - ] - ) - latest_version = versions[-1] - return latest_version - - -def shift_row_to_bottom(df, index_to_shift): - idx = [i for i in df.index if i != index_to_shift] - return df.loc[idx + [index_to_shift]] - - -def find_cmip_metric_data(pmprdir, data_set, var): - # cmip data for comparison - mip = data_set.split(".")[0] - exp = data_set.split(".")[1] - case_id = data_set.split(".")[2] - if case_id == "": - case_id = find_latest(pmprdir, mip, exp) - fpath = glob.glob(os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format(var))) - if len(fpath) < 1 and var == "rtmt": - fpath = glob.glob( - os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format("rt")) - ) - if len(fpath) > 0 and os.path.exists(fpath[0]): - cmip_list = fpath[0] - return_code = 0 - else: - print("Warning: cmip metrics data not found for {}....".format(var)) - print("Warning: remove {} from the metric list....".format(var)) - cmip_list = None - return_code = -99 - return cmip_list, return_code - - -def select_models(df, selected_models): - # Selected models only - model_names = df["model"].tolist() - for model_name in model_names: - drop_model = True - for keyword in selected_models: - if keyword in model_name: - drop_model = False - break - if drop_model: - df.drop(df.loc[df["model"] == model_name].index, inplace=True) - df.reset_index(drop=True, inplace=True) - - return df - - -def exclude_models(df, excluded_models): - # eclude models - model_names = df["model"].tolist() - for model_name in model_names: - drop_model = False - for keyword in excluded_models: - if keyword in model_name: - drop_model = True - break - if drop_model: - df.drop(df.loc[df["model"] == model_name].index, inplace=True) - df.reset_index(drop=True, inplace=True) - return df - - -def fill_plot_var_and_units(model_lib, cmip_lib): - # we define fixed sets of variables used for final plotting. - units_all = { - "prw": "[kg m$^{-2}$]", - "pr": "[mm d$^{-1}$]", - "prsn": "[mm d$^{-1}$]", - "prc": "[mm d$^{-1}$]", - "hfls": "[W m$^{-2}$]", - "hfss": "[W m$^{-2}$]", - "clivi": "[kg $m^{-2}$]", - "clwvi": "[kg $m^{-2}$]", - "psl": "[Pa]", - "evspsbl": "[kg m$^{-2} s^{-1}$]", - "rlds": "[W m$^{-2}$]", - "rldscs": "[W $m^{-2}$]", - "rtmt": "[W m$^{-2}$]", - "rsdt": "[W m$^{-2}$]", - "rlus": "[W m$^{-2}$]", - "rluscs": "[W m$^{-2}$]", - "rlut": "[W m$^{-2}$]", - "rlutcs": "[W m$^{-2}$]", - "rsds": "[W m$^{-2}$]", - "rsdscs": "[W m$^{-2}$]", - "rstcre": "[W m$^{-2}$]", - "rltcre": "[W m$^{-2}$]", - "rsus": "[W m$^{-2}$]", - "rsuscs": "[W m$^{-2}$]", - "rsut": "[W m$^{-2}$]", - "rsutcs": "[W m$^{-2}$]", - "ts": "[K]", - "tas": "[K]", - "tauu": "[Pa]", - "tauv": "[Pa]", - "sfcWind": "[m s$^{-1}$]", - "zg-500": "[m]", - "ta-200": "[K]", - "ta-850": "[K]", - "ua-200": "[m s$^{-1}$]", - "ua-850": "[m s$^{-1}$]", - "va-200": "[m s$^{-1}$]", - "va-850": "[m s$^{-1}$]", - "uas": "[m s$^{-1}$]", - "vas": "[m s$^{-1}$]", - "tasmin": "[K]", - "tasmax": "[K]", - "clt": "[%]", - } - - # loop variable list and find them in cmip and target models - variable_units = [] - variable_names = [] - for var in units_all.keys(): - # reorgnize cmip data - if var == "rtmt": - if ("rt" in cmip_lib.var_list) and ("rtmt" in model_lib.var_list): - # special case (rt is used in pcmdi datasets, but rtmt is for cmip) - cmip_lib.var_list = list( - map(lambda x: x.replace("rt", "rtmt"), cmip_lib.var_list) - ) - for stat in cmip_lib.df_dict: - for season in cmip_lib.df_dict[stat]: - for region in cmip_lib.df_dict[stat][season]: - cmip_lib.df_dict[stat][season][region]["rtmt"] = ( - cmip_lib.df_dict[stat][season][region].pop("rt") - ) - - if var in model_lib.var_list and var in cmip_lib.var_list: - varunt = var + "\n" + str(units_all[var]) - indv1 = cmip_lib.var_list.index(var) - indv2 = model_lib.var_list.index(var) - cmip_lib.var_unit_list[indv1] = varunt - model_lib.var_unit_list[indv2] = varunt - variable_units.append(varunt) - variable_names.append(var) - del (indv1, indv2, varunt) - else: - print("Warning: {} is not found in metrics data".format(var)) - print( - "Warning: {} is possibly not included as default in fill_plot_var_and_units()".format( - var - ) - ) - - # sanity check for cmip data - for stat in cmip_lib.df_dict: - for season in cmip_lib.df_dict[stat]: - for region in cmip_lib.df_dict[stat][season]: - df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) - for i, model in enumerate(df["model"].tolist()): - if model in ["E3SM-1-0", "E3SM-1-1-ECA"]: - idxs = df[df.iloc[:, 0] == model].index - df.loc[idxs, "ta-850"] = np.nan - del idxs - if model in ["CIESM"]: - idxs = df[df.iloc[:, 0] == model].index - df.loc[idxs, "pr"] = np.nan - del idxs - cmip_lib.df_dict[stat][season][region] = df - del df - - return model_lib, cmip_lib, variable_names, variable_units - - -def find_metrics_data(parameter): - pmp_set = parameter.pcmdi_data_set - pmp_path = parameter.pcmdi_data_path - test_set = parameter.test_data_set - test_path = parameter.test_data_path - refr_set = parameter.refr_data_set - refr_path = parameter.refr_data_path - run_type = parameter.run_type - debug = parameter.debug - - test_mip = test_set.split(".")[0] - test_exp = test_set.split(".")[1] - test_case_id = test_set.split(".")[-1] - test_dir = os.path.join(test_path, test_mip, test_exp, test_case_id) - if run_type == "model_vs_model": - refr_mip = refr_set.split(".")[0] - refr_exp = refr_set.split(".")[1] - refr_case_id = refr_set.split(".")[-1] - refr_dir = os.path.join(refr_path, refr_mip, refr_exp, refr_case_id) - - variables = [ - s.split("/")[-1].split("_")[0] - for s in glob.glob(os.path.join(test_dir, "*{}.json".format(test_case_id))) - if os.path.exists(s) - ] - variables = list(set(variables)) - - # find list of metrics data files - test_list = [] - refr_list = [] - cmip_list = [] - - for vv in variables: - ftest = glob.glob( - os.path.join(test_dir, "{}_*_{}.json".format(vv, test_case_id)) - ) - fcmip, rcode = find_cmip_metric_data(pmp_path, pmp_set, vv) - if rcode == 0: - if len(ftest) > 0 and len(fcmip) > 0: - for fx in ftest: - test_list.append(fx) - cmip_list.append(fcmip) - if debug: - print(ftest[0].split("/")[-1], fcmip.split("/")[-1]) - if run_type == "model_vs_model": - frefr = glob.glob( - os.path.join(refr_dir, "{}_*_{}.json".format(vv, refr_case_id)) - ) - if len(frefr) > 0: - for fr in frefr: - refr_list.append(fr) - if debug: - print( - ftest[0].split("/")[-1], - frefr[0].split("/")[-1], - fcmip.split("/")[-1], - ) - del frefr - del (ftest, fcmip) - return test_list, refr_list, cmip_list diff --git a/zppy/templates/inclusions/pcmdi/reference_alias.json b/zppy/templates/inclusions/pcmdi/reference_alias.json index 5fee538d..1b1ebc92 100755 --- a/zppy/templates/inclusions/pcmdi/reference_alias.json +++ b/zppy/templates/inclusions/pcmdi/reference_alias.json @@ -204,7 +204,7 @@ }, "wap" : { "default" : "ERA5", - "alternate" : "MERRA2", + "alternate" : "MERRA2", "alternate1" : "ERA-Interim", "alternate2" : "NOAA-20C" }, diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 3097567c..563d9f08 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -46,14 +46,14 @@ ref_name={{ ref_name }} ################################################## #info to construct pcmdi-preferred data convension ################################################## -cmip_name='{{ cmip_name }}' -tableID='{{ cmip_tableID }}' +model_name='{{ model_name }}' +tableID='{{ model_tableID }}' {% if run_type == "model_vs_obs" %} -cmip_name_ref='obs.historical.%(model).00' +model_name_ref='obs.historical.%(model).00' tableID_ref=${tableID} {% elif run_type == "model_vs_model" %} -cmip_name_ref='{{ cmip_name_ref }}' -tableID_ref='{{ cmip_tableID_ref }}' +model_name_ref='{{ model_name_ref }}' +tableID_ref='{{ model_tableID_ref }}' {%- endif %} case_id=v$(date '+%Y%m%d') @@ -243,12 +243,12 @@ climo_dir_primary=climo_test {%- endif %} # Create local links to input climo files climo_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly -create_links_acyc_climo ${climo_dir_source} ${climo_dir_primary} ${Y1} ${Y2} ${cmip_name}.${tableID} 1 +create_links_acyc_climo ${climo_dir_source} ${climo_dir_primary} ${Y1} ${Y2} ${model_name}.${tableID} 1 {% if run_type == "model_vs_model" %} # Create local links to input climo files (ref model) climo_dir_source={{ reference_data_path }} climo_dir_ref=climo_ref -create_links_acyc_climo ${climo_dir_source} ${climo_dir_ref} ${ref_Y1} ${ref_Y2} ${cmip_name_ref}.${tableID_ref} 2 +create_links_acyc_climo ${climo_dir_source} ${climo_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 2 {%- endif %} {%- endif %} @@ -264,11 +264,11 @@ ts_dir_primary=ts_test {%- endif %} # Create netcdf files for time series variables ts_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly -create_links_ts ${ts_dir_source} ${ts_dir_primary} ${Y1} ${Y2} ${cmip_name}.${tableID} 3 +create_links_ts ${ts_dir_source} ${ts_dir_primary} ${Y1} ${Y2} ${model_name}.${tableID} 3 {% if run_type == "model_vs_model" %} ts_dir_source={{ reference_data_path_ts }}/{{ ts_num_years_ref }}yr ts_dir_ref=ts_ref -create_links_ts ${ts_dir_source} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} ${cmip_name_ref}.${tableID_ref} 4 +create_links_ts ${ts_dir_source} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 4 {%- endif %} {%- endif %} @@ -281,6 +281,7 @@ create_links_ts ${ts_dir_source} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} ${cmip_name_r ######################################################### cat > process_derived_var.py << EOF import os +import re import glob import json import time @@ -311,23 +312,23 @@ def derive_var(path,vout,var_dic,fname): ds.to_netcdf(out) return -{% if run_type == "model_vs_obs" %} -cmip_groups = ['${cmip_name}.${tableID}'] +variables = '{{ vars }}'.split(",") + {%- if ("mean_climate" in subset) %} +{% if run_type == "model_vs_obs" %} +model_groups = ['${model_name}.${tableID}'] run_groups=['${climo_dir_primary}'] -variables = '{{ cmip_vars }}'.split(",") -{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} -run_groups=['${ts_dir_primary}'] -variables = '{{ vars }}'.split(",") -{%- endif %} {% elif run_type == "model_vs_model" %} -cmip_groups = ['${cmip_name}.${tableID}','${cmip_name_ref}.${tableID_ref}'] -{%- if ("mean_climate" in subset) %} +model_groups = ['${model_name}.${tableID}','${model_name_ref}.${tableID_ref}'] run_groups=['${climo_dir_primary}','${climo_dir_ref}'] -variables = '{{ cmip_vars }}'.split(",") +{%- endif %} {%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} -run_groups=['${ts_dir_primary}','${ts_dir_ref}'] -variables = '{{ vars }}'.split(",") +{% if run_type == "model_vs_obs" %} +model_groups = ['${model_name}.${tableID}'] +run_groups = ['${ts_dir_primary}'] +{% elif run_type == "model_vs_model" %} +model_groups = ['${model_name}.${tableID}','${model_name_ref}.${tableID_ref}'] +run_groups = ['${ts_dir_primary}','${ts_dir_ref}'] {%- endif %} {%- endif %} @@ -338,16 +339,16 @@ variables = '{{ vars }}'.split(",") for i,group in enumerate(run_groups): for j,var in enumerate(variables): if "_" in var or "-" in var: - varin = var.split("_|-", varin)[0] + varin = re.split("_|-", var)[0] else: varin = var if varin in ['rltcre','rstcre']: fpaths = sorted(glob.glob(os.path.join(group,"*"+var+"_*.nc"))) if len(fpaths) < 1: if varin == 'rstcre': - derive_var(group,varin,{'rsutcs':1,'rsut':-1},cmip_groups[i]) + derive_var(group,varin,{'rsutcs':1,'rsut':-1},model_groups[i]) elif varin == 'rltcre': - derive_var(group,varin,{'rlutcs':1,'rlut':-1},cmip_groups[i]) + derive_var(group,varin,{'rlutcs':1,'rlut':-1},model_groups[i]) EOF ################### @@ -374,6 +375,7 @@ mkdir -p ${obstmp_dir} #create a python module to link observation data cat > link_observation.py << EOF import os +import re import glob import json import time @@ -404,10 +406,10 @@ def derive_var(path,vout,var_dic,fname): ds.to_netcdf(out) return -cmip_name = '${cmip_name_ref}.${tableID_ref}' +model_name = '${model_name_ref}.${tableID_ref}' {%- if ("mean_climate" in subset) %} -variables = '{{ cmip_vars }}'.split(",") +variables = '{{ vars }}'.split(",") obs_sets = '{{ obs_sets }}'.split(",") {%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} variables = '{{ vars }}'.split(",") @@ -430,7 +432,7 @@ obs_dic = json.load(open('{{reference_alias}}')) #loop each variable and process the data for i,var in enumerate(variables): if "_" in var or "-" in var: - varin = var.split("_|-", varin)[0] + varin = re.split("_|-", var)[0] else: varin = var @@ -439,7 +441,7 @@ for i,var in enumerate(variables): else: obsid = obs_sets[0] - obsname = obs_dic[var][obsid] + obsname = obs_dic[varin][obsid] if "ceres_ebaf" in obsname: obsstr = obsname.replace("_","*").replace("-","*") else: @@ -458,7 +460,7 @@ for i,var in enumerate(variables): obs = obsname.replace(".","_") out = os.path.join('${obstmp_dir}', '{}.{}.{}-{}.nc'.format( - cmip_name.replace('%(model)',obs), + model_name.replace('%(model)',obs), var,yms,yme)) #rename variable if needed then save file if varin != var: @@ -475,9 +477,9 @@ for i,var in enumerate(variables): fpaths = sorted(glob.glob(os.path.join('${obstmp_dir}',"*"+varin+"_*.nc"))) if len(fpaths) < 1: if varin == 'rstcre': - derive_var('${obstmp_dir}',varin,{'rsutcs':1,'rsut':-1},cmip_name) + derive_var('${obstmp_dir}',varin,{'rsutcs':1,'rsut':-1},model_name) elif varin == 'rltcre': - derive_var('${obstmp_dir}',varin,{'rlutcs':1,'rlut':-1},cmip_name) + derive_var('${obstmp_dir}',varin,{'rlutcs':1,'rlut':-1},model_name) EOF ################### @@ -512,6 +514,7 @@ create_links_ts_obs ${ts_dir_ref_source} ${ts_dir_ref} ${Y1} ${Y2} 8 mkdir -p pcmdi_diags cat > data_info_collect.py << EOF import os +import re import glob import json import collections @@ -520,7 +523,7 @@ from collections import OrderedDict {%- if ("mean_climate" in subset) %} test = '${climo_dir_primary}' refr = '${climo_dir_ref}' -variables = '{{ cmip_vars }}'.split(",") +variables = '{{ vars }}'.split(",") {%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) %} test = '${ts_dir_primary}' refr = '${ts_dir_ref}' @@ -531,18 +534,18 @@ refr = '${ts_dir_ref}' variables = '{{ vars }}'.split(",") {%- endif %} -test_data_set = ['${cmip_name}'.split(".")[1]] +test_data_set = ['${model_name}'.split(".")[1]] {% if run_type == "model_vs_obs" %} refr_data_set = '{{ obs_sets }}'.split(",") {% elif run_type == "model_vs_model" %} -refr_data_set = ['${cmip_name_ref}'.split(".")[1]] +refr_data_set = ['${model_name_ref}'.split(".")[1]] {%- endif %} #collect variables when both model and observations are available refr_dic,test_dic = OrderedDict(),OrderedDict() for i,var in enumerate(variables): if "_" in var or "-" in var: - varin = var.split("_|-", varin)[0] + varin = re.split("_|-", var)[0] else: varin = var test_path = sorted(glob.glob(os.path.join(test,"*.{}.*.nc".format(varin)))) @@ -702,99 +705,6 @@ if [ $? != 0 ]; then exit 10 fi -{%- if '{{sythentic_plots}}' == "y" %} -################################################################### -# this module is added as an external module to generate sythentic -# metrics plots for mean-climate diagnostics (compared with cmip -################################################################### -# Prepare configuration file -cat > sythentic_plots.py << EOF -import os -import sys -import glob -import json -import time -import datetime -import xcdat as xc -import numpy as np -import pcmdi_metrics - -# external module for plot -sys.path.append('{{clim_plot_parser}}'.split("/")[-1]) -clim_plot_parser = '{{clim_plot_parser}}'.split("/")[-1] -clim_plot_driver = '{{clim_plot_driver}}'.split("/")[-1] -from clim_plot_parser import ( - create_mean_climate_plot_parser, -) -from clim_plot_driver import ( - mean_climate_metrics_plot, -) - -parser = create_mean_climate_plot_parser() -parameter = parser.get_parameter(argparse_vals_only=False) -parameter.run_type = "${run_type}" - -{% if run_type == "model_vs_obs" %} -parameter.refr_data_set = "" -parameter.refr_period = "" -parameter.refr_data_path = "" -{% elif run_type == "model_vs_model" %} -parameter.refr_data_set = '${cmip_name_ref}.${case_id}' -parameter.refr_period = "{}-{}".format(${ref_Y1},${ref_Y2}) -parameter.refr_data_path = ${reference_data_path} -{%- endif %} - -parameter.test_data_set = '${cmip_name}' -parameter.test_period = "{:04d}-{:04d}".format(${Y1},${Y2})" -parameter.test_data_path = os.path.join( - '${cmip_name}'.split(".")[0], - '${cmip_name}'.split(".")[1], - '${case_id}' -) - -{%- if ("mean_climate" in subset) %} -pcmdi_data_set = '{{pcmdi_cmip_mclm}}' -pcmdi_data_key = 'mean_climate' -{%- elif ("variability_mode" in subset) %} -pcmdi_data_set = '{{pcmdi_cmip_mov}}' -pcmdi_data_key = 'variability_modes' -{%- elif ("enso" in subset) %} -pcmdi_data_set = '{{pcmdi_cmip_enso}}' -pcmdi_data_key = 'enso_metric' -{%- endif %} - -#existing pcmdi cmip diagnostic metrics -parameter.pcmdi_data_set = pcmdi_data_set -parameter.pcmdi_data_path = os.path.join( - "{{pcmdi_data_path}}", - "variability_modes", - pcmdi_data_set.split(".")[0], - pcmdi_data_set.split(".")[1], - pcmdi_data_set.split(".")[2] -) - -parameter.output_path = os.path.join( - "pcmdi_diags", - "graphics", - pcmdi_data_key, -) -parameter.ftype = '{{ figure_format }}' -parameter.debug = {{ pmp_debug }} -parameter.parcord_show_markers = {{parcord_show_markers}} #False -parameter.add_vertical_line = {{portrait_vertical_line}} #True - -#generate diagnostics figures - -print("--- generate mean climate metrics plot ---") -compute_regions = '{{ regions }}'.split(",") -compute_variables ='{{ vars }}'.split(",") - -mean_climate_metrics_plot(parameter) - -EOF - -{%- endif %} - ######################################################## # generate basic parameter file for pcmdi metrics driver ######################################################## @@ -814,10 +724,10 @@ end_yr = int('${Y2}') num_years = end_yr - start_yr + 1 period = "{:04d}{:02d}-{:04d}{:02d}".format(start_yr,1,end_yr,12) -mip = '${cmip_name}'.split(".")[0] -exp = '${cmip_name}'.split(".")[1] -product = '${cmip_name}'.split(".")[2] -realm = '${cmip_name}'.split(".")[3] +mip = '${model_name}'.split(".")[0] +exp = '${model_name}'.split(".")[1] +product = '${model_name}'.split(".")[2] +realm = '${model_name}'.split(".")[3] ############################################## #Configuration shared with pcmdi diagnostics @@ -1073,11 +983,13 @@ echo cat > pcmdi.py << EOF import os import glob +import re import json import time import datetime import xcdat as xc import numpy as np +import pandas as pd import collections from collections import OrderedDict @@ -1087,6 +999,13 @@ from pcmdi_metrics.io import ( xcdat_open ) +from pcmdi_metrics.graphics import ( + Metrics, + normalize_by_median, + parallel_coordinate_plot, + portrait_plot, +) + import psutil import subprocess from itertools import chain @@ -1141,12 +1060,12 @@ def enso_obsvar_dict(obs_dic,variables): refr_dic = OrderedDict() for var in variables: vkey = var.split("-")[0] - refset = obs_dic[var]['set'] - refname = obs_dic[var][refset] + refset = obs_dic[vkey]['set'] + refname = obs_dic[vkey][refset] #data file in model->var sequence if refname not in refr_dic.keys(): refr_dic[refname] = {} - refr_dic[refname][var] = obs_dic[var][refname] + refr_dic[refname][vkey] = obs_dic[vkey][refname] #save data file dictionary json.dump(refr_dic, @@ -1162,8 +1081,8 @@ def enso_obsvar_lmsk(regions,variables): relf_dic = OrderedDict() for var in variables: vkey = var.split("-")[0] - refset = obs_dic[var]['set'] - refname = obs_dic[var][refset] + refset = obs_dic[vkey]['set'] + refname = obs_dic[vkey][refset] #land/sea mask if refname not in relf_dic.keys(): relf_dic[refname] = os.path.join( @@ -1179,6 +1098,409 @@ def enso_obsvar_lmsk(regions,variables): return + +def shift_row_to_bottom(df, index_to_shift): + idx = [i for i in df.index if i != index_to_shift] + return df.loc[idx + [index_to_shift]] + +def merge_data(model_lib,cmip_lib,model_name): + model_lib,cmip_lib = check_regions(model_lib,cmip_lib) + merge_lib = cmip_lib.merge(model_lib) + merge_lib = check_units(merge_lib) + for stat in merge_lib.df_dict: + for season in merge_lib.df_dict[stat]: + for region in merge_lib.df_dict[stat][season]: + highlight_models = [] + df = merge_lib.df_dict[stat][season][region] + for model in df["model"].tolist(): + if "e3sm" in model.lower(): + highlight_models.append(model) + if model in model_name: + idxs = df[df.iloc[:, 0] == model].index + df.loc[idxs, "model"] = model_name + highlight_models.append(model_name) + for model in highlight_models: + for idx in df[df.iloc[:, 0] == model].index: + df = shift_row_to_bottom(df, idx) + merge_lib.df_dict[stat][season][region] = df.fillna(value=np.nan) + del(df) + return merge_lib + +def check_regions(data_lib,ref_lib): + regions = [x for x in data_lib.regions if x in ref_lib.regions] + for stat in ref_lib.df_dict: + for season in ref_lib.df_dict[stat]: + subset_dict = dict((k, ref_lib.df_dict[stat][season][k]) for k in regions) + ref_lib.df_dict[stat][season] = subset_dict + del(subset_dict) + ref_lib.regions = regions + + for stat in data_lib.df_dict: + for season in data_lib.df_dict[stat]: + subset_dict = dict((k, data_lib.df_dict[stat][season][k]) for k in regions) + data_lib.df_dict[stat][season] = subset_dict + del(subset_dict) + data_lib.regions = regions + + return data_lib,ref_lib + +def check_references(data_dict): + reference_alias = {'CERES-EBAF-4-1': 'ceres_ebaf_v4_1', + 'CERES-EBAF-4-0': 'ceres_ebaf_v4_0', + 'CERES-EBAF-2-8': 'ceres_ebaf_v2_8', + 'GPCP-2-3' : 'GPCP_v2_3', + 'GPCP-2-2' : 'GPCP_v2_2', + 'GPCP-3-2' : 'GPCP_v3_2', + 'NOAA_20C' : 'NOAA-20C', + 'ERA-INT' : 'ERA-Interim', + 'ERA-5' : 'ERA5'} + for key,values in data_dict.items(): + for i,value in enumerate(values): + if value in reference_alias.keys(): + values[i] = reference_alias[value] + data_dict[key] = values + return data_dict + +def check_units(data_lib): + # we define fixed sets of variables used for final plotting. + units_all = { + "prw" : "[kg m$^{-2}$]", "pr" : "[mm d$^{-1}$]", "prsn" : "[mm d$^{-1}$]", + "prc" : "[mm d$^{-1}$]", "hfls" : "[W m$^{-2}$]", "hfss" : "[W m$^{-2}$]", + "clivi" : "[kg $m^{-2}$]", "clwvi" : "[kg $m^{-2}$]", "psl" : "[Pa]", + "rlds" : "[W m$^{-2}$]", "rldscs": "[W $m^{-2}$]", "evspsbl": "[kg m$^{-2} s^{-1}$]", + "rtmt" : "[W m$^{-2}$]", "rsdt" : "[W m$^{-2}$]", "rlus" : "[W m$^{-2}$]", + "rluscs": "[W m$^{-2}$]", "rlut" : "[W m$^{-2}$]", "rlutcs" : "[W m$^{-2}$]", + "rsds" : "[W m$^{-2}$]", "rsdscs": "[W m$^{-2}$]", "rstcre" : "[W m$^{-2}$]", + "rltcre": "[W m$^{-2}$]", "rsus" : "[W m$^{-2}$]", "rsuscs" : "[W m$^{-2}$]", + "rsut" : "[W m$^{-2}$]", "rsutcs": "[W m$^{-2}$]", "ts" : "[K]", + "tas" : "[K]", "tauu" : "[Pa]", "tauv" : "[Pa]", + "zg-500": "[m]", "ta-200": "[K]", "sfcWind": "[m s$^{-1}$]", + "ta-850": "[K]", "ua-200": "[m s$^{-1}$]", "ua-850" : "[m s$^{-1}$]", + "va-200": "[m s$^{-1}$]", "va-850": "[m s$^{-1}$]", "uas" : "[m s$^{-1}$]", + "vas" : "[m s$^{-1}$]", "tasmin": "[K]", "tasmax" : "[K]", + "clt" : "[%]"} + + common_vars = [x for x in data_lib.var_list if x in units_all.keys()] + #special case + if 'rtmt' not in common_vars: + if ('rt' in data_lib.var_list) or ('rmt' in data_lib.var_list): + common_vars.append('rtmt') + + #collect unit list + common_unts = [units_all[x] for x in common_vars] + + #collect reference list + reflist = data_lib.var_ref_dict.copy() + for var in reflist: + if var not in common_vars: + if var in ['rt','rmt']: + data_lib.var_ref_dict['rtmt'] = data_lib.var_ref_dict.pop(var) + else: + data_lib.var_ref_dict.pop(var) + data_lib.var_ref_dict = check_references(data_lib.var_ref_dict) + #now clean up data to exclude vars not in common lists + for stat in data_lib.df_dict: + for season in data_lib.df_dict[stat]: + for region in data_lib.df_dict[stat][season]: + df = data_lib.df_dict[stat][season][region] + if 'rt' in df.columns: + df['rtmt'] = df['rt'] + elif 'rmt' in df.columns: + df['rtmt'] = df['rmt'] + for var in df.columns[3:]: + if var not in common_vars: + df = df.drop(var,axis=1) + data_lib.df_dict[stat][season][region] = df + del(df) + + data_lib.var_list = common_vars + data_lib.var_unit_list = common_unts + + return data_lib + +def collect_metrics_data(parameter,group): + #merge data to an exisiting cmip base + cmip_files = glob.glob(os.path.join( + parameter['cmip_path'], + group, + parameter['cmip_name'].split(".")[0], + parameter['cmip_name'].split(".")[1], + parameter['cmip_name'].split(".")[2], + "*.json")) + if len(cmip_files) > 0 and os.path.exists(cmip_files[0]): + print('CMIP PCMDI DIAGs for Sythetic Metrics Found, Read data...') + cmip_lib = Metrics(cmip_files) + cmip_lib = check_units(cmip_lib) + else: + exit("Warning: CMIP PCMDI DIAGs for Sythetic Metrics Not Found,....") + + model_name = '.'.join([ + parameter['test_name'].split(".")[2], + parameter['test_name'].split(".")[3]]) + model_files = glob.glob(os.path.join( + parameter['test_path'], + group, + parameter['test_name'].split(".")[0], + parameter['test_name'].split(".")[1], + parameter['case_id'], + "*.json")) + if len(model_files) > 0 and os.path.exists(model_files[0]): + print('{} PCMDI DIAGs for Sythetic Metrics Found, Read data...'.format(model_name)) + model_lib = Metrics(model_files) + model_lib = check_units(model_lib) + else: + exit("Warning: Model PCMDI DIAGs for Sythetic Metrics Not Found,....") + + #merge model data with reference cmip data + merge_lib = merge_data(model_lib,cmip_lib,model_name) + + return merge_lib + +def archive_data(parameter,stat,region,season,data_dict, + model_name,var_names,var_units,outdir): + outdic = pd.DataFrame(data_dict) + outdic = outdic.drop(columns=["model_run"]) + for var in list(outdic.columns.values[3:]): + if var not in var_names: + outdic = outdic.drop(columns=[var]) + else: + # replace the variable with the name + units + outdic.columns.values[outdic.columns.values.tolist().index(var)] = ( + var_units[var_names.index(var)] + ) + # save data to .csv file + if not os.path.exists(outdir): + os.makedirs(outdir) + outfile = "{}_{}_{}_{}.csv".format(stat,region,season,model_name) + outdic.to_csv(os.path.join(outdir,outfile)) + return + +def parcord_metric_plot(parameter,group,data_lib): + metric_dict = {"rms_xyt" : "RMSE", + "std_xyt" : "Stddev Model", + "std-obs_xyt" : "Stddev Obs."} + season = "ann" + model_name = '.'.join([ + parameter['test_name'].split(".")[2], + parameter['test_name'].split(".")[3]]) + + # process figure + fontsize = 20 + figsize = (40, 18) + legend_box_xy = (1.08, 1.18) + legend_box_size = 4 + legend_lw = 1.5 + shrink = 0.8 + legend_fontsize = fontsize * 0.8 + # hide markers for CMIP models + identify_all_models = False + + for stat in metric_dict.keys(): + for region in data_lib.regions: + # data for final plot + data_dict = data_lib.df_dict[stat][season][region] + data_dict = data_dict.reset_index(drop=True) + #drop data if all is NaNs + var_names = data_lib.var_list.copy() + var_units = data_lib.var_unit_list.copy() + for column in data_dict.columns[3:]: + if np.all(np.isnan(data_dict[column].to_numpy())): + data_dict = data_dict.drop(column, axis=1) + index = var_names.index(column) + var_names.remove(var_names[index]) + var_units.remove(var_units[index]) + + highlight_model1 = [] + for model in data_dict['model'].to_list(): + if "e3sm" in model.lower(): + highlight_model1.append(model) + elif model in model_name: + highlight_model1.append(model_name) + + # ensemble mean for CMIP group + irow_sub = data_dict[data_dict['model'] == highlight_model1[0]].index[0] + data_dict.loc["CMIP MMM"] = data_dict[:irow_sub].mean( + numeric_only=True, skipna=True) + data_dict.at["CMIP MMM", "model"] = "CMIP MMM" + data_dict.loc["E3SM MMM"] = data_dict[irow_sub:].mean( + numeric_only=True, skipna=True) + data_dict.at["E3SM MMM", "model"] = "E3SM MMM" + + if parameter['save_data']: + outdir = os.path.join(parameter['out_dir'],region) + archive_data(parameter,stat,region,season,data_dict, + model_name,var_names,var_units,outdir) + + #label information + var_labels = [] + for i,var in enumerate(var_names): + var_labels.append(var + "\n" + var_units[i]) + model_list = data_dict['model'].to_list() + highlight_model2 = data_dict['model'].to_list()[-3:] + + #final plot data + data_var = data_dict[var_names].to_numpy() + + figsize = (40, 12) + fontsize = 20 + legend_ncol = int(7 * figsize[0] / 40.0) + legend_posistion = (0.50, -0.14) + # colors for highlight lines + xcolors = ["#000000","#e41a1c","#ff7f00","#4daf4a","#f781bf", + "#a65628","#984ea3","#999999","#377eb8","#dede00"] + lncolors = xcolors[1 : len(highlight_model2)] + [xcolors[0]] + + xlabel = "Metric" + ylabel = '{} ({})'.format(metric_dict[stat],stat.upper()) + fig, ax = parallel_coordinate_plot( + data_var, + var_labels, + model_list, + model_names2=highlight_model1, + group1_name="CMIP6", + group2_name="E3SM", + models_to_highlight=highlight_model2, + models_to_highlight_colors=lncolors, + models_to_highlight_labels=highlight_model2, + identify_all_models=identify_all_models, + vertical_center="median", + vertical_center_line=True, + title="Model Performance of {} Climatology ({}, {})".format( + season.upper(), stat.upper(), region.upper()), + figsize=figsize, + colormap="tab20_r", + show_boxplot=False, + show_violin=True, + violin_colors=("lightgrey", "pink"), + legend_ncol=legend_ncol, + legend_bbox_to_anchor=legend_posistion, + legend_fontsize=fontsize * 0.85, + xtick_labelsize=fontsize * 0.95, + ytick_labelsize=fontsize * 0.95, + logo_rect=[0, 0, 0, 0], + logo_off=True) + + # Save figure as an image file + outdir = os.path.join(parameter['out_dir'],region) + if not os.path.exists(outdir): + os.makedirs(outdir) + outfile = "{}_{}_{}_parcord_{}.png".format(stat,region,season,model_name) + fig.savefig(os.path.join(outdir,outfile),facecolor="w", bbox_inches="tight") + + +def portrait_metric_plot(parameter,group,data_lib): + metric_dict = {"cor_xy" : "Pattern Corr.", + "rms_xy" : "Normalized RMSE by Median"} + seasons = ["djf", "mam", "jja", "son"] + var_names = data_lib.var_list + var_units = data_lib.var_unit_list + model_name = '.'.join([ + parameter['test_name'].split(".")[2], + parameter['test_name'].split(".")[3]]) + # process figure + fontsize = 20 + add_vertical_line = True + figsize = (40, 18) + legend_box_xy = (1.08, 1.18) + legend_box_size = 4 + legend_lw = 1.5 + shrink = 0.8 + legend_fontsize = fontsize * 0.8 + var_label_unit_on = False + + for stat in metric_dict.keys(): + for region in data_lib.regions: + data_nor = dict() + for season in seasons: + data_dict = data_lib.df_dict[stat][season][region] + if stat == "cor_xy": + data_nor[season] = data_dict[var_names].to_numpy().T + else: + data_nor[season] = normalize_by_median( + data_dict[var_names].to_numpy().T, axis=1) + if parameter['save_data']: + outdir = os.path.join(parameter['out_dir'],region) + archive_data(parameter,stat,region,season,data_dict, + model_name,var_names,var_units,outdir) + # data for final plot + data_all_nor = np.stack([data_nor["djf"], data_nor["mam"], + data_nor["jja"], data_nor["son"]]) + + model_list = data_dict['model'] + highlight_models = [] + for model in model_list: + if "e3sm" in model.lower(): + highlight_models.append(model) + highlight_models.append(model_name) + + lable_colors = [] + for model in model_list: + if model in highlight_models: + if model in model_name: + lable_colors.append("#FC5A50") + else: + lable_colors.append("#5170d7") + else: + lable_colors.append("#000000") + if stat == "cor_xy": + var_range = (-1.0, 1.0) + cmap_bounds = [0.1, 0.2, 0.4, 0.6, 0.65, 0.7, 0.75, + 0.8, 0.85, 0.9, 0.95, 1.0] + else: + var_range = (-0.5, 0.5) + cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, + 0.2, 0.3, 0.4, 0.5] + + x_labels = model_list + y_labels = [] + if var_label_unit_on: + for i,var in enumerate(var_names): + y_labels.append(var + "\n" + var_units[i]) + else: + y_labels = var_names + + fig, ax, cbar = portrait_plot( + data_all_nor, + xaxis_labels=x_labels, + yaxis_labels=y_labels, + cbar_label=metric_dict[stat], + cbar_label_fontsize=fontsize * 1.0, + cbar_tick_fontsize=fontsize, + box_as_square=True, + vrange=var_range, + figsize=figsize, + cmap="RdYlBu_r", + cmap_bounds=cmap_bounds, + cbar_kw={"extend": "both", "shrink": shrink}, + missing_color="white", + legend_on=True, + legend_labels=["DJF", "MAM", "JJA", "SON"], + legend_box_xy=legend_box_xy, + legend_box_size=legend_box_size, + legend_lw=legend_lw, + legend_fontsize=legend_fontsize, + logo_rect=[0, 0, 0, 0], + logo_off=True) + + ax.axvline(x = len(x_labels) - len(highlight_models), color="k",linewidth=3,) + ax.set_xticklabels(model_list, + rotation=45, va="bottom", ha="left") + ax.set_yticklabels(y_labels, + rotation=0, va="center", ha="right") + + for xtick, color in zip(ax.get_xticklabels(), lable_colors): + xtick.set_color(color) + ax.yaxis.label.set_color(lable_colors[0]) + + # Save figure as an image file + outdir = os.path.join(parameter['out_dir'],region) + if not os.path.exists(outdir): + os.makedirs(outdir) + outfile = "{}_{}_4season_{}.png".format(stat,region,model_name) + fig.savefig(os.path.join(outdir,outfile),facecolor="w", bbox_inches="tight") + + return + ############################## start_yr = int('${Y1}') end_yr = int('${Y2}') @@ -1218,9 +1540,9 @@ variable_region( # each variable (will execuate in parallel later) lstcmd = [] for var in compute_variables: - if var in obs_dic.keys(): - vkey = var.split("-")[0] - refset = obs_dic[var]['set'] + vkey = var.split("-")[0] + if vkey in obs_dic.keys(): + refset = obs_dic[vkey]['set'] lstcmd.append(" ".join([ 'mean_climate_driver.py', '-p parameterfile.py' , @@ -1246,11 +1568,23 @@ else: #time delay to ensure process completely finished time.sleep(1) -{%- if '{{sythentic_plots}}' == 'y' %} -#process sythetic metric plot if turned on -return_code = subprocess.call(["python", 'sythentic_plots.py']) -if return_code != 0: - exit("Failed to process {{sythentic_plots}}") +{% if run_type == "model_vs_obs" %} +synthetic_plot = '{{sythentic_plots}}' +if synthetic_plot == "y": + print("generate sythentic metrics plot ...") + parameter = OrderedDict() + parameter['save_data'] = True + parameter['cmip_path'] = '{{pcmdi_data_path}}' + parameter['cmip_name'] = '{{pcmdi_cmip_clim}}' + parameter['test_name'] = '{{model_name}}' + parameter['test_path'] = os.path.join('pcmdi_diags','metrics_results') + parameter['case_id'] = '${case_id}' + parameter['out_dir'] = os.path.join('${results_dir}','ERROR_metric') + merge_lib = collect_metrics_data(parameter,'mean_climate') + print("Processing Portrait Plots (4 seasons)....") + portrait_metric_plot(parameter,'mean_climate',merge_lib) + print("Processing Parallel Coordinate Plots (Annual Cycle)....") + parcord_metric_plot(parameter,'mean_climate',merge_lib) {%- endif %} {%- endif %} @@ -1394,7 +1728,6 @@ def get_mean_climate_graphics(regions,variables,fig_format,input_dir,output_dir) fig_sets = OrderedDict() fig_sets['CLIM_patttern'] = ['graphics','*'] - fig_sets['ERROR_metric'] = ['graphics','*'] for fset in fig_sets.keys(): fdir = input_dir.replace('%(output_type)',fig_sets[fset][0] ) @@ -1465,15 +1798,14 @@ def get_enso_graphics(groups,fig_format,refname,input_dir,output_dir): ############# fig_format = '{{ figure_format }}' - diag_types = ['metrics_results','diagnostic_result','graphics'] input_template = os.path.join( 'pcmdi_diags', '%(output_type)', '%(metric_type)', - '${cmip_name}'.split(".")[0], - '${cmip_name}'.split(".")[1], + '${model_name}'.split(".")[0], + '${model_name}'.split(".")[1], '${case_id}', ) @@ -1514,6 +1846,16 @@ get_enso_graphics( {% endif %} EOF +################################ +# Run diagnostics +command="srun -N 1 python -u graphic_viewer.py" +# Run diagnostics +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (12)' > {{ prefix }}.status + exit 12 +fi ################################# # Copy output to web server @@ -1526,8 +1868,8 @@ web_dir=${www}/${case}/pcmdi_diags mkdir -p ${web_dir} if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (12)' > {{ prefix }}.status - exit 12 + echo 'ERROR (13)' > {{ prefix }}.status + exit 13 fi {% if machine in ['pm-cpu', 'pm-gpu'] %} @@ -1546,11 +1888,12 @@ done ############################################ # Copy files -rsync -a --delete ${results_dir} ${web_dir}/ +#rsync -a --delete ${results_dir} ${web_dir}/ +rsync -a ${results_dir} ${web_dir}/ if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (13)' > {{ prefix }}.status - exit 13 + echo 'ERROR (14)' > {{ prefix }}.status + exit 14 fi {% if machine in ['pm-cpu', 'pm-gpu'] %} From 67ca00db34fba2e86c133cc9883060836dae5ed3 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Tue, 7 Jan 2025 03:43:25 -0600 Subject: [PATCH 09/23] bug fix --- zppy/defaults/default.ini | 30 ++---- zppy/templates/pcmdi_diags.bash | 161 +++++++++++++------------------- 2 files changed, 77 insertions(+), 114 deletions(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 7f2a8d7b..bca053d7 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -229,12 +229,10 @@ RmDomainMean = string(default=True) EofScaling = string(default=False) ConvEOF = string(default=True) CBF = string(default=True) -cmec = string(default=False) -update_json = string(default=False) -plot_obs = string(default=True) -plot = string(default=True) -nc_out_obs = string(default=True) -nc_out = string(default=True) +mov_plot_obs = string(default=True) +mov_plot_model = string(default=True) +mov_nc_out_obs = string(default=True) +mov_nc_out_model = string(default=True) ########################################################################################## # options for pcmdi enso diagnostics # vars = "psl,pr,prsn,ts,tas,tauu,tauv,hflx,hfss,rlds,rsds,rlus,rlut,rsdt" @@ -275,11 +273,7 @@ pcmdi_cmip_clim = string(default="cmip6.historical.v20220928") pcmdi_cmip_movs = string(default="cmip6.historical.v20220825") pcmdi_cmip_enso = string(default="cmip6.historical.v20210620") # options shared by pcmdi -pmp_debug = string(default=False) -# setup for parallel coordinate plots (hide makers for sigle model) -parcord_show_markers = string(default=False) -# setup for portrait plots (add vertical line to separate test and reference models) -portrait_vertical_line = string(default=True) +pcmdi_debug = string(default=False) [[__many__]] backend = string(default=None) @@ -323,8 +317,6 @@ portrait_vertical_line = string(default=True) EofScaling = string(default=None) ConvEOF = string(default=None) CBF = string(default=None) - cmec = string(default=None) - update_json = string(default=None) subset = string(default=None) landmask = string(default=None) frequency = string(default=None) @@ -338,13 +330,11 @@ portrait_vertical_line = string(default=True) model_name_ref = string(default=None) model_tableID = string(default=None) model_vars = string(default=None) - pmp_debug = string(default=None) - nc_out_obs = string(default=None) - nc_out = string(default=None) - plot_obs = string(default=None) - plot = string(default=None) - parcord_show_markers = string(default=None) - portrait_vertical_line = string(default=None) + mov_nc_out_obs = string(default=None) + mov_nc_out_model = string(default=None) + mov_plot_obs = string(default=None) + mov_plot_model = string(default=None) + pcmdi_debug = string(default=None) [e3sm_diags] # See https://e3sm-project.github.io/e3sm_diags/_build/html/master/available-parameters.html diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 563d9f08..855f2a5b 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -408,13 +408,8 @@ def derive_var(path,vout,var_dic,fname): model_name = '${model_name_ref}.${tableID_ref}' -{%- if ("mean_climate" in subset) %} -variables = '{{ vars }}'.split(",") -obs_sets = '{{ obs_sets }}'.split(",") -{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} variables = '{{ vars }}'.split(",") obs_sets = '{{ obs_sets }}'.split(",") -{%- endif %} ts_dir_ref_source = '{{ obs_ts }}' # variable map from observation to cmip @@ -733,18 +728,18 @@ realm = '${model_name}'.split(".")[3] #Configuration shared with pcmdi diagnostics ############################################## # Record NetCDF output -nc_out_obs = {{ nc_out_obs }} -nc_out = {{ nc_out }} -if nc_out: +nc_out_obs = {{ mov_nc_out_obs }} +nc_out_model = {{ mov_nc_out_model }} +if nc_out_model or nc_out_obs: ext = ".nc" else: ext = ".xml" user_notes = 'Provenance and results' -debug = {{ pmp_debug }} +debug = {{ pcmdi_debug }} # Generate plots -plot = {{ plot }} -plot_obs = {{ plot_obs }} # optional +plot_model = {{ mov_plot_model }} +plot_obs = {{ mov_plot_obs }} # optional # Additional settings run_type = '{{ run_type }}' @@ -884,18 +879,10 @@ landmask = {{ landmask }} #template for model file modnames = [ product ] realization = "*" -modpath = '.'.join([ - mip, - exp, - '%(model)', - '%(realization)', - '${tableID}', - '%(variable)', - period, - 'AC', - '${case_id}', - 'nc' -]) +modpath = os.path.join( + '${ts_dir_primary}', + '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,${tableID},period) +) #start and end year for analysis msyear = int(start_yr) @@ -914,10 +901,10 @@ CBF = {{ CBF }} ConvEOF = {{ ConvEOF }} # Generate CMEC compliant json -cmec = {{ cmec }} +cmec = False # Update diagnostic file if exist -update_json = {{ update_json }} +update_json = False #results directory structure. results_dir = os.path.join( @@ -938,12 +925,11 @@ results_dir = os.path.join( ########################################### modnames = [ product ] realization = realm - modpath = os.path.join( '${ts_dir_primary}', - '.'.join([mip,exp,'%(model)','%(realization)', - '${tableID}','%(variable)',period,'nc']) + '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,${tableID},period) ) + #observation/reference file catalogue obs_cmor = True obs_cmor_path = '${ts_dir_ref}' @@ -1276,10 +1262,8 @@ def archive_data(parameter,stat,region,season,data_dict, return def parcord_metric_plot(parameter,group,data_lib): - metric_dict = {"rms_xyt" : "RMSE", - "std_xyt" : "Stddev Model", - "std-obs_xyt" : "Stddev Obs."} season = "ann" + metric_dict = {"rms_xyt" : "RMSE"} model_name = '.'.join([ parameter['test_name'].split(".")[2], parameter['test_name'].split(".")[3]]) @@ -1287,22 +1271,26 @@ def parcord_metric_plot(parameter,group,data_lib): # process figure fontsize = 20 figsize = (40, 18) + shrink = 0.8 legend_box_xy = (1.08, 1.18) legend_box_size = 4 legend_lw = 1.5 - shrink = 0.8 legend_fontsize = fontsize * 0.8 + legend_ncol = int(7 * figsize[0] / 40.0) + legend_posistion = (0.50, -0.14) # hide markers for CMIP models identify_all_models = False + # colors for highlight lines + xcolors = ["#000000","#e41a1c","#ff7f00","#4daf4a","#f781bf", + "#a65628","#984ea3","#999999","#377eb8","#dede00"] for stat in metric_dict.keys(): for region in data_lib.regions: - # data for final plot - data_dict = data_lib.df_dict[stat][season][region] - data_dict = data_dict.reset_index(drop=True) - #drop data if all is NaNs var_names = data_lib.var_list.copy() var_units = data_lib.var_unit_list.copy() + # data for final plot + data_dict = data_lib.df_dict[stat][season][region].reset_index(drop=True) + #drop data if all is NaNs for column in data_dict.columns[3:]: if np.all(np.isnan(data_dict[column].to_numpy())): data_dict = data_dict.drop(column, axis=1) @@ -1331,28 +1319,22 @@ def parcord_metric_plot(parameter,group,data_lib): archive_data(parameter,stat,region,season,data_dict, model_name,var_names,var_units,outdir) - #label information - var_labels = [] - for i,var in enumerate(var_names): - var_labels.append(var + "\n" + var_units[i]) model_list = data_dict['model'].to_list() highlight_model2 = data_dict['model'].to_list()[-3:] - #final plot data + #final plot data data_var = data_dict[var_names].to_numpy() - figsize = (40, 12) - fontsize = 20 - legend_ncol = int(7 * figsize[0] / 40.0) - legend_posistion = (0.50, -0.14) - # colors for highlight lines - xcolors = ["#000000","#e41a1c","#ff7f00","#4daf4a","#f781bf", - "#a65628","#984ea3","#999999","#377eb8","#dede00"] - lncolors = xcolors[1 : len(highlight_model2)] + [xcolors[0]] + #label information + var_labels = [] + for i,var in enumerate(var_names): + var_labels.append(var + "\n" + var_units[i]) xlabel = "Metric" ylabel = '{} ({})'.format(metric_dict[stat],stat.upper()) - fig, ax = parallel_coordinate_plot( + # colors for highlight lines + lncolors = xcolors[1 : len(highlight_model2)] + [xcolors[0]] + fig,ax = parallel_coordinate_plot( data_var, var_labels, model_list, @@ -1366,7 +1348,7 @@ def parcord_metric_plot(parameter,group,data_lib): vertical_center="median", vertical_center_line=True, title="Model Performance of {} Climatology ({}, {})".format( - season.upper(), stat.upper(), region.upper()), + season.upper(),stat.upper(), region.upper()), figsize=figsize, colormap="tab20_r", show_boxplot=False, @@ -1389,14 +1371,13 @@ def parcord_metric_plot(parameter,group,data_lib): def portrait_metric_plot(parameter,group,data_lib): + seasons = ["djf", "mam", "jja", "son"] metric_dict = {"cor_xy" : "Pattern Corr.", "rms_xy" : "Normalized RMSE by Median"} - seasons = ["djf", "mam", "jja", "son"] - var_names = data_lib.var_list - var_units = data_lib.var_unit_list model_name = '.'.join([ parameter['test_name'].split(".")[2], parameter['test_name'].split(".")[3]]) + # process figure fontsize = 20 add_vertical_line = True @@ -1406,70 +1387,65 @@ def portrait_metric_plot(parameter,group,data_lib): legend_lw = 1.5 shrink = 0.8 legend_fontsize = fontsize * 0.8 - var_label_unit_on = False + + var_names = data_lib.var_list + var_units = data_lib.var_unit_list for stat in metric_dict.keys(): for region in data_lib.regions: data_nor = dict() for season in seasons: - data_dict = data_lib.df_dict[stat][season][region] + data_dict = data_lib.df_dict[stat][season][region].copy() if stat == "cor_xy": data_nor[season] = data_dict[var_names].to_numpy().T else: data_nor[season] = normalize_by_median( - data_dict[var_names].to_numpy().T, axis=1) + data_dict[var_names].to_numpy().T, axis=1) if parameter['save_data']: + data_dict[var_names] = data_nor[season] outdir = os.path.join(parameter['out_dir'],region) archive_data(parameter,stat,region,season,data_dict, model_name,var_names,var_units,outdir) - # data for final plot - data_all_nor = np.stack([data_nor["djf"], data_nor["mam"], - data_nor["jja"], data_nor["son"]]) + # data for final plot + data_all_nor = np.stack( + [data_nor["djf"], data_nor["mam"], data_nor["jja"], data_nor["son"]] + ) - model_list = data_dict['model'] + lable_colors = [] highlight_models = [] + model_list = data_dict['model'].to_list() for model in model_list: if "e3sm" in model.lower(): - highlight_models.append(model) - highlight_models.append(model_name) - - lable_colors = [] - for model in model_list: - if model in highlight_models: - if model in model_name: - lable_colors.append("#FC5A50") - else: - lable_colors.append("#5170d7") + highlight_models.append(model) + lable_colors.append("#5170d7") + elif model in model_name: + highlight_models.append(model_name) + lable_colors.append("#FC5A50") else: lable_colors.append("#000000") + if stat == "cor_xy": - var_range = (-1.0, 1.0) - cmap_bounds = [0.1, 0.2, 0.4, 0.6, 0.65, 0.7, 0.75, - 0.8, 0.85, 0.9, 0.95, 1.0] + var_range = (0, 1.0) + cmap_color = "YlOrBr" + cmap_bounds = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, + 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] else: var_range = (-0.5, 0.5) - cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, - 0.2, 0.3, 0.4, 0.5] - - x_labels = model_list - y_labels = [] - if var_label_unit_on: - for i,var in enumerate(var_names): - y_labels.append(var + "\n" + var_units[i]) - else: - y_labels = var_names + cmap_color = "RdYlBu_r" + cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, + 0, 0.1,0.2, 0.3, 0.4, 0.5] fig, ax, cbar = portrait_plot( data_all_nor, - xaxis_labels=x_labels, - yaxis_labels=y_labels, + xaxis_labels=model_list, + yaxis_labels=var_names, cbar_label=metric_dict[stat], cbar_label_fontsize=fontsize * 1.0, cbar_tick_fontsize=fontsize, box_as_square=True, vrange=var_range, figsize=figsize, - cmap="RdYlBu_r", + cmap=cmap_color, cmap_bounds=cmap_bounds, cbar_kw={"extend": "both", "shrink": shrink}, missing_color="white", @@ -1482,13 +1458,10 @@ def portrait_metric_plot(parameter,group,data_lib): logo_rect=[0, 0, 0, 0], logo_off=True) - ax.axvline(x = len(x_labels) - len(highlight_models), color="k",linewidth=3,) - ax.set_xticklabels(model_list, - rotation=45, va="bottom", ha="left") - ax.set_yticklabels(y_labels, - rotation=0, va="center", ha="right") - - for xtick, color in zip(ax.get_xticklabels(), lable_colors): + ax.axvline(x=len(x_labels)-len(highlight_models),color="k",linewidth=3) + ax.set_xticklabels(model_list,rotation=45,va="bottom",ha="left") + ax.set_yticklabels(y_labels,rotation=0,va="center",ha="right") + for xtick,color in zip(ax.get_xticklabels(),lable_colors): xtick.set_color(color) ax.yaxis.label.set_color(lable_colors[0]) From 205a479b7c234f49d81d4408382a81ccd334141b Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Wed, 8 Jan 2025 14:48:54 -0600 Subject: [PATCH 10/23] reorgnize the including files --- .../inclusions/pcmdi/reference_alias.json | 341 ------------------ .../inclusions/pcmdi/regions_specs.json | 263 -------------- 2 files changed, 604 deletions(-) delete mode 100755 zppy/templates/inclusions/pcmdi/reference_alias.json delete mode 100755 zppy/templates/inclusions/pcmdi/regions_specs.json diff --git a/zppy/templates/inclusions/pcmdi/reference_alias.json b/zppy/templates/inclusions/pcmdi/reference_alias.json deleted file mode 100755 index 1b1ebc92..00000000 --- a/zppy/templates/inclusions/pcmdi/reference_alias.json +++ /dev/null @@ -1,341 +0,0 @@ -{ - "rlds" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rldscs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rlus" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsds" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsdscs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - - "rsus" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsuscs": { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rstcre" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rltcre" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rlut" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rlutcs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsdt" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsut" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rsutcs" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "rtmt" : { - "default" : "ceres_ebaf_v4.1", - "alternate" : "ceres_ebaf_v4.0", - "alternate1" : "ceres_ebaf_v2.8", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C" - }, - "pr" : { - "default" : "GPCP_v2.3", - "alternate" : "GPCP_v2.2", - "alternate1" : "GPCP_1DD", - "alternate2" : "ERA5", - "alternate3" : "MERRA2", - "alternate4" : "ERA-Interim", - "alternate5" : "NOAA-20C", - "alternate6" : "GPCP_v3.2" - }, - "prc" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "prsn" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "prw" : { - "default" : "ERA5", - "alternate" : "NOAA-20C", - "alternate1" : "MERRA2", - "alternate2" : "ERA-Interim", - "alternate3" : "NOAA-20C" - }, - "psl" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "ps" : { - "default" : "ERA5", - "alternate " : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "huss" : { - "default" : "MERRA2", - "alternate" : "NOAA-20C", - "alternate1" : "ERA5", - "alternate2" : "ERA-Interim" - }, - "ta" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "ua" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "va" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "hur" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "wap" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "zg" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "o3" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "hus" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "uas" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "vas" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "tauu" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "taux" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "tauv" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "tauy" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "COREv2-Flux" - }, - "tas" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C" - }, - "ts" : { - "default" : "ERA5", - "alternate" : "NOAA-20C", - "alternate1" : "HadISST2" - }, - "sst" : { - "default" : "ERA5", - "alternate" : "NOAA-20C", - "alternate1" : "HadISST2" - }, - "sfcWind" : { - "default" : "NOAA-20C", - "alternate" : "ERA5", - "alternate1" : "MERRA2", - "alternate2" : "ERA-Interim" - }, - "hfls" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "OAFlux" - }, - "hfss" : { - "default" : "ERA5", - "alternate" : "MERRA2", - "alternate1" : "ERA-Interim", - "alternate2" : "NOAA-20C", - "alternate3" : "OAFlux" - }, - "evspsbl" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "clt" : { - "default" : "ERA5", - "alternate3" : "NOAA-20C" - }, - "clwvi" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "clivi" : { - "default" : "ERA5", - "alternate" : "NOAA-20C" - }, - "tasmin" : { - "default" : "MERRA2" - }, - "tasmax" : { - "default" : "MERRA2" - }, - "sic" : { - "default" : "HadSST2" - }, - "tos" : { - "default" : "HadSST2" - }, - "zos" : { - "default" : "AVISO", - "alternate" : "HadISST" - }, - "sos" : { - "default" : "Aquarius", - "alternate" : "HadISST" - } -} diff --git a/zppy/templates/inclusions/pcmdi/regions_specs.json b/zppy/templates/inclusions/pcmdi/regions_specs.json deleted file mode 100755 index 811eb1e9..00000000 --- a/zppy/templates/inclusions/pcmdi/regions_specs.json +++ /dev/null @@ -1,263 +0,0 @@ -{ - "global": { - "domain": { "latitude":[-90.0, 90.0]} - }, - "NH": { - "domain": { "latitude":[0.0, 90.0]} - }, - "SH": { - "domain": { "latitude":[-90.0, 0]} - }, - "NHEX": { - "domain": { "latitude":[30.0, 90.0]} - }, - "SHEX": { - "domain": { "latitude":[-90.0, -30.0]} - }, - "TROPICS": { - "domain": { "latitude":[-30.0, 30.0]} - }, - "90S50S": { - "domain": { "latitude":[-90.0, -50.0]} - }, - "50S20S": { - "domain": { "latitude":[-50.0, -20.0]} - }, - "20S20N": { - "domain": { "latitude":[-20.0, 20.0]} - }, - "20N50N": { - "domain": { "latitude":[20.0, 50.0]} - }, - "50N90N": { - "domain": { "latitude":[50.0, 90.0]} - }, - "ocean_NH": { - "value": 0.0, - "domain": { "latitude":[0.0, 90.0]} - }, - "ocean_SH": { - "value": 0.0, - "domain": { "latitude":[-90.0, 0.0]} - }, - "land_NH": { - "value": 100, - "domain": { "latitude":[0.0, 90.0]} - }, - "land_SH": { - "value": 100, - "domain": { "latitude":[-90.0, 0.0]} - }, - "land_NHEX": { - "value": 100, - "domain": { "latitude":[30.0, 90.0]} - }, - "land_SHEX": { - "value": 100, - "domain": { "latitude":[-90.0, -30.0]} - }, - "land_TROPICS": { - "value": 100, - "domain": { "latitude":[-30.0, 30.0]} - }, - "land": { - "value": 100 - }, - "ocean_NHEX": { - "value": 0, - "domain": { "latitude":[30.0, 90.0]} - }, - "ocean_SHEX": { - "value": 0, - "domain": { "latitude":[-90.0, -30.0]} - }, - "ocean_TROPICS": { - "value": 0, - "domain": { "latitude":[30.0, 30.0]} - }, - "ocean": { - "value": 0 - }, - "ocean_50S50N": { - "value": 0.0, - "domain": { "latitude":[-50.0, 50.0]} - }, - "ocean_50S20S": { - "value": 0.0, - "domain": { "latitude":[-50.0, -20.0]} - }, - "ocean_20S20N": { - "value": 0.0, - "domain": { "latitude":[-20.0, 20.0]} - }, - "ocean_20N50N": { - "value": 0.0, - "domain": { "latitude":[20.0, 50.0]} - }, - "ocean_50N90N": { - "value": 0.0, - "domain": { "latitude":[50.0, 90.0]} - }, - "ocean_90S50S": { - "value": 0.0, - "domain": { "latitude":[-90.0, -50.0]} - }, - "NAM": { - "domain": { "latitude":[20.0, 90], - "longitude":[-180, 180]} - }, - "NAO": { - "domain": { "latitude":[20.0, 80], - "longitude":[-90, 40]} - }, - "SAM": { - "domain": { "latitude":[-20.0, -90], - "longitude":[0, 360]} - }, - "PSA1": { - "domain": { "latitude":[-20.0, -90], - "longitude":[0, 360]} - }, - "PSA2": { - "domain": { "latitude":[-20.0, -90], - "longitude":[0, 360]} - }, - "PNA": { - "domain": { "latitude":[20.0, 85], - "longitude":[120, 240]} - }, - "PDO": { - "domain": { "latitude":[20.0, 70], - "longitude":[110, 260]} - }, - "AMO": { - "domain": { "latitude":[0.0, 70], - "longitude":[-80, 0]} - }, - "AllMW": { - "domain": { "latitude":[-40.0, 45.0], - "longitude":[0.0, 360.0]} - }, - "AllM": { - "domain": { "latitude":[-45.0, 45.0], - "longitude":[0.0, 360.0]} - }, - "NAMM": { - "domain": { "latitude":[0.0, 45.0], - "longitude":[210.0, 310.0]} - }, - "SAMM": { - "domain": { "latitude":[-45.0, 0.0], - "longitude":[240.0, 330.0]} - }, - "NAFM": { - "domain": { "latitude":[0.0, 45.0], - "longitude":[310.0, 60.0]} - }, - "SAFM": { - "domain": { "latitude":[-45.0, 0.0], - "longitude":[0.0, 90.0]} - }, - "ASM": { - "domain": { "latitude":[0.0, 45.0], - "longitude":[60.0, 180.0]} - }, - "AUSM": { - "domain": { "latitude":[-45.0, 0.0], - "longitude":[90.0, 160.0]} - }, - "AIR": { - "domain": { "latitude":[7.0, 25.0], - "longitude":[65.0, 85.0]} - }, - "AUS": { - "domain": { "latitude":[-20.0, -10.0], - "longitude":[120.0, 150.0]} - }, - "Sahel": { - "domain": { "latitude":[13.0, 18.0], - "longitude":[-10.0, 10.0]} - }, - "GoG": { - "domain": { "latitude":[0.0, 5.0], - "longitude":[-10.0, 10.0]} - }, - "NAmo": { - "domain": { "latitude":[20.0, 37.0], - "longitude":[-112.0, -103.0]} - }, - "SAmo": { - "domain": { "latitude":[-20.0, 2.5], - "longitude":[-65.0, -40.0]} - }, - "Nino34": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[190.0, 240.0]} - }, - "Nino3": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[210.0, 270.0]} - }, - "Nino4": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[160.0, 210.0]} - }, - "ONI": { - "value": 0.0, - "domain": { "latitude":[-5.0, 5.0], - "longitude":[190.0, 240.0]} - }, - "Nino12": { - "value": 0.0, - "domain": { "latitude":[-10.0, 0.0], - "longitude":[270.0, 280.0]} - }, - "AMMS": { - "value": 0.0, - "domain": { "latitude":[-15.0, -5.0], - "longitude":[-20.0, 10.0]} - }, - "AMMN": { - "value": 0.0, - "domain": { "latitude":[5.0, 15.0], - "longitude":[-50.0, -20.0]} - }, - "ATL3": { - "value": 0.0, - "domain": { "latitude":[-3.0, 3.0], - "longitude":[-20.0, 0.0]} - }, - "TSA": { - "value": 0.0, - "domain": { "latitude":[-20.0, 0.0], - "longitude":[-30.0, 10.0]} - }, - "TNA": { - "value": 0.0, - "domain": { "latitude":[5.5, 23.5], - "longitude":[302.5, 345.0]} - }, - "TIO": { - "value": 0.0, - "domain": { "latitude":[-15.0, 15.0], - "longitude":[40.0, 115.0]} - }, - "IODE": { - "value": 0.0, - "domain": { "latitude":[-10.0, 10.0], - "longitude":[50.0, 70.0]} - }, - "IODW": { - "value": 0.0, - "domain": { "latitude":[-10.0, 0.0], - "longitude":[90.0, 110.0]} - }, - "SOCN": { - "value": 0.0, - "domain": { "latitude":[-70.0, -50.0], - "longitude":[0.0, 360.0]} - } -} From 2a811c8a374dd603f6098619dd190421130a9f57 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Fri, 10 Jan 2025 03:49:05 -0600 Subject: [PATCH 11/23] Revise the workflow related to zppy-pcmdi --- zppy/defaults/default.ini | 106 ++- zppy/pcmdi_diags.py | 265 ++++-- zppy/templates/pcmdi_diags.bash | 1474 ++++++++----------------------- 3 files changed, 607 insertions(+), 1238 deletions(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index bca053d7..5048d10f 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -139,13 +139,13 @@ scratch = string(default="") backend = string(default="mpl") cfg = string(default="") # File of specified regions for mean climate calculation -regions_specs = string(default="inclusions/pcmdi/regions_specs.json") +regions_specs = string(default="pcmdi_data/region/regions_specs.json") # File of observation data name for mean climate calculation -reference_alias = string(default="inclusions/pcmdi/reference_alias.json") -# File of fuction to generate mean climate metrics figure -clim_plot_parser = string(default="inclusions/pcmdi/mean_climate_plot_parser.py") -# File of module to plot mean climate metrics figure -clim_plot_driver = string(default="inclusions/pcmdi/mean_climate_plot_driver.py") +reference_alias = string(default="pcmdi_data/reference/reference_alias.json") +# Utility file with functions for zppy-pcmdi data processing +pcmdi_zppy_util = string(default="pcmdi_data/utility/pcmdi_zppy_util.py") +# File of variable list to generate synthetic metrics plot +synthetic_metrics = string(default="pcmdi_data/metrics/synthetic_metrics_list.json") # Path to observation time-series data # Required for "mean_climate","variability_mode","enso" obs_ts = string(default="") @@ -155,59 +155,62 @@ obs_sets = string(default="default") # options specific for constructing pcmdi preferred file name conventions # required for "model_vs_obs" comparison model_name = string(default="e3sm.historical.v3-LR.0051") -# required for "model_vs_model" comparison -model_name_ref = string(default="e3sm.historical.v3-LR.0051") -# required for "model_vs_obs" comparison model_tableID = string(default="Amon") +# required for "model_vs_model" comparison +model_name_ref = string(default="") +model_tableID_ref = string(default="") # variables in the cmip6 table that can be potentially used by pcmdi # this list depends on the definition of cmip variable # required for "mean climate" diagnostics -cmip_vars = string(default="pr,prw,psl,rlds,rldscs,rlut,rlutcs,rsut,rsutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta,ua,va,zg") +cmip_vars = string(default="pr,prw,psl,rlds,rldscs,rlut,rlutcs,rsut,rsutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rtmt,sfcWind,tas,tauu,tauv,ts,ta,ua,va,zg") # flag to process the land/sea mask within pcmdi generate_sftlf = string(default=True) # variables to be used by the pcmdi diagnostics # needs to setup for each subsections, defalut setup is the mean climate metrics vars = string(default="pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsds,rsdscs,rsdt,rsus,rsuscs,rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta-200,ta-850,ua-200,ua-850,va-200,va-850,zg-500") # sets of diagnostics from pcmdi package -sets = string_list(default=list("mean_climate","variability_mode_atm","variability_mode_cpl","enso")) +sets = string_list(default=list("mean_climate","variability_modes_atm","variability_modes_cpl","enso","synthetic_plots")) # options to identify subset of pcmdi drivers ("mean_climate","variability_mode","enso") -subset = string(default="") -########################################################################################## -# below followed the setup in e3sm_diag but used for PCMDI workflow -########################################################################################## -# See url -multiprocessing = boolean(default=True) -# See url -num_workers = integer(default=24) -# See url -figure_format = string(default="png") -# comparision type (same as e3sm_diag) -run_type = string(default="model_vs_obs") -# Used to label the results directory -# Options are "model_vs_obs" and "model_vs_model" -tag = string(default="model_vs_obs") +subsets = string(default="mean_climate,variability_modes,enso") ########################################################################################### # Required for run_type="model_vs_model" runs, different from e3sm_diag, # model_vs_model in pcmdi referred to the comparision of two model simulations # with observations and cmip models. ########################################################################################### #path for reference model data (time series) +reference_data_path = string(default="") reference_data_path_ts = string(default="") # pcmdi_diags.py will set to match `years` if not specified ref_years = string_list(default=list("")) -# End year (i.e., the last year to use) for the reference data -ref_end_yr = string(default="") +# The years increment for reference data +ts_num_years_ref = integer(default=5) # Final year (i.e., the last available year) for the reference data ref_final_yr = string(default="") # Start year for the reference data ref_start_yr = string(default="") # reference model name ref_name = string(default="") -# The years increment for reference data -ts_num_years_ref = integer(default=5) +# The years range for test model data +ts_years = string_list(default=list("")) +# The years increment for test model data +ts_num_years = integer(default=5) # Set to true to swap test and ref when run_type="model_vs_model" swap_test_ref = boolean(default=False) ########################################################################################## +# below followed the setup in e3sm_diag but used for PCMDI workflow +########################################################################################## +# See url +multiprocessing = boolean(default=True) +# See url +num_workers = integer(default=24) +# See url +figure_format = string(default="png") +# comparision type (same as e3sm_diag) +run_type = string(default="model_vs_obs") +# Used to label the results directory +# Options are "model_vs_obs" and "model_vs_model" +tag = string(default="model_vs_obs") +########################################################################################## # options for pcmdi mode varibility diagnostics # vars = "psl" for atm_modes # vars = "ts" for cpl_modes @@ -263,15 +266,18 @@ regrid_tool = string(default="esmf") regrid_method = string(default="regrid2") # OPTIONS: 'linear','conservative', only if tool is esmf regrid_method_ocn = string(default="conservative") -#options for sythetic plots with cmip model metrics data from pcmdi -########################################################################################## -sythentic_plots = string(default="n") +################################################################### +#options for synthetic plots with exisiting pcmdi cmip model metrics +################################################################### +synthetic_sets = string(default="portrait,parcoord") # path to pcmdi generated cmip metrics data -pcmdi_data_path = string(default="") +cmip_enso_dir = string(default="") +cmip_clim_dir = string(default="") +cmip_movs_dir = string(default="") #group of pcmdi generated cmip metrics data (mip.exp.version) -pcmdi_cmip_clim = string(default="cmip6.historical.v20220928") -pcmdi_cmip_movs = string(default="cmip6.historical.v20220825") -pcmdi_cmip_enso = string(default="cmip6.historical.v20210620") +cmip_clim_set = string(default="cmip6.historical.v20220928") +cmip_movs_set = string(default="cmip6.historical.v20220825") +cmip_enso_set = string(default="cmip6.historical.v20210620") # options shared by pcmdi pcmdi_debug = string(default=False) @@ -280,29 +286,33 @@ pcmdi_debug = string(default=False) cfg = string(default=None) vars = string(default=None) grid = string(default=None) - sythentic_plots = string(default=None) - pcmdi_cmip_clim = string(default=None) - pcmdi_cmip_movs = string(default=None) - pcmdi_cmip_enso = string(default=None) - pcmdi_data_path = string(default=None) - derived_variable = string(default=None) + cmip_enso_set = string(default=None) + cmip_clim__set = string(default=None) + cmip_movs_set = string(default=None) + cmip_enso_dir = string(default=None) + cmip_clim_dir = string(default=None) + cmip_movs_dir = string(default=None) + synthetic_sets = string(default=None) reference_alias = string(default=None) regions_specs = string(default=None) - process_sftlf = string(default=None) + pcmdi_zppy_util = string(default=None) + synthetic_metrics = string(default=None) multiprocessing = boolean(default=None) num_workers = integer(default=None) obs_ts = string(default=None) figure_format = string(default=None) - ref_end_yr = string(default=None) - ref_final_yr = string(default=None) ref_name = string(default=None) - ref_start_yr = string(default=None) ref_years = string_list(default=None) + ref_start_yr = string(default=None) + ref_final_yr = string(default=None) + reference_data_path = string(default=None) reference_data_path_ts = string(default=None) run_type = string(default=None) sets = string_list(default=None) swap_test_ref = boolean(default=None) tag = string(default=None) + ts_years = string_list(default=None) + ts_num_years = integer(default=None) ts_num_years_ref = integer(default=None) target_grid = string(default=None) target_grid_string = string(default=None) @@ -317,7 +327,7 @@ pcmdi_debug = string(default=False) EofScaling = string(default=None) ConvEOF = string(default=None) CBF = string(default=None) - subset = string(default=None) + subsets = string(default=None) landmask = string(default=None) frequency = string(default=None) generate_sftlf = string(default=None) diff --git a/zppy/pcmdi_diags.py b/zppy/pcmdi_diags.py index c28d12c5..ee074b7b 100644 --- a/zppy/pcmdi_diags.py +++ b/zppy/pcmdi_diags.py @@ -1,17 +1,23 @@ import os -import pprint -from typing import List +from typing import Any, Dict, List, Set, Tuple from zppy.bundle import handle_bundles from zppy.utils import ( + ParameterGuessType, add_dependencies, + check_parameter_defined, + check_required_parameters, check_status, + define_or_guess, + define_or_guess2, + get_file_names, get_tasks, get_years, initialize_template, make_executable, print_url, submit_script, + write_settings_file, ) @@ -21,25 +27,28 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): template, _ = initialize_template(config, "pcmdi_diags.bash") # --- List of pcmdi_diags tasks --- - tasks = get_tasks(config, "pcmdi_diags") + tasks: List[Dict[str, Any]] = get_tasks(config, "pcmdi_diags") if len(tasks) == 0: return existing_bundles # --- Generate and submit pcmdi_diags scripts --- - dependencies: List[str] = [] - for c in tasks: - + dependencies: List[str] = [] + check_parameters_for_bash(c) c["scriptDir"] = script_dir - if "ts_num_years" in c.keys(): c["ts_num_years"] = int(c["ts_num_years"]) # procedure type for e3sm_to_cmip c["cmor_tables_prefix"] = c["diagnostics_base_path"] + # check and set parameter for pcmdi + c["pcmdi_external_prefix"] = c["diagnostics_base_path"] + check_parameters_for_pcmdi(c) + # Loop over year sets - year_sets = get_years(c["ts_years"]) + year_sets: List[Tuple[int, int]] = get_years(c["ts_years"]) + ref_year_sets: List[Tuple[int, int]] if ("ref_years" in c.keys()) and (c["ref_years"] != [""]): ref_year_sets = get_years(c["ref_years"]) else: @@ -49,85 +58,37 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): c["year2"] = s[1] if ("last_year" in c.keys()) and (c["year2"] > c["last_year"]): continue # Skip this year set + c["ref_year1"] = rs[0] c["ref_year2"] = rs[1] - if c["subsection"]: - c["sub"] = c["subsection"] - else: - c["sub"] = c["grid"] - # Make a guess for observation paths, if need be - if ("ts_num_years" in c.keys()) and (c["obs_ts"] == ""): - c["obs_ts"] = ( - f"{c['diagnostics_base_path']}/observations/Atm/time-series/" - ) - if c["run_type"] == "model_vs_obs": - prefix = "pcmdi_diags_%s_%s_%04d-%04d" % ( - c["sub"], - c["tag"], - c["year1"], - c["year2"], - ) - elif c["run_type"] == "model_vs_model": - prefix = "pcmdi_diags_%s_%s_%04d-%04d_vs_%04d-%04d" % ( - c["sub"], - c["tag"], - c["year1"], - c["year2"], - c["ref_year1"], - c["ref_year2"], - ) - reference_data_path = ( - c["reference_data_path"].split("/post")[0] + "/post" - ) - if ("ts_num_years" in c.keys()) and (c["reference_data_path_ts"] == ""): - c["reference_data_path_ts"] = ( - f"{reference_data_path}/atm/{c['grid']}/cmip_ts/monthly" - ) - else: - raise ValueError("Invalid run_type={}".format(c["run_type"])) - print(prefix) - c["prefix"] = prefix - scriptFile = os.path.join(script_dir, "%s.bash" % (prefix)) - statusFile = os.path.join(script_dir, "%s.status" % (prefix)) - settingsFile = os.path.join(script_dir, "%s.settings" % (prefix)) - skip = check_status(statusFile) + + check_and_define_parameters(c) + bash_file, settings_file, status_file = get_file_names( + script_dir, c["prefix"] + ) + skip: bool = check_status(status_file) if skip: continue # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - make_executable(scriptFile) + make_executable(bash_file) + # List of dependencies # Iterate from year1 to year2 incrementing by the number of years per time series file. if "ts_num_years" in c.keys(): for yr in range(c["year1"], c["year2"], c["ts_num_years"]): - start_yr = yr - end_yr = yr + c["ts_num_years"] - 1 - if ( - ("mean_climate" in c["sets"]) - or ("variability_mode_atm" in c["sets"]) - or ("variability_mode_cpl" in c["sets"]) - or ("enso" in c["sets"]) - ): - add_dependencies( - dependencies, - script_dir, - "ts", - "atm_monthly_180x360_aave", - start_yr, - end_yr, - c["ts_num_years"], - ) - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) + add_ts_dependencies(c, dependencies, script_dir, yr) + + add_pcmdi_dependencies(c, dependencies, script_dir) + c["dependencies"] = dependencies + write_settings_file(settings_file, c, s) export = "ALL" existing_bundles = handle_bundles( c, - scriptFile, + bash_file, export, dependFiles=dependencies, existing_bundles=existing_bundles, @@ -136,17 +97,169 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): if c["bundle"] == "": # Submit job submit_script( - scriptFile, - statusFile, + bash_file, + status_file, export, job_ids_file, dependFiles=dependencies, + fail_on_dependency_skip=c["fail_on_dependency_skip"], ) - else: - print("...adding to bundle '%s'" % (c["bundle"])) + print(f"...adding to bundle {c['bundle']}") print(f" environment_commands={c['environment_commands']}") print_url(c, "pcmdi_diags") return existing_bundles + + +def check_parameters_for_bash(c: Dict[str, Any]) -> None: + check_required_parameters( + c, + set(["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"]), + "ref_final_yr", + ) + check_required_parameters( + c, set(["variability_mode_cpl", "variability_mode_atm", "enso"]), "ref_start_yr" + ) + check_required_parameters( + c, set(["variability_mode_cpl", "variability_mode_atm", "enso"]), "ref_end_yr" + ) + + +def check_parameters_for_pcmdi(c: Dict[str, Any]) -> None: + # check and set up the external data needed by pcmdi + if set(["synthetic_plots"]) & set(c["sets"]): + define_or_guess2( + c, + "cmip_enso_dir", + f"{c['diagnostics_base_path']}/pcmdi_data/metrics_data/enso_metric", + ParameterGuessType.PATH_GUESS, + ) + define_or_guess2( + c, + "cmip_clim_dir", + f"{c['diagnostics_base_path']}/pcmdi_data/metrics_data/mean_climate", + ParameterGuessType.PATH_GUESS, + ) + define_or_guess2( + c, + "cmip_movs_dir", + f"{c['diagnostics_base_path']}/pcmdi_data/metrics_data/variability_modes", + ParameterGuessType.PATH_GUESS, + ) + + +def check_mvm_only_parameters_for_bash(c: Dict[str, Any]) -> None: + check_parameter_defined(c, "diff_title") + check_parameter_defined(c, "ref_name") + check_parameter_defined(c, "short_ref_name") + + check_required_parameters( + c, + set(["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"]), + "ref_start_yr", + ) + ts_sets = set( + [ + "mean_climate", + "variability_mode_cpl", + "variability_mode_atm", + "enso", + ] + ) + check_required_parameters(c, ts_sets, "ts_num_years_ref") + check_required_parameters(c, ts_sets, "ts_subsection") + + +def check_and_define_parameters(c: Dict[str, Any]) -> None: + c["sub"] = define_or_guess( + c, "subsection", "grid", ParameterGuessType.SECTION_GUESS + ) + # TODO: do this based on sets, rather than by relying on the user setting ts_num_years + if "ts_num_years" in c.keys(): + define_or_guess2( + c, + "obs_ts", + f"{c['diagnostics_base_path']}/observations/Atm/time-series/", + ParameterGuessType.PATH_GUESS, + ) + prefix: str + if c["run_type"] == "model_vs_obs": + prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}" + + elif c["run_type"] == "model_vs_model": + check_mvm_only_parameters_for_bash(c) + prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}_vs_{c['ref_year1']:04d}-{c['ref_year2']:04d}" + reference_data_path = c["reference_data_path"].split("/post")[0] + "/post" + if set( + ["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"] + ) & set(c["sets"]): + define_or_guess2( + c, + "reference_data_path_ts", + f"{reference_data_path}/atm/{c['grid']}/cmip_ts/monthly", + ParameterGuessType.PATH_GUESS, + ) + else: + raise ValueError(f"Invalid run_type={c['run_type']}") + print(prefix) + c["prefix"] = prefix + + +def add_ts_dependencies( + c: Dict[str, Any], dependencies: List[str], script_dir: str, yr: int +): + start_yr = yr + end_yr = yr + c["ts_num_years"] - 1 + depend_on_ts: Set[str] = set( + ["mean_climate", "variability_mode_atm", "variability_mode_cpl", "enso"] + ) + if depend_on_ts & set(c["sets"]): + add_dependencies( + dependencies, + script_dir, + "ts", + "atm_monthly_180x360_aave", + start_yr, + end_yr, + c["ts_num_years"], + ) + + +def add_pcmdi_dependencies( + c: Dict[str, Any], dependencies: List[str], script_dir: str +) -> None: + pcmdi_sub = define_or_guess( + c, "pcmdi_diags", "sub", ParameterGuessType.SECTION_GUESS + ) + status_suffix: str = f"_{c['year1']:04d}-{c['year2']:04d}.status" + if "synthetic_plots" in pcmdi_sub: + check_parameter_defined(c, "run_type") + if "mean_climate" in c["sets"]: + dependencies.append( + os.path.join( + script_dir, + f"pcmdi_diags_mean_climate_{c['run_type']}{status_suffix}", + ) + ) + if "variability_mode_cpl" in c["sets"]: + dependencies.append( + os.path.join( + script_dir, + f"pcmdi_diags_variability_mode_cpl_{c['run_type']}{status_suffix}", + ) + ) + if "variability_mode_atm" in c["sets"]: + dependencies.append( + os.path.join( + script_dir, + f"pcmdi_diags_variability_mode_atm_{c['run_type']}{status_suffix}", + ) + ) + if "enso" in c["sets"]: + dependencies.append( + os.path.join( + script_dir, f"pcmdi_diags_enso_{c['run_type']}{status_suffix}" + ) + ) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 855f2a5b..117fb447 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -61,6 +61,20 @@ case_id=v$(date '+%Y%m%d') workdir=`mktemp -d tmp.${id}.XXXX` cd ${workdir} +# utility file for pcmdi-zppy workflow +cp -r '{{pcmdi_external_prefix}}/{{pcmdi_zppy_util}}' . + +# files for definition of regions for regional mean +cp -r '{{pcmdi_external_prefix}}/{{regions_specs}}' . + +# file for aliases of observation datasets +cp -r '{{pcmdi_external_prefix}}/{{reference_alias}}' . + +# file for list of variables for synthetic_metrics metric plots +cp -r '{{pcmdi_external_prefix}}/{{synthetic_metrics}}' . + +{%- if ("mean_climate" in subsection) %} +#further simplification could be done in future create_links_acyc_climo() { ts_dir_source=$1 @@ -109,45 +123,7 @@ create_links_acyc_climo() cd .. } -create_links_ts() -{ - ts_dir_source=$1 - ts_dir_destination=$2 - begin_year=$3 - end_year=$4 - subname=$5 - error_num=$6 - # Create netcdf files for time series variables - mkdir -p ${ts_dir_destination} - cd ${ts_dir_destination} - # https://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable - variables="{{ vars }}" - for v in ${variables//,/ } - do - # Go through the time series files for between year1 and year2, using a step size equal to the number of years per time series file - for year in `seq ${begin_year} {{ ts_num_years }} ${end_year}`; - do - YYYY=`printf "%04d" ${year}` - for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc - do - # Add this time series file to the list of files for cdscan to use - echo ${file} >> ${v}_files.txt - done - done - # netcdf file will be combined to cover the whole period from year1 to year2 - combined_name="${subname}.${v}.${begin_year}01-${end_year}12.nc" - cat ${v}_files.txt | ncrcat -v ${v} -d "time,${begin_year}-01-01,${end_year}-12-31" ${combined_name} - #modify time to avoid issues in pcmdi calculation - ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} - if [ $? != 0 ]; then - cd {{ scriptDir }} - echo "ERROR (${error_num})" > {{ prefix }}.status - exit ${error_num} - fi - done - cd .. -} - +{% if run_type == "model_vs_obs" %} create_links_acyc_climo_obs() { ts_dir_source=$1 @@ -197,7 +173,50 @@ create_links_acyc_climo_obs() done cd .. } +{%- endif %} +{%- endif %} + +{%- if ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} +create_links_ts() +{ + ts_dir_source=$1 + ts_dir_destination=$2 + begin_year=$3 + end_year=$4 + subname=$5 + error_num=$6 + # Create netcdf files for time series variables + mkdir -p ${ts_dir_destination} + cd ${ts_dir_destination} + # https://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable + variables="{{ vars }}" + for v in ${variables//,/ } + do + # Go through the time series files for between year1 and year2, using a step size equal to the number of years per time series file + for year in `seq ${begin_year} {{ ts_num_years }} ${end_year}`; + do + YYYY=`printf "%04d" ${year}` + for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc + do + # Add this time series file to the list of files for cdscan to use + echo ${file} >> ${v}_files.txt + done +done + # netcdf file will be combined to cover the whole period from year1 to year2 + combined_name="${subname}.${v}.${begin_year}01-${end_year}12.nc" + cat ${v}_files.txt | ncrcat -v ${v} -d "time,${begin_year}-01-01,${end_year}-12-31" ${combined_name} + #modify time to avoid issues in pcmdi calculation + ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} + if [ $? != 0 ]; then + cd {{ scriptDir }} + echo "ERROR (${error_num})" > {{ prefix }}.status + exit ${error_num} + fi + done + cd .. +} +{% if run_type == "model_vs_obs" %} create_links_ts_obs() { ts_dir_source=$1 @@ -234,135 +253,39 @@ create_links_ts_obs() done cd .. } +{%- endif %} +{%- endif %} -{%- if ("mean_climate" in subset) %} -{% if run_type == "model_vs_obs" %} +######################## +#prepare the model data +######################## +{%- if ("mean_climate" in subsection) %} climo_dir_primary=climo -{% elif run_type == "model_vs_model" %} -climo_dir_primary=climo_test -{%- endif %} # Create local links to input climo files climo_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly create_links_acyc_climo ${climo_dir_source} ${climo_dir_primary} ${Y1} ${Y2} ${model_name}.${tableID} 1 {% if run_type == "model_vs_model" %} # Create local links to input climo files (ref model) -climo_dir_source={{ reference_data_path }} +climo_dir_source_ref={{ reference_data_path }} climo_dir_ref=climo_ref -create_links_acyc_climo ${climo_dir_source} ${climo_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 2 +create_links_acyc_climo ${climo_dir_source_ref} ${climo_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 2 {%- endif %} {%- endif %} -######################## -#prepare the model data -######################## -{%- if ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} +{%- if ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} #all diags will be run with ts data -{% if run_type == "model_vs_obs" %} ts_dir_primary=ts -{% elif run_type == "model_vs_model" %} -ts_dir_primary=ts_test -{%- endif %} # Create netcdf files for time series variables ts_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly create_links_ts ${ts_dir_source} ${ts_dir_primary} ${Y1} ${Y2} ${model_name}.${tableID} 3 {% if run_type == "model_vs_model" %} -ts_dir_source={{ reference_data_path_ts }}/{{ ts_num_years_ref }}yr +ts_dir_source_ref={{ reference_data_path_ts }}/{{ ts_num_years_ref }}yr ts_dir_ref=ts_ref -create_links_ts ${ts_dir_source} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 4 -{%- endif %} -{%- endif %} - -######################################################### -#process the derived quantities for pcmdi diagnostics. -#this module is created as variables such as rltcre and -#rstcre were not included as default in cmip6 table -#this part can be removed when all variables converated -#during the 'e3sm_to_cmip' step -######################################################### -cat > process_derived_var.py << EOF -import os -import re -import glob -import json -import time -import datetime -import xcdat as xc -import numpy as np -import shutil - -import pcmdi_metrics -from pcmdi_metrics.io import ( - xcdat_open -) - -def derive_var(path,vout,var_dic,fname): - for i,var in enumerate(var_dic.keys()): - fpath = sorted(glob.glob(os.path.join(path,"*."+var+".*.nc"))) - df = xcdat_open(fpath[0]) - if i == 0: - template = fpath[0].split("/")[-1] - #construct a copy of file for derived variable - out = os.path.join(path,template.replace(".{}.".format(var),".{}.".format(vout))) - shutil.copy(fpath[0],out) - ds = xcdat_open(fpath[0]) - ds = ds.rename_vars({var:vout}) - ds[vout].data = ds[vout].data * var_dic[var] - else: - ds[vout].data = ds[vout].data + df[var].data * var_dic[var] - ds.to_netcdf(out) - return - -variables = '{{ vars }}'.split(",") - -{%- if ("mean_climate" in subset) %} -{% if run_type == "model_vs_obs" %} -model_groups = ['${model_name}.${tableID}'] -run_groups=['${climo_dir_primary}'] -{% elif run_type == "model_vs_model" %} -model_groups = ['${model_name}.${tableID}','${model_name_ref}.${tableID_ref}'] -run_groups=['${climo_dir_primary}','${climo_dir_ref}'] -{%- endif %} -{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} -{% if run_type == "model_vs_obs" %} -model_groups = ['${model_name}.${tableID}'] -run_groups = ['${ts_dir_primary}'] -{% elif run_type == "model_vs_model" %} -model_groups = ['${model_name}.${tableID}','${model_name_ref}.${tableID_ref}'] -run_groups = ['${ts_dir_primary}','${ts_dir_ref}'] +create_links_ts ${ts_dir_source_ref} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 4 {%- endif %} {%- endif %} -############################################################### -#check and process derived quantities, these quantities are not -#included as default in e3sm_to_cmip module -############################################################### -for i,group in enumerate(run_groups): - for j,var in enumerate(variables): - if "_" in var or "-" in var: - varin = re.split("_|-", var)[0] - else: - varin = var - if varin in ['rltcre','rstcre']: - fpaths = sorted(glob.glob(os.path.join(group,"*"+var+"_*.nc"))) - if len(fpaths) < 1: - if varin == 'rstcre': - derive_var(group,varin,{'rsutcs':1,'rsut':-1},model_groups[i]) - elif varin == 'rltcre': - derive_var(group,varin,{'rlutcs':1,'rlut':-1},model_groups[i]) - -EOF -################### -# run process jobs -################### -command="srun -N 1 python -u process_derived_var.py" -time ${command} -if [ $? != 0 ]; then - cd {{ scriptDir }} - echo 'ERROR (5)' > {{ prefix }}.status - exit 5 -fi - -{% if run_type == "model_vs_obs" %} +{% if (run_type == "model_vs_obs") and ("synthetic_plots" not in subsection) %} ######################################################### #prepare the observation data. As observation are often #depends on the source available for analysis, therefore, @@ -389,26 +312,13 @@ from pcmdi_metrics.io import ( xcdat_open ) -def derive_var(path,vout,var_dic,fname): - for i,var in enumerate(var_dic.keys()): - fpath = sorted(glob.glob(os.path.join(path,"*."+var+".*.nc"))) - df = xcdat_open(fpath[0]) - if i == 0: - template = fpath[0].split("/")[-1] - #construct a copy of file for derived variable - out = os.path.join(path,template.replace(".{}.".format(var),".{}.".format(vout))) - shutil.copy(fpath[0],out) - ds = xcdat_open(fpath[0]) - ds = ds.rename_vars({var:vout}) - ds[vout].data = ds[vout].data * var_dic[var] - else: - ds[vout].data = ds[vout].data + df[var].data * var_dic[var] - ds.to_netcdf(out) - return +from pcmdi_zppy_util import( + derive_var, +) model_name = '${model_name_ref}.${tableID_ref}' -variables = '{{ vars }}'.split(",") +variables = '{{ cmip_vars }}'.split(",") obs_sets = '{{ obs_sets }}'.split(",") ts_dir_ref_source = '{{ obs_ts }}' @@ -422,7 +332,7 @@ altobs_dic = { "pr" : "PRECT", "rstcre" : "toa_cre_sw_mon", "rtmt" : "toa_net_all_mon"} -obs_dic = json.load(open('{{reference_alias}}')) +obs_dic = json.load(open('reference_alias.json')) #loop each variable and process the data for i,var in enumerate(variables): @@ -492,214 +402,16 @@ fi #use same period as test model when possible ####################################################### ts_dir_ref_source="{{ scriptDir }}/${workdir}/${obstmp_dir}" -{%- if ("mean_climate" in subset) %} +{%- if ("mean_climate" in subsection) %} climo_dir_ref=climo_ref create_links_acyc_climo_obs ${ts_dir_ref_source} ${climo_dir_ref} ${Y1} ${Y2} 7 -{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} +{%- elif ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} ts_dir_ref=ts_ref create_links_ts_obs ${ts_dir_ref_source} ${ts_dir_ref} ${Y1} ${Y2} 8 {%- endif %} - -{%- endif %} - -################################################## -#collect data description and save in a json file -#for the convinience of later-on process -################################################## -mkdir -p pcmdi_diags -cat > data_info_collect.py << EOF -import os -import re -import glob -import json -import collections -from collections import OrderedDict - -{%- if ("mean_climate" in subset) %} -test = '${climo_dir_primary}' -refr = '${climo_dir_ref}' -variables = '{{ vars }}'.split(",") -{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) %} -test = '${ts_dir_primary}' -refr = '${ts_dir_ref}' -variables = '{{ vars }}'.split(",") -{%- elif ("enso" in subset) %} -test = '${ts_dir_primary}' -refr = '${ts_dir_ref}' -variables = '{{ vars }}'.split(",") -{%- endif %} - -test_data_set = ['${model_name}'.split(".")[1]] -{% if run_type == "model_vs_obs" %} -refr_data_set = '{{ obs_sets }}'.split(",") -{% elif run_type == "model_vs_model" %} -refr_data_set = ['${model_name_ref}'.split(".")[1]] -{%- endif %} - -#collect variables when both model and observations are available -refr_dic,test_dic = OrderedDict(),OrderedDict() -for i,var in enumerate(variables): - if "_" in var or "-" in var: - varin = re.split("_|-", var)[0] - else: - varin = var - test_path = sorted(glob.glob(os.path.join(test,"*.{}.*.nc".format(varin)))) - refr_path = sorted(glob.glob(os.path.join(refr,"*.{}.*.nc".format(varin)))) - if (len(test_path) > 0) and (len(refr_path) > 0): - if (os.path.exists(test_path[0])) and (os.path.exists(refr_path[0])): - for j,path in enumerate([test_path[0],refr_path[0]]): - fname = path.split("/")[-1] - model = fname.split(".")[2] - sbdic = { "mip" : fname.split(".")[0], - "exp" : fname.split(".")[1], - "model" : fname.split(".")[2], - "realization" : fname.split(".")[3], - "tableID" : fname.split(".")[4], - "yymms" : fname.split(".")[6].split("-")[0], - "yymme" : fname.split(".")[6].split("-")[1], - "var_in_file" : varin, - "var_name" : var, - "file_path" : path, - "template" : fname } - if j == 0: - if var not in test_dic.keys(): - test_dic[var] = {} - if len(test_data_set) != len(variables): - kset = test_data_set[0] - else: - kset = test_data_set[i] - test_dic[var]['set'] = kset - test_dic[var][kset] = model - test_dic[var][model] = sbdic - else: - if var not in refr_dic.keys(): - refr_dic[var] = {} - if len(refr_data_set) != len(variables): - kset = refr_data_set[0] - else: - kset = refr_data_set[i] - refr_dic[var][kset] = model - refr_dic[var][model] = sbdic - refr_dic[var]['set'] = kset - -# Save test and obs/reference data information for next step -for i,group in enumerate([test,refr]): - if i == 0: - out_dic = test_dic - else: - out_dic = refr_dic - out_file = os.path.join( - 'pcmdi_diags', - '{}_{}_catalogue.json'.format(group,'{{subset}}') - ) - json.dump(out_dic, - open(out_file, "w"), - sort_keys=False, - indent=4, - separators=(",", ": ")) - -EOF -##################### -# run process jobs -command="srun -N 1 python -u data_info_collect.py" -time ${command} -if [ $? != 0 ]; then - cd {{ scriptDir }} - echo 'ERROR (9)' > {{ prefix }}.status - exit 9 -fi - -############################################################################## -# land/sea mask is needed in PCMDI diagnostics, check and generate it here as -# these data are not always available for model or observations -############################################################################## -fixed_dir="fixed" -mkdir -p ${fixed_dir} -cat > create_landsea_mask.py << EOF -import os -import glob -import json -import datetime -import numpy as np -import collections -from collections import OrderedDict - -import pcmdi_metrics -from pcmdi_metrics.io import ( - xcdat_open -) -from pcmdi_metrics.utils import ( - create_land_sea_mask -) - -############################################### -# Flag to turn on/off land/sea mask processing -############################################# -if {{ generate_sftlf }} in ['true', 'y', True]: - generate_sftlf = True -else: - generate_sftlf = False - -if generate_sftlf: - -{%- if ("mean_climate" in subset) %} - test = '${climo_dir_primary}' - refr = '${climo_dir_ref}' -{%- elif ("variability_mode_cpl" in subset) or ("variability_mode_atm" in subset) or ("enso" in subset) %} - test = '${ts_dir_primary}' - refr = '${ts_dir_ref}' {%- endif %} - #loop each group and process land/mask if not exist - for group in [test,refr]: - dic_file = os.path.join( - 'pcmdi_diags', - '{}_{}_catalogue.json'.format(group,'{{subset}}') - ) - data_dic = json.load(open(dic_file)) - for var in data_dic.keys(): - mdset = data_dic[var]['set'] - model = data_dic[var][mdset] - mpath = data_dic[var][model]['file_path'] - mpath_lf = os.path.join( - '${fixed_dir}', - 'sftlf.{}.nc'.format(model) - ) - # generate land/sea mask if not exist - if not os.path.exists(mpath_lf): - ds = xcdat_open(mpath, decode_times=True) - ds = ds.bounds.add_missing_bounds() - try: - lf_array = create_land_sea_mask(ds, method="regionmask") - print("land mask is estimated using regionmask method.") - except Exception: - lf_array = create_land_sea_mask(ds, method="pcmdi") - print("land mask is estimated using pcmdi method.") - lf_array = lf_array * 100.0 - lf_array.attrs['long_name']= "land_area_fraction" - lf_array.attrs['units'] = "%" - lf_array.attrs['id'] = "sftlf" # Rename - ds_lf = lf_array.to_dataset(name='sftlf').compute() - ds_lf = ds_lf.bounds.add_missing_bounds() - ds_lf.fillna(1.0e20) - ds_lf.attrs['model'] = model - ds_lf.attrs['associated_files'] = mpath - ds_lf.attrs['history'] = "File processed: " + datetime.datetime.now().strftime("%Y%m%d") - comp = dict(_FillValue=1.0e20,zlib=True,complevel=5) - encoding = {var: comp for var in list(ds_lf.data_vars)+list(ds_lf.coords)} - ds_lf.to_netcdf(mpath_lf,encoding=encoding) - del(ds,ds_lf,lf_array) -EOF -##################### -# run process script -command="srun -N 1 python -u create_landsea_mask.py" -time ${command} -if [ $? != 0 ]; then - cd {{ scriptDir }} - echo 'ERROR (10)' > {{ prefix }}.status - exit 10 -fi - +{% if "synthetic_plots" not in subsection %} ######################################################## # generate basic parameter file for pcmdi metrics driver ######################################################## @@ -707,9 +419,6 @@ cat > parameterfile.py << EOF import os import sys import json -import numpy as np -import collections -from collections import OrderedDict ##################### #basic information @@ -758,13 +467,13 @@ regions_values = {"land":100.,"ocean":0.} #setup template for land/sea mask (fixed) modpath_lf = os.path.join( - '${fixed_dir}', + 'fixed', 'sftlf.%(model).nc' ) ############################################ #setup specific for mean climate metrics -{%- if ("mean_climate" in subset) %} +{%- if ("mean_climate" in subsection) %} #case id modver = "${case_id}" @@ -816,13 +525,13 @@ custom_observations = os.path.join( 'pcmdi_diags', '{}_{}_catalogue.json'.format( '${climo_dir_ref}', - '{{subset}}')) + '{{subsection}}')) #load caclulated regions for each variable regions = json.load(open('regions.json')) #load predefined region information -regions_specs = json.load(open('{{regions_specs}}')) +regions_specs = json.load(open('regions_specs.json')) for key in regions_specs.keys(): if "domain" in regions_specs[key].keys(): if "latitude" in regions_specs[key]['domain'].keys(): @@ -859,7 +568,7 @@ test_clims_interpolated_output = diagnostics_output_path {%- endif %} -{%- if ("variability_mode" in subset) %} +{%- if ("variability_modes" in subsection) %} ######################################## #setup for mode variability diagnostics ######################################## @@ -881,7 +590,7 @@ modnames = [ product ] realization = "*" modpath = os.path.join( '${ts_dir_primary}', - '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,${tableID},period) + '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,'${tableID}',period) ) #start and end year for analysis @@ -919,7 +628,7 @@ results_dir = os.path.join( ) {%- endif %} -{%- if ("enso" in subset) %} +{%- if ("enso" in subsection) %} ########################################### #parameter setup specific for enso metrics ########################################### @@ -927,7 +636,7 @@ modnames = [ product ] realization = realm modpath = os.path.join( '${ts_dir_primary}', - '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,${tableID},period) + '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,'${tableID}',period) ) #observation/reference file catalogue @@ -959,6 +668,7 @@ netcdf_name = json_name {%- endif %} EOF +{%- endif %} ################################################################ # Run PCMDI Diags @@ -980,499 +690,43 @@ import pandas as pd import collections from collections import OrderedDict +import psutil +import subprocess +from itertools import chain +from subprocess import Popen, PIPE, call + import pcmdi_metrics -from pcmdi_metrics.io import ( - xcdat_open -) from pcmdi_metrics.graphics import ( - Metrics, normalize_by_median, - parallel_coordinate_plot, - portrait_plot, ) -import psutil -import subprocess -from itertools import chain -from subprocess import Popen, PIPE, call - -def childCount(): - current_process = psutil.Process() - children = current_process.children() - return(len(children)) - -def parallel_jobs(cmds,num_workers): - procs = [] - for i,p in enumerate(cmds): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - procs.append(proc) - if (i == len(cmds)-1): - outs, errs = proc.communicate() - rcode = proc.returncode - time.sleep(0.25); break - else: - njobs = childCount() - while (njobs > num_workers): - [pp.communicate() for pp in procs] - time.sleep(0.25) - procs = [] - return outs,errs,rcode - -def serial_jobs(cmds,num_workers): - for i,p in enumerate(cmds): - print('running %s' % (str(p))) - proc = Popen(p, stdout=PIPE, shell=True) - - return outs,errs,rcode - -def variable_region(regions,variables): - regv_dic = OrderedDict() - for var in variables: - vkey = var.split("-")[0] - regv_dic[vkey] = regions - - #save region info dictionary - json.dump(regv_dic, - open('regions.json', "w"), - sort_keys=False, - indent=4, - separators=(",", ": ")) - return - -def enso_obsvar_dict(obs_dic,variables): - #orgnize observation for enso driver - refr_dic = OrderedDict() - for var in variables: - vkey = var.split("-")[0] - refset = obs_dic[vkey]['set'] - refname = obs_dic[vkey][refset] - #data file in model->var sequence - if refname not in refr_dic.keys(): - refr_dic[refname] = {} - refr_dic[refname][vkey] = obs_dic[vkey][refname] - - #save data file dictionary - json.dump(refr_dic, - open('obs_catalogue.json', "w"), - sort_keys=False, - indent=4, - separators=(",", ": ")) - - return - -def enso_obsvar_lmsk(regions,variables): - #orgnize observation landmask for enso driver - relf_dic = OrderedDict() - for var in variables: - vkey = var.split("-")[0] - refset = obs_dic[vkey]['set'] - refname = obs_dic[vkey][refset] - #land/sea mask - if refname not in relf_dic.keys(): - relf_dic[refname] = os.path.join( - "${fixed_dir}", - 'sftlf.{}.nc'.format(refname)) - - #save data file dictionary - json.dump(relf_dic, - open('obs_landmask.json', "w"), - sort_keys=False, - indent=4, - separators=(",", ": ")) - - return - - -def shift_row_to_bottom(df, index_to_shift): - idx = [i for i in df.index if i != index_to_shift] - return df.loc[idx + [index_to_shift]] - -def merge_data(model_lib,cmip_lib,model_name): - model_lib,cmip_lib = check_regions(model_lib,cmip_lib) - merge_lib = cmip_lib.merge(model_lib) - merge_lib = check_units(merge_lib) - for stat in merge_lib.df_dict: - for season in merge_lib.df_dict[stat]: - for region in merge_lib.df_dict[stat][season]: - highlight_models = [] - df = merge_lib.df_dict[stat][season][region] - for model in df["model"].tolist(): - if "e3sm" in model.lower(): - highlight_models.append(model) - if model in model_name: - idxs = df[df.iloc[:, 0] == model].index - df.loc[idxs, "model"] = model_name - highlight_models.append(model_name) - for model in highlight_models: - for idx in df[df.iloc[:, 0] == model].index: - df = shift_row_to_bottom(df, idx) - merge_lib.df_dict[stat][season][region] = df.fillna(value=np.nan) - del(df) - return merge_lib - -def check_regions(data_lib,ref_lib): - regions = [x for x in data_lib.regions if x in ref_lib.regions] - for stat in ref_lib.df_dict: - for season in ref_lib.df_dict[stat]: - subset_dict = dict((k, ref_lib.df_dict[stat][season][k]) for k in regions) - ref_lib.df_dict[stat][season] = subset_dict - del(subset_dict) - ref_lib.regions = regions - - for stat in data_lib.df_dict: - for season in data_lib.df_dict[stat]: - subset_dict = dict((k, data_lib.df_dict[stat][season][k]) for k in regions) - data_lib.df_dict[stat][season] = subset_dict - del(subset_dict) - data_lib.regions = regions - - return data_lib,ref_lib - -def check_references(data_dict): - reference_alias = {'CERES-EBAF-4-1': 'ceres_ebaf_v4_1', - 'CERES-EBAF-4-0': 'ceres_ebaf_v4_0', - 'CERES-EBAF-2-8': 'ceres_ebaf_v2_8', - 'GPCP-2-3' : 'GPCP_v2_3', - 'GPCP-2-2' : 'GPCP_v2_2', - 'GPCP-3-2' : 'GPCP_v3_2', - 'NOAA_20C' : 'NOAA-20C', - 'ERA-INT' : 'ERA-Interim', - 'ERA-5' : 'ERA5'} - for key,values in data_dict.items(): - for i,value in enumerate(values): - if value in reference_alias.keys(): - values[i] = reference_alias[value] - data_dict[key] = values - return data_dict - -def check_units(data_lib): - # we define fixed sets of variables used for final plotting. - units_all = { - "prw" : "[kg m$^{-2}$]", "pr" : "[mm d$^{-1}$]", "prsn" : "[mm d$^{-1}$]", - "prc" : "[mm d$^{-1}$]", "hfls" : "[W m$^{-2}$]", "hfss" : "[W m$^{-2}$]", - "clivi" : "[kg $m^{-2}$]", "clwvi" : "[kg $m^{-2}$]", "psl" : "[Pa]", - "rlds" : "[W m$^{-2}$]", "rldscs": "[W $m^{-2}$]", "evspsbl": "[kg m$^{-2} s^{-1}$]", - "rtmt" : "[W m$^{-2}$]", "rsdt" : "[W m$^{-2}$]", "rlus" : "[W m$^{-2}$]", - "rluscs": "[W m$^{-2}$]", "rlut" : "[W m$^{-2}$]", "rlutcs" : "[W m$^{-2}$]", - "rsds" : "[W m$^{-2}$]", "rsdscs": "[W m$^{-2}$]", "rstcre" : "[W m$^{-2}$]", - "rltcre": "[W m$^{-2}$]", "rsus" : "[W m$^{-2}$]", "rsuscs" : "[W m$^{-2}$]", - "rsut" : "[W m$^{-2}$]", "rsutcs": "[W m$^{-2}$]", "ts" : "[K]", - "tas" : "[K]", "tauu" : "[Pa]", "tauv" : "[Pa]", - "zg-500": "[m]", "ta-200": "[K]", "sfcWind": "[m s$^{-1}$]", - "ta-850": "[K]", "ua-200": "[m s$^{-1}$]", "ua-850" : "[m s$^{-1}$]", - "va-200": "[m s$^{-1}$]", "va-850": "[m s$^{-1}$]", "uas" : "[m s$^{-1}$]", - "vas" : "[m s$^{-1}$]", "tasmin": "[K]", "tasmax" : "[K]", - "clt" : "[%]"} - - common_vars = [x for x in data_lib.var_list if x in units_all.keys()] - #special case - if 'rtmt' not in common_vars: - if ('rt' in data_lib.var_list) or ('rmt' in data_lib.var_list): - common_vars.append('rtmt') - - #collect unit list - common_unts = [units_all[x] for x in common_vars] - - #collect reference list - reflist = data_lib.var_ref_dict.copy() - for var in reflist: - if var not in common_vars: - if var in ['rt','rmt']: - data_lib.var_ref_dict['rtmt'] = data_lib.var_ref_dict.pop(var) - else: - data_lib.var_ref_dict.pop(var) - data_lib.var_ref_dict = check_references(data_lib.var_ref_dict) - #now clean up data to exclude vars not in common lists - for stat in data_lib.df_dict: - for season in data_lib.df_dict[stat]: - for region in data_lib.df_dict[stat][season]: - df = data_lib.df_dict[stat][season][region] - if 'rt' in df.columns: - df['rtmt'] = df['rt'] - elif 'rmt' in df.columns: - df['rtmt'] = df['rmt'] - for var in df.columns[3:]: - if var not in common_vars: - df = df.drop(var,axis=1) - data_lib.df_dict[stat][season][region] = df - del(df) - - data_lib.var_list = common_vars - data_lib.var_unit_list = common_unts - - return data_lib - -def collect_metrics_data(parameter,group): - #merge data to an exisiting cmip base - cmip_files = glob.glob(os.path.join( - parameter['cmip_path'], - group, - parameter['cmip_name'].split(".")[0], - parameter['cmip_name'].split(".")[1], - parameter['cmip_name'].split(".")[2], - "*.json")) - if len(cmip_files) > 0 and os.path.exists(cmip_files[0]): - print('CMIP PCMDI DIAGs for Sythetic Metrics Found, Read data...') - cmip_lib = Metrics(cmip_files) - cmip_lib = check_units(cmip_lib) - else: - exit("Warning: CMIP PCMDI DIAGs for Sythetic Metrics Not Found,....") - - model_name = '.'.join([ - parameter['test_name'].split(".")[2], - parameter['test_name'].split(".")[3]]) - model_files = glob.glob(os.path.join( - parameter['test_path'], - group, - parameter['test_name'].split(".")[0], - parameter['test_name'].split(".")[1], - parameter['case_id'], - "*.json")) - if len(model_files) > 0 and os.path.exists(model_files[0]): - print('{} PCMDI DIAGs for Sythetic Metrics Found, Read data...'.format(model_name)) - model_lib = Metrics(model_files) - model_lib = check_units(model_lib) - else: - exit("Warning: Model PCMDI DIAGs for Sythetic Metrics Not Found,....") - - #merge model data with reference cmip data - merge_lib = merge_data(model_lib,cmip_lib,model_name) - - return merge_lib - -def archive_data(parameter,stat,region,season,data_dict, - model_name,var_names,var_units,outdir): - outdic = pd.DataFrame(data_dict) - outdic = outdic.drop(columns=["model_run"]) - for var in list(outdic.columns.values[3:]): - if var not in var_names: - outdic = outdic.drop(columns=[var]) - else: - # replace the variable with the name + units - outdic.columns.values[outdic.columns.values.tolist().index(var)] = ( - var_units[var_names.index(var)] - ) - # save data to .csv file - if not os.path.exists(outdir): - os.makedirs(outdir) - outfile = "{}_{}_{}_{}.csv".format(stat,region,season,model_name) - outdic.to_csv(os.path.join(outdir,outfile)) - return - -def parcord_metric_plot(parameter,group,data_lib): - season = "ann" - metric_dict = {"rms_xyt" : "RMSE"} - model_name = '.'.join([ - parameter['test_name'].split(".")[2], - parameter['test_name'].split(".")[3]]) - - # process figure - fontsize = 20 - figsize = (40, 18) - shrink = 0.8 - legend_box_xy = (1.08, 1.18) - legend_box_size = 4 - legend_lw = 1.5 - legend_fontsize = fontsize * 0.8 - legend_ncol = int(7 * figsize[0] / 40.0) - legend_posistion = (0.50, -0.14) - # hide markers for CMIP models - identify_all_models = False - # colors for highlight lines - xcolors = ["#000000","#e41a1c","#ff7f00","#4daf4a","#f781bf", - "#a65628","#984ea3","#999999","#377eb8","#dede00"] - - for stat in metric_dict.keys(): - for region in data_lib.regions: - var_names = data_lib.var_list.copy() - var_units = data_lib.var_unit_list.copy() - # data for final plot - data_dict = data_lib.df_dict[stat][season][region].reset_index(drop=True) - #drop data if all is NaNs - for column in data_dict.columns[3:]: - if np.all(np.isnan(data_dict[column].to_numpy())): - data_dict = data_dict.drop(column, axis=1) - index = var_names.index(column) - var_names.remove(var_names[index]) - var_units.remove(var_units[index]) - - highlight_model1 = [] - for model in data_dict['model'].to_list(): - if "e3sm" in model.lower(): - highlight_model1.append(model) - elif model in model_name: - highlight_model1.append(model_name) - - # ensemble mean for CMIP group - irow_sub = data_dict[data_dict['model'] == highlight_model1[0]].index[0] - data_dict.loc["CMIP MMM"] = data_dict[:irow_sub].mean( - numeric_only=True, skipna=True) - data_dict.at["CMIP MMM", "model"] = "CMIP MMM" - data_dict.loc["E3SM MMM"] = data_dict[irow_sub:].mean( - numeric_only=True, skipna=True) - data_dict.at["E3SM MMM", "model"] = "E3SM MMM" - - if parameter['save_data']: - outdir = os.path.join(parameter['out_dir'],region) - archive_data(parameter,stat,region,season,data_dict, - model_name,var_names,var_units,outdir) - - model_list = data_dict['model'].to_list() - highlight_model2 = data_dict['model'].to_list()[-3:] - - #final plot data - data_var = data_dict[var_names].to_numpy() - - #label information - var_labels = [] - for i,var in enumerate(var_names): - var_labels.append(var + "\n" + var_units[i]) - - xlabel = "Metric" - ylabel = '{} ({})'.format(metric_dict[stat],stat.upper()) - # colors for highlight lines - lncolors = xcolors[1 : len(highlight_model2)] + [xcolors[0]] - fig,ax = parallel_coordinate_plot( - data_var, - var_labels, - model_list, - model_names2=highlight_model1, - group1_name="CMIP6", - group2_name="E3SM", - models_to_highlight=highlight_model2, - models_to_highlight_colors=lncolors, - models_to_highlight_labels=highlight_model2, - identify_all_models=identify_all_models, - vertical_center="median", - vertical_center_line=True, - title="Model Performance of {} Climatology ({}, {})".format( - season.upper(),stat.upper(), region.upper()), - figsize=figsize, - colormap="tab20_r", - show_boxplot=False, - show_violin=True, - violin_colors=("lightgrey", "pink"), - legend_ncol=legend_ncol, - legend_bbox_to_anchor=legend_posistion, - legend_fontsize=fontsize * 0.85, - xtick_labelsize=fontsize * 0.95, - ytick_labelsize=fontsize * 0.95, - logo_rect=[0, 0, 0, 0], - logo_off=True) - - # Save figure as an image file - outdir = os.path.join(parameter['out_dir'],region) - if not os.path.exists(outdir): - os.makedirs(outdir) - outfile = "{}_{}_{}_parcord_{}.png".format(stat,region,season,model_name) - fig.savefig(os.path.join(outdir,outfile),facecolor="w", bbox_inches="tight") - - -def portrait_metric_plot(parameter,group,data_lib): - seasons = ["djf", "mam", "jja", "son"] - metric_dict = {"cor_xy" : "Pattern Corr.", - "rms_xy" : "Normalized RMSE by Median"} - model_name = '.'.join([ - parameter['test_name'].split(".")[2], - parameter['test_name'].split(".")[3]]) - - # process figure - fontsize = 20 - add_vertical_line = True - figsize = (40, 18) - legend_box_xy = (1.08, 1.18) - legend_box_size = 4 - legend_lw = 1.5 - shrink = 0.8 - legend_fontsize = fontsize * 0.8 - - var_names = data_lib.var_list - var_units = data_lib.var_unit_list - - for stat in metric_dict.keys(): - for region in data_lib.regions: - data_nor = dict() - for season in seasons: - data_dict = data_lib.df_dict[stat][season][region].copy() - if stat == "cor_xy": - data_nor[season] = data_dict[var_names].to_numpy().T - else: - data_nor[season] = normalize_by_median( - data_dict[var_names].to_numpy().T, axis=1) - if parameter['save_data']: - data_dict[var_names] = data_nor[season] - outdir = os.path.join(parameter['out_dir'],region) - archive_data(parameter,stat,region,season,data_dict, - model_name,var_names,var_units,outdir) - # data for final plot - data_all_nor = np.stack( - [data_nor["djf"], data_nor["mam"], data_nor["jja"], data_nor["son"]] - ) - - lable_colors = [] - highlight_models = [] - model_list = data_dict['model'].to_list() - for model in model_list: - if "e3sm" in model.lower(): - highlight_models.append(model) - lable_colors.append("#5170d7") - elif model in model_name: - highlight_models.append(model_name) - lable_colors.append("#FC5A50") - else: - lable_colors.append("#000000") - - if stat == "cor_xy": - var_range = (0, 1.0) - cmap_color = "YlOrBr" - cmap_bounds = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, - 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] - else: - var_range = (-0.5, 0.5) - cmap_color = "RdYlBu_r" - cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, - 0, 0.1,0.2, 0.3, 0.4, 0.5] - - fig, ax, cbar = portrait_plot( - data_all_nor, - xaxis_labels=model_list, - yaxis_labels=var_names, - cbar_label=metric_dict[stat], - cbar_label_fontsize=fontsize * 1.0, - cbar_tick_fontsize=fontsize, - box_as_square=True, - vrange=var_range, - figsize=figsize, - cmap=cmap_color, - cmap_bounds=cmap_bounds, - cbar_kw={"extend": "both", "shrink": shrink}, - missing_color="white", - legend_on=True, - legend_labels=["DJF", "MAM", "JJA", "SON"], - legend_box_xy=legend_box_xy, - legend_box_size=legend_box_size, - legend_lw=legend_lw, - legend_fontsize=legend_fontsize, - logo_rect=[0, 0, 0, 0], - logo_off=True) - - ax.axvline(x=len(x_labels)-len(highlight_models),color="k",linewidth=3) - ax.set_xticklabels(model_list,rotation=45,va="bottom",ha="left") - ax.set_yticklabels(y_labels,rotation=0,va="center",ha="right") - for xtick,color in zip(ax.get_xticklabels(),lable_colors): - xtick.set_color(color) - ax.yaxis.label.set_color(lable_colors[0]) - - # Save figure as an image file - outdir = os.path.join(parameter['out_dir'],region) - if not os.path.exists(outdir): - os.makedirs(outdir) - outfile = "{}_{}_4season_{}.png".format(stat,region,model_name) - fig.savefig(os.path.join(outdir,outfile),facecolor="w", bbox_inches="tight") - - return +from pcmdi_zppy_util import( + archive_data, + check_regions, + check_references, + check_units, + childCount, + collect_data_info, + collect_clim_diags, + collect_movs_diags, + collect_enso_diags, + collect_clim_metrics, + collect_movs_metrics, + create_data_lmask, + derive_var, + enso_obsvar_dict, + enso_obsvar_lmsk, + shift_row_to_bottom, + merge_data, + parallel_jobs, + parcoord_metric_plot, + portrait_metric_plot, + serial_jobs, + variable_region, + mean_climate_plot_driver, + variability_modes_plot_driver, +) ############################## start_yr = int('${Y1}') @@ -1481,96 +735,161 @@ num_years = end_yr - start_yr + 1 #parallel calculation num_workers = {{ num_workers }} -multiprocessing = {{multiprocessing}} +if num_workers < 2: + multiprocessing = False +else: + multiprocessing = {{multiprocessing}} # DATA LOCATION: Reference -{%- if "mean_climate" in subset %} +{%- if "mean_climate" in subsection %} +test_data_path = '${climo_dir_primary}' reference_data_path = '${climo_dir_ref}' -{%- elif ("variability_mode" in subset) or ("enso" in subset) %} +{%- elif ("variability_modes" in subsection) or ("enso" in subsection) %} +test_data_path = '${ts_dir_primary}' reference_data_path = '${ts_dir_ref}' {%- endif %} -observation_file = os.path.join( - 'pcmdi_diags', - '{}_{}_catalogue.json'.format( - reference_data_path, - '{{subset}}') -) -obs_dic = json.load(open(observation_file)) -{%- if "mean_climate" in subset %} -###################################### +test_data_set = ['${model_name}'.split(".")[1]] +{% if run_type == "model_vs_obs" %} +reference_data_set = '{{ obs_sets }}'.split(",") +{% elif run_type == "model_vs_model" %} +reference_data_set = ['${model_name_ref}'.split(".")[1]] +{%- endif %} + +{%- if "synthetic_plots" not in subsection %} +variables = '{{ vars }}'.split(",") +############################################################### +#check and process derived quantities, these quantities are +#likely not included as default in e3sm_to_cmip module +############################################################### +for i,var in enumerate(variables): + if "_" in var or "-" in var: + varin = re.split("_|-", var)[0] + else: + varin = var + fpaths = sorted(glob.glob(os.path.join(test_data_path,"*."+var+".*.nc"))) + if len(fpaths) < 1 and varin == 'rstcre': + derive_var(test_data_path, + varin,{'rsutcs':1,'rsut':-1}, + '${model_name}.${tableID}') +{% if run_type == "model_vs_model" %} + derive_var(reference_data_path, + varin,{'rsutcs':1,'rsut':-1}, + '${model_name_ref}.${tableID_ref}') +{%- endif %} + elif len(fpaths) < 1 and varin == 'rltcre': + derive_var(test_data_path, + varin,{'rlutcs':1,'rlut':-1}, + '${model_name}.${tableID}') +{% if run_type == "model_vs_model" %} + derive_var(reference_data_path, + varin,{'rlutcs':1,'rlut':-1}, + '${model_name_ref}.${tableID_ref}') +{%- endif %} + +####################################################### +#collect and document data info in a dictionary +# for convenience of pcmdi processing +####################################################### +test_dic, obs_dic = collect_data_info( + test_data_path,test_data_set, + reference_data_path,reference_data_set, + variables,'{{subsection}}','pcmdi_diags') + +########################################################## +# land/sea mask is needed in PCMDI diagnostics, check and +# generate it here as these data are not always available +# for model or observations +########################################################## +if {{ generate_sftlf }} in ['true', 'y', True]: + generate_sftlf = True +else: + generate_sftlf = False + +if generate_sftlf: + create_data_lmask( + test_data_path, + reference_data_path, + '{{subsection}}', + 'fixed') + +#info to collect diagnostic output +input_template = os.path.join( + 'pcmdi_diags', + '%(output_type)', + '%(metric_type)', + '${model_name}'.split(".")[0], + '${model_name}'.split(".")[1], + '${case_id}') + +out_path = os.path.join( + '${results_dir}', + '%(group_type)') + +{%- endif %} + +{%- if "mean_climate" in subsection %} +#################################################### # call pcmdi mean climate diagnostics -##################################### -compute_regions = '{{regions}}'.split(",") -compute_variables = '{{vars}}'.split(",") +#################################################### +regions = '{{regions}}'.split(",") + #assiagn region to each variable -variable_region( - compute_regions, - compute_variables -) +variable_region(regions,variables) + ################################################### # generate the command list for each reference and # each variable (will execuate in parallel later) lstcmd = [] -for var in compute_variables: - vkey = var.split("-")[0] - if vkey in obs_dic.keys(): - refset = obs_dic[vkey]['set'] - lstcmd.append(" ".join([ - 'mean_climate_driver.py', - '-p parameterfile.py' , - '--vars' , '{}'.format(var), - '-r' , '{}'.format(refset), - '--case_id' , '{}'.format('${case_id}') - ])) +for var in variables: + if "_" in var or "-" in var: + varin = re.split("_|-", var)[0] + else: + varin = var + if varin in obs_dic.keys(): + refset = obs_dic[varin]['set'] + lstcmd.append( + " ".join(['mean_climate_driver.py','-p parameterfile.py', + '--vars' , '{}'.format(var), + '-r' , '{}'.format(refset), + '--case_id' , '{}'.format('${case_id}') + ]) + ) if (len(lstcmd) > 0 ) and multiprocessing: - print("Parallel computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) + print("Parallel computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) elif (len(lstcmd) > 0 ): - print("Serial computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) + print("Serial computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) else: - print("no jobs to run...") - return_code = 0 + print("no jobs to run...") + return_code = 0 if return_code != 0: - exit("ERROR: {} jobs failed".format('{{subset}}')) + exit("ERROR: {} jobs failed".format('{{subsection}}')) else: - print("successfully finish all jobs....") - #time delay to ensure process completely finished - time.sleep(1) + print("successfully finish all jobs....") + #time delay to ensure process completely finished + time.sleep(5) -{% if run_type == "model_vs_obs" %} -synthetic_plot = '{{sythentic_plots}}' -if synthetic_plot == "y": - print("generate sythentic metrics plot ...") - parameter = OrderedDict() - parameter['save_data'] = True - parameter['cmip_path'] = '{{pcmdi_data_path}}' - parameter['cmip_name'] = '{{pcmdi_cmip_clim}}' - parameter['test_name'] = '{{model_name}}' - parameter['test_path'] = os.path.join('pcmdi_diags','metrics_results') - parameter['case_id'] = '${case_id}' - parameter['out_dir'] = os.path.join('${results_dir}','ERROR_metric') - merge_lib = collect_metrics_data(parameter,'mean_climate') - print("Processing Portrait Plots (4 seasons)....") - portrait_metric_plot(parameter,'mean_climate',merge_lib) - print("Processing Parallel Coordinate Plots (Annual Cycle)....") - parcord_metric_plot(parameter,'mean_climate',merge_lib) -{%- endif %} +#orgnize diagnostic output +collect_clim_diags( + regions,variables,'{{figure_format}}', + input_template,out_path +) {%- endif %} -{%- if "variability_mode" in subset %} +{%- if "variability_modes" in subsection %} ########################################## # call pcmdi mode variability diagnostics ########################################## print("calculate mode variability metrics") -{%- if subset == "variability_mode_atm" %} +{%- if subsection == "variability_modes_atm" %} var_modes = '{{ atm_modes }}'.split(",") -{% elif subset == "variability_mode_cpl" %} +{% elif subsection == "variability_modes_cpl" %} var_modes = '{{ cpl_modes }}'.split(",") {%- endif %} @@ -1583,21 +902,21 @@ reftyrs = int(str(obs_dic[varOBS][refname]['yymms'])[0:4]) reftyre = int(str(obs_dic[varOBS][refname]['yymme'])[0:4]) lstcmd = [] -for variability_mode in var_modes: - if variability_mode in ["NPO", "NPGO", "PSA1"]: - eofn_obs = "2" - eofn_mod = "2" - elif variability_mode in ["PSA2"]: - eofn_obs = "3" - eofn_mod = "3" - else: - eofn_obs = "1" - eofn_mod = "1" - ############################################## - cmd = (" ".join([ +for var_mode in var_modes: + if var_mode in ["NPO", "NPGO", "PSA1"]: + eofn_obs = "2" + eofn_mod = "2" + elif var_mode in ["PSA2"]: + eofn_obs = "3" + eofn_mod = "3" + else: + eofn_obs = "1" + eofn_mod = "1" + ############################################## + cmd = (" ".join([ 'variability_modes_driver.py', '-p parameterfile.py' , - '--variability_mode' , '{}'.format(variability_mode), + '--variability_mode' , '{}'.format(var_mode), '--eofn_mod' , '{}'.format(eofn_mod), '--eofn_obs' , '{}'.format(eofn_obs), '--varOBS' , '{}'.format(varOBS), @@ -1607,227 +926,154 @@ for variability_mode in var_modes: '--reference_data_path' , '{}'.format(refpath), '--case_id' , '{}'.format('${case_id}') ])) - lstcmd.append(cmd); del(cmd) + lstcmd.append(cmd); del(cmd) if (len(lstcmd) > 0 ) and multiprocessing: - print("Parallel computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) + print("Parallel computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) elif (len(lstcmd) > 0 ): - print("Serial computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) + print("Serial computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) else: - print("no jobs to run...") - return_code = 0 + print("no jobs to run...") + return_code = 0 if return_code != 0: - exit("ERROR: {} jobs failed".format('{{subset}}')) + exit("ERROR: {} jobs failed".format('{{subsection}}')) else: - print("successfully finish all jobs....") - #time delay to ensure process completely finished - time.sleep(1) + print("successfully finish all jobs....") + #time delay to ensure process completely finished + time.sleep(5) + +#orgnize diagnostic output +collect_movs_diags(var_modes,'{{figure_format}}',input_template,out_path) {%- endif %} -{%- if "enso" in subset %} +{%- if "enso" in subsection %} ############################################# # call enso_driver.py to process diagnostics ############################################# #orgnize observation var list -enso_obsvar_dict(obs_dic,"{{vars}}".split(",")) +enso_obsvar_dict(obs_dic,variables) #orgnize observation landmask -enso_obsvar_lmsk(obs_dic,"{{vars}}".split(",")) +enso_obsvar_lmsk(obs_dic,variables) #now start enso driver print("calculate enso metrics") enso_groups = '{{ enso_groups }}'.split(",") lstcmd = [] for metricsCollection in enso_groups: - cmd = (" ".join([ - 'enso_driver.py ', - '-p parameterfile.py', - '--metricsCollection', '{}'.format(metricsCollection), - '--case_id' , '{}'.format('${case_id}') - ])) - lstcmd.append(cmd); del(cmd) + cmd = (" ".join([ + 'enso_driver.py ', + '-p parameterfile.py', + '--metricsCollection', '{}'.format(metricsCollection), + '--case_id' , '{}'.format('${case_id}') + ])) + lstcmd.append(cmd); del(cmd) if (len(lstcmd) > 0 ) and multiprocessing: - print("Parallel computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) + print("Parallel computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) elif (len(lstcmd) > 0 ): - print("Serial computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) + print("Serial computing with {} jobs".format(str(len(lstcmd)))) + stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) else: - print("no jobs to run...") - return_code = 0 + print("no jobs to run...") + return_code = 0 if return_code != 0: - exit("ERROR: {} jobs failed".format('{{subset}}')) + exit("ERROR: {} jobs failed".format('{{subsection}}')) else: - print("successfully finish all jobs....") - #time delay to ensure process completely finished - time.sleep(1) -{%- endif %} -EOF -################################ -# Run diagnostics -command="srun -N 1 python -u pcmdi.py" -# Run diagnostics -time ${command} -if [ $? != 0 ]; then - cd {{ scriptDir }} - echo 'ERROR (11)' > {{ prefix }}.status - exit 11 -fi + print("successfully finish all jobs....") + #time delay to ensure process completely finished + time.sleep(5) -########################################### -# reorgnize pcmdi diagnostics output -########################################### -# Prepare configuration file -cat > graphic_viewer.py << EOF -import os -import glob -import json -import time -import datetime -import collections -from collections import OrderedDict - -def get_mean_climate_graphics(regions,variables,fig_format,input_dir,output_dir): - diag_metric = "mean_climate" - seasons = ['DJF','MAM','JJA','SON','AC'] - input_dir = input_dir.replace("%(metric_type)",diag_metric) - - fig_sets = OrderedDict() - fig_sets['CLIM_patttern'] = ['graphics','*'] - - for fset in fig_sets.keys(): - fdir = input_dir.replace('%(output_type)',fig_sets[fset][0] ) - output = output_dir.replace("%(group_type)",fset) - for region in regions: - for sea in seasons: - outpath = os.path.join(output,region,sea) - if not os.path.exists(outpath): - os.makedirs(outpath) - for var in variables: - fpaths = sorted(glob.glob(os.path.join(fdir,var, - '{}{}_{}*.{}'.format(fig_sets[fset][1],region,sea,fig_format)))) - for fpath in fpaths: - refname = fpath.split("/")[-2] - filname = fpath.split("/")[-1] - outfile = os.path.join(outpath,filname) - os.rename(fpath,outfile) - - return - -def get_variability_graphics(modes,fig_format,input_dir,output_dir): - diag_metric = "variability_modes" - input_dir = input_dir.replace("%(metric_type)",diag_metric) - - fig_sets = OrderedDict() - fig_sets['MOV_eofvar'] = ['diagnostic_results','EG_Spec*'] - fig_sets['MOV_telecon'] = ['graphics','*teleconnection'] - fig_sets['MOV_pattern'] = ['graphics','*'] - - for mode in modes: - for fset in fig_sets.keys(): - fdir = input_dir.replace('%(output_type)',fig_sets[fset][0] ) - output = output_dir.replace("%(group_type)",fset) - fpaths = sorted(glob.glob(os.path.join(fdir,mode,'*', - '{}.{}'.format(fig_sets[fset][1],fig_format)))) - for fpath in fpaths: - refname = fpath.split("/")[-2] - filname = fpath.split("/")[-1] - outpath = os.path.join(output,'{}_model_vs_{}'.format(mode,refname)) - if not os.path.exists(outpath): - os.makedirs(outpath) - outfile = os.path.join(outpath,filname) - os.rename(fpath,outfile) - return - -def get_enso_graphics(groups,fig_format,refname,input_dir,output_dir): - diag_metric = "enso_metric" - input_dir = input_dir.replace("%(metric_type)",diag_metric) - - fig_sets = OrderedDict() - fig_sets['ENSO_metric'] = ['graphics','*'] - - for fset in fig_sets.keys(): - for group in groups: - fdir = input_dir.replace('%(output_type)',fig_sets[fset][0] ) - output = output_dir.replace("%(group_type)",fset) - fpaths = sorted(glob.glob(os.path.join(fdir,group, - '{}.{}'.format(fig_sets[fset][1],fig_format)))) - for fpath in fpaths: - filname = fpath.split("/")[-1] - outpath = os.path.join(output,'{}_model_vs_{}'.format(group,refname)) - if not os.path.exists(outpath): - os.makedirs(outpath) - outfile = os.path.join(outpath,filname) - os.rename(fpath,outfile) - - return - -############# -fig_format = '{{ figure_format }}' -diag_types = ['metrics_results','diagnostic_result','graphics'] +#organize diagnostic output +obs_dict = json.load(open('obs_catalogue.json')) +obs_name = list(obs_dict.keys())[0] +collect_enso_diags(enso_groups,'{{figure_format}}', + obs_name,input_template,out_path) -input_template = os.path.join( - 'pcmdi_diags', - '%(output_type)', - '%(metric_type)', - '${model_name}'.split(".")[0], - '${model_name}'.split(".")[1], - '${case_id}', -) +{%- endif %} -out_path = os.path.join( - '${results_dir}', - '%(group_type)' +{%- if "synthetic_plots" in subsection %} +######################################### +#plot synthetic figures for pcmdi metrics +######################################### +print("generate synthetic metrics plot ...") +metric_sets = '{{sub_sets}}'.split(",") +figure_sets = '{{synthetic_sets}}'.split(",") +figure_format = '{{figure_format}}' +test_input_path = os.path.join( + '${www}','${case}','pcmdi_diags','${results_dir}', + 'metrics_data','%(group_type)' ) -{%- if ("mean_climate" in subset) %} -compute_regions = '{{ regions }}'.split(",") -compute_variables = '{{ vars }}'.split(",") -get_mean_climate_graphics( - compute_regions,compute_variables, - fig_format,input_template,out_path -) -{% endif %} +metric_dict = json.load(open('synthetic_metrics_list.json')) + +parameter = OrderedDict() +parameter['save_data'] = True +parameter['case_id'] = '${case_id}' +parameter['out_dir'] = os.path.join('${results_dir}','ERROR_metric') +parameter['test_name'] = '{{model_name}}' +parameter['tableID'] = '{{model_tableID}}' +parameter['model_name'] = '-'.join('{{model_name}}'.split(".")[2:]) + +for metric in metric_sets: + parameter['test_path'] = test_input_path.replace('%(group_type)',metric) + parameter['diag_vars'] = metric_dict[metric] + if metric == "mean_climate": + parameter['cmip_path'] = '{{cmip_clim_dir}}' + parameter['cmip_name'] = '{{cmip_clim_set}}' + merge_lib = collect_clim_metrics(parameter) + elif metric == "variability_modes": + parameter['cmip_path'] = '{{cmip_movs_dir}}' + parameter['cmip_name'] = '{{cmip_movs_set}}' + parameter['movs_mode'] = '{{ atm_modes }}'.split(",") + '{{ cpl_modes }}'.split(",") + merge_lib,mode_season_list = collect_movs_metrics(parameter) + elif metric == 'enso': + parameter['cmip_path'] = '{{cmip_enso_dir}}' + parameter['cmip_name'] = '{{cmip_enso_set}}' + merge_lib = collect_enso_metrics(parameter) + + for stat in metric_dict[metric].keys(): + if metric == "mean_climate": + mean_climate_plot_driver( + metric, stat, + merge_lib.regions, + parameter['model_name'], + parameter['diag_vars'][stat], + merge_lib.df_dict[stat], + merge_lib.var_list, + merge_lib.var_unit_list, + parameter['save_data'], + parameter['out_dir']) + elif metric == "variability_modes": + variability_modes_plot_driver( + metric, stat, + parameter['model_name'], + parameter['diag_vars'][stat], + merge_lib[stat], + mode_season_list, + parameter['save_data'], + parameter['out_dir']) -{%- if ("variability_mode" in subset) %} -{%- if ("variability_mode_atm" in subset) %} -compute_modes = '{{ atm_modes }}'.split(",") -{% elif ("variability_mode_cpl" in subset) %} -compute_modes = '{{ cpl_modes }}'.split(",") {%- endif %} -get_variability_graphics( - compute_modes,fig_format, - input_template,out_path -) -{%- endif %} - -{%- if ("enso" in subset) %} -compute_groups = '{{ enso_groups }}'.split(",") -obs_dict = json.load(open('obs_catalogue.json')) -obs_name = list(obs_dict.keys())[0] -get_enso_graphics( - compute_groups,fig_format, - obs_name,input_template,out_path -) -{% endif %} EOF ################################ # Run diagnostics -command="srun -N 1 python -u graphic_viewer.py" +mkdir -p pcmdi_diags +command="srun -N 1 python -u pcmdi.py" # Run diagnostics time ${command} if [ $? != 0 ]; then cd {{ scriptDir }} - echo 'ERROR (12)' > {{ prefix }}.status - exit 12 + echo 'ERROR (11)' > {{ prefix }}.status + exit 11 fi ################################# From 0d3c387a89516ac35e6116b671bdb1ae17e4d36c Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Fri, 10 Jan 2025 12:39:04 -0600 Subject: [PATCH 12/23] Revise mpi to ensure the writing process finished --- zppy/templates/pcmdi_diags.bash | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 117fb447..d682f3d4 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -318,7 +318,11 @@ from pcmdi_zppy_util import( model_name = '${model_name_ref}.${tableID_ref}' +{%- if ("mean_climate" in subsection) %} variables = '{{ cmip_vars }}'.split(",") +{%- elif ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} +variables = '{{ vars }}'.split(",") +{%- endif %} obs_sets = '{{ obs_sets }}'.split(",") ts_dir_ref_source = '{{ obs_ts }}' @@ -1051,7 +1055,7 @@ for metric in metric_sets: merge_lib.var_unit_list, parameter['save_data'], parameter['out_dir']) - elif metric == "variability_modes": + elif metric == "variability_modes": variability_modes_plot_driver( metric, stat, parameter['model_name'], From 9ff8c189f5b9929a364b3a197f35d33c5e5a1b28 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Tue, 14 Jan 2025 13:38:31 -0600 Subject: [PATCH 13/23] Bug fix --- zppy/templates/pcmdi_diags.bash | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index d682f3d4..fac21296 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -317,12 +317,7 @@ from pcmdi_zppy_util import( ) model_name = '${model_name_ref}.${tableID_ref}' - -{%- if ("mean_climate" in subsection) %} -variables = '{{ cmip_vars }}'.split(",") -{%- elif ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} variables = '{{ vars }}'.split(",") -{%- endif %} obs_sets = '{{ obs_sets }}'.split(",") ts_dir_ref_source = '{{ obs_ts }}' @@ -370,9 +365,9 @@ for i,var in enumerate(variables): out = os.path.join('${obstmp_dir}', '{}.{}.{}-{}.nc'.format( model_name.replace('%(model)',obs), - var,yms,yme)) + varin,yms,yme)) #rename variable if needed then save file - if varin != var: + if (varin != var) and ("_" not in var ) and ("-" not in var): ds = xcdat_open(fpaths[0]) ds = ds.rename(name_dict={varin:var}) ds.to_netcdf(out) @@ -853,7 +848,7 @@ for var in variables: if varin in obs_dic.keys(): refset = obs_dic[varin]['set'] lstcmd.append( - " ".join(['mean_climate_driver.py','-p parameterfile.py', + " ".join(['mean_climate_driver.py', ' -p parameterfile.py', '--vars' , '{}'.format(var), '-r' , '{}'.format(refset), '--case_id' , '{}'.format('${case_id}') @@ -875,7 +870,7 @@ if return_code != 0: else: print("successfully finish all jobs....") #time delay to ensure process completely finished - time.sleep(5) + time.sleep(60) #orgnize diagnostic output collect_clim_diags( From b2b156dfb23ec3b9da1f10ebdaf71db49746bfc7 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Tue, 14 Jan 2025 23:38:20 -0600 Subject: [PATCH 14/23] Bug fix --- zppy/templates/pcmdi_diags.bash | 147 ++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 65 deletions(-) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index fac21296..3b0ab53e 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -333,13 +333,14 @@ altobs_dic = { "pr" : "PRECT", obs_dic = json.load(open('reference_alias.json')) -#loop each variable and process the data -for i,var in enumerate(variables): - if "_" in var or "-" in var: - varin = re.split("_|-", var)[0] +######################################## +#first loop: link data to work directory +######################################## +for i,vv in enumerate(variables): + if "_" in vv or "-" in vv: + varin = re.split("_|-", vv)[0] else: - varin = var - + varin = vv if len(obs_sets) > 1 and len(obs_sets) == len(variables): obsid = obs_sets[i] else: @@ -352,38 +353,52 @@ for i,var in enumerate(variables): obsstr = obsname fpaths = sorted(glob.glob(os.path.join(ts_dir_ref_source,obsstr,varin+"_*.nc"))) - if (len(fpaths) < 1) and (varin in altobs_dic.keys()): - #these variables were not included as cmip type - varin = altobs_dic[varin] - fpaths = sorted(glob.glob(os.path.join(ts_dir_ref_source,obsstr,varin+"_*.nc"))) - if (len(fpaths) > 0) and (os.path.exists(fpaths[0])): - template = fpaths[0].split("/")[-1] - yms = template.split("_")[-2][0:6] - yme = template.split("_")[-1][0:6] - obs = obsname.replace(".","_") - out = os.path.join('${obstmp_dir}', - '{}.{}.{}-{}.nc'.format( - model_name.replace('%(model)',obs), - varin,yms,yme)) - #rename variable if needed then save file - if (varin != var) and ("_" not in var ) and ("-" not in var): - ds = xcdat_open(fpaths[0]) - ds = ds.rename(name_dict={varin:var}) - ds.to_netcdf(out) - elif not os.path.exists(out): - os.symlink(fpaths[0],out) - - ##################################################################### - #check and process derived quantities - #note: these quantities are possibly not included as default in cmip - if varin in ['rltcre','rstcre']: - fpaths = sorted(glob.glob(os.path.join('${obstmp_dir}',"*"+varin+"_*.nc"))) - if len(fpaths) < 1: - if varin == 'rstcre': - derive_var('${obstmp_dir}',varin,{'rsutcs':1,'rsut':-1},model_name) - elif varin == 'rltcre': - derive_var('${obstmp_dir}',varin,{'rlutcs':1,'rlut':-1},model_name) + template = fpaths[0].split("/")[-1] + yms = template.split("_")[-2][0:6] + yme = template.split("_")[-1][0:6] + obs = obsname.replace(".","_") + out = os.path.join( + '${obstmp_dir}', + '{}.{}.{}-{}.nc'.format( + model_name.replace('%(model)',obs), + varin,yms,yme) + ) + if not os.path.exists(out): + os.symlink(fpaths[0],out) + elif varin in altobs_dic.keys(): + varin1 = altobs_dic[varin] + fpaths = sorted(glob.glob( + os.path.join(ts_dir_ref_source,obsstr,varin1+"_*.nc")) + ) + if (len(fpaths) > 0) and (os.path.exists(fpaths[0])): + template = fpaths[0].split("/")[-1] + yms = template.split("_")[-2][0:6] + yme = template.split("_")[-1][0:6] + obs = obsname.replace(".","_") + out = os.path.join( + '${obstmp_dir}', + '{}.{}.{}-{}.nc'.format( + model_name.replace('%(model)',obs), + varin,yms,yme) + ) + ds = xcdat_open(fpaths[0]) + ds = ds.rename(name_dict={varin1:varin}) + ds.to_netcdf(out) + +##################################################################### +#second loop: check and process derived quantities +#note: these quantities are possibly not included as default in cmip +##################################################################### +for vv in enumerate(variables): + if vv in ['rltcre','rstcre']: + fpaths = sorted(glob.glob( + os.path.join('${obstmp_dir}',"*"+vv+"_*.nc")) + ) + if (len(fpaths) < 1) and (vv == 'rstcre'): + derive_var('${obstmp_dir}',vv,{'rsutcs':1,'rsut':-1},model_name) + elif (len(fpaths) < 1) and (vv == 'rltcre'): + derive_var('${obstmp_dir}',vv,{'rlutcs':1,'rlut':-1},model_name) EOF ################### @@ -870,7 +885,7 @@ if return_code != 0: else: print("successfully finish all jobs....") #time delay to ensure process completely finished - time.sleep(60) + time.sleep(5) #orgnize diagnostic output collect_clim_diags( @@ -902,30 +917,30 @@ reftyre = int(str(obs_dic[varOBS][refname]['yymme'])[0:4]) lstcmd = [] for var_mode in var_modes: - if var_mode in ["NPO", "NPGO", "PSA1"]: + if var_mode in ["NPO", "NPGO", "PSA1"]: eofn_obs = "2" eofn_mod = "2" - elif var_mode in ["PSA2"]: + elif var_mode in ["PSA2"]: eofn_obs = "3" eofn_mod = "3" - else: + else: eofn_obs = "1" eofn_mod = "1" - ############################################## - cmd = (" ".join([ - 'variability_modes_driver.py', - '-p parameterfile.py' , - '--variability_mode' , '{}'.format(var_mode), - '--eofn_mod' , '{}'.format(eofn_mod), - '--eofn_obs' , '{}'.format(eofn_obs), - '--varOBS' , '{}'.format(varOBS), - '--osyear' , '{}'.format(reftyrs), - '--oeyear' , '{}'.format(reftyre), - '--reference_data_name' , '{}'.format(refname), - '--reference_data_path' , '{}'.format(refpath), - '--case_id' , '{}'.format('${case_id}') - ])) - lstcmd.append(cmd); del(cmd) + ############################################## + lstcmd.append( + " ".join([ + 'variability_modes_driver.py', ' -p parameterfile.py', + '--variability_mode' , '{}'.format(var_mode), + '--eofn_mod' , '{}'.format(eofn_mod), + '--eofn_obs' , '{}'.format(eofn_obs), + '--varOBS' , '{}'.format(varOBS), + '--osyear' , '{}'.format(reftyrs), + '--oeyear' , '{}'.format(reftyre), + '--reference_data_name' , '{}'.format(refname), + '--reference_data_path' , '{}'.format(refpath), + '--case_id' , '{}'.format('${case_id}') + ]) + ) if (len(lstcmd) > 0 ) and multiprocessing: print("Parallel computing with {} jobs".format(str(len(lstcmd)))) @@ -965,13 +980,13 @@ print("calculate enso metrics") enso_groups = '{{ enso_groups }}'.split(",") lstcmd = [] for metricsCollection in enso_groups: - cmd = (" ".join([ - 'enso_driver.py ', - '-p parameterfile.py', - '--metricsCollection', '{}'.format(metricsCollection), - '--case_id' , '{}'.format('${case_id}') - ])) - lstcmd.append(cmd); del(cmd) + lstcmd.append( + " ".join([ + 'enso_driver.py ', ' -p parameterfile.py', + '--metricsCollection', '{}'.format(metricsCollection), + '--case_id' , '{}'.format('${case_id}') + ]) + ) if (len(lstcmd) > 0 ) and multiprocessing: print("Parallel computing with {} jobs".format(str(len(lstcmd)))) @@ -993,8 +1008,10 @@ else: #organize diagnostic output obs_dict = json.load(open('obs_catalogue.json')) obs_name = list(obs_dict.keys())[0] -collect_enso_diags(enso_groups,'{{figure_format}}', - obs_name,input_template,out_path) +collect_enso_diags( + enso_groups,'{{figure_format}}', + obs_name,input_template,out_path +) {%- endif %} From ae24608e108fbaddffb8e2004b25bc9427fd599f Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Wed, 15 Jan 2025 00:31:41 -0600 Subject: [PATCH 15/23] bug fix --- zppy/templates/pcmdi_diags.bash | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 3b0ab53e..b3000638 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -960,7 +960,12 @@ else: time.sleep(5) #orgnize diagnostic output -collect_movs_diags(var_modes,'{{figure_format}}',input_template,out_path) +collect_movs_diags( + var_modes, + '{{figure_format}}', + input_template, + out_path +) {%- endif %} From 902b2c56e5e1bbecafe749e1b5b1f360084d7775 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Sun, 19 Jan 2025 02:37:52 -0600 Subject: [PATCH 16/23] Rename fixed directory to be clearer --- zppy/defaults/default.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 5048d10f..dcab0752 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -145,7 +145,7 @@ reference_alias = string(default="pcmdi_data/reference/reference_alias.json") # Utility file with functions for zppy-pcmdi data processing pcmdi_zppy_util = string(default="pcmdi_data/utility/pcmdi_zppy_util.py") # File of variable list to generate synthetic metrics plot -synthetic_metrics = string(default="pcmdi_data/metrics/synthetic_metrics_list.json") +synthetic_metrics = string(default="pcmdi_data/synthetic_metrics/synthetic_metrics_list.json") # Path to observation time-series data # Required for "mean_climate","variability_mode","enso" obs_ts = string(default="") From c2819860a162dad35aed7543f0ae6779c87e6f5d Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Sun, 19 Jan 2025 02:41:16 -0600 Subject: [PATCH 17/23] Change the default set of cmip6 data for synthetic plots --- zppy/defaults/default.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index dcab0752..79d5374a 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -275,7 +275,7 @@ cmip_enso_dir = string(default="") cmip_clim_dir = string(default="") cmip_movs_dir = string(default="") #group of pcmdi generated cmip metrics data (mip.exp.version) -cmip_clim_set = string(default="cmip6.historical.v20220928") +cmip_clim_set = string(default="cmip6.historical.v20250108") cmip_movs_set = string(default="cmip6.historical.v20220825") cmip_enso_set = string(default="cmip6.historical.v20210620") # options shared by pcmdi From 43dafdbbd62f7eecd91c9ffbff49af93663b5c6f Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Tue, 4 Feb 2025 17:06:46 -0600 Subject: [PATCH 18/23] Refine the workflow --- zppy/defaults/default.ini | 4 +- zppy/pcmdi_diags.py | 158 ++++++++++++++++++-------------- zppy/templates/pcmdi_diags.bash | 51 +++++++---- 3 files changed, 124 insertions(+), 89 deletions(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 79d5374a..1d27ada2 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -171,7 +171,7 @@ vars = string(default="pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsds,rsd # sets of diagnostics from pcmdi package sets = string_list(default=list("mean_climate","variability_modes_atm","variability_modes_cpl","enso","synthetic_plots")) # options to identify subset of pcmdi drivers ("mean_climate","variability_mode","enso") -subsets = string(default="mean_climate,variability_modes,enso") +subsets = string_list(default=list("mean_climate","variability_modes","enso")) ########################################################################################### # Required for run_type="model_vs_model" runs, different from e3sm_diag, # model_vs_model in pcmdi referred to the comparision of two model simulations @@ -327,7 +327,7 @@ pcmdi_debug = string(default=False) EofScaling = string(default=None) ConvEOF = string(default=None) CBF = string(default=None) - subsets = string(default=None) + subsets = string_list(default=None) landmask = string(default=None) frequency = string(default=None) generate_sftlf = string(default=None) diff --git a/zppy/pcmdi_diags.py b/zppy/pcmdi_diags.py index ee074b7b..b1a0dcae 100644 --- a/zppy/pcmdi_diags.py +++ b/zppy/pcmdi_diags.py @@ -34,7 +34,11 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): # --- Generate and submit pcmdi_diags scripts --- for c in tasks: dependencies: List[str] = [] + c["sub"] = define_or_guess( + c, "subsection", "sub", ParameterGuessType.SECTION_GUESS + ) check_parameters_for_bash(c) + c["scriptDir"] = script_dir if "ts_num_years" in c.keys(): c["ts_num_years"] = int(c["ts_num_years"]) @@ -53,7 +57,8 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): ref_year_sets = get_years(c["ref_years"]) else: ref_year_sets = year_sets - for s, rs in zip(year_sets, ref_year_sets): + + for i, (s, rs) in enumerate(zip(year_sets, ref_year_sets)): c["year1"] = s[0] c["year2"] = s[1] if ("last_year" in c.keys()) and (c["year2"] > c["last_year"]): @@ -62,7 +67,11 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): c["ref_year1"] = rs[0] c["ref_year2"] = rs[1] - check_and_define_parameters(c) + if c["sub"] != "synthetic_plots": + check_and_define_parameters(c) + else: + c["prefix"] = f"pcmdi_diags_{c['sub']}_{c['tag']}" + bash_file, settings_file, status_file = get_file_names( script_dir, c["prefix"] ) @@ -81,10 +90,14 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): for yr in range(c["year1"], c["year2"], c["ts_num_years"]): add_ts_dependencies(c, dependencies, script_dir, yr) - add_pcmdi_dependencies(c, dependencies, script_dir) + if c["sub"] == "synthetic_plots": + add_pcmdi_dependencies(c, dependencies, script_dir) + if i < len(year_sets) - 1: + continue c["dependencies"] = dependencies write_settings_file(settings_file, c, s) + export = "ALL" existing_bundles = handle_bundles( c, @@ -106,7 +119,6 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): ) else: print(f"...adding to bundle {c['bundle']}") - print(f" environment_commands={c['environment_commands']}") print_url(c, "pcmdi_diags") @@ -114,22 +126,33 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): def check_parameters_for_bash(c: Dict[str, Any]) -> None: - check_required_parameters( - c, - set(["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"]), - "ref_final_yr", - ) - check_required_parameters( - c, set(["variability_mode_cpl", "variability_mode_atm", "enso"]), "ref_start_yr" - ) - check_required_parameters( - c, set(["variability_mode_cpl", "variability_mode_atm", "enso"]), "ref_end_yr" - ) + if c["sub"] != "synthetic_plots": + check_required_parameters( + c, + set( + ["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"] + ), + "ref_final_yr", + ) + check_required_parameters( + c, + set( + ["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"] + ), + "ref_start_yr", + ) + check_required_parameters( + c, + set( + ["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"] + ), + "ref_end_yr", + ) def check_parameters_for_pcmdi(c: Dict[str, Any]) -> None: # check and set up the external data needed by pcmdi - if set(["synthetic_plots"]) & set(c["sets"]): + if c["sub"] == "synthetic_plots": define_or_guess2( c, "cmip_enso_dir", @@ -154,28 +177,27 @@ def check_mvm_only_parameters_for_bash(c: Dict[str, Any]) -> None: check_parameter_defined(c, "diff_title") check_parameter_defined(c, "ref_name") check_parameter_defined(c, "short_ref_name") - - check_required_parameters( - c, - set(["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"]), - "ref_start_yr", - ) - ts_sets = set( - [ - "mean_climate", - "variability_mode_cpl", - "variability_mode_atm", - "enso", - ] - ) - check_required_parameters(c, ts_sets, "ts_num_years_ref") - check_required_parameters(c, ts_sets, "ts_subsection") + if c["sub"] != "synthetic_plots": + check_required_parameters( + c, + set( + ["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"] + ), + "ref_start_yr", + ) + ts_sets = set( + [ + "mean_climate", + "variability_mode_cpl", + "variability_mode_atm", + "enso", + ] + ) + check_required_parameters(c, ts_sets, "ts_num_years_ref") + check_required_parameters(c, ts_sets, "ts_subsection") def check_and_define_parameters(c: Dict[str, Any]) -> None: - c["sub"] = define_or_guess( - c, "subsection", "grid", ParameterGuessType.SECTION_GUESS - ) # TODO: do this based on sets, rather than by relying on the user setting ts_num_years if "ts_num_years" in c.keys(): define_or_guess2( @@ -187,7 +209,6 @@ def check_and_define_parameters(c: Dict[str, Any]) -> None: prefix: str if c["run_type"] == "model_vs_obs": prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}" - elif c["run_type"] == "model_vs_model": check_mvm_only_parameters_for_bash(c) prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}_vs_{c['ref_year1']:04d}-{c['ref_year2']:04d}" @@ -230,36 +251,35 @@ def add_ts_dependencies( def add_pcmdi_dependencies( c: Dict[str, Any], dependencies: List[str], script_dir: str ) -> None: - pcmdi_sub = define_or_guess( - c, "pcmdi_diags", "sub", ParameterGuessType.SECTION_GUESS - ) - status_suffix: str = f"_{c['year1']:04d}-{c['year2']:04d}.status" - if "synthetic_plots" in pcmdi_sub: - check_parameter_defined(c, "run_type") - if "mean_climate" in c["sets"]: - dependencies.append( - os.path.join( - script_dir, - f"pcmdi_diags_mean_climate_{c['run_type']}{status_suffix}", - ) - ) - if "variability_mode_cpl" in c["sets"]: - dependencies.append( - os.path.join( - script_dir, - f"pcmdi_diags_variability_mode_cpl_{c['run_type']}{status_suffix}", - ) - ) - if "variability_mode_atm" in c["sets"]: - dependencies.append( - os.path.join( - script_dir, - f"pcmdi_diags_variability_mode_atm_{c['run_type']}{status_suffix}", - ) - ) - if "enso" in c["sets"]: - dependencies.append( - os.path.join( - script_dir, f"pcmdi_diags_enso_{c['run_type']}{status_suffix}" - ) - ) + check_parameter_defined(c, "run_type") + if c["run_type"] == "model_vs_obs": + status_suffix = f"_{c['year1']:04d}-{c['year2']:04d}" + elif c["run_type"] == "model_vs_model": + status_suffix = f"_{c['year1']:04d}-{c['year2']:04d}_vs_{c['ref_year1']:04d}-{c['ref_year2']:04d}" + if "mean_climate" in c["sets"]: + status_file = os.path.join( + script_dir, + f"pcmdi_diags_mean_climate_{c['run_type']}{status_suffix}.status", + ) + if os.path.exists(status_file): + dependencies.append(status_file) + if "variability_modes_cpl" in c["sets"]: + status_file = os.path.join( + script_dir, + f"pcmdi_diags_variability_modes_cpl_{c['run_type']}{status_suffix}.status", + ) + if os.path.exists(status_file): + dependencies.append(status_file) + if "variability_modes_atm" in c["sets"]: + status_file = os.path.join( + script_dir, + f"pcmdi_diags_variability_modes_atm_{c['run_type']}{status_suffix}.status", + ) + if os.path.exists(status_file): + dependencies.append(status_file) + if "enso" in c["sets"]: + status_file = os.path.join( + script_dir, f"pcmdi_diags_enso_{c['run_type']}{status_suffix}.status" + ) + if os.path.exists(status_file): + dependencies.append(status_file) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index b3000638..9b401751 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -28,6 +28,7 @@ echo "RUNNING ${id}" > {{ prefix }}.status # Basic definitions case="{{ case }}" www="{{ www }}" +{% if "synthetic_plots" not in subsection %} y1={{ year1 }} y2={{ year2 }} Y1="{{ '%04d' % (year1) }}" @@ -36,10 +37,11 @@ Y2="{{ '%04d' % (year2) }}" ref_Y1="{{ '%04d' % (ref_year1) }}" ref_Y2="{{ '%04d' % (ref_year2) }}" {%- endif %} +{%- endif %} run_type="{{ run_type }}" tag="{{ tag }}" -results_dir=${tag}_${Y1}-${Y2} +results_dir=${tag} #_${Y1}-${Y2} ref_name={{ ref_name }} @@ -231,8 +233,8 @@ create_links_ts_obs() do fname=`basename $file` PREFIX=${fname: :-17} - YYYYS=${fname: -16:-10} - YYYYE=${fname: -9:-3} + YYYYS=${fname: -16:-12} + YYYYE=${fname: -9:-5} if [[ ${YYYYS} < ${begin_year} ]];then YYYYS=${begin_year} fi @@ -742,11 +744,6 @@ from pcmdi_zppy_util import( variability_modes_plot_driver, ) -############################## -start_yr = int('${Y1}') -end_yr = int('${Y2}') -num_years = end_yr - start_yr + 1 - #parallel calculation num_workers = {{ num_workers }} if num_workers < 2: @@ -754,6 +751,12 @@ if num_workers < 2: else: multiprocessing = {{multiprocessing}} +{%- if "synthetic_plots" not in subsection %} +############################## +start_yr = int('${Y1}') +end_yr = int('${Y2}') +num_years = end_yr - start_yr + 1 + # DATA LOCATION: Reference {%- if "mean_climate" in subsection %} test_data_path = '${climo_dir_primary}' @@ -770,7 +773,6 @@ reference_data_set = '{{ obs_sets }}'.split(",") reference_data_set = ['${model_name_ref}'.split(".")[1]] {%- endif %} -{%- if "synthetic_plots" not in subsection %} variables = '{{ vars }}'.split(",") ############################################################### #check and process derived quantities, these quantities are @@ -834,18 +836,17 @@ input_template = os.path.join( '%(metric_type)', '${model_name}'.split(".")[0], '${model_name}'.split(".")[1], - '${case_id}') + '${case_id}' +) out_path = os.path.join( '${results_dir}', - '%(group_type)') + '%(group_type)' +) {%- endif %} {%- if "mean_climate" in subsection %} -#################################################### -# call pcmdi mean climate diagnostics -#################################################### regions = '{{regions}}'.split(",") #assiagn region to each variable @@ -870,6 +871,9 @@ for var in variables: ]) ) +#################################################### +# call pcmdi mean climate diagnostics +#################################################### if (len(lstcmd) > 0 ) and multiprocessing: print("Parallel computing with {} jobs".format(str(len(lstcmd)))) stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) @@ -889,8 +893,12 @@ else: #orgnize diagnostic output collect_clim_diags( - regions,variables,'{{figure_format}}', - input_template,out_path + regions,variables, + '{{figure_format}}', + '${model_name}', + '${case_id}', + input_template, + out_path ) {%- endif %} @@ -963,6 +971,8 @@ else: collect_movs_diags( var_modes, '{{figure_format}}', + '${model_name}', + '${case_id}', input_template, out_path ) @@ -1014,8 +1024,13 @@ else: obs_dict = json.load(open('obs_catalogue.json')) obs_name = list(obs_dict.keys())[0] collect_enso_diags( - enso_groups,'{{figure_format}}', - obs_name,input_template,out_path + enso_groups, + '{{figure_format}}', + obs_name, + '${model_name}', + '${case_id}', + input_template, + out_path ) {%- endif %} From d30ade22de9604708b7f924bf72dae5e5f380563 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Wed, 5 Feb 2025 11:41:49 -0600 Subject: [PATCH 19/23] Refine workflow --- zppy/pcmdi_diags.py | 4 +++- zppy/templates/pcmdi_diags.bash | 27 ++++++++++++++++++--------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/zppy/pcmdi_diags.py b/zppy/pcmdi_diags.py index b1a0dcae..90627627 100644 --- a/zppy/pcmdi_diags.py +++ b/zppy/pcmdi_diags.py @@ -70,7 +70,9 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): if c["sub"] != "synthetic_plots": check_and_define_parameters(c) else: - c["prefix"] = f"pcmdi_diags_{c['sub']}_{c['tag']}" + prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}" + print(prefix) + c["prefix"] = prefix bash_file, settings_file, status_file = get_file_names( script_dir, c["prefix"] diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 9b401751..1f114abf 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -895,7 +895,10 @@ else: collect_clim_diags( regions,variables, '{{figure_format}}', - '${model_name}', + '${model_name}'.split(".")[0], + '${model_name}'.split(".")[1], + '${model_name}'.split(".")[2], + '${model_name}'.split(".")[3], '${case_id}', input_template, out_path @@ -971,7 +974,10 @@ else: collect_movs_diags( var_modes, '{{figure_format}}', - '${model_name}', + '${model_name}'.split(".")[0], + '${model_name}'.split(".")[1], + '${model_name}'.split(".")[2], + '${model_name}'.split(".")[3], '${case_id}', input_template, out_path @@ -1024,13 +1030,16 @@ else: obs_dict = json.load(open('obs_catalogue.json')) obs_name = list(obs_dict.keys())[0] collect_enso_diags( - enso_groups, - '{{figure_format}}', - obs_name, - '${model_name}', - '${case_id}', - input_template, - out_path + enso_groups, + '{{figure_format}}', + obs_name, + '${model_name}'.split(".")[0], + '${model_name}'.split(".")[1], + '${model_name}'.split(".")[2], + '${model_name}'.split(".")[3], + '${case_id}', + input_template, + out_path ) {%- endif %} From bdf224f7d97fc6d467fe27ffd098be080d355f34 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Tue, 18 Feb 2025 00:47:54 -0600 Subject: [PATCH 20/23] Add Enso metrics summary plot --- zppy/templates/pcmdi_diags.bash | 44 ++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 1f114abf..6d7d890c 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -664,7 +664,7 @@ obs_catalogue = 'obs_catalogue.json' reference_data_lf_path = json.load(open('obs_landmask.json')) # METRICS COLLECTION (set in namelist, and main driver) -# metricsCollection = ENSO_perf # ENSO_perf, ENSO_tel, ENSO_proc +# metricsCollection = ENSO_perf, ENSO_tel, ENSO_proc # OUTPUT results_dir = os.path.join( @@ -742,6 +742,7 @@ from pcmdi_zppy_util import( variable_region, mean_climate_plot_driver, variability_modes_plot_driver, + enso_plot_driver ) #parallel calculation @@ -1053,19 +1054,24 @@ metric_sets = '{{sub_sets}}'.split(",") figure_sets = '{{synthetic_sets}}'.split(",") figure_format = '{{figure_format}}' test_input_path = os.path.join( - '${www}','${case}','pcmdi_diags','${results_dir}', - 'metrics_data','%(group_type)' + '${www}', + '${case}', + 'pcmdi_diags', + '${results_dir}', + 'metrics_data', + '%(group_type)' ) metric_dict = json.load(open('synthetic_metrics_list.json')) parameter = OrderedDict() parameter['save_data'] = True -parameter['case_id'] = '${case_id}' parameter['out_dir'] = os.path.join('${results_dir}','ERROR_metric') parameter['test_name'] = '{{model_name}}' -parameter['tableID'] = '{{model_tableID}}' -parameter['model_name'] = '-'.join('{{model_name}}'.split(".")[2:]) + +parameter['model_name'] = [ '-'.join('{{model_name}}'.split(".")[2:]) ] +parameter['tableID'] = [ '{{model_tableID}}' ] +parameter['case_id'] = [ '${case_id}' ] for metric in metric_sets: parameter['test_path'] = test_input_path.replace('%(group_type)',metric) @@ -1079,14 +1085,13 @@ for metric in metric_sets: parameter['cmip_name'] = '{{cmip_movs_set}}' parameter['movs_mode'] = '{{ atm_modes }}'.split(",") + '{{ cpl_modes }}'.split(",") merge_lib,mode_season_list = collect_movs_metrics(parameter) - elif metric == 'enso': + elif metric == 'enso_metric': parameter['cmip_path'] = '{{cmip_enso_dir}}' parameter['cmip_name'] = '{{cmip_enso_set}}' - merge_lib = collect_enso_metrics(parameter) - for stat in metric_dict[metric].keys(): - if metric == "mean_climate": - mean_climate_plot_driver( + if metric == "mean_climate": + for stat in metric_dict[metric].keys(): + mean_climate_plot_driver( metric, stat, merge_lib.regions, parameter['model_name'], @@ -1095,16 +1100,25 @@ for metric in metric_sets: merge_lib.var_list, merge_lib.var_unit_list, parameter['save_data'], - parameter['out_dir']) - elif metric == "variability_modes": - variability_modes_plot_driver( + parameter['out_dir'], + figure_format) + elif metric == "variability_modes": + for stat in metric_dict[metric].keys(): + variability_modes_plot_driver( metric, stat, parameter['model_name'], parameter['diag_vars'][stat], merge_lib[stat], mode_season_list, parameter['save_data'], - parameter['out_dir']) + parameter['out_dir'], + figure_format) + elif metric == "enso_metric": + for stat in metric_dict[metric].keys(): + enso_plot_driver( + metric,stat, + parameter, + figure_format) {%- endif %} From 0c1618891855fc08a4256df5282366dd54c83a67 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Fri, 25 Apr 2025 20:11:58 -0500 Subject: [PATCH 21/23] Refinement on the code and add viewer page --- zppy/defaults/default.ini | 12 + zppy/templates/pcmdi_diags.bash | 1475 ++++++++++++++++--------------- 2 files changed, 785 insertions(+), 702 deletions(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 1d27ada2..79c415f6 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -138,12 +138,20 @@ scratch = string(default="") [pcmdi_diags] backend = string(default="mpl") cfg = string(default="") +# Title of zppy-pcmdi diagnostics +pcmdi_webtitle = string(default="E3SM-PMP Diagnostics") +# Version of zppy-pcmdi code +pcmdi_version = string(default="v3.8.2") # File of specified regions for mean climate calculation regions_specs = string(default="pcmdi_data/region/regions_specs.json") # File of observation data name for mean climate calculation reference_alias = string(default="pcmdi_data/reference/reference_alias.json") # Utility file with functions for zppy-pcmdi data processing pcmdi_zppy_util = string(default="pcmdi_data/utility/pcmdi_zppy_util.py") +# Utility file with functions for zppy-pcmdi viewer page processing +pcmdi_viewer_util = string(default="pcmdi_data/utility/pcmdi_viewer_util.py") +# Template files for zppy-pcmdi viewer page processing +pcmdi_viewer_template = string(default="pcmdi_data/viewer") # File of variable list to generate synthetic metrics plot synthetic_metrics = string(default="pcmdi_data/synthetic_metrics/synthetic_metrics_list.json") # Path to observation time-series data @@ -295,7 +303,11 @@ pcmdi_debug = string(default=False) synthetic_sets = string(default=None) reference_alias = string(default=None) regions_specs = string(default=None) + pcmdi_webtitle = string(default=None) + pcmdi_version = string(default=None) pcmdi_zppy_util = string(default=None) + pcmdi_viewer_util = string(default=None) + pcmdi_viewer_template = string(default=None) synthetic_metrics = string(default=None) multiprocessing = boolean(default=None) num_workers = integer(default=None) diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 6d7d890c..50857887 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -36,17 +36,20 @@ Y2="{{ '%04d' % (year2) }}" {% if run_type == "model_vs_model" %} ref_Y1="{{ '%04d' % (ref_year1) }}" ref_Y2="{{ '%04d' % (ref_year2) }}" -{%- endif %} -{%- endif %} +{% endif %} +{% endif %} + run_type="{{ run_type }}" -tag="{{ tag }}" -results_dir=${tag} #_${Y1}-${Y2} +results_dir="{{ tag }}" ref_name={{ ref_name }} +# Top-level directory +web_dir=${www}/${case}/pcmdi_diags + ################################################## -#info to construct pcmdi-preferred data convension +# info to construct pcmdi-preferred data convention ################################################## model_name='{{ model_name }}' tableID='{{ model_tableID }}' @@ -75,357 +78,404 @@ cp -r '{{pcmdi_external_prefix}}/{{reference_alias}}' . # file for list of variables for synthetic_metrics metric plots cp -r '{{pcmdi_external_prefix}}/{{synthetic_metrics}}' . -{%- if ("mean_climate" in subsection) %} -#further simplification could be done in future -create_links_acyc_climo() -{ - ts_dir_source=$1 - ts_dir_destination=$2 - begin_year=$3 - end_year=$4 - name_key=$5 - error_num=$6 - # Create netcdf files for time series variables - mkdir -p ${ts_dir_destination} - cd ${ts_dir_destination} - # https://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable - variables="{{ cmip_vars }}" - for v in ${variables//,/ } - do - # Go through the time series files for between year1 and year2, using a step size equal to the number of years per time series file - for year in `seq ${begin_year} {{ ts_num_years }} ${end_year}`; - do - YYYY=`printf "%04d" ${year}` - for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc - do - # Add this time series file to the list of files for cdscan to use - echo ${file} >> ${v}_files.txt +# utility file for pcmdi-zppy viewer +cp -r '{{pcmdi_external_prefix}}/{{pcmdi_viewer_util}}' . + +{% if "mean_climate" in subsection %} +create_links_acyc_climo() { + local ts_dir_source="$1" + local ts_dir_destination="$2" + local begin_year="$3" + local end_year="$4" + local name_key="$5" + local error_num="$6" + + mkdir -p "${ts_dir_destination}" + cd "${ts_dir_destination}" || exit + + local variables="{{ cmip_vars }}" + local script_dir="{{ scriptDir }}" + local prefix="{{ prefix }}" + local ts_step="{{ ts_num_years }}" + local dofm=(15.5 45 74.5 105 125.5 166 196.5 227.5 258 288.5 319 349.5) + + for v in ${variables//,/ }; do + > "${v}_files.txt" # Start fresh + + shopt -s nullglob + for year in $(seq "${begin_year}" "${ts_step}" "${end_year}"); do + local YYYY + YYYY=$(printf "%04d" "${year}") + for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc; do + [[ -f "${file}" ]] && echo "${file}" >> "${v}_files.txt" done done - #derive annual cycle climate mean - dofm=(15 46 74 105 125 166 196 227 258 288 319 349) #middle day of month - for month in `seq 1 1 12`; - do - MM=`printf "%02d" ${month}` - MDAY=dofm[${month}-1] - cat ${v}_files.txt | ncra -O -h -F -d "time,${month},,12" ${v}_clm_${MM}.nc + shopt -u nullglob + + # Derive monthly climatology files + for month in $(seq 1 12); do + local MM + MM=$(printf "%02d" "${month}") + ncra -O -h -F -d time,"${month}",,12 $(< "${v}_files.txt") "${v}_clm_${MM}.nc" done - #Concatenate files to form the annual cycle monthly climatology file - combined_name="${name_key}.${v}.${begin_year}01-${end_year}12.AC.${case_id}.nc" - ncrcat -O -d time,0, ${v}_clm_*.nc ${combined_name} - #modify time to avoid issues in pcmdi calculation - ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 125.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time_bnds[time,bnds]={0, 31, 31, 59, 59, 90, 90, 120, 120, 151, 151, 181, 181, 212, 212, 243, 243, 273, 273, 304, 304, 334, 334, 365.};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";time@bounds="time_bnds"' ${combined_name} ${combined_name} - rm -rvf ${v}_clm_*.nc - if [ $? != 0 ]; then - cd {{ scriptDir }} - echo "ERROR (${error_num})" > {{ prefix }}.status - exit ${error_num} + + # Combine to form full annual cycle file + local combined_name="${name_key}.${v}.${begin_year}01-${end_year}12.AC.${case_id}.nc" + ncrcat -O -d time,0, "${v}_clm_"*.nc "${combined_name}" + + # Adjust time metadata for PCMDI diagnostics + local cmdfix1='time[time]={15.5, 45, 74.5, 105, 125.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5}' + local cmdfix2='time_bnds[time,bnds]={0,31,31,59,59,90,90,120,120,151,151,181,181,212,212,243,243,273,273,304,304,334,334,365.}' + local cmdfix3='time@units="days since 1850-01-01 00:00:00"' + local cmdfix4='time@calendar="noleap"' + local cmdfix5='time@bounds="time_bnds"' + ncap2 -O -h -s "${cmdfix1};${cmdfix2};${cmdfix3};${cmdfix4};${cmdfix5}" "${combined_name}" "${combined_name}" + + rm -vf "${v}_clm_"*.nc + + if [[ $? -ne 0 ]]; then + cd "${script_dir}" || exit + echo "ERROR (${error_num})" > "${prefix}.status" + exit "${error_num}" fi done + cd .. } {% if run_type == "model_vs_obs" %} -create_links_acyc_climo_obs() -{ - ts_dir_source=$1 - ts_dir_destination=$2 - begin_year=$3 - end_year=$4 - error_num=$5 - # Create netcdf files for time series variables - mkdir -p ${ts_dir_destination} - cd ${ts_dir_destination} - for file in ${ts_dir_source}/*.nc - do - fname=`basename $file` - PREFIX=${fname: :-17} - YYYYS=${fname: -16:-10} - YYYYE=${fname: -9:-3} - if [[ ${YYYYS} < ${begin_year} ]];then - YYYYS=${begin_year} +create_links_acyc_climo_obs() { + local ts_dir_source="$1" + local ts_dir_destination="$2" + local begin_year="$3" + local end_year="$4" + local error_num="$5" + + local script_dir="{{ scriptDir }}" + local prefix="{{ prefix }}" + local dofm=(15.5 45 74.5 105 125.5 166 196.5 227.5 258 288.5 319 349.5) + + mkdir -p "${ts_dir_destination}" + cd "${ts_dir_destination}" || exit + + for file in ${ts_dir_source}/*.nc; do + local fname + local YYYYS YYYYE + local PREFIX + local ttag tmp_file MM combined_name + + fname=$(basename "${file}") + + # Match two date patterns (YYYYMM or YYYYMMDD) separated by _ or - + if [[ ${fname} =~ ([0-9]{6,8})[_-]([0-9]{6,8}) ]]; then + YYYYS="${BASH_REMATCH[1]}" + YYYYE="${BASH_REMATCH[2]}" + else + echo "Warning: Could not extract dates from ${fname}" + continue fi - if [[ ${YYYYE} > ${end_year} ]];then - YYYYE=${end_year} - fi - ttag=`printf "%04d" ${YYYYS}`01-`printf "%04d" ${YYYYE}`12 - # select the interest period + + # Clip to specified year range + if [[ ${YYYYS} -lt ${begin_year} ]]; then YYYYS=${begin_year}; fi + if [[ ${YYYYE} -gt ${end_year} ]]; then YYYYE=${end_year}; fi + + # Extract prefix before the date range (removes from .${YYYYS} or -${YYYYS}) + PREFIX="${fname%%[._-]${YYYYS}*}" + + ttag="$(printf "%04d" "${YYYYS}")01-$(printf "%04d" "${YYYYE}")12" tmp_file="tmp_combine_${ttag}.nc" - ncrcat -d time,"${YYYYS}-01-01,${YYYYE}-12-31" ${file} ${tmp_file} - # Go through the time serie file, and derive annual cycle climate mean - dofm=(15 46 74 105 125 166 196 227 258 288 319 349) #middle day of month - for month in `seq 1 1 12`; - do - MM=`printf "%02d" ${month}` - MDAY=dofm[${month}-1] - ncra -O -h -F -d "time,${month},,12" ${tmp_file} tmp_clm_${MM}.nc + + ncrcat -O -d time,"${YYYYS}-01-01","${YYYYE}-12-31" "${file}" "${tmp_file}" + + # Derive monthly climatology + for month in $(seq 1 12); do + MM=$(printf "%02d" ${month}) + ncra -O -h -F -d time,"${month}",,12 "${tmp_file}" "tmp_clm_${MM}.nc" done - #Concatenate files to form the annual cycle monthly climatology file + combined_name="${PREFIX}.${ttag}.AC.${case_id}.nc" - ncrcat -O -d time,0, tmp_clm_*.nc ${combined_name} - #modify time to avoid issues in pcmdi calculation - ncap2 -O -h -s 'time[time]={15.5, 45, 74.5, 105, 125.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5};time@units="days since 1850-01-01 00:00:00";time@calendar="noleap";' ${combined_name} ${combined_name} - ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} - rm -rvf tmp_*.nc - if [ $? != 0 ]; then - cd {{ scriptDir }} - echo "ERROR (${error_num})" > {{ prefix }}.status - exit ${error_num} + ncrcat -O tmp_clm_*.nc "${combined_name}" + + # Adjust time metadata + local cmdfix1='time[time]={15.5, 45, 74.5, 105, 125.5, 166, 196.5, 227.5, 258, 288.5,319, 349.5}' + local cmdfix2='time@units="days since 1850-01-01 00:00:00"' + local cmdfix3='time@calendar="noleap"' + ncap2 -O -h -s "${cmdfix1};${cmdfix2};${cmdfix3}" "${combined_name}" "${combined_name}" + + local cmdfix4='defdim("bnds",2)' + local cmdfix5='time_bnds=make_bounds(time,$bnds,"time_bnds")' + local cmdfix6='time_bnds@units=time@units' + local cmdfix7='time_bnds@calendar=time@calendar' + ncap2 -O -h -s "${cmdfix4};${cmdfix5};${cmdfix6};${cmdfix7}" "${combined_name}" "${combined_name}" + + rm -vf tmp_*.nc + + if [[ $? -ne 0 ]]; then + cd "${script_dir}" || exit + echo "ERROR (${error_num})" > "${prefix}.status" + exit "${error_num}" fi done + cd .. } -{%- endif %} -{%- endif %} +{% endif %} +{% endif %} -{%- if ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} -create_links_ts() -{ - ts_dir_source=$1 - ts_dir_destination=$2 - begin_year=$3 - end_year=$4 - subname=$5 - error_num=$6 - # Create netcdf files for time series variables - mkdir -p ${ts_dir_destination} - cd ${ts_dir_destination} - # https://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable - variables="{{ vars }}" - for v in ${variables//,/ } - do - # Go through the time series files for between year1 and year2, using a step size equal to the number of years per time series file - for year in `seq ${begin_year} {{ ts_num_years }} ${end_year}`; - do - YYYY=`printf "%04d" ${year}` - for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc - do - # Add this time series file to the list of files for cdscan to use - echo ${file} >> ${v}_files.txt +{% if "variability_modes_cpl" in subsection + or "variability_modes_atm" in subsection + or "enso" in subsection %} +create_links_ts() { + local ts_dir_source="$1" + local ts_dir_destination="$2" + local begin_year="$3" + local end_year="$4" + local subname="$5" + local error_num="$6" + + local script_dir="{{ scriptDir }}" + local prefix="{{ prefix }}" + local vars="{{ vars }}" + local ts_step="{{ ts_num_years }}" + + mkdir -p "${ts_dir_destination}" + cd "${ts_dir_destination}" || exit + + local v file YYYY combined_name + + # Convert comma-separated list to array + IFS=',' read -ra var_array <<< "${vars}" + for v in "${var_array[@]}"; do + > "${v}_files.txt" # Reset file list + + shopt -s nullglob + for year in $(seq "${begin_year}" "${ts_step}" "${end_year}"); do + YYYY=$(printf "%04d" "${year}") + for file in ${ts_dir_source}/${v}_*_${YYYY}*.nc; do + [[ -f "${file}" ]] && echo "${file}" >> "${v}_files.txt" done -done - # netcdf file will be combined to cover the whole period from year1 to year2 + done + shopt -u nullglob + combined_name="${subname}.${v}.${begin_year}01-${end_year}12.nc" - cat ${v}_files.txt | ncrcat -v ${v} -d "time,${begin_year}-01-01,${end_year}-12-31" ${combined_name} - #modify time to avoid issues in pcmdi calculation - ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} - if [ $? != 0 ]; then - cd {{ scriptDir }} - echo "ERROR (${error_num})" > {{ prefix }}.status - exit ${error_num} + if [[ -s "${v}_files.txt" ]]; then + ncrcat -O -v "${v}" -d time,"${begin_year}-01-01","${end_year}-12-31" $(< "${v}_files.txt") "${combined_name}" + + # Add calendar attribute if missing + if ! ncks -m "${combined_name}" | grep -q "calendar"; then + echo "Adding missing calendar attribute to time..." + ncatted -a calendar,time,o,c,"standard" "${combined_name}" + fi + + # Add time bounds + local cmdfix1='defdim("bnds",2)' + local cmdfix2='time_bnds=make_bounds(time,$bnds,"time_bnds")' + local cmdfix3='time_bnds@units=time@units' + local cmdfix4='time_bnds@calendar=time@calendar' + ncap2 -O -h -s "${cmdfix1};${cmdfix2};${cmdfix3};${cmdfix4}" "${combined_name}" "${combined_name}" + + if [[ $? -ne 0 ]]; then + cd "${script_dir}" || exit + echo "ERROR (${error_num})" > "${prefix}.status" + exit "${error_num}" + fi + else + echo "Warning: No input files found for variable ${v}. Skipping." fi done + cd .. } {% if run_type == "model_vs_obs" %} -create_links_ts_obs() -{ - ts_dir_source=$1 - ts_dir_destination=$2 - begin_year=$3 - end_year=$4 - error_num=$5 - # Create netcdf files for time series variables - mkdir -p ${ts_dir_destination} - cd ${ts_dir_destination} - for file in ${ts_dir_source}/*.nc - do - fname=`basename $file` - PREFIX=${fname: :-17} - YYYYS=${fname: -16:-12} - YYYYE=${fname: -9:-5} - if [[ ${YYYYS} < ${begin_year} ]];then - YYYYS=${begin_year} +create_links_ts_obs() { + local ts_dir_source="$1" + local ts_dir_destination="$2" + local begin_year="$3" + local end_year="$4" + local error_num="$5" + + local script_dir="{{ scriptDir }}" + local prefix="{{ prefix }}" + + mkdir -p "${ts_dir_destination}" + cd "${ts_dir_destination}" || exit + + local file fname PREFIX YYYYS YYYYE ttag combined_name + + for file in ${ts_dir_source}/*.nc; do + fname=$(basename "$file") + # Match two time patterns (YYYYMM or YYYYMMDD) separated by _ or - + if [[ ${fname} =~ ([0-9]{6,8})[_-]([0-9]{6,8}) ]]; then + YYYYS="${BASH_REMATCH[1]}" + YYYYE="${BASH_REMATCH[2]}" + else + echo "Warning: Could not extract dates from ${fname}" + continue fi - if [[ ${YYYYE} > ${end_year} ]];then - YYYYE=${end_year} + + # Optional: clip years if needed + if [[ ${YYYYS} -lt ${begin_year} ]]; then + YYYYS="${begin_year}" fi - ttag=`printf "%04d" ${YYYYS}`01-`printf "%04d" ${YYYYE}`12 - # Go through the time series files and extract analysis period - combined_name=${PREFIX}.${ttag}.nc - ncrcat -d time,${YYYYS}-01-01,${YYYYE}-12-31 ${file} ${combined_name} - #modify time to avoid issues in pcmdi calculation - ncap2 -O -h -s 'defdim("bnds",2);time_bnds=make_bounds(time,$bnds,"time_bnds");time_bnds@units=time@units;time_bnds@calendar=time@calendar' ${combined_name} ${combined_name} - if [ $? != 0 ]; then - cd {{ scriptDir }} - echo "ERROR (${error_num})" > {{ prefix }}.status - exit ${error_num} + + if [[ ${YYYYE} -gt ${end_year} ]]; then + YYYYE="${end_year}" + fi + + # Extract prefix (before the date range, ignoring separator) + PREFIX="${fname%%[._-]${YYYYS}*}" + + ttag="$(printf "%04d" "${YYYYS}")01-$(printf "%04d" "${YYYYE}")12" + combined_name="${PREFIX}.${ttag}.nc" + + # Extract subset of time series + ncrcat -O -d time,"${YYYYS}-01-01","${YYYYE}-12-31" "${file}" "${combined_name}" + + # Ensure time has calendar attribute + if ! ncks -m "${combined_name}" | grep -q "calendar"; then + echo "Adding missing calendar attribute to time..." + ncatted -a calendar,time,o,c,"standard" "${combined_name}" + fi + + # Add time bounds + local cmdfix1='defdim("bnds",2)' + local cmdfix2='time_bnds=make_bounds(time,$bnds,"time_bnds")' + local cmdfix3='time_bnds@units=time@units' + local cmdfix4='time_bnds@calendar=time@calendar' + ncap2 -O -h -s "${cmdfix1};${cmdfix2};${cmdfix3};${cmdfix4}" "${combined_name}" "${combined_name}" + + if [[ $? -ne 0 ]]; then + cd "${script_dir}" || exit + echo "ERROR (${error_num})" > "${prefix}.status" + exit "${error_num}" fi done + cd .. } -{%- endif %} -{%- endif %} +{% endif %} +{% endif %} ######################## -#prepare the model data +# Prepare the model data ######################## -{%- if ("mean_climate" in subsection) %} -climo_dir_primary=climo -# Create local links to input climo files -climo_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly -create_links_acyc_climo ${climo_dir_source} ${climo_dir_primary} ${Y1} ${Y2} ${model_name}.${tableID} 1 +{% if "mean_climate" in subsection %} +# Define output directory for climatology files +climo_dir_primary="climo" +# Path to model's monthly climatology files +climo_dir_source="{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly" +# Link and process primary model climo data +create_links_acyc_climo "${climo_dir_source}" "${climo_dir_primary}" "${Y1}" "${Y2}" "${model_name}.${tableID}" 1 {% if run_type == "model_vs_model" %} -# Create local links to input climo files (ref model) -climo_dir_source_ref={{ reference_data_path }} -climo_dir_ref=climo_ref -create_links_acyc_climo ${climo_dir_source_ref} ${climo_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 2 -{%- endif %} -{%- endif %} +# Path to reference model's climatology files +climo_dir_source_ref="{{ reference_data_path }}" +climo_dir_ref="climo_ref" +# Link and process reference model climo data +create_links_acyc_climo "${climo_dir_source_ref}" "${climo_dir_ref}" "${ref_Y1}" "${ref_Y2}" "${model_name_ref}.${tableID_ref}" 2 +{% endif %} +{% endif %} -{%- if ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} -#all diags will be run with ts data -ts_dir_primary=ts -# Create netcdf files for time series variables -ts_dir_source={{ output }}/post/atm/{{ grid }}/cmip_ts/monthly -create_links_ts ${ts_dir_source} ${ts_dir_primary} ${Y1} ${Y2} ${model_name}.${tableID} 3 +{% if "variability_modes_cpl" in subsection or "variability_modes_atm" in subsection or "enso" in subsection %} +# All diagnostics in this subsection use time series (ts) data +# Define output directory for primary model time series +ts_dir_primary="ts" +ts_dir_source="{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly" +# Create local links and combine time series NetCDF files for the primary model +create_links_ts "${ts_dir_source}" "${ts_dir_primary}" "${Y1}" "${Y2}" "${model_name}.${tableID}" 3 {% if run_type == "model_vs_model" %} -ts_dir_source_ref={{ reference_data_path_ts }}/{{ ts_num_years_ref }}yr -ts_dir_ref=ts_ref -create_links_ts ${ts_dir_source_ref} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} ${model_name_ref}.${tableID_ref} 4 -{%- endif %} -{%- endif %} +# Define time series path for reference model (adjust for different year spans) +ts_dir_source_ref="{{ reference_data_path_ts }}/{{ ts_num_years_ref }}yr" +ts_dir_ref="ts_ref" +# Create local links and combine ts files for the reference model +create_links_ts "${ts_dir_source_ref}" "${ts_dir_ref}" "${ref_Y1}" "${ref_Y2}" "${model_name_ref}.${tableID_ref}" 4 +{% endif %} +{% endif %} -{% if (run_type == "model_vs_obs") and ("synthetic_plots" not in subsection) %} -######################################################### -#prepare the observation data. As observation are often -#depends on the source available for analysis, therefore, -#we use external files to help collect the information -#for pcmdi diagnostics. -######################################################### -# Create netcdf files for time series variables +{% if run_type == "model_vs_obs" and "synthetic_plots" not in subsection %} +########################################################################### +# Prepare the observation data +# Observation datasets vary by diagnostic, so we use an external +# Python utility to handle linking and remapping to standard names. +########################################################################### obstmp_dir="obs_link" -mkdir -p ${obstmp_dir} -#create a python module to link observation data +mkdir -p "${obstmp_dir}" +echo "Linking observational data into ${obstmp_dir}..." + cat > link_observation.py << EOF import os import re import glob import json -import time -import datetime -import xcdat as xc -import numpy as np -import shutil -import pcmdi_metrics -from pcmdi_metrics.io import ( - xcdat_open -) +try: + from pcmdi_zppy_util import ObservationLinker +except ImportError as e: + raise ImportError("Module 'pcmdi_zppy_util' not found. Make sure it's installed and accessible.") from e + +# Inputs populated via templating +MODEL_NAME = '${model_name_ref}.${tableID_ref}' +VARIABLES = '{{ vars }}'.split(",") +OBS_SETS = '{{ obs_sets }}'.split(",") +OBS_TS_DIR = '{{ obs_ts }}' +OBS_TMP_DIR = '${obstmp_dir}' +OBS_ALIAS_FILE = "reference_alias.json" + +# Mapping from observational variable names to CMIP-standard +ALT_OBS_MAP = { + "pr": "PRECT", + "sst": "ts", + "sfcWind": "si10", + "taux": "tauu", + "tauy": "tauv", + "rltcre": "toa_cre_lw_mon", + "rstcre": "toa_cre_sw_mon", + "rtmt": "toa_net_all_mon" +} -from pcmdi_zppy_util import( - derive_var, +linker = ObservationLinker( + model_name=MODEL_NAME, + variables=VARIABLES, + obs_sets=OBS_SETS, + ts_dir_ref_source=OBS_TS_DIR, + obstmp_dir=OBS_TMP_DIR, + altobs_dic=ALT_OBS_MAP, + obs_alias_file=OBS_ALIAS_FILE ) -model_name = '${model_name_ref}.${tableID_ref}' -variables = '{{ vars }}'.split(",") -obs_sets = '{{ obs_sets }}'.split(",") -ts_dir_ref_source = '{{ obs_ts }}' - -# variable map from observation to cmip -altobs_dic = { "pr" : "PRECT", - "sst" : "ts", - "sfcWind" : "si10", - "taux" : "tauu", - "tauy" : "tauv", - "rltcre" : "toa_cre_lw_mon", - "rstcre" : "toa_cre_sw_mon", - "rtmt" : "toa_net_all_mon"} - -obs_dic = json.load(open('reference_alias.json')) - -######################################## -#first loop: link data to work directory -######################################## -for i,vv in enumerate(variables): - if "_" in vv or "-" in vv: - varin = re.split("_|-", vv)[0] - else: - varin = vv - if len(obs_sets) > 1 and len(obs_sets) == len(variables): - obsid = obs_sets[i] - else: - obsid = obs_sets[0] - - obsname = obs_dic[varin][obsid] - if "ceres_ebaf" in obsname: - obsstr = obsname.replace("_","*").replace("-","*") - else: - obsstr = obsname - - fpaths = sorted(glob.glob(os.path.join(ts_dir_ref_source,obsstr,varin+"_*.nc"))) - if (len(fpaths) > 0) and (os.path.exists(fpaths[0])): - template = fpaths[0].split("/")[-1] - yms = template.split("_")[-2][0:6] - yme = template.split("_")[-1][0:6] - obs = obsname.replace(".","_") - out = os.path.join( - '${obstmp_dir}', - '{}.{}.{}-{}.nc'.format( - model_name.replace('%(model)',obs), - varin,yms,yme) - ) - if not os.path.exists(out): - os.symlink(fpaths[0],out) - elif varin in altobs_dic.keys(): - varin1 = altobs_dic[varin] - fpaths = sorted(glob.glob( - os.path.join(ts_dir_ref_source,obsstr,varin1+"_*.nc")) - ) - if (len(fpaths) > 0) and (os.path.exists(fpaths[0])): - template = fpaths[0].split("/")[-1] - yms = template.split("_")[-2][0:6] - yme = template.split("_")[-1][0:6] - obs = obsname.replace(".","_") - out = os.path.join( - '${obstmp_dir}', - '{}.{}.{}-{}.nc'.format( - model_name.replace('%(model)',obs), - varin,yms,yme) - ) - ds = xcdat_open(fpaths[0]) - ds = ds.rename(name_dict={varin1:varin}) - ds.to_netcdf(out) - -##################################################################### -#second loop: check and process derived quantities -#note: these quantities are possibly not included as default in cmip -##################################################################### -for vv in enumerate(variables): - if vv in ['rltcre','rstcre']: - fpaths = sorted(glob.glob( - os.path.join('${obstmp_dir}',"*"+vv+"_*.nc")) - ) - if (len(fpaths) < 1) and (vv == 'rstcre'): - derive_var('${obstmp_dir}',vv,{'rsutcs':1,'rsut':-1},model_name) - elif (len(fpaths) < 1) and (vv == 'rltcre'): - derive_var('${obstmp_dir}',vv,{'rlutcs':1,'rlut':-1},model_name) - +linker.link_obs_data() +linker.process_derived_variables() EOF + ################### -# run process jobs +# Run process job ################### +echo "Linking observational data using SLURM..." + command="srun -N 1 python -u link_observation.py" -time ${command} -if [ $? != 0 ]; then +echo "Running: ${command}" +time eval "${command}" + +if [ $? -ne 0 ]; then cd {{ scriptDir }} - echo 'ERROR (6)' > {{ prefix }}.status + echo "ERROR (6)" > {{ prefix }}.status exit 6 fi + ####################################################### -#now create obs climo and timeseries for pcmdi diags -#use same period as test model when possible +# Now create obs climo and time series for PCMDI diags +# Use same period as test model when possible ####################################################### ts_dir_ref_source="{{ scriptDir }}/${workdir}/${obstmp_dir}" -{%- if ("mean_climate" in subsection) %} +{% if "mean_climate" in subsection %} climo_dir_ref=climo_ref -create_links_acyc_climo_obs ${ts_dir_ref_source} ${climo_dir_ref} ${Y1} ${Y2} 7 -{%- elif ("variability_modes_cpl" in subsection) or ("variability_modes_atm" in subsection) or ("enso" in subsection) %} +create_links_acyc_climo_obs "${ts_dir_ref_source}" "${climo_dir_ref}" ${Y1} ${Y2} 7 +{% elif "variability_modes_cpl" in subsection or "variability_modes_atm" in subsection or "enso" in subsection %} ts_dir_ref=ts_ref -create_links_ts_obs ${ts_dir_ref_source} ${ts_dir_ref} ${Y1} ${Y2} 8 -{%- endif %} -{%- endif %} +create_links_ts_obs "${ts_dir_ref_source}" "${ts_dir_ref}" ${Y1} ${Y2} 8 +{% endif %} + +{% endif %} {% if "synthetic_plots" not in subsection %} ######################################################## @@ -435,203 +485,209 @@ cat > parameterfile.py << EOF import os import sys import json - ##################### -#basic information +# Basic Information ##################### + start_yr = int('${Y1}') end_yr = int('${Y2}') num_years = end_yr - start_yr + 1 -period = "{:04d}{:02d}-{:04d}{:02d}".format(start_yr,1,end_yr,12) +period = f"{start_yr:04d}01-{end_yr:04d}12" -mip = '${model_name}'.split(".")[0] -exp = '${model_name}'.split(".")[1] -product = '${model_name}'.split(".")[2] -realm = '${model_name}'.split(".")[3] +model_parts = '${model_name}'.split('.') +mip, exp, product, realm = model_parts[:4] ############################################## -#Configuration shared with pcmdi diagnostics +# Configuration Shared with PCMDI Diagnostics ############################################## -# Record NetCDF output + +# Whether to generate NetCDF outputs for observations and model results nc_out_obs = {{ mov_nc_out_obs }} nc_out_model = {{ mov_nc_out_model }} -if nc_out_model or nc_out_obs: - ext = ".nc" -else: - ext = ".xml" + +# Output file extension: use .nc if either output is enabled, +# otherwise default to .xml +ext = ".nc" if nc_out_model or nc_out_obs else ".xml" + +# User annotation and debug flag user_notes = 'Provenance and results' debug = {{ pcmdi_debug }} -# Generate plots +# Enable plot generation for model and observation plot_model = {{ mov_plot_model }} -plot_obs = {{ mov_plot_obs }} # optional +plot_obs = {{ mov_plot_obs }} # optional -# Additional settings +# Execution mode and output format run_type = '{{ run_type }}' figure_format = '{{ figure_format }}' -# Save interpolated model climatology ? +# Save interpolated model climatologies? save_test_clims = {{ save_test_clims }} -# Save Metrics Results in Single File ? -# option: 'y' or 'n', set to 'n' as we -# run pcmdi for each variable separately +# Save all metrics results in a single file? +# Set to 'n' as metrics are computed per variable metrics_in_single_file = 'n' -# customize land/sea mask values -regions_values = {"land":100.,"ocean":0.} +# Custom values for land/sea masking +regions_values = { + "land": 100.0, + "ocean": 0.0 +} -#setup template for land/sea mask (fixed) +# Template path for land/sea mask file (fixed input) modpath_lf = os.path.join( 'fixed', 'sftlf.%(model).nc' ) -############################################ -#setup specific for mean climate metrics -{%- if ("mean_climate" in subsection) %} +{% if "mean_climate" in subsection %} -#case id +############################################ +# Setup Specific for Mean Climate Metrics +############################################ modver = "${case_id}" - -#always turn off parallel = False - -#land/sea mask file (already generated) generate_sftlf = False sftlf_filename_template = modpath_lf -# INTERPOLATION OPTIONS -# OPTIONS: '2.5x2.5' or an actual cdms2 grid object +# Target grid: can be '2.5x2.5' or a CDMS2 grid object string target_grid = '{{ target_grid }}' -targetGrid = target_grid +targetGrid = target_grid # for backward compatibility target_grid_string = '{{ target_grid_string }}' -# OPTIONS: 'regrid2','esmf' + +# Regridding tool and method (general use) +# OPTIONS: 'regrid2' or 'esmf' regrid_tool = '{{ regrid_tool }}' -# OPTIONS: 'linear','conservative', only if tool is esmf +# OPTIONS: 'linear' or 'conservative' (only for 'esmf') regrid_method = '{{ regrid_method }}' -# OPTIONS: "regrid2","esmf" -regrid_tool_ocn = '{{ regrid_tool_ocn }}' -# OPTIONS: 'linear','conservative', only if tool is esmf -regrid_method_ocn = ( '{{ regrid_method_ocn }}' ) - -####################################### -# DATA LOCATION: MODELS -# --------------------------------------------- + +# Regridding tool and method for ocean diagnostics +regrid_tool_ocn = '{{ regrid_tool_ocn }}' # 'regrid2' or 'esmf' +regrid_method_ocn = ('{{ regrid_method_ocn }}') # 'linear' or 'conservative' + +# Model realization(s) to consider realization = "*" -test_data_set = [ product ] + +# Model product name from input +test_data_set = [product] + +# Path to model climatology files test_data_path = '${climo_dir_primary}' -# Templates for model climatology files + +# Template for model climatology filenames filename_template = '.'.join([ - mip, - exp, - '%(model)', - '%(realization)', - '${tableID}', - '%(variable)', - period, - 'AC', - '${case_id}', - 'nc' + mip, + exp, + '%(model)', + '%(realization)', + '${tableID}', + '%(variable)', + period, + 'AC', + '${case_id}', + 'nc' ]) -#observation info +# Path to reference climatology files reference_data_path = '${climo_dir_ref}' + +# Observation catalogue file (dynamic by subsection) custom_observations = os.path.join( - 'pcmdi_diags', - '{}_{}_catalogue.json'.format( - '${climo_dir_ref}', - '{{subsection}}')) + 'pcmdi_diags', + '{}_{}_catalogue.json'.format( + '${climo_dir_ref}', + '{{subsection}}' + ) +) -#load caclulated regions for each variable +# Load variable-specific region definitions regions = json.load(open('regions.json')) -#load predefined region information +# Load predefined region specifications and normalize domain lat/lon as tuples regions_specs = json.load(open('regions_specs.json')) -for key in regions_specs.keys(): - if "domain" in regions_specs[key].keys(): - if "latitude" in regions_specs[key]['domain'].keys(): - regions_specs[key]['domain']['latitude'] = tuple( - regions_specs[key]['domain']['latitude'] - ) - if "longitude" in regions_specs[key]['domain'].keys(): - regions_specs[key]['domain']['longitude'] = tuple( - regions_specs[key]['domain']['longitude'] - ) - -####################################### -# DATA LOCATION: METRICS OUTPUT +for key in regions_specs: + domain = regions_specs[key].get('domain', {}) + if 'latitude' in domain: + domain['latitude'] = tuple(domain['latitude']) + regions_specs[key]['domain']['latitude'] = domain['latitude'] + if 'longitude' in domain: + domain['longitude'] = tuple(domain['longitude']) + regions_specs[key]['domain']['longitude'] = domain['longitude'] + +# METRICS OUTPUT metrics_output_path = os.path.join( 'pcmdi_diags', 'metrics_results', 'mean_climate', - mip, - exp, + mip, + exp, '%(case_id)' ) -############################################################ -# DATA LOCATION: INTERPOLATED MODELS' CLIMATOLOGIES -diagnostics_output_path= os.path.join( +#INTERPOLATED MODELS' CLIMATOLOGIES +diagnostics_output_path = os.path.join( 'pcmdi_diags', 'diagnostic_results', 'mean_climate', - mip, - exp, + mip, + exp, '%(case_id)' ) + test_clims_interpolated_output = diagnostics_output_path -{%- endif %} +{% endif %} + +{% if "variability_modes" in subsection %} +# Setup for Mode Variability Diagnostics +msyear = int(start_yr) +meyear = int(end_yr) -{%- if ("variability_modes" in subsection) %} -######################################## -#setup for mode variability diagnostics -######################################## -seasons = '{{ seasons }}'.split(",") +# Seasons to analyze (comma-separated string to list) +seasons = '{{ seasons }}'.split(",") + +# Data frequency (e.g., monthly, seasonal) frequency = '{{ frequency }}' -#from configuration file -varModel = '{{vars}}' +# Variables to analyze (comma-separated string or space-separated) +varModel = '{{ vars }}' -#unit conversion (namelist) +# Unit conversion flags for model and observations ModUnitsAdjust = {{ ModUnitsAdjust }} ObsUnitsAdjust = {{ ObsUnitsAdjust }} -# If True, maskout land region thus consider only over ocean +# Mask out land regions (consider ocean-only if True) landmask = {{ landmask }} -#template for model file -modnames = [ product ] -realization = "*" -modpath = os.path.join( - '${ts_dir_primary}', - '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,'${tableID}',period) -) - -#start and end year for analysis -msyear = int(start_yr) -meyear = int(end_yr) - -# If True, remove Domain Mean of each time step +# If True, remove domain mean from each time step RmDomainMean = {{ RmDomainMean }} -# If True, consider EOF with unit variance +# If True, normalize EOFs to unit variance EofScaling = {{ EofScaling }} -# Conduct CBF analysis +# Conduct Combined EOF/CBF analysis (if True) CBF = {{ CBF }} -# Conduct conventional EOF analysis +# Conduct Conventional EOF analysis (if True) ConvEOF = {{ ConvEOF }} -# Generate CMEC compliant json +# Skip CMEC output (hardcoded for now) cmec = False -# Update diagnostic file if exist +# Whether to overwrite existing diagnostic output update_json = False -#results directory structure. +# Template for model input file paths +modnames = [product] +realization = "*" +modpath = os.path.join( + '${ts_dir_primary}', + '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format( + mip, exp, '${tableID}', period + ) +) + +# Output results directory results_dir = os.path.join( 'pcmdi_diags', '%(output_type)', @@ -642,31 +698,32 @@ results_dir = os.path.join( '%(variability_mode)', '%(reference_data_name)', ) -{%- endif %} +{% endif %} -{%- if ("enso" in subsection) %} -########################################### -#parameter setup specific for enso metrics -########################################### -modnames = [ product ] +{% if "enso" in subsection %} +# Parameter Setup for ENSO Metrics +modnames = [product] realization = realm + modpath = os.path.join( - '${ts_dir_primary}', - '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format(mip,exp,'${tableID}',period) + '${ts_dir_primary}', + '{}.{}.%(model).%(realization).{}.%(variable).{}.nc'.format( + mip, exp, '${tableID}', period + ) ) -#observation/reference file catalogue +# Observation/Reference settings obs_cmor = True obs_cmor_path = '${ts_dir_ref}' obs_catalogue = 'obs_catalogue.json' -#land/sea mask for obs/reference model +# Land/Sea mask for reference data reference_data_lf_path = json.load(open('obs_landmask.json')) -# METRICS COLLECTION (set in namelist, and main driver) -# metricsCollection = ENSO_perf, ENSO_tel, ENSO_proc +# Metrics collection type (e.g., ENSO_perf, ENSO_tel, ENSO_proc) +# Defined externally via metricsCollection -# OUTPUT +# Output directory structure results_dir = os.path.join( 'pcmdi_diags', '%(output_type)', @@ -677,14 +734,15 @@ results_dir = os.path.join( '%(metricsCollection)', ) +# Output filenames for JSON and NetCDF json_name = "%(mip)_%(exp)_%(metricsCollection)_${case_id}_%(model)_%(realization)" netcdf_name = json_name -{%- endif %} +{% endif %} EOF -{%- endif %} +{% endif %} ################################################################ # Run PCMDI Diags @@ -702,6 +760,7 @@ import datetime import xcdat as xc import numpy as np import pandas as pd +import shutil import collections from collections import OrderedDict @@ -718,119 +777,121 @@ from pcmdi_metrics.graphics import ( ) from pcmdi_zppy_util import( - archive_data, - check_regions, - check_references, - check_units, - childCount, - collect_data_info, - collect_clim_diags, - collect_movs_diags, - collect_enso_diags, - collect_clim_metrics, - collect_movs_metrics, - create_data_lmask, - derive_var, - enso_obsvar_dict, - enso_obsvar_lmsk, + count_child_processes, + run_serial_jobs, + run_parallel_jobs, + derive_missing_variable, + save_variable_regions, + generate_mean_clim_cmds, + generate_varmode_cmds, + build_enso_obsvar_catalog, + build_enso_obsvar_landmask, + generate_enso_cmds, shift_row_to_bottom, - merge_data, - parallel_jobs, + check_badvals, + archive_data, + drop_vars, + enso_plot_driver, + variability_modes_plot_driver, + mean_climate_plot_driver, parcoord_metric_plot, portrait_metric_plot, - serial_jobs, - variable_region, - mean_climate_plot_driver, - variability_modes_plot_driver, - enso_plot_driver + ObservationLinker, + DataCatalogueBuilder, + LandSeaMaskGenerator, + ClimMetricsReader, + ClimMetricsMerger, + MeanClimateMetricsCollector, + VariabilityMetricsCollector, + EnsoDiagnosticsCollector, + SyntheticMetricsPlotter ) -#parallel calculation +from pcmdi_viewer_util import( + collect_config, + setup_jinja_env, + create_section, + add_section, + generate_methodology_html, + generate_data_html, + generate_viewer_html +) + +# Determine multiprocessing usage num_workers = {{ num_workers }} -if num_workers < 2: - multiprocessing = False -else: - multiprocessing = {{multiprocessing}} +multiprocessing = {{ multiprocessing }} if num_workers >= 2 else False -{%- if "synthetic_plots" not in subsection %} -############################## +{% if "synthetic_plots" not in subsection %} + +# Time range start_yr = int('${Y1}') end_yr = int('${Y2}') num_years = end_yr - start_yr + 1 -# DATA LOCATION: Reference -{%- if "mean_climate" in subsection %} +# Set data paths based on diagnostic type +{% if "mean_climate" in subsection %} test_data_path = '${climo_dir_primary}' reference_data_path = '${climo_dir_ref}' -{%- elif ("variability_modes" in subsection) or ("enso" in subsection) %} +{% elif "variability_modes" in subsection or "enso" in subsection %} test_data_path = '${ts_dir_primary}' reference_data_path = '${ts_dir_ref}' -{%- endif %} +{% endif %} +# Dataset identifiers test_data_set = ['${model_name}'.split(".")[1]] {% if run_type == "model_vs_obs" %} reference_data_set = '{{ obs_sets }}'.split(",") {% elif run_type == "model_vs_model" %} reference_data_set = ['${model_name_ref}'.split(".")[1]] -{%- endif %} +{% endif %} variables = '{{ vars }}'.split(",") + ############################################################### #check and process derived quantities, these quantities are #likely not included as default in e3sm_to_cmip module ############################################################### -for i,var in enumerate(variables): - if "_" in var or "-" in var: - varin = re.split("_|-", var)[0] - else: - varin = var - fpaths = sorted(glob.glob(os.path.join(test_data_path,"*."+var+".*.nc"))) - if len(fpaths) < 1 and varin == 'rstcre': - derive_var(test_data_path, - varin,{'rsutcs':1,'rsut':-1}, - '${model_name}.${tableID}') -{% if run_type == "model_vs_model" %} - derive_var(reference_data_path, - varin,{'rsutcs':1,'rsut':-1}, - '${model_name_ref}.${tableID_ref}') -{%- endif %} - elif len(fpaths) < 1 and varin == 'rltcre': - derive_var(test_data_path, - varin,{'rlutcs':1,'rlut':-1}, - '${model_name}.${tableID}') +for var in variables: + varin = re.split(r"[_-]", var)[0] if "_" in var or "-" in var else var + + test_fpaths = sorted(glob.glob(os.path.join(test_data_path, f"*.{var}.*.nc"))) + if not test_fpaths: + derive_missing_variable(varin, test_data_path, '${model_name}.${tableID}') + {% if run_type == "model_vs_model" %} - derive_var(reference_data_path, - varin,{'rlutcs':1,'rlut':-1}, - '${model_name_ref}.${tableID_ref}') -{%- endif %} + ref_fpaths = sorted(glob.glob(os.path.join(reference_data_path, f"*.{var}.*.nc"))) + if not ref_fpaths: + derive_missing_variable(varin, reference_data_path, '${model_name_ref}.${tableID_ref}') +{% endif %} ####################################################### #collect and document data info in a dictionary # for convenience of pcmdi processing ####################################################### -test_dic, obs_dic = collect_data_info( - test_data_path,test_data_set, - reference_data_path,reference_data_set, - variables,'{{subsection}}','pcmdi_diags') +builder = DataCatalogueBuilder( + test_data_path, test_data_set, + reference_data_path, reference_data_set, + variables, '{{subsection}}', 'pcmdi_diags' +) +test_dic, obs_dic = builder.build_catalogues() ########################################################## # land/sea mask is needed in PCMDI diagnostics, check and # generate it here as these data are not always available # for model or observations ########################################################## -if {{ generate_sftlf }} in ['true', 'y', True]: - generate_sftlf = True -else: - generate_sftlf = False - -if generate_sftlf: - create_data_lmask( - test_data_path, - reference_data_path, - '{{subsection}}', - 'fixed') +# Whether to generate the land/sea mask +generate_flag = {{ generate_sftlf }} +# Instantiate and run +mask_generator = LandSeaMaskGenerator( + test_path=test_data_path, + ref_path=reference_data_path, + subsection='{{subsection}}', + fixed_dir='fixed' +) +mask_generator.run(generate_flag) -#info to collect diagnostic output +# Diagnostic input file templates input_template = os.path.join( 'pcmdi_diags', '%(output_type)', @@ -840,84 +901,80 @@ input_template = os.path.join( '${case_id}' ) -out_path = os.path.join( - '${results_dir}', - '%(group_type)' -) -{%- endif %} +# Diagnostic output path templates +out_path = os.path.join('${results_dir}', '%(group_type)') -{%- if "mean_climate" in subsection %} +{% endif %} + +{% if "mean_climate" in subsection %} regions = '{{regions}}'.split(",") #assiagn region to each variable -variable_region(regions,variables) +save_variable_regions(variables, regions) -################################################### -# generate the command list for each reference and -# each variable (will execuate in parallel later) -lstcmd = [] -for var in variables: - if "_" in var or "-" in var: - varin = re.split("_|-", var)[0] - else: - varin = var - if varin in obs_dic.keys(): - refset = obs_dic[varin]['set'] - lstcmd.append( - " ".join(['mean_climate_driver.py', ' -p parameterfile.py', - '--vars' , '{}'.format(var), - '-r' , '{}'.format(refset), - '--case_id' , '{}'.format('${case_id}') - ]) - ) +# generate the command list +lstcmd = generate_mean_clim_cmds( + variables=variables, + obs_dic=obs_dic, + case_id='${case_id}' +) #################################################### # call pcmdi mean climate diagnostics #################################################### if (len(lstcmd) > 0 ) and multiprocessing: - print("Parallel computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) + try: + results = run_parallel_jobs(lstcmd, num_workers) + for i, (stdout, stderr, return_code) in enumerate(results): + print(f"\nCommand {i+1} finished:") + print(f"STDOUT: {stdout}") + print(f"STDERR: {stderr}") + print(f"Return code: {return_code}") + except RuntimeError as e: + print(f"Execution failed: {e}") elif (len(lstcmd) > 0 ): - print("Serial computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) + try: + results = run_serial_jobs(lstcmd) + for i, (stdout, stderr, return_code) in enumerate(results): + print(f"\nCommand {i+1} finished:") + print(f"STDOUT: {stdout}") + print(f"STDERR: {stderr}") + print(f"Return code: {return_code}") + except RuntimeError as e: + print(f"Execution failed: {e}") else: - print("no jobs to run...") - return_code = 0 + print("no jobs to run,continue....") -if return_code != 0: - exit("ERROR: {} jobs failed".format('{{subsection}}')) -else: - print("successfully finish all jobs....") - #time delay to ensure process completely finished - time.sleep(5) +print("successfully finish all jobs....") +#time delay to ensure process completely finished +time.sleep(5) #orgnize diagnostic output -collect_clim_diags( - regions,variables, - '{{figure_format}}', - '${model_name}'.split(".")[0], - '${model_name}'.split(".")[1], - '${model_name}'.split(".")[2], - '${model_name}'.split(".")[3], - '${case_id}', - input_template, - out_path +collector = MeanClimateMetricsCollector( + regions=regions, + variables=variables, + fig_format='{{figure_format}}', + model_info=tuple('${model_name}'.split(".")), # (mip, exp, model, relm) + case_id='${case_id}', + input_template=input_template, + output_dir=out_path ) +collector.collect() -{%- endif %} +{% endif %} -{%- if "variability_modes" in subsection %} +{% if "variability_modes" in subsection %} ########################################## # call pcmdi mode variability diagnostics ########################################## print("calculate mode variability metrics") -{%- if subsection == "variability_modes_atm" %} +{% if subsection == "variability_modes_atm" %} var_modes = '{{ atm_modes }}'.split(",") {% elif subsection == "variability_modes_cpl" %} var_modes = '{{ cpl_modes }}'.split(",") -{%- endif %} +{% endif %} #from configuration file varOBS = '{{vars}}' @@ -927,125 +984,116 @@ refpath = obs_dic[varOBS][refname]['file_path'] reftyrs = int(str(obs_dic[varOBS][refname]['yymms'])[0:4]) reftyre = int(str(obs_dic[varOBS][refname]['yymme'])[0:4]) -lstcmd = [] -for var_mode in var_modes: - if var_mode in ["NPO", "NPGO", "PSA1"]: - eofn_obs = "2" - eofn_mod = "2" - elif var_mode in ["PSA2"]: - eofn_obs = "3" - eofn_mod = "3" - else: - eofn_obs = "1" - eofn_mod = "1" - ############################################## - lstcmd.append( - " ".join([ - 'variability_modes_driver.py', ' -p parameterfile.py', - '--variability_mode' , '{}'.format(var_mode), - '--eofn_mod' , '{}'.format(eofn_mod), - '--eofn_obs' , '{}'.format(eofn_obs), - '--varOBS' , '{}'.format(varOBS), - '--osyear' , '{}'.format(reftyrs), - '--oeyear' , '{}'.format(reftyre), - '--reference_data_name' , '{}'.format(refname), - '--reference_data_path' , '{}'.format(refpath), - '--case_id' , '{}'.format('${case_id}') - ]) - ) +# Call the function +lstcmd = generate_varmode_cmds( + modes=var_modes, + varOBS=varOBS, + reftyrs=reftyrs, + reftyre=reftyre, + refname=refname, + refpath=refpath, + case_id='${case_id}' +) if (len(lstcmd) > 0 ) and multiprocessing: - print("Parallel computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) + try: + results = run_parallel_jobs(lstcmd, num_workers) + for i, (stdout, stderr, return_code) in enumerate(results): + print(f"\nCommand {i+1} finished:") + print(f"STDOUT: {stdout}") + print(f"STDERR: {stderr}") + print(f"Return code: {return_code}") + except RuntimeError as e: + print(f"Execution failed: {e}") elif (len(lstcmd) > 0 ): - print("Serial computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) -else: - print("no jobs to run...") - return_code = 0 - -if return_code != 0: - exit("ERROR: {} jobs failed".format('{{subsection}}')) + try: + results = run_serial_jobs(lstcmd) + for i, (stdout, stderr, return_code) in enumerate(results): + print(f"\nCommand {i+1} finished:") + print(f"STDOUT: {stdout}") + print(f"STDERR: {stderr}") + print(f"Return code: {return_code}") + except RuntimeError as e: + print(f"Execution failed: {e}") else: - print("successfully finish all jobs....") - #time delay to ensure process completely finished - time.sleep(5) - -#orgnize diagnostic output -collect_movs_diags( - var_modes, - '{{figure_format}}', - '${model_name}'.split(".")[0], - '${model_name}'.split(".")[1], - '${model_name}'.split(".")[2], - '${model_name}'.split(".")[3], - '${case_id}', - input_template, - out_path + print("no jobs to run,continue...") + +print("successfully finish all jobs....") +#time delay to ensure process completely finished +time.sleep(5) + +# Create the collector instance +collector = VariabilityMetricsCollector( + modes=var_modes, + fig_format='{{figure_format}}', + mip='${model_name}'.split(".")[0], + exp='${model_name}'.split(".")[1], + model='${model_name}'.split(".")[2], + relm='${model_name}'.split(".")[3], + case_id='${case_id}', + input_dir=input_template, + output_dir=out_path ) -{%- endif %} +# Run the collection process +collector.collect() -{%- if "enso" in subsection %} +{% endif %} + +{% if "enso" in subsection %} ############################################# # call enso_driver.py to process diagnostics ############################################# - -#orgnize observation var list -enso_obsvar_dict(obs_dic,variables) - -#orgnize observation landmask -enso_obsvar_lmsk(obs_dic,variables) +build_enso_obsvar_catalog(obs_dic, variables) +build_enso_obsvar_landmask(obs_dic, variables) #now start enso driver -print("calculate enso metrics") -enso_groups = '{{ enso_groups }}'.split(",") -lstcmd = [] -for metricsCollection in enso_groups: - lstcmd.append( - " ".join([ - 'enso_driver.py ', ' -p parameterfile.py', - '--metricsCollection', '{}'.format(metricsCollection), - '--case_id' , '{}'.format('${case_id}') - ]) - ) - +lstcmd = generate_enso_cmds('{{ enso_groups }}', '${case_id}') if (len(lstcmd) > 0 ) and multiprocessing: - print("Parallel computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = parallel_jobs(lstcmd,num_workers) -elif (len(lstcmd) > 0 ): - print("Serial computing with {} jobs".format(str(len(lstcmd)))) - stdout,stderr,return_code = serial_jobs(lstcmd,num_workers) + try: + results = run_parallel_jobs(lstcmd, num_workers) + for i, (stdout, stderr, return_code) in enumerate(results): + print(f"\nCommand {i+1} finished:") + print(f"STDOUT: {stdout}") + print(f"STDERR: {stderr}") + print(f"Return code: {return_code}") + except RuntimeError as e: + print(f"Execution failed: {e}") +elif (len(lstcmd) > 0 ) and not multiprocessing: + try: + results = run_serial_jobs(lstcmd) + for i, (stdout, stderr, return_code) in enumerate(results): + print(f"\nCommand {i+1} finished:") + print(f"STDOUT: {stdout}") + print(f"STDERR: {stderr}") + print(f"Return code: {return_code}") + except RuntimeError as e: + print(f"Execution failed: {e}") else: - print("no jobs to run...") - return_code = 0 + print("no jobs to run...") -if return_code != 0: - exit("ERROR: {} jobs failed".format('{{subsection}}')) -else: - print("successfully finish all jobs....") - #time delay to ensure process completely finished - time.sleep(5) +print("successfully finish all jobs....") +#time delay to ensure process completely finished +time.sleep(5) -#organize diagnostic output +# Initialize and run collector obs_dict = json.load(open('obs_catalogue.json')) obs_name = list(obs_dict.keys())[0] -collect_enso_diags( - enso_groups, - '{{figure_format}}', - obs_name, - '${model_name}'.split(".")[0], - '${model_name}'.split(".")[1], - '${model_name}'.split(".")[2], - '${model_name}'.split(".")[3], - '${case_id}', - input_template, - out_path +collector = EnsoDiagnosticsCollector( + fig_format='{{figure_format}}', + refname=obs_name, + model_name_parts='${model_name}'.split("."), + case_id='${case_id}', + input_dir=input_template, + output_dir=out_path ) -{%- endif %} +enso_groups = '{{ enso_groups }}'.split(",") +collector.run(enso_groups) + +{% endif %} -{%- if "synthetic_plots" in subsection %} +{% if "synthetic_plots" in subsection %} ######################################### #plot synthetic figures for pcmdi metrics ######################################### @@ -1055,7 +1103,7 @@ figure_sets = '{{synthetic_sets}}'.split(",") figure_format = '{{figure_format}}' test_input_path = os.path.join( '${www}', - '${case}', + '%(model_name)', 'pcmdi_diags', '${results_dir}', 'metrics_data', @@ -1064,63 +1112,87 @@ test_input_path = os.path.join( metric_dict = json.load(open('synthetic_metrics_list.json')) -parameter = OrderedDict() -parameter['save_data'] = True -parameter['out_dir'] = os.path.join('${results_dir}','ERROR_metric') -parameter['test_name'] = '{{model_name}}' - -parameter['model_name'] = [ '-'.join('{{model_name}}'.split(".")[2:]) ] -parameter['tableID'] = [ '{{model_tableID}}' ] -parameter['case_id'] = [ '${case_id}' ] - -for metric in metric_sets: - parameter['test_path'] = test_input_path.replace('%(group_type)',metric) - parameter['diag_vars'] = metric_dict[metric] - if metric == "mean_climate": - parameter['cmip_path'] = '{{cmip_clim_dir}}' - parameter['cmip_name'] = '{{cmip_clim_set}}' - merge_lib = collect_clim_metrics(parameter) - elif metric == "variability_modes": - parameter['cmip_path'] = '{{cmip_movs_dir}}' - parameter['cmip_name'] = '{{cmip_movs_set}}' - parameter['movs_mode'] = '{{ atm_modes }}'.split(",") + '{{ cpl_modes }}'.split(",") - merge_lib,mode_season_list = collect_movs_metrics(parameter) - elif metric == 'enso_metric': - parameter['cmip_path'] = '{{cmip_enso_dir}}' - parameter['cmip_name'] = '{{cmip_enso_set}}' - - if metric == "mean_climate": - for stat in metric_dict[metric].keys(): - mean_climate_plot_driver( - metric, stat, - merge_lib.regions, - parameter['model_name'], - parameter['diag_vars'][stat], - merge_lib.df_dict[stat], - merge_lib.var_list, - merge_lib.var_unit_list, - parameter['save_data'], - parameter['out_dir'], - figure_format) - elif metric == "variability_modes": - for stat in metric_dict[metric].keys(): - variability_modes_plot_driver( - metric, stat, - parameter['model_name'], - parameter['diag_vars'][stat], - merge_lib[stat], - mode_season_list, - parameter['save_data'], - parameter['out_dir'], - figure_format) - elif metric == "enso_metric": - for stat in metric_dict[metric].keys(): - enso_plot_driver( - metric,stat, - parameter, - figure_format) +plotter = SyntheticMetricsPlotter( + test_name='{{model_name}}', + table_id='{{model_tableID}}', + figure_format=figure_format, + figure_sets=figure_sets, + metric_dict=metric_dict, + save_data=True, + base_test_input_path=test_input_path, + results_dir='${web_dir}/${results_dir}', + cmip_clim_dir='{{cmip_clim_dir}}', + cmip_clim_set='{{cmip_clim_set}}', + cmip_movs_dir='{{cmip_movs_dir}}', + cmip_movs_set='{{cmip_movs_set}}', + atm_modes='{{ atm_modes }}', + cpl_modes='{{ cpl_modes }}', + cmip_enso_dir='{{cmip_enso_dir}}', + cmip_enso_set='{{cmip_enso_set}}' +) -{%- endif %} +# Generate Summary Metrics plots +# e.g., "climatology,enso,variability" +groups = '{{sub_sets}}'.split(',') +plotter.generate(groups) + +print("Generating viewer page for diagnostics...") + +# Extract template values (assumes substitution happens before execution) +title = "{{pcmdi_webtitle}}" +version = "{{pcmdi_version}}" +subtitle = "${run_type}".replace('_', ' ').capitalize() +case_id = "${case}" +model_name = "{{model_name}}" +table_id = "{{model_tableID}}" + +# ts_years is assumed to be a list via string_list(default=list("")) +ts_periods = ts_years if isinstance(ts_years, list) else [] + +# Validate and unpack periods +if len(ts_periods) == 3: + clim_period, emov_period, enso_period = [p.strip() for p in ts_periods] +else: + raise ValueError( + f"Expected 3 periods (climatology, EMoV, ENSO), " + f"but got {len(ts_periods)}: {ts_periods}" + ) + +# Set up paths +obs_dir = os.path.join('{{pcmdi_external_prefix}}', 'observations', 'Atm', 'time-series') +pmp_dir = os.path.join('{{pcmdi_external_prefix}}', 'pcmdi_data') +web_dir = os.path.join("${web_dir}", "viewer") +os.makedirs(web_dir, exist_ok=True) + +# Copy logo +web_logo_src = os.path.join( + '{{pcmdi_external_prefix}}', + '{{pcmdi_viewer_template}}', + 'e3sm_pmp_logo.png' +) +web_logo_dst = os.path.join(web_dir, 'e3sm_pmp_logo.png') +shutil.copy(web_logo_src, web_logo_dst) + +# Build config +config = collect_config( + title=title, + subtitle=subtitle, + version=version, + case_id=case_id, + diag_dir="${web_dir}", + obs_dir=obs_dir, + pmp_dir=pmp_dir, + clim_period=clim_period, + emov_period=emov_period, + enso_period=enso_period +) + +# Render viewer +generate_methodology_html(config) +generate_data_html(config) +generate_viewer_html(config) + +{% endif %} EOF ################################ @@ -1142,7 +1214,6 @@ echo ===== COPY FILES TO WEB SERVER ===== echo # Create top-level directory -web_dir=${www}/${case}/pcmdi_diags mkdir -p ${web_dir} if [ $? != 0 ]; then cd {{ scriptDir }} From ecaa510db9eba72d39d9fae3c200ab7e815a7ffe Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Sat, 26 Apr 2025 15:30:49 -0500 Subject: [PATCH 22/23] bug fix --- zppy/defaults/default.ini | 4 ++-- zppy/templates/pcmdi_diags.bash | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini index 79c415f6..a67bac5c 100755 --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -165,8 +165,8 @@ obs_sets = string(default="default") model_name = string(default="e3sm.historical.v3-LR.0051") model_tableID = string(default="Amon") # required for "model_vs_model" comparison -model_name_ref = string(default="") -model_tableID_ref = string(default="") +model_name_ref = string(default="ERA5") +model_tableID_ref = string(default="Amon") # variables in the cmip6 table that can be potentially used by pcmdi # this list depends on the definition of cmip variable # required for "mean climate" diagnostics diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 50857887..8f5935a0 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -1147,6 +1147,7 @@ model_name = "{{model_name}}" table_id = "{{model_tableID}}" # ts_years is assumed to be a list via string_list(default=list("")) +ts_years = {{ts_years}} ts_periods = ts_years if isinstance(ts_years, list) else [] # Validate and unpack periods @@ -1238,12 +1239,14 @@ done ############################################ # Copy files #rsync -a --delete ${results_dir} ${web_dir}/ +{% if "synthetic_plots" not in subsection %} rsync -a ${results_dir} ${web_dir}/ if [ $? != 0 ]; then cd {{ scriptDir }} echo 'ERROR (14)' > {{ prefix }}.status exit 14 fi +{% endif %} {% if machine in ['pm-cpu', 'pm-gpu'] %} # For NERSC, change permissions of new files From ae3e19f4722a4e5b3c84db4f60444eb9954c1870 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Tue, 29 Apr 2025 17:23:44 -0500 Subject: [PATCH 23/23] Refine the code to move "viewer" into the results directory Bug fix to allow the diagnostic workflow work for piControl simulation --- zppy/pcmdi_diags.py | 14 ++++---- zppy/templates/pcmdi_diags.bash | 60 +++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/zppy/pcmdi_diags.py b/zppy/pcmdi_diags.py index 90627627..6cbe9c13 100644 --- a/zppy/pcmdi_diags.py +++ b/zppy/pcmdi_diags.py @@ -70,7 +70,7 @@ def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): if c["sub"] != "synthetic_plots": check_and_define_parameters(c) else: - prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}" + prefix = f"pcmdi_diags_{c['sub']}_{c['run_type']}" print(prefix) c["prefix"] = prefix @@ -176,9 +176,9 @@ def check_parameters_for_pcmdi(c: Dict[str, Any]) -> None: def check_mvm_only_parameters_for_bash(c: Dict[str, Any]) -> None: - check_parameter_defined(c, "diff_title") - check_parameter_defined(c, "ref_name") - check_parameter_defined(c, "short_ref_name") + check_parameter_defined(c, "reference_data_path_ts") + check_parameter_defined(c, "model_name_ref") + check_parameter_defined(c, "model_tableID_ref") if c["sub"] != "synthetic_plots": check_required_parameters( c, @@ -210,10 +210,12 @@ def check_and_define_parameters(c: Dict[str, Any]) -> None: ) prefix: str if c["run_type"] == "model_vs_obs": - prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}" + prefix = ( + f"pcmdi_diags_{c['sub']}_{c['run_type']}_{c['year1']:04d}-{c['year2']:04d}" + ) elif c["run_type"] == "model_vs_model": check_mvm_only_parameters_for_bash(c) - prefix = f"pcmdi_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}_vs_{c['ref_year1']:04d}-{c['ref_year2']:04d}" + prefix = f"pcmdi_diags_{c['sub']}_{c['run_type']}_{c['year1']:04d}-{c['year2']:04d}_vs_{c['ref_year1']:04d}-{c['ref_year2']:04d}" reference_data_path = c["reference_data_path"].split("/post")[0] + "/post" if set( ["mean_climate", "variability_mode_cpl", "variability_mode_atm", "enso"] diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash index 8f5935a0..ee06aae6 100755 --- a/zppy/templates/pcmdi_diags.bash +++ b/zppy/templates/pcmdi_diags.bash @@ -28,22 +28,45 @@ echo "RUNNING ${id}" > {{ prefix }}.status # Basic definitions case="{{ case }}" www="{{ www }}" +run_type="{{ run_type }}" +results_dir="{{ run_type }}" +ref_name={{ ref_name }} {% if "synthetic_plots" not in subsection %} + +# Input variables y1={{ year1 }} y2={{ year2 }} -Y1="{{ '%04d' % (year1) }}" -Y2="{{ '%04d' % (year2) }}" -{% if run_type == "model_vs_model" %} -ref_Y1="{{ '%04d' % (ref_year1) }}" -ref_Y2="{{ '%04d' % (ref_year2) }}" -{% endif %} -{% endif %} +ref_y1={{ ref_year1 }} +ref_y2={{ ref_year2 }} +ref_start_yr={{ ref_start_yr }} +ref_final_yr={{ ref_final_yr }} + +# Formatted versions +Y1="$(printf "%04d" ${y1})" +Y2="$(printf "%04d" ${y2})" +ref_Y1="$(printf "%04d" ${ref_y1})" +ref_Y2="$(printf "%04d" ${ref_y2})" + +# Refine reference range +if [[ ${ref_y1} -lt ${ref_start_yr} ]]; then + ref_y1=${ref_start_yr} + ref_Y1="$(printf "%04d" ${ref_y1})" +fi -run_type="{{ run_type }}" +num_years=$((y2 - y1 + 1)) +ref_end_yr=$((ref_y1 + num_years - 1)) -results_dir="{{ tag }}" +if [[ ${ref_y2} -gt ${ref_end_yr} ]]; then + ref_y2=${ref_end_yr} + ref_Y2="$(printf "%04d" ${ref_y2})" +fi -ref_name={{ ref_name }} +if [[ ${ref_y2} -gt ${ref_final_yr} ]]; then + ref_y2=${ref_final_yr} + ref_Y2="$(printf "%04d" ${ref_y2})" +fi + +{% endif %} # Top-level directory web_dir=${www}/${case}/pcmdi_diags @@ -369,7 +392,7 @@ climo_dir_source="{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly" create_links_acyc_climo "${climo_dir_source}" "${climo_dir_primary}" "${Y1}" "${Y2}" "${model_name}.${tableID}" 1 {% if run_type == "model_vs_model" %} # Path to reference model's climatology files -climo_dir_source_ref="{{ reference_data_path }}" +climo_dir_source_ref="{{ reference_data_path_ts }}" climo_dir_ref="climo_ref" # Link and process reference model climo data create_links_acyc_climo "${climo_dir_source_ref}" "${climo_dir_ref}" "${ref_Y1}" "${ref_Y2}" "${model_name_ref}.${tableID_ref}" 2 @@ -385,7 +408,7 @@ ts_dir_source="{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly" create_links_ts "${ts_dir_source}" "${ts_dir_primary}" "${Y1}" "${Y2}" "${model_name}.${tableID}" 3 {% if run_type == "model_vs_model" %} # Define time series path for reference model (adjust for different year spans) -ts_dir_source_ref="{{ reference_data_path_ts }}/{{ ts_num_years_ref }}yr" +ts_dir_source_ref="{{ reference_data_path_ts }}" ts_dir_ref="ts_ref" # Create local links and combine ts files for the reference model create_links_ts "${ts_dir_source_ref}" "${ts_dir_ref}" "${ref_Y1}" "${ref_Y2}" "${model_name_ref}.${tableID_ref}" 4 @@ -467,12 +490,13 @@ fi # Use same period as test model when possible ####################################################### ts_dir_ref_source="{{ scriptDir }}/${workdir}/${obstmp_dir}" + {% if "mean_climate" in subsection %} climo_dir_ref=climo_ref -create_links_acyc_climo_obs "${ts_dir_ref_source}" "${climo_dir_ref}" ${Y1} ${Y2} 7 +create_links_acyc_climo_obs "${ts_dir_ref_source}" "${climo_dir_ref}" ${ref_Y1} ${ref_Y2} 7 {% elif "variability_modes_cpl" in subsection or "variability_modes_atm" in subsection or "enso" in subsection %} ts_dir_ref=ts_ref -create_links_ts_obs "${ts_dir_ref_source}" "${ts_dir_ref}" ${Y1} ${Y2} 8 +create_links_ts_obs "${ts_dir_ref_source}" "${ts_dir_ref}" ${ref_Y1} ${ref_Y2} 8 {% endif %} {% endif %} @@ -1113,6 +1137,7 @@ test_input_path = os.path.join( metric_dict = json.load(open('synthetic_metrics_list.json')) plotter = SyntheticMetricsPlotter( + case_name='{{case}}', test_name='{{model_name}}', table_id='{{model_tableID}}', figure_format=figure_format, @@ -1162,8 +1187,8 @@ else: # Set up paths obs_dir = os.path.join('{{pcmdi_external_prefix}}', 'observations', 'Atm', 'time-series') pmp_dir = os.path.join('{{pcmdi_external_prefix}}', 'pcmdi_data') -web_dir = os.path.join("${web_dir}", "viewer") -os.makedirs(web_dir, exist_ok=True) +out_dir = os.path.join("${web_dir}", "${results_dir}", "viewer") +os.makedirs(out_dir, exist_ok=True) # Copy logo web_logo_src = os.path.join( @@ -1171,7 +1196,7 @@ web_logo_src = os.path.join( '{{pcmdi_viewer_template}}', 'e3sm_pmp_logo.png' ) -web_logo_dst = os.path.join(web_dir, 'e3sm_pmp_logo.png') +web_logo_dst = os.path.join(out_dir, 'e3sm_pmp_logo.png') shutil.copy(web_logo_src, web_logo_dst) # Build config @@ -1183,6 +1208,7 @@ config = collect_config( diag_dir="${web_dir}", obs_dir=obs_dir, pmp_dir=pmp_dir, + out_dir=out_dir, clim_period=clim_period, emov_period=emov_period, enso_period=enso_period