diff --git a/config_noresm_default.yaml b/config_noresm_default.yaml new file mode 100644 index 000000000..dbd805b86 --- /dev/null +++ b/config_noresm_default.yaml @@ -0,0 +1,523 @@ +#============================== +#config_noresm_default.yaml +# modified from config_amwg_default_plots.yaml + +# This config file contains the standard set of variables and plots used for +# evaluating CAM simulations in the AMWG working group. + +#Currently, if one is on NCAR's Casper or +#Cheyenne machine, then only the diagnostic output +#paths are needed, at least to perform a quick test +#run (these are indicated with "MUST EDIT" comments). +#Running these diagnostics on a different machine, +#or with a different, non-example simulation, will +#require additional modifications. +# +# On sigma2 NIRD, please see the discussion on github +# https://github.com/NorESMhub/noresm3_dev_simulations/discussions/17 +# for details +# +#Config file Keywords: +#-------------------- +# +#1. Using ${xxx} will substitute that text with the +# variable referenced by xxx. For example: +# +# cam_case_name: cool_run +# cam_climo_loc: /some/where/${cam_case_name} +# +# will set "cam_climo_loc" in the diagnostics package to: +# /some/where/cool_run +# +# Please note that currently this will only work if the +# variable only exists in one location in the file. +# +#2. Using ${.xxx} will do the same as +# keyword 1 above, but specifies which sub-section the +# variable is coming from, which is necessary for variables +# that are repeated in different subsections. For example: +# +# diag_basic_info: +# cam_climo_loc: /some/where/${diag_cam_climo.start_year} +# +# diag_cam_climo: +# start_year: 1850 +# +# will set "cam_climo_loc" in the diagnostics package to: +# /some/where/1850 +# +#Finally, please note that for both 1 and 2 the keywords must be lowercase. +#This is because future developments will hopefully use other keywords +#that are uppercase. Also please avoid using periods (".") in variable +#names, as this will likely cause issues with the current file parsing +#system. +#-------------------- +user: 'USER-NAME-NOT-SET' +# +##============================== +# +# This file doesn't (yet) read environment variables, so the user must +# set this themselves. It is also a good idea to search the doc for 'user' +# to see what default paths are being set for output/working files. +# +# Note that the string 'USER-NAME-NOT-SET' is used in the jupyter script +# to check for a failure to customize +# + +#------------------------------------------------------------------------------------- +#This first set of variables specify basic info used by all diagnostic runs: +#------------------------------------------------------------------------------------- +diag_basic_info: + + #History file string to match (eg. cam.h0 or ocn.pop.h.ecosys.nday1) + # Only affects timeseries as everything else uses timeseries + # Leave off trailing '.' + #Default: cam.h0a + hist_str: cam.h0a + + #Is this a model vs observations comparison? + #If "false" or missing, then a model-model comparison is assumed: + compare_obs: false + + #Generate HTML website (assumed false if missing): + #Note: The website files themselves will be located in the path + #specified by "cam_diag_plot_loc", under the "/website" subdirectory, + #where "" is the subdirectory created for this particular diagnostics run + #(usually "case_vs_obs_XXX" or "case_vs_baseline_XXX"). + create_html: true + + #Location of observational datasets: + #Note: this only matters if "compare_obs" is true and the path + #isn't specified in the variable defaults file. + obs_data_loc: /nird/datalake/NS16000B/ADF-obs + + #Location where re-gridded and interpolated CAM climatology files are stored: + cam_regrid_loc: /scratch/${user}/noresm3/${diag_cam_climo.cam_case_name}/atm/proc/tseries/regrid + + #Overwrite CAM re-gridded files? + #If false, or missing, then regridding will be skipped for regridded variables + #that already exist in "cam_regrid_loc": + cam_overwrite_regrid: false + + #Location where diagnostic plots are stored: + cam_diag_plot_loc: /nird/datalake/NS2345K/www/diagnostics/ADF/${user} + + #Location of ADF variable plotting defaults YAML file: + #If left blank or missing, ADF/lib/adf_variable_defaults.yaml will be used + #Uncomment and change path for custom variable defaults file + #defaults_file: /some/path/to/defaults/file.yaml + + #Vertical pressure levels (in hPa) on which to plot 3-D variables + #when using horizontal (e.g. lat/lon) map projections. + #If this config option is missing, then no 3-D variables will be plotted on + #horizontal maps. Please note too that pressure levels must currently match + #what is available in the observations file in order to be plotted in a + #model vs obs run: + plot_press_levels: [200,500,850] + + #Longitude line on which to center all lat/lon maps. + #If this config option is missing then the central + #longitude will default to 180 degrees E. + central_longitude: 180 + + #Number of processors on which to run the ADF. + #If this config variable isn't present then + #the ADF defaults to one processor. Also, if + #you set it to "*" then it will default + #to all of the processors available on a + #single node/machine: + num_procs: 8 + + #If set to true, then redo all plots even if they already exist. + #If set to false, then if a plot is found it will be skipped: + redo_plot: true + +#------------------------------------------------------------------------------------- +#This second set of variables provides info for the CAM simulation(s) being diagnosed: +#------------------------------------------------------------------------------------- +diag_cam_climo: + + # History file list of strings to match + # eg. cam.h0 or ocn.pop.h.ecosys.nday1 or hist_str: [cam.h2,cam.h0] + # Only affects timeseries as everything else uses the created timeseries + # Default: + hist_str: cam.h0a + + #Calculate climatologies? + #If false, the climatology files will not be created: + calc_cam_climo: true + + #Overwrite CAM climatology files? + #If false, or not prsent, then already existing climatology files will be skipped: + cam_overwrite_climo: false + + #Name of CAM case (or CAM run name): + cam_case_name: n1850.ne30_tn14.hybrid_fatessp.20241219 + + #Case nickname + #NOTE: if nickname starts with '0' - nickname must be in quotes! + # ie '026a' as opposed to 026a + #If missing or left blank, will default to cam_case_name + case_nickname: #cool nickname + + #Location of CAM history (h0a) files: + #Example test files + cam_hist_loc: /nird/datalake/NS9560K/noresm3/cases/${diag_cam_climo.cam_case_name}/atm/hist + + #Location of CAM climatologies (to be created and then used by this script) + cam_climo_loc: /scratch/${user}/noresm3/${diag_cam_climo.cam_case_name}/atm/proc/climo + + #model year when time series files should start: + #Note: Leaving this entry blank will make time series + # start at earliest available year. + start_year: 100 + + #model year when time series files should end: + #Note: Leaving this entry blank will make time series + # end at latest available year. + end_year: 110 + + #Do time series files exist? + #If True, then diagnostics assumes that model files are already time series. + #If False, or if simply not present, then diagnostics will attempt to create + #time series files from history (time-slice) files: + cam_ts_done: false + + #Save interim time series files? + #WARNING: This can take up a significant amount of space, + # but will save processing time the next time + cam_ts_save: true + + #Overwrite time series files, if found? + #If set to false, then time series creation will be skipped if files are found: + cam_overwrite_ts: false + + #Location where time series files are (or will be) stored: + cam_ts_loc: /scratch/${user}/noresm3/${diag_cam_climo.cam_case_name}/atm/proc/tseries + +#------------------------------------------------------------------------------------- + #TEM diagnostics + #--------------- + #TEM history file number + #If missing or blank, ADF will default to h4 + tem_hist_str: cam.h4 + + #Location where TEM files are stored: + #NOTE: If path not specified or commented out, TEM calculation/plots will be skipped! + cam_tem_loc: /scratch/${user}/noresm3/ADF/${diag_cam_climo.cam_case_name}/tem/ + + #Overwrite TEM files, if found? + #If set to false, then TEM creation will be skipped if files are found: + overwrite_tem: false + + #---------------------- + +#This third set of variables provide info for the CAM baseline climatologies. +#------------------------------------------------------------------------------------- +#This only matters if "compare_obs" is false: +diag_cam_baseline_climo: + + # History file list of strings to match + # eg. cam.h0 or ocn.pop.h.ecosys.nday1 or hist_str: [cam.h2,cam.h0] + # Only affects timeseries as everything else uses the created timeseries + # Default: + hist_str: cam.h0a + + #Calculate cam baseline climatologies? + #If false, the climatology files will not be created: + calc_cam_climo: true + + #Overwrite CAM climatology files? + #If false, or not present, then already existing climatology files will be skipped: + cam_overwrite_climo: false + + #Name of CAM baseline case: + cam_case_name: n1850.ne30_tn14.hybrid_fatessp.20241204 + + #Baseline case nickname + #NOTE: if nickname starts with '0' - nickname must be in quotes! + # ie '026a' as opposed to 026a + #If missing or left blank, will default to cam_case_name + case_nickname: + + #Location of CAM baseline history (h0a) files: + #Example test files + cam_hist_loc: /nird/datalake/NS9560K/noresm3/cases/${diag_cam_baseline_climo.cam_case_name}/atm/hist + + #Location of baseline CAM climatologies: + cam_climo_loc: /scratch/${user}/noresm3/${diag_cam_baseline_climo.cam_case_name}/atm/proc/climo + + #model year when time series files should start: + #Note: Leaving this entry blank will make time series + # start at earliest available year. + start_year: 52 + + #model year when time series files should end: + #Note: Leaving this entry blank will make time series + # end at latest available year. + end_year: 71 + + #Do time series files need to be generated? + #If True, then diagnostics assumes that model files are already time series. + #If False, or if simply not present, then diagnostics will attempt to create + #time series files from history (time-slice) files: + cam_ts_done: false + + #Save interim time series files for baseline run? + #WARNING: This can take up a significant amount of space: + cam_ts_save: true + + #Overwrite baseline time series files, if found? + #If set to false, then time series creation will be skipped if files are found: + cam_overwrite_ts: false + + #Location where time series files are (or will be) stored: + cam_ts_loc: /scratch/${user}/diagnostics/ADF/${diag_cam_baseline_climo.cam_case_name}/atm/tseries + + #TEM diagnostics + #--------------- + #TEM history file number + #If missing or blank, ADF will default to h4 + tem_hist_str: cam.h4 + + #Location where TEM files are stored: + #NOTE: If path not specified or commented out, TEM calculation/plots will be skipped! + cam_tem_loc: /scratch/${user}/${diag_cam_baseline_climo.cam_case_name}/tem/ + + #Overwrite TEM files, if found? + #If set to false, then TEM creation will be skipped if files are found: + overwrite_tem: false + +#------------------------------------------------------------------------------------- +#This fourth set of variables provides settings for calling the Climate Variability +#------------------------------------------------------------------------------------- +# Diagnostics Package (CVDP). If cvdp_run is set to true the CVDP will be set up and +# run in background mode, likely completing after the ADF has completed. +# If CVDP is to be run PSL, TREFHT, TS and PRECT (or PRECC and PRECL) should be listed +# in the diag_var_list variable listing. +# For more CVDP information: https://www.cesm.ucar.edu/working_groups/CVC/cvdp/ +diag_cvdp_info: + + # Run the CVDP on the listed run(s)? + cvdp_run: false + + # CVDP code path, sets the location of the CVDP codebase + # CGD systems path = /home/asphilli/CESM-diagnostics/CVDP/Release/v5.2.0/ + # CISL systems path = /glade/u/home/asphilli/CESM-diagnostics/CVDP/Release/v5.2.0/ + # github location = https://github.com/NCAR/CVDP-ncl + cvdp_codebase_loc: /glade/u/home/asphilli/CESM-diagnostics/CVDP/Release/v5.2.0/ + + # Location where cvdp codebase will be copied to and diagnostic plots will be stored + cvdp_loc: /glade/scratch/asphilli/ADF-Sandbox/cvdp/ #MUST EDIT! + + # tar up CVDP results? + cvdp_tar: false + +# This set of variables provides settings for calling NOAA's +# Model Diagnostic Task Force (MDTF) diagnostic package. +# https://github.com/NOAA-GFDL/MDTF-diagnostics +# +# If mdtf_run: true, the MDTF will be set up and +# run in background mode, likely completing after the ADF has completed. +# +# WARNING: This currently only runs on CASPER (not derecho) +# +# The variables required depend on the diagnostics (PODs) selected. +# AMWG-developed PODS and their required variables: +# (Note that PRECT can be computed from PRECC & PRECL) +# - MJO_suite: daily PRECT, FLUT, U850, U200, V200 (all required) +# - Wheeler-Kiladis Wavenumber Frequency Spectra: daily PRECT, FLUT, U200, U850, OMEGA500 +# (will use what is available) +# - Blocking (Rich Neale): daily OMEGA500 +# - Precip Diurnal Cycle (Rich Neale): 3-hrly PRECT +# +# Many other diagnostics are available; see +# https://mdtf-diagnostics.readthedocs.io/en/main/sphinx/start_overview.html + +# +diag_mdtf_info: + # Run the MDTF on the model cases + mdtf_run: false + + # The file that will be written by ADF to input to MDTF. Call this whatever you want. + mdtf_input_settings_filename : mdtf_input.json + + ## MDTF code path, sets the location of the MDTF codebase and pre-compiled conda envs + # CHANGE if you have any: your own MDTF code, installed conda envs and/or obs_data + + mdtf_codebase_path : /glade/campaign/cgd/amp/amwg/mdtf + mdtf_codebase_loc : ${mdtf_codebase_path}/MDTF-diagnostics.v3.1.20230817.ADF + conda_root : /glade/u/apps/opt/conda + conda_env_root : ${mdtf_codebase_path}/miniconda2/envs.MDTFv3.1.20230412/ + OBS_DATA_ROOT : ${mdtf_codebase_path}/obs_data + + # SET this to a writable dir. The ADF will place ts files here for the MDTF to read (adds the casename) + MODEL_DATA_ROOT : ${diag_cam_climo.cam_ts_loc}/mdtf/inputdata/model + + # Choose diagnostics (PODs). Full list of available PODs: https://github.com/NOAA-GFDL/MDTF-diagnostics + pod_list : [ "MJO_suite" ] + + # Intermediate/output file settings + make_variab_tar: false # tar up MDTF results + save_ps : false # save postscript figures in addition to bitmaps + save_nc : false # save netCDF files of processed data (recommend true when starting with new model data) + overwrite: true # overwrite results in OUTPUT_DIR; otherwise results will be saved under a unique name + + # Settings used in debugging: + verbose : 3 # Log verbosity level. + test_mode: false # Set to true for framework test. Data is fetched but PODs are not run. + dry_run : false # Framework test. No external commands are run and no remote data is copied. Implies test_mode. + + # Settings that shouldn't change in ADF implementation for now + data_type : single_run # single_run or multi_run (only works with single right now) + data_manager : Local_File # Fetch data or it is local? + environment_manager : Conda # Manage dependencies + + + +#+++++++++++++++++++++++++++++++++++++++++++++++++++ +#These variables below only matter if you are using +#a non-standard method, or are adding your own +#diagnostic scripts. +#+++++++++++++++++++++++++++++++++++++++++++++++++++ + +#Note: If you want to pass arguments to a particular script, you can +#do it like so (using the "averaging_example" script in this case): +# - {create_climo_files: {kwargs: {clobber: true}}} + +#Name of time-averaging scripts being used to generate climatologies. +#These scripts must be located in "scripts/averaging": +time_averaging_scripts: + - {create_climo_files: {kwargs: {clobber: false}}} + #- create_TEM_files #To generate TEM files, please un-comment + +#Name of regridding scripts being used. +#These scripts must be located in "scripts/regridding": +regridding_scripts: + - regrid_and_vert_interp + +#List of analysis scripts being used. +#These scripts must be located in "scripts/analysis": +analysis_scripts: + - amwg_table + +#List of plotting scripts being used. +#These scripts must be located in "scripts/plotting": +plotting_scripts: + - global_mean_timeseries + - global_latlon_map + - global_latlon_vect_map + - zonal_mean + - meridional_mean + - polar_map + - cam_taylor_diagram + - ozone_diagnostics + - qbo + #- tape_recorder + #- tem #To plot TEM, please un-comment fill-out the "tem_info" section below + +#List of CAM variables that will be processesd: +#If CVDP is to be run PSL, TREFHT, TS and PRECT (or PRECC and PRECL) should be listed +diag_var_list: + - AODDUST + - AODVIS + - cb_SULFATE + - cb_isoprene + - cb_monoterp + - cb_DUST + - cb_DMS + - cb_BC + - cb_OM + - cb_H2O2 + - cb_H2SO4 + - cb_SALT + - SFmonoterp + - SFisoprene + - SFSS + - SFDUST + - SFSOA + - SFSO4 + - SFSO2_net + - SFOM + - SFBC + - SFDMS + - SFH2O2 + - SFH2SO4 + - cb_SO2 + - D550_BC + - D550_DU + - D550_POM + - D550_SO4 + - D550_SS + - CLDHGH + - CLDICE + - CLDLIQ + - CLDLOW + - CLDMED + - CLDTOT + - CLOUD + - RESTOM + - FLNS + - FLNT + - FLNTC + - FSNS + - FSNT + - FSNTC + - LHFLX + - LWCF + - OMEGA500 + - PBLH + - PRECT + - PS + - PSL + - QFLX + - RELHUM + - SHFLX + - SST + - SWCF + - T + - TAUX + - TAUY + - TGCLDIWP + - TGCLDLWP + - TMQ + - TREFHT + - TS + - U + - U10 + - ICEFRAC + - OCNFRAC + - LANDFRAC + - O3 + # 2d fields + # - SFSO2 0 => 1.4e-10 kg/m2/s (SO2 surface flux) + # - WD_DMS -4.5e-15 => 1.7e-22 kg/m2/s (vertical integrated wet deposition flux) + # - WD_SO2 -1.5e-10 => 3.8e-16 kg/m2/s (vertical integrated wet deposition flux) + # - DF_DMS -4.5e-21 => 5.1e-13 kg/m2/s (vertical integrated dry deposition flux) + # - DF_H2O2 6.6e-26 => 2.7e-11 kg/m2/s (vertical integrated dry deposition flux) + # - DF_SO2 1.5e-27 => 7.0e-10 kg/m2/s (vertical integrated dry deposition flux) + # 3d fields + # - sum_BC 1.8e-14 => 1.2e-8 kg/kg (sum of BC concentrations) + # - sum_DST 6.3e-20 => 7.9e-6 kg/kg + # - sum_OM 6.3e-15 => 7.2e-8 kt/kg + +# + +# Options for TEM diagnostics (./averaging/create_TEM_files.py and ./plotting/temp.py) +#tem_info: + #Location where TEM files are stored: + #If path not specified or commented out, TEM calculation/plots will be skipped + #tem_loc: /glade/scratch/richling/adf-output/ADF-data/TEM/ + + #TEM history file number + #If missing or blank, ADF will default to h4 + #hist_num: h4 + + #Overwrite TEM files, if found? + #If set to false, then TEM creation will be skipped if files are found: + #overwrite_tem_case: false + + #overwrite_tem_case: + #overwrite_tem_base: false + +#END OF FILE diff --git a/env/conda_environment_nird.yaml b/env/conda_environment_nird.yaml new file mode 100644 index 000000000..ebd1d60c0 --- /dev/null +++ b/env/conda_environment_nird.yaml @@ -0,0 +1,20 @@ +name: adf_v1.0.0 +channels: + - conda-forge + - defaults +dependencies: + - pyyaml=6.0.2 + - scipy=1.12.0 + - cartopy=0.23.0 + - netcdf4=1.6.5 + - xarray=2024.1.1 + - uxarray=2025.03.0 + - matplotlib=3.9.4 + - pandas=2.2.0 + - pint=0.23 + - xskillscore=0.0.24 + - geocat-comp=2024.04.0 + - python=3.12 + - xesmf>=0.8.8 +prefix: /nird/datalake/NS16000B/ADF-env/adf_v1.0.0 + diff --git a/lib/adf_diag.py b/lib/adf_diag.py index ddf452ecc..b75fe3a77 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -1159,11 +1159,25 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir= whether to overwrite the file (true) or exit with a warning message. """ - + start_years = self.climo_yrs["syears"] + start_years = str(start_years[0]).zfill(4) + end_years = self.climo_yrs["eyears"] + end_years = str(end_years[0]).zfill(4) + date_range_string_case = f"{start_years}01-{end_years}12" # Loop through derived variables for var in vars_to_derive: print(f"\t - deriving time series for {var}") - + filename = f'{self.get_cam_info("cam_case_name")[0]}.{hist_str}*.{var}.{date_range_string_case}.nc' + if glob.glob(os.path.join(ts_dir, filename)): + expname = f'{self.get_baseline_info("cam_case_name")}' + start_years = self.climo_yrs["syear_baseline"] + start_years = str(start_years).zfill(4) + end_years = self.climo_yrs["eyear_baseline"] + end_years = str(end_years).zfill(4) + date_range_string = f"{start_years}01-{end_years}12" + else: + expname = f'{self.get_cam_info("cam_case_name")[0]}' + date_range_string = date_range_string_case # Grab list of constituents for this variable constit_list = constit_dict[var] @@ -1172,9 +1186,9 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir= for constit in constit_list: # Check if the constituent file is present, if so add it to list if hist_str: - const_glob_str = f"*{hist_str}*.{constit}.*.nc" + const_glob_str = f"{expname}.{hist_str}*.{constit}.{date_range_string}.nc" else: - const_glob_str = f"*.{constit}.*.nc" + const_glob_str = f"{expname}.*.{constit}.{date_range_string}.nc" # end if if glob.glob(os.path.join(ts_dir, const_glob_str)): constit_files.append(glob.glob(os.path.join(ts_dir, const_glob_str ))[0]) @@ -1275,6 +1289,8 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir= # Drop all constituents from final saved dataset # These are not necessary because they have their own time series files ds_final = ds.drop_vars(constit_list) + if "time_bnds" in list(ds_final.keys()): + ds_final = ds_final.drop_vars("time_bnds") # Copy attributes from constituent file to derived variable ds_final[var].attrs = attrs ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w') diff --git a/lib/adf_info.py b/lib/adf_info.py index fb4612a9b..683b395c8 100644 --- a/lib/adf_info.py +++ b/lib/adf_info.py @@ -254,6 +254,7 @@ def __init__(self, config_file, debug=False): #Grab first possible hist string, just looking for years of run base_hist_str = baseline_hist_str[0] + print(f"AVAILABLE BASE_HIST_STR: {base_hist_str}") starting_location = Path(baseline_hist_locs) print(f"\tChecking history files in '{starting_location}'") file_list = sorted(starting_location.glob("*" + base_hist_str + ".*.nc")) @@ -294,7 +295,7 @@ def __init__(self, config_file, debug=False): raise AdfError(msg) base_climo_yrs = sorted(np.unique(base_climo_yrs)) - + print(f"AVAILABLE YEARS IN BASE RUN: {base_climo_yrs}") base_found_syr = int(base_climo_yrs[0]) base_found_eyr = int(base_climo_yrs[-1]) @@ -488,7 +489,7 @@ def __init__(self, config_file, debug=False): msg = f"\t ERROR: No climo years found in {cam_hist_locs[case_idx]}, " raise AdfError(msg) case_climo_yrs = sorted(np.unique(case_climo_yrs)) - + print(f'Case climo years: {case_climo_yrs}') case_found_syr = int(case_climo_yrs[0]) case_found_eyr = int(case_climo_yrs[-1]) @@ -895,4 +896,4 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name): #++++++++++++++++++++ #End Class definition -#++++++++++++++++++++ \ No newline at end of file +#++++++++++++++++++++ diff --git a/lib/adf_variable_defaults.yaml b/lib/adf_variable_defaults.yaml index ed5be6da0..434ae5fc7 100644 --- a/lib/adf_variable_defaults.yaml +++ b/lib/adf_variable_defaults.yaml @@ -157,6 +157,56 @@ AODDUST: pct_diff_colormap: "PuOr_r" AODVIS: + category: "Aerosols" + colormap: "Oranges" + contour_levels_range: [0.00, 1, 0.1] + diff_colormap: "PuOr_r" + diff_contour_range: [-0.5, 0.5, 0.05] + scale_factor: 1 + add_offset: 0 + new_unit: "" + +D550_SO4: + category: "Aerosols" + colormap: "Oranges" + contour_levels_range: [0.05, 0.6, 0.05] + diff_colormap: "PuOr_r" + diff_contour_range: [-0.1, 0.1, 0.01] + scale_factor: 1 + add_offset: 0 + new_unit: "" + +D550_SS: + category: "Aerosols" + colormap: "Oranges" + contour_levels_range: [0.05, 0.6, 0.05] + diff_colormap: "PuOr_r" + diff_contour_range: [-0.1, 0.1, 0.01] + scale_factor: 1 + add_offset: 0 + new_unit: "" + +D550_BC: + category: "Aerosols" + colormap: "Oranges" + contour_levels_range: [0.05, 0.6, 0.05] + diff_colormap: "PuOr_r" + diff_contour_range: [-0.1, 0.1, 0.01] + scale_factor: 1 + add_offset: 0 + new_unit: "" + +D550_DU: + category: "Aerosols" + colormap: "Oranges" + contour_levels_range: [0.05, 0.6, 0.05] + diff_colormap: "PuOr_r" + diff_contour_range: [-0.1, 0.1, 0.01] + scale_factor: 1 + add_offset: 0 + new_unit: "" + +D550_POM: category: "Aerosols" colormap: "Oranges" contour_levels_range: [0.05, 0.6, 0.05] @@ -183,7 +233,14 @@ AODVISdn: pct_diff_contour_levels: [-100,-75,-50,-40,-30,-20,-10,-8,-6,-4,-2,0,2,4,6,8,10,20,30,40,50,75,100] pct_diff_colormap: "PuOr_r" -BURDENBC: +cb_BC: + colormap: "Oranges" + contour_levels_range: [0, 5.5, .5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-1, 1.1, 0.1] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" category: "Aerosols" colormap: "plasma_r" scale_factor: 1000000 @@ -198,7 +255,14 @@ BURDENBC: obs_scale_factor: 1000000 obs_add_offset: 0 -BURDENDUST: +cb_SULFATE: + colormap: "Oranges" + contour_levels_range: [0, 11, 1 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-2.25, 2.5, 0.25] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" category: "Aerosols" colormap: "plasma_r" scale_factor: 1000000 @@ -214,7 +278,14 @@ BURDENDUST: obs_scale_factor: 1000000 obs_add_offset: 0 -BURDENPOM: +cb_isoprene: + colormap: "Oranges" + contour_levels_range: [0, 105, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-50, 52.5, 2.5] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" category: "Aerosols" colormap: "plasma_r" scale_factor: 1000000 @@ -225,7 +296,14 @@ BURDENPOM: diff_colormap: "RdBu_r" -BURDENSEASALT: +cb_monoterp: + colormap: "Oranges" + contour_levels_range: [0, 55, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-20, 22.5, 2.5] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" category: "Aerosols" colormap: "plasma_r" scale_factor: 1000000 @@ -241,7 +319,14 @@ BURDENSEASALT: obs_scale_factor: 1000000 obs_add_offset: 0 -BURDENSO4: +cb_DMS: + colormap: "Oranges" + contour_levels_range: [0, 2.25, .25 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-0.1, 0.11, 0.01] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" category: "Aerosols" colormap: "plasma_r" scale_factor: 1000000 @@ -256,7 +341,65 @@ BURDENSO4: obs_scale_factor: 1000000 obs_add_offset: 0 -BURDENSOA: +cb_DUST: + colormap: "Oranges" + contour_levels: [1, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 999, 1000] + diff_colormap: "PuOr_r" + log_normal: true + diff_contour_range: [-600, 650, 50] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" + category: "Aerosols" + +cb_OM: + colormap: "Oranges" + contour_levels_range: [0, 105, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-50, 52.5, 2.5] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" + category: "Aerosols" + +cb_H2O2: + colormap: "Oranges" + contour_levels_range: [0, 11, 1 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-2.25, 2.5, 0.25] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" + category: "Aerosols" + +cb_H2SO4: + colormap: "Oranges" + contour_levels_range: [0, 1.1, .1 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-0.1, 0.11, 0.01] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" + category: "Aerosols" + +cb_SALT: + colormap: "Oranges" + contour_levels_range: [0, 105, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-50, 55, 5] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" + category: "Aerosols" + +cb_SO2: + colormap: "Oranges" + contour_levels_range: [0, 22, 2 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-5, 6, 1] + scale_factor: 1000000 + add_offset: 0 + new_unit: "1e-6 kg/m2" category: "Aerosols" colormap: "plasma_r" scale_factor: 1000000 @@ -572,6 +715,150 @@ SAD_SULFC: pct_diff_contour_levels: [-100,-75,-50,-40,-30,-20,-10,-8,-6,-4,-2,0,2,4,6,8,10,20,30,40,50,75,100] pct_diff_colormap: "PuOr_r" +#+++++++++++++++++ +# Category: Surface emissions +#+++++++++++++++++ + +SFSOA: + colormap: "Oranges" + contour_levels_range: [0, 105, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-50, 55, 5] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + derivable_from: ["SFSOA_A1", "SFSOA_LV", "SFSOA_NA", "SFSOA_SV"] + +SFSS: + colormap: "Oranges" + contour_levels_range: [0, 2100, 100] + diff_colormap: "PuOr_r" + diff_contour_range: [-500, 550, 50] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + derivable_from: ["SFSS_A1", "SFSS_A2", "SFSS_A3"] + +SFBC: + colormap: "Oranges" + contour_levels_range: [0, 11, 1 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-1, 1.1, .1] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + derivable_from: ["SFBC_A","SFBC_AC","SFBC_AI","SFBC_AX","SFBC_N","SFBC_NI","BC_AX_CMXF","BC_NI_CMXF","BC_N_CMXF"] + +SFH2O2: + colormap: "Oranges" + contour_levels_range: [0, 105, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-50, 55, 5] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + +SFH2SO4: + colormap: "Oranges" + contour_levels_range: [0, 105, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-50, 55, 5] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + +SFDMS: + colormap: "Oranges" + contour_levels_range: [0, 11, 1 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-10, 11, 1] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + +SFOM: + colormap: "Oranges" + contour_levels_range: [0, 45, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-20, 22.5, 2.5] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + derivable_from: ["SFOM_AC", "SFOM_AI", "SFOM_NI", "OM_NI_CMXF"] + +SFDUST: + colormap: "Oranges" + contour_levels: [1, 250, 500, 1000, 1500, 2000, 2500, 3000, 4000, 5000] + diff_colormap: "PuOr_r" + #log_normal: true + diff_contour_range: [-600, 650, 50] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + derivable_from: [ "SFDST_A2", "SFDST_A3"] + +SFSO2_net: + colormap: "Oranges" + contour_levels_range: [0, 105, 5 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-50, 55, 5] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + derivable_from: ["SFSO2", "SO2_CMXF"] + +SFSO4: + colormap: "Oranges" + contour_levels: [0, .5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20] + diff_colormap: "PuOr_r" + diff_contour_range: [-1, 1.1, .1] + scale_factor: 100000000000000 + add_offset: 0 + new_unit: "1e-14 kg/m2/s" + category: "Surface emissions" + derivable_from: [ "SFSO4_PR", "SO4_PR_CMXF"] + #["SFSO4_A1", "SFSO4_A2", "SFSO4_AC", "SFSO4_NA", "SFSO4_PR"] + +SFmonoterp: + colormap: "Oranges" + contour_levels: [-4, -2, 0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 550, 600 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-100, 105, 5] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + +SFisoprene: + colormap: "Oranges" + contour_levels_range: [0, 2100, 100 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-500, 550, 50] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + +SFVOC: + colormap: "Oranges" + contour_levels_range: [0, 2100, 100 ] + diff_colormap: "PuOr_r" + diff_contour_range: [-500, 550, 50] + scale_factor: 1000000000000 + add_offset: 0 + new_unit: "1e-12 kg/m2/s" + category: "Surface emissions" + derivable_from: [ "SFisoprene", "SFisoprene"] + #+++++++++++++++++ # Category: Budget #+++++++++++++++++ @@ -1745,6 +2032,11 @@ TREFHT: obs_file: "TREFHT_ERA5_monthly_climo_197901-202112.nc" obs_name: "ERA5" obs_var_name: "TREFHT" + contour_levels_range: [220,320, 5] + diff_contour_range: [-10, 10, 1] + scale_factor: 1 + add_offset: 0 + new_unit: "K" TS: colormap: "Blues" @@ -2565,7 +2857,6 @@ budget_tables: #----------- - # Plot Specific formatting ########################## diff --git a/lib/adf_web.py b/lib/adf_web.py index 265778624..923450a08 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -303,8 +303,8 @@ def _write_run_info_to_log(self, config_file, active_env): msg = f"{git_msg}\n{'-' * (len(git_msg)-1)}\n" log_msg += f"\n {msg}" - for key,val in git_info.items(): - log_msg += f" {key}: {val}\n" + for key,val in git_info.items(): + log_msg += f" {key}: {val}\n" self.debug_log(log_msg) @@ -851,9 +851,8 @@ def jinja_enumerate(arg): #If not, add it so the index.html file can include it for ptype in plot_types.keys(): if ptype not in avail_plot_types: - avail_plot_types.append(plot_types) - - + avail_plot_types.append(ptype) + # External packages that can be run through ADF avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'} diff --git a/lib/plotting_functions.py b/lib/plotting_functions.py index 86911759e..6609033e7 100644 --- a/lib/plotting_functions.py +++ b/lib/plotting_functions.py @@ -75,6 +75,468 @@ #HELPER FUNCTIONS ################# +def load_dataset(fils): + """ + Parameters + ---------- + fils : list + strings or paths to input file(s) + + Returns + ------- + xr.Dataset + + Notes + ----- + When just one entry is provided, use `open_dataset`, otherwise `open_mfdataset` + """ + if len(fils) == 0: + warnings.warn("\t WARNING: Input file list is empty.") + return None + elif len(fils) > 1: + return xr.open_mfdataset(fils, combine="by_coords") + else: + return xr.open_dataset(fils[0]) + +def use_this_norm(): + """Just use the right normalization; avoids a deprecation warning.""" + + mplversion = [int(x) for x in mpl.__version__.split('.')] + if mplversion[0] < 3: + return mpl.colors.Normalize, mplversion[0] + else: + if mplversion[1] < 2: + return mpl.colors.DivergingNorm, mplversion[0] + else: + return mpl.colors.TwoSlopeNorm, mplversion[0] + + +def get_difference_colors(values): + """Provide a color norm and colormap assuming this is a difference field. + Parameters + ---------- + values : array-like + can be either the data field or a set of specified contour levels. + + Returns + ------- + dnorm + Matplotlib color nomalization + cmap + Matplotlib colormap + + Notes + ----- + Uses 'OrRd' colormap for positive definite, 'BuPu_r' for negative definite, + and 'RdBu_r' centered on zero if there are values of both signs. + """ + normfunc, mplv = use_this_norm() + dmin = np.min(values) + dmax = np.max(values) + # color normalization for difference + if ((dmin < 0) and (0 < dmax)): + dnorm = normfunc(vmin=np.min(values), vmax=np.max(values), vcenter=0.0) + cmap = mpl.cm.RdBu_r + else: + dnorm = mpl.colors.Normalize(vmin=np.min(values), vmax=np.max(values)) + if dmin >= 0: + cmap = mpl.cm.OrRd + elif dmax <= 0: + cmap = mpl.cm.BuPu_r + else: + dnorm = mpl.colors.TwoSlopeNorm(vmin=dmin, vcenter=0, vmax=dmax) + return dnorm, cmap + + +def mask_land_or_ocean(arr, msk, use_nan=False): + """Apply a land or ocean mask to provided variable. + + Parameters + ---------- + arr : xarray.DataArray + the xarray variable to apply the mask to. + msk : xarray.DataArray + the xarray variable that contains the land or ocean mask, + assumed to be the same shape as "arr". + use_nan : bool, optional + argument for whether to set the missing values + to np.nan values instead of the defaul "-999." values. + + Returns + ------- + arr : xarray.DataArray + Same as input `arr` but masked as specified. + """ + + if use_nan: + missing_value = np.nan + else: + missing_value = -999. + #End if + + arr = xr.where(msk>=0.9,arr,missing_value) + arr.attrs["missing_value"] = missing_value + return(arr) + + +def get_central_longitude(*args): + """Determine central longitude for maps. + + Allows an arbitrary number of arguments. + If any of the arguments is an instance of `AdfDiag`, then check + whether it has a `central_longitude` in `diag_basic_info`. + _This case takes precedence._ + _Else_, if any of the arguments are scalars in [-180, 360], + assumes the FIRST ONE is the central longitude. + There are no other possible conditions, so if none of those are met, + returns the default value of 180. + + Parameters + ---------- + *args : tuple + Any number of objects to check for `central_longitude`. + After that, looks for the first number between -180 and 360 in the args. + + Notes + ----- + This allows a script to, for example, allow a config file to specify, but also have a preference: + `get_central_longitude(AdfObj, 30.0)` + """ + chk_for_adf = [isinstance(arg, AdfDiag) for arg in args] + # preference is to get value from AdfDiag: + if any(chk_for_adf): + for arg in args: + if isinstance(arg, AdfDiag): + result = arg.get_basic_info('central_longitude', required=False) + if (isinstance(result, int) or isinstance(result, float)) and \ + (result >= -180) and (result <= 360): + return result + else: + #If result exists, then write info to debug log: + if result: + msg = f"central_lngitude of type '{type(result).__name__}'" + msg += f" and value '{result}', which is not a valid longitude" + msg += " for the ADF." + arg.debug_log(msg) + #End if + + #There is only one ADF object per ADF run, so if its + #not present or configured correctly then no + #reason to keep looking: + break + #End if + #End if + #End for + #End if + + # 2nd pass through arguments, look for numbers: + for arg in args: + if (isinstance(arg, float) or isinstance(arg, int)) and ((arg >= -180) and (arg <= 360)): + return arg + #End if + else: + # this is the `else` on the for loop --> if non of the arguments meet the criteria, do this. + print("No valid central longitude specified. Defaults to 180.") + return 180 + #End if + +####### + +def global_average(fld, wgt, verbose=False): + """A simple, pure numpy global average. + + Parameters + ---------- + fld : np.ndarray + an input ndarray + wgt : np.ndarray + a 1-dimensional array of weights, should be same size as one dimension of `fld` + verbose : bool, optional + prints information when `True` + + Returns + ------- + weighted average of `fld` + """ + + s = fld.shape + for i in range(len(s)): + if np.size(fld, i) == len(wgt): + a = i + break + fld2 = np.ma.masked_invalid(fld) + if verbose: + print("(global_average)-- fraction of mask that is True: {}".format(np.count_nonzero(fld2.mask) / np.size(fld2))) + print("(global_average)-- apply ma.average along axis = {} // validate: {}".format(a, fld2.shape)) + avg1, sofw = np.ma.average(fld2, axis=a, weights=wgt, returned=True) # sofw is sum of weights + + return np.ma.average(avg1) + + +def spatial_average(indata, weights=None, spatial_dims=None): + """Compute spatial average. + + Parameters + ---------- + indata : xr.DataArray + input data + weights : np.ndarray or xr.DataArray, optional + the weights to apply, see Notes for default behavior + spatial_dims : list, optional + list of dimensions to average, see Notes for default behavior + + Returns + ------- + xr.DataArray + weighted average of `indata` + + Notes + ----- + When `weights` is not provided, tries to find sensible values. + If there is a 'lat' dimension, use `cos(lat)`. + If there is a 'ncol' dimension, looks for `area` in `indata`. + Otherwise, set to equal weights. + + Makes an attempt to identify the spatial variables when `spatial_dims` is None. + Will average over `ncol` if present, and then will check for `lat` and `lon`. + When none of those three are found, raise an AdfError. + """ + import warnings + + if weights is None: + #Calculate spatial weights: + if 'lat' in indata.coords: + weights = np.cos(np.deg2rad(indata.lat)) + weights.name = "weights" + elif 'ncol' in indata.dims: + if 'area' in indata: + warnings.warn("area variable being used to generated normalized weights.") + weights = indata['area'] / indata['area'].sum() + else: + warnings.warn("\t We need a way to get area variable. Using equal weights.") + weights = xr.DataArray(1.) + weights.name = "weights" + else: + weights = xr.DataArray(1.) + weights.name = "weights" + warnings.warn("Un-recognized spatial dimensions: using equal weights for all grid points.") + #End if + #End if + + #Apply weights to input data: + weighted = indata.weighted(weights) + + # we want to average over all non-time dimensions + if spatial_dims is None: + if 'ncol' in indata.dims: + spatial_dims = ['ncol'] + else: + spatial_dims = [dimname for dimname in indata.dims if (('lat' in dimname.lower()) or ('lon' in dimname.lower()))] + + if not spatial_dims: + #Scripts using this function likely expect the horizontal dimensions + #to be removed via the application of the mean. So in order to avoid + #possibly unexpected behavior due to arrays being incorrectly dimensioned + #(which could be difficult to debug) the ADF should die here: + emsg = "spatial_average: No spatial dimensions were identified," + emsg += " so can not perform average." + raise AdfError(emsg) + + return weighted.mean(dim=spatial_dims, keep_attrs=True) + + +def wgt_rmse(fld1, fld2, wgt): + """Calculate the area-weighted RMSE. + + Parameters + ---------- + fld1, fld2 : array-like + 2-dimensional spatial fields with the same shape. + They can be xarray DataArray or numpy arrays. + wgt : array-like + the weight vector, expected to be 1-dimensional, + matching length of one dimension of the data. + + Returns + ------- + float + root mean squared error + + Notes: + ```rmse = sqrt( mean( (fld1 - fld2)**2 ) )``` + """ + assert len(fld1.shape) == 2, "Input fields must have exactly two dimensions." + assert fld1.shape == fld2.shape, "Input fields must have the same array shape." + # in case these fields are in dask arrays, compute them now. + if hasattr(fld1, "compute"): + fld1 = fld1.compute() + if hasattr(fld2, "compute"): + fld2 = fld2.compute() + if isinstance(fld1, xr.DataArray) and isinstance(fld2, xr.DataArray): + return (np.sqrt(((fld1 - fld2)**2).weighted(wgt).mean())).values.item() + else: + check = [len(wgt) == s for s in fld1.shape] + if ~np.any(check): + raise IOError(f"Sorry, weight array has shape {wgt.shape} which is not compatible with data of shape {fld1.shape}") + check = [len(wgt) != s for s in fld1.shape] + dimsize = fld1.shape[np.argwhere(check).item()] # want to get the dimension length for the dim that does not match the size of wgt + warray = np.tile(wgt, (dimsize, 1)).transpose() # May need more logic to ensure shape is correct. + warray = warray / np.sum(warray) # normalize + wmse = np.sum(warray * (fld1 - fld2)**2) + return np.sqrt( wmse ).item() + + +####### +# Time-weighted averaging + +def annual_mean(data, whole_years=False, time_name='time'): + """Calculate annual averages from monthly time series data. + + Parameters + ---------- + data : xr.DataArray or xr.Dataset + monthly data values with temporal dimension + whole_years : bool, optional + whether to restrict endpoints of the average to + start at first January and end at last December + time_name : str, optional + name of the time dimension, defaults to `time` + + Returns + ------- + result : xr.DataArray or xr.Dataset + `data` reduced to annual averages + + Notes + ----- + This function assumes monthly data, and weights the average by the + number of days in each month. + + `result` includes an attribute that reports the date range used for the average. + """ + assert time_name in data.coords, f"Did not find the expected time coordinate '{time_name}' in the data" + if whole_years: + first_january = np.argwhere((data.time.dt.month == 1).values)[0].item() + last_december = np.argwhere((data.time.dt.month == 12).values)[-1].item() + data_to_avg = data.isel(time=slice(first_january,last_december+1)) # PLUS 1 BECAUSE SLICE DOES NOT INCLUDE END POINT + else: + data_to_avg = data + date_range_string = f"{data_to_avg['time'][0]} -- {data_to_avg['time'][-1]}" + + # this provides the normalized monthly weights in each year + # -- do it for each year to allow for non-standard calendars (360-day) + # -- and also to provision for data with leap years + days_gb = data_to_avg.time.dt.daysinmonth.groupby('time.year').map(lambda x: x / x.sum()) + # weighted average with normalized weights: = SUM x_i * w_i (implied division by SUM w_i) + result = (data_to_avg * days_gb).groupby('time.year').sum(dim='time') + result.attrs['averaging_period'] = date_range_string + result.attrs['units'] = data.attrs.get("units",None) + return result + + +def seasonal_mean(data, season=None, is_climo=None): + """Calculates the time-weighted seasonal average (or average over all time). + + Parameters + ---------- + data : xarray.DataArray or xarray.Dataset + data to be averaged + season : str, optional + the season to extract from `data` + If season is `ANN` or None, average all available time. + is_climo : bool, optional + If True, expects data to have time or month dimenion of size 12. + If False, then 'time' must be a coordinate, + and the `time.dt.days_in_month` attribute must be available. + + Returns + ------- + xarray.DataArray or xarray.Dataset + the average of `data` in season `season` + + Notes + ----- + If the data is a climatology, the code will make an attempt to understand the time or month + dimension, but will assume that it is ordered from January to December. + If the data is a climatology and is just a numpy array with one dimension that is size 12, + it will assume that dimension is time running from January to December. + """ + if season is not None: + assert season in ["ANN", "DJF", "JJA", "MAM", "SON"], f"Unrecognized season string provided: '{season}'" + elif season is None: + season = "ANN" + + try: + month_length = data.time.dt.days_in_month + except (AttributeError, TypeError): + # do our best to determine the temporal dimension and assign weights + if not is_climo: + raise ValueError("Non-climo file provided, but without a decoded time dimension.") + else: + # CLIMO file: try to determine which dimension is month + has_time = False + if isinstance(data, xr.DataArray): + has_time = 'time' in data.dims + if not has_time: + if "month" in data.dims: + data = data.rename({"month":"time"}) + has_time = True + if not has_time: + # this might happen if a pure numpy array gets passed in + # --> assumes ordered January to December. + assert ((12 in data.shape) and (data.shape.count(12) == 1)), f"Sorry, {data.shape.count(12)} dimensions have size 12, making determination of which dimension is month ambiguous. Please provide a `time` or `month` dimension." + time_dim_num = data.shape.index(12) + fakedims = [f"dim{n}" for n in range(len(data.shape))] + data = xr.DataArray(data, dims=fakedims, attrs=data.attrs) + timefix = pd.date_range(start='1/1/1999', end='12/1/1999', freq='MS') # generic time coordinate from a non-leap-year + data = data.assign_coords({"time":timefix}) + month_length = data.time.dt.days_in_month + #End try/except + + data = data.sel(time=data.time.dt.month.isin(seasons[season])) # directly take the months we want based on season kwarg + return data.weighted(data.time.dt.daysinmonth).mean(dim='time', keep_attrs=True) + + + +####### + +#Polar Plot functions + +def domain_stats(data, domain): + """Provides statistics in specified region. + + Parameters + ---------- + data : xarray.DataArray + data values + domain : list or tuple or numpy.ndarray + the domain specification as: + [west_longitude, east_longitude, south_latitude, north_latitude] + + Returns + ------- + x_region_mean : float + the regional area-weighted average + x_region_max : float + the maximum value in the region + x_region_min : float + the minimum value in the region + + Notes + ----- + Currently assumes 'lat' is a dimension and uses `cos(lat)` as weight. + Should use `spatial_average` + + See Also + -------- + spatial_average + + """ + x_region = data.sel(lat=slice(domain[2],domain[3]), lon=slice(domain[0],domain[1])) + x_region_mean = x_region.weighted(np.cos(np.deg2rad(x_region['lat']))).mean().item() + x_region_min = x_region.min().item() + x_region_max = x_region.max().item() + return x_region_mean, x_region_max, x_region_min def make_polar_plot(wks, case_nickname, base_nickname, @@ -189,11 +651,17 @@ def make_polar_plot(wks, case_nickname, if 'contour_levels' in kwargs: levels1 = kwargs['contour_levels'] - norm1 = mpl.colors.Normalize(vmin=min(levels1), vmax=max(levels1)) + if 'log_normal' in kwargs: + norm1 = mpl.colors.LogNorm(vmin=min(levels1), vmax=max(levels1)) ##ADA + else: + norm1 = mpl.colors.Normalize(vmin=min(levels1), vmax=max(levels1)) elif 'contour_levels_range' in kwargs: assert len(kwargs['contour_levels_range']) == 3, "contour_levels_range must have exactly three entries: min, max, step" levels1 = np.arange(*kwargs['contour_levels_range']) - norm1 = mpl.colors.Normalize(vmin=min(levels1), vmax=max(levels1)) + if 'log_normal' in kwargs: + norm1 = mpl.colors.LogNorm(vmin=min(levels1), vmax=max(levels1)) ## ADA + else: + norm1 = mpl.colors.Normalize(vmin=min(levels1), vmax=max(levels1)) else: levels1 = np.linspace(minval, maxval, 12) norm1 = mpl.colors.Normalize(vmin=minval, vmax=maxval) @@ -302,16 +770,16 @@ def make_polar_plot(wks, case_nickname, st.set_y(0.95) #Set plot titles - case_title = "$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" + case_title = r"$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" ax1.set_title(case_title, loc='left', fontsize=6) #fontsize=tiFontSize if obs: obs_var = kwargs["obs_var_name"] obs_title = kwargs["obs_file"][:-3] - base_title = "$\mathbf{Baseline}:$"+obs_title+"\n"+"$\mathbf{Variable}:$"+f"{obs_var}" + base_title = r"$\mathbf{Baseline}:$"+obs_title+"\n"+r"$\mathbf{Variable}:$"+f"{obs_var}" ax2.set_title(base_title, loc='left', fontsize=6) #fontsize=tiFontSize else: - base_title = "$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" + base_title = r"$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" ax2.set_title(base_title, loc='left', fontsize=6) ax1.text(-0.2, -0.10, f"Mean: {d1_region_mean:5.2f}\nMax: {d1_region_max:5.2f}\nMin: {d1_region_min:5.2f}", transform=ax1.transAxes) @@ -322,7 +790,7 @@ def make_polar_plot(wks, case_nickname, ax3.set_title("Test % diff Baseline", loc='left', fontsize=8) ax4.text(-0.2, -0.10, f"Mean: {dif_region_mean:5.2f}\nMax: {dif_region_max:5.2f}\nMin: {dif_region_min:5.2f}", transform=ax4.transAxes) - ax4.set_title("$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=8) + ax4.set_title(r"$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=8) if "units" in kwargs: ax2.set_ylabel(kwargs["units"]) @@ -549,16 +1017,16 @@ def plot_map_vect_and_save(wks, case_nickname, base_nickname, st.set_y(0.85) #Set plot titles - case_title = "$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" + case_title = r"$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" ax[0].set_title(case_title, loc='left', fontsize=tiFontSize) if obs: obs_var = kwargs["obs_var_name"] obs_title = kwargs["obs_file"][:-3] - base_title = "$\mathbf{Baseline}:$"+obs_title+"\n"+"$\mathbf{Variable}:$"+f"{obs_var}" + base_title = r"$\mathbf{Baseline}:$"+obs_title+"\n"+r"$\mathbf{Variable}:$"+f"{obs_var}" ax[1].set_title(base_title, loc='left', fontsize=tiFontSize) else: - base_title = "$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" + base_title = r"$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" ax[1].set_title(base_title, loc='left', fontsize=tiFontSize) #Set stats: area_avg @@ -571,7 +1039,7 @@ def plot_map_vect_and_save(wks, case_nickname, base_nickname, # set rmse title: ax[-1].set_title(f"RMSE: ", fontsize=tiFontSize) - ax[-1].set_title("$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) + ax[-1].set_title(r"$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) if "units" in kwargs: ax[1].set_ylabel(f"[{kwargs['units']}]") @@ -746,6 +1214,19 @@ def plot_map_and_save(wks, case_nickname, base_nickname, dateline_direction_label=False) lat_formatter = LatitudeFormatter(number_format='0.0f', degree_symbol='') + + ## Just adding this to allow for LogNormal plotting: + if 'contour_levels' in kwargs: + levels1 = kwargs['contour_levels'] + if 'log_normal' in kwargs: + norm1 = mpl.colors.LogNorm(vmin=min(levels1), vmax=max(levels1)) ##ADA + cp_info['norm1'] = norm1 + elif 'contour_levels_range' in kwargs: + assert len(kwargs['contour_levels_range']) == 3, "contour_levels_range must have exactly three entries: min, max, step" + levels1 = np.arange(*kwargs['contour_levels_range']) + if 'log_normal' in kwargs: + norm1 = mpl.colors.LogNorm(vmin=min(levels1), vmax=max(levels1)) ## ADA + cp_info['norm1'] = norm1 for i, a in enumerate(wrap_fields): @@ -767,7 +1248,8 @@ def plot_map_and_save(wks, case_nickname, base_nickname, img.append(ax[i].contourf(lons,lats,a,colors="w",transform=ccrs.PlateCarree(),transform_first=True)) ax[i].text(0.4, 0.4, empty_message, transform=ax[i].transAxes, bbox=props) else: - img.append(ax[i].contourf(lons, lats, a, levels=levels, cmap=cmap, norm=norm, transform=ccrs.PlateCarree(), transform_first=True, **cp_info['contourf_opt'])) + img.append(ax[i].contourf(lons, lats, a, levels=levels, cmap=cmap, norm=norm, transform=ccrs.PlateCarree(), + transform_first=True, extend = "both", **cp_info['contourf_opt'])) #End if ax[i].set_title("AVG: {0:.3f}".format(area_avg[i]), loc='right', fontsize=11) @@ -782,16 +1264,16 @@ def plot_map_and_save(wks, case_nickname, base_nickname, st.set_y(0.85) #Set plot titles - case_title = "$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" + case_title = r"$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" ax[0].set_title(case_title, loc='left', fontsize=tiFontSize) if obs: obs_var = kwargs["obs_var_name"] obs_title = kwargs["obs_file"][:-3] - base_title = "$\mathbf{Baseline}:$"+obs_title+"\n"+"$\mathbf{Variable}:$"+f"{obs_var}" + base_title = r"$\mathbf{Baseline}:$"+obs_title+"\n"+r"$\mathbf{Variable}:$"+f"{obs_var}" ax[1].set_title(base_title, loc='left', fontsize=tiFontSize) else: - base_title = "$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" + base_title = r"$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" ax[1].set_title(base_title, loc='left', fontsize=tiFontSize) #Set stats: area_avg @@ -806,7 +1288,7 @@ def plot_map_and_save(wks, case_nickname, base_nickname, # set rmse title: ax[3].set_title(f"RMSE: {d_rmse:.3f}", fontsize=tiFontSize) - ax[3].set_title("$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) + ax[3].set_title(r"$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) ax[2].set_title("Test % Diff Baseline", loc='left', fontsize=tiFontSize,fontweight="bold") for a in ax: @@ -981,14 +1463,14 @@ def plot_zonal_mean_and_save(wks, case_nickname, base_nickname, #End if #Set plot titles - case_title = "$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" + case_title = r"$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" if obs: obs_var = kwargs["obs_var_name"] obs_title = kwargs["obs_file"][:-3] - base_title = "$\mathbf{Baseline}:$"+obs_title+"\n"+"$\mathbf{Variable}:$"+f"{obs_var}" + base_title = r"$\mathbf{Baseline}:$"+obs_title+"\n"+r"$\mathbf{Variable}:$"+f"{obs_var}" else: - base_title = "$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" + base_title = r"$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" if has_lev: # calculate zonal average: @@ -1042,7 +1524,7 @@ def plot_zonal_mean_and_save(wks, case_nickname, base_nickname, ax[0].set_title(case_title, loc='left', fontsize=tiFontSize) ax[1].set_title(base_title, loc='left', fontsize=tiFontSize) - ax[2].set_title("$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) + ax[2].set_title(r"$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) ax[3].set_title("Test % Diff Baseline", loc='left', fontsize=tiFontSize,fontweight="bold") @@ -1057,7 +1539,7 @@ def plot_zonal_mean_and_save(wks, case_nickname, base_nickname, fig.text(-0.03, 0.5, 'PRESSURE [hPa]', va='center', rotation='vertical') else: - line = Line2D([0], [0], label="$\mathbf{Test}:$"+f"{case_nickname} - years: {case_climo_yrs[0]}-{case_climo_yrs[-1]}", + line = Line2D([0], [0], label=r"$\mathbf{Test}:$"+f"{case_nickname} - years: {case_climo_yrs[0]}-{case_climo_yrs[-1]}", color="#1f77b4") # #1f77b4 -> matplotlib standard blue line2 = Line2D([0], [0], label=base_title, @@ -1087,7 +1569,7 @@ def plot_zonal_mean_and_save(wks, case_nickname, base_nickname, borderaxespad=0.0,fontsize=6,frameon=False) zonal_plot(adata['lat'], diff, ax=ax[1], color="k") - ax[1].set_title("$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=10) + ax[1].set_title(r"$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=10) zonal_plot(adata['lat'], pct, ax=ax[2], color="k") ax[2].set_title("Test % Diff Baseline", loc='left', fontsize=10,fontweight="bold") @@ -1231,14 +1713,14 @@ def plot_meridional_mean_and_save(wks, case_nickname, base_nickname, xdim = 'lon' # the name used for the x-axis dimension pltfunc = meridional_plot # the plotting function ... maybe we can generalize to get zonal/meridional into one function (?) - case_title = "$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" + case_title = r"$\mathbf{Test}:$"+f"{case_nickname}\nyears: {case_climo_yrs[0]}-{case_climo_yrs[-1]}" if obs: obs_var = kwargs["obs_var_name"] obs_title = kwargs["obs_file"][:-3] - base_title = "$\mathbf{Baseline}:$"+obs_title+"\n"+"$\mathbf{Variable}:$"+f"{obs_var}" + base_title = r"$\mathbf{Baseline}:$"+obs_title+"\n"+r"$\mathbf{Variable}:$"+f"{obs_var}" else: - base_title = "$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" + base_title = r"$\mathbf{Baseline}:$"+f"{base_nickname}\nyears: {baseline_climo_yrs[0]}-{baseline_climo_yrs[-1]}" if has_lev: # generate dictionary of contour plot settings: @@ -1279,7 +1761,7 @@ def plot_meridional_mean_and_save(wks, case_nickname, base_nickname, #Set plot titles ax[0].set_title(case_title, loc='left', fontsize=tiFontSize) ax[1].set_title(base_title, loc='left', fontsize=tiFontSize) - ax[2].set_title("$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) + ax[2].set_title(r"$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=tiFontSize) ax[3].set_title("Test % Diff Baseline", loc='left', fontsize=tiFontSize, fontweight = "bold") # style the plot: @@ -1292,7 +1774,7 @@ def plot_meridional_mean_and_save(wks, case_nickname, base_nickname, fig.text(-0.03, 0.5, 'PRESSURE [hPa]', va='center', rotation='vertical') else: - line = Line2D([0], [0], label="$\mathbf{Test}:$"+f"{case_nickname} - years: {case_climo_yrs[0]}-{case_climo_yrs[-1]}", + line = Line2D([0], [0], label=r"$\mathbf{Test}:$"+f"{case_nickname} - years: {case_climo_yrs[0]}-{case_climo_yrs[-1]}", color="#1f77b4") # #1f77b4 -> matplotlib standard blue line2 = Line2D([0], [0], label=base_title, @@ -1308,7 +1790,7 @@ def plot_meridional_mean_and_save(wks, case_nickname, base_nickname, pltfunc(adata[xdim], diff, ax=ax[1], color="k") pltfunc(adata[xdim], pct, ax=ax[2], color="k") - ax[1].set_title("$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=10) + ax[1].set_title(r"$\mathbf{Test} - \mathbf{Baseline}$", loc='left', fontsize=10) ax[2].set_title("Test % Diff Baseline", loc='left', fontsize=10, fontweight = "bold") #Set Main title for subplots: @@ -1511,4 +1993,4 @@ def square_contour_difference(fld1, fld2, **kwargs): return fig ##################### -#END HELPER FUNCTIONS \ No newline at end of file +#END HELPER FUNCTIONS diff --git a/scripts/analysis/amwg_table.py b/scripts/analysis/amwg_table.py index 25a836ba2..79137a2a8 100644 --- a/scripts/analysis/amwg_table.py +++ b/scripts/analysis/amwg_table.py @@ -108,7 +108,9 @@ def amwg_table(adf): #----------------------------------------- var_list = adf.diag_var_list var_defaults = adf.variable_defaults - + emislist = ["SFmonoterp","SFisoprene","SFSS","SFDUST", "SFSOA", "SFSO4", "SFSO2_net", "SFOM", "SFBC", "SFDMS", "SFH2O2","SFH2SO4"] + cblist=["cb_SULFATE","cb_isoprene","cb_monoterp","cb_DUST","cb_DMS","cb_BC","cb_OM","cb_H2O2","cb_H2SO4","cb_SALT", "cb_SO2"] + emicblist = emislist + cblist #Check if ocean or land fraction exist #in the variable list: for var in ["OCNFRAC", "LANDFRAC"]: @@ -224,13 +226,6 @@ def amwg_table(adf): #Load model variable data from file: ds = utils.load_dataset(ts_files) data = ds[var] - - #Extract units string, if available: - if hasattr(data, 'units'): - unit_str = data.units - else: - unit_str = '--' - #Check if variable has a vertical coordinate: if 'lev' in data.coords or 'ilev' in data.coords: print(f"\t WARNING: Variable '{var}' has a vertical dimension, "+\ @@ -272,15 +267,46 @@ def amwg_table(adf): ocn_frc_da = data #End if + if var in emislist: + area = _get_area(data) + data = (data*area).sum(dim={"lon","lat"}) + first_january = np.argwhere((data.time.dt.month == 1).values)[0].item() + last_december = np.argwhere((data.time.dt.month == 12).values)[-1].item() + data = data.isel(time=slice(first_january,last_december+1)) # PLUS 1 BECAUSE SLICE DOES NOT INCLUDE END POINT + + date_range_string = f"{data['time'][0]} -- {data['time'][-1]}" + + # this provides the seconds in months in each year + # -- do it for each year to allow for non-standard calendars (360-day) + # -- and also to provision for data with leap years + days_gb = data.time.dt.daysinmonth + # weighted average with normalized weights: = SUM x_i * w_i (implied division by SUM w_i) + data= (data * days_gb).groupby('time.year').sum(dim='time') + data = 1e-9*86400 * data + data.attrs['averaging_period'] = date_range_string + data.attrs['units'] = " Tg/yr" + + if var in cblist: + area = _get_area(data) + data = (data*area).sum(dim={"lon","lat"}) + data = 1e-9* data + data.attrs['units'] = " Tg" # we should check if we need to do area averaging: - if len(data.dims) > 1: + if len(data.dims) > 1 and var not in emicblist: # flags that we have spatial dimensions # Note: that could be 'lev' which should trigger different behavior # Note: we should be able to handle (lat, lon) or (ncol,) cases, at least data = utils.spatial_average(data) # changes data "in place" # In order to get correct statistics, average to annual or seasonal - data = utils.annual_mean(data, whole_years=True, time_name='time') + if var not in emislist: + data = utils.annual_mean(data, whole_years=True, time_name='time') + + #Extract units string, if available: + if hasattr(data, 'units'): + unit_str = data.units + else: + unit_str = '--' # Set values for columns cols = ['variable', 'unit', 'mean', 'sample size', 'standard dev.', @@ -352,6 +378,31 @@ def amwg_table(adf): ################## # Helper functions ################## +def _get_area(tmp_file): + """ + This function retrieves the files, latitude, and longitude information + in all the directories within the chosen dates. + """ + Earth_rad=6.371e6 # Earth Radius in meters + + lon = tmp_file['lon'].data + lon[lon > 180.] -= 360 # shift longitude from 0-360˚ to -180-180˚ + lat = tmp_file['lat'].data + + + dlon = np.abs(lon[1]-lon[0]) + dlat = np.abs(lat[1]-lat[0]) + + lon2d,lat2d = np.meshgrid(lon,lat) + + dy = Earth_rad*dlat*np.pi/180 + dx = Earth_rad*np.cos(lat2d*np.pi/180)*dlon*np.pi/180 + + _area = dx*dy + # End if + + # Variables to return + return _area def _get_row_vals(data): # Now that data is (time,), we can do our simple stats: @@ -414,4 +465,4 @@ def _df_comp_table(adf, output_location, case_names): adf.add_website_data(df_comp, "Case Comparison", case_names[0], plot_type="Tables") ############## -#END OF SCRIPT \ No newline at end of file +#END OF SCRIPT diff --git a/scripts/plotting/cam_taylor_diagram.py b/scripts/plotting/cam_taylor_diagram.py index e05a33f72..0d32ab7ef 100644 --- a/scripts/plotting/cam_taylor_diagram.py +++ b/scripts/plotting/cam_taylor_diagram.py @@ -162,7 +162,11 @@ def cam_taylor_diagram(adfobj): # LOOP OVER VARIABLES # for v in var_list: - base_x = _retrieve(adfobj, v, data_name, data_loc) # get the baseline field + try: + base_x = _retrieve(adfobj, v, data_name, data_loc) # get the baseline field + except Exception as e: + print(f"[WARN] Skipping variable '{v}' due to error: {e}") + continue for casenumber, case in enumerate(case_names): # LOOP THROUGH CASES case_x = _retrieve(adfobj, v, case, case_climo_loc[casenumber]) # ASSUMING `time` is 1-12, get the current season: @@ -588,4 +592,4 @@ def taylor_plot_finalize(wks, test_nicknames, casecolors, syear_cases, eyear_cas bias_legend_labels = ["> 20%", "10-20%", "5-10%", "1-5%", "< 1%"] wks.legend(handles=bias_legend_elements, labels=bias_legend_labels, loc='upper left', handler_map={tuple: HandlerTuple(ndivide=None, pad=2.)}, labelspacing=2, handletextpad=2, frameon=False, title=" - / + Bias", title_fontsize=18) - return wks \ No newline at end of file + return wks diff --git a/scripts/plotting/global_mean_timeseries.py b/scripts/plotting/global_mean_timeseries.py index a166a9b66..067d3ca66 100644 --- a/scripts/plotting/global_mean_timeseries.py +++ b/scripts/plotting/global_mean_timeseries.py @@ -6,14 +6,10 @@ """ from pathlib import Path -from types import NoneType import xarray as xr import numpy as np import matplotlib.pyplot as plt -import matplotlib.ticker as ticker - - import adf_utils as utils import warnings # use to warn user about missing files. warnings.formatwarning = utils.my_formatwarning @@ -32,61 +28,65 @@ def global_mean_timeseries(adfobj): print(f"{msg}\n {'-' * (len(msg)-3)}") # Gather ADF configurations + emislist = ["SFmonoterp","SFisoprene","SFSS","SFDUST", "SFSOA", "SFSO4", "SFSO2_net", "SFOM", "SFBC", "SFDMS", "SFH2O2","SFH2SO4"] + cblist=["cb_SULFATE","cb_isoprene","cb_monoterp","cb_DUST","cb_DMS","cb_BC","cb_OM","cb_H2O2","cb_H2SO4","cb_SALT", "cb_SO2"] + plot_loc = get_plot_loc(adfobj) plot_type = adfobj.read_config_var("diag_basic_info").get("plot_type", "png") res = adfobj.variable_defaults # will be dict of variable-specific plot preferences # or an empty dictionary if use_defaults was not specified in YAML. - # Loop over variables for field in adfobj.diag_var_list: - #Notify user of variable being plotted: - print(f"\t - time series plot for {field}") - - # Check res for any variable specific options that need to be used BEFORE going to the plot: - if field in res: - vres = res[field] - #If found then notify user, assuming debug log is enabled: - adfobj.debug_log(f"global_mean_timeseries: Found variable defaults for {field}") - else: - vres = {} + ts_files = adfobj.data.get_ref_timeseries_file(field) + # If no files exist, try to move to next variable. --> Means we can not proceed with this variable, and it'll be problematic later. + if not ts_files: + errmsg = f"Time series files for variable '{field}' not found. Script will continue to next variable." + warnings.warn(errmsg) + continue #End if - # reference time series (DataArray) - ref_ts_da = adfobj.data.load_reference_timeseries_da(field) - - base_name = adfobj.data.ref_case_label - - # Check to see if this field is available - if ref_ts_da is None: - if not adfobj.compare_obs: - print( - f"\t WARNING: Variable {field} for case '{base_name}' provides Nonetype. Skipping this variable" - ) + #TEMPORARY: For now, make sure only one file exists: + if len(ts_files) != 1: + errmsg = "Currently the AMWG table script can only handle one time series file per variable." + errmsg += f" Multiple files were found for the variable '{field}', so it will be skipped." + warnings.warn(errmsg) continue + #End if + + #Load model variable data from file: + ds = utils.load_dataset(ts_files) + ref_ts_da = ds[field] + + if field in emislist: + ref_ts_da = surface_emission(ref_ts_da) + elif field in cblist: + ref_ts_da_ga = column_burden(ref_ts_da) + # reference time series global average else: # check data dimensions: has_lat_ref, has_lev_ref = utils.zm_validate_dims(ref_ts_da) # check if this is a "2-d" varaible: if has_lev_ref: - print( - f"\t WARNING: Variable {field} has a lev dimension for '{base_name}', which does not work with this script." + warnings.warn( + f"\t Variable named {field} has a lev dimension, which does not work with this script." ) continue # End if # check if there is a lat dimension: if not has_lat_ref: - print( - f"\t WARNING: Variable {field} is missing a lat dimension for '{base_name}', cannot continue to plot." - ) + warnings.warn( + f"\t Variable named {field} is missing a lat dimension, cannot continue to plot." + ) continue # End if # reference time series global average ref_ts_da_ga = utils.spatial_average(ref_ts_da, weights=None, spatial_dims=None) - # annually averaged + # annually averaged + if field not in emislist: ref_ts_da = utils.annual_mean(ref_ts_da_ga, whole_years=True, time_name="time") # End if @@ -103,21 +103,27 @@ def global_mean_timeseries(adfobj): ref_label = ( adfobj.data.ref_nickname if adfobj.data.ref_nickname - else base_name + else adfobj.data.ref_case_label ) - - skip_var = False for case_name in adfobj.data.case_names: + c_ts_files = adfobj.data.get_timeseries_file(case_name, field) + # If no files exist, try to move to next variable. --> Means we can not proceed with this variable, and it'll be problematic later. + if not c_ts_files: + errmsg = f"Time series files for case: {case_name} and variable '{field}' not found. Script will continue to next variable." + warnings.warn(errmsg) + continue + #End if - c_ts_da = adfobj.data.load_timeseries_da(case_name, field) - - if c_ts_da is None: - print( - f"\t WARNING: Variable {field} for case '{case_name}' provides Nonetype. Skipping this variable" - ) - skip_var = True + #TEMPORARY: For now, make sure only one file exists: + if len(c_ts_files) != 1: + errmsg = "Currently the AMWG table script can only handle one time series file per variable." + errmsg += f" Multiple files were found for case: {case_name} and the variable '{field}', so it will be skipped." + print(errmsg) continue - # End if + + # Load model variable data from file: + _ds = utils.load_dataset(c_ts_files) + c_ts_da = _ds[field] # If no reference, we still need to check if this is a "2-d" varaible: # check data dimensions: @@ -125,62 +131,86 @@ def global_mean_timeseries(adfobj): # If 3-d variable, notify user, flag and move to next test case if has_lev_case: - print( + warnings.warn( f"\t WARNING: Variable {field} has a lev dimension for '{case_name}', which does not work with this script." ) - skip_var = True continue - # End if + #End if # check if there is a lat dimension: if not has_lat_case: - print( + warnings.warn( f"\t WARNING: Variable {field} is missing a lat dimension for '{case_name}', cannot continue to plot." ) skip_var = True continue # End if - # Gather spatial avg for test case - c_ts_da_ga = utils.spatial_average(c_ts_da) - case_ts[labels[case_name]] = utils.annual_mean(c_ts_da_ga) + # Case global average (keep noresm behavior) + if field in emislist: + c_ts_da_ga = surface_emission(c_ts_da) + elif field in cblist: + c_ts_da_ga = column_burden(c_ts_da) + else: + c_ts_da_ga = utils.spatial_average(c_ts_da, weights=None, spatial_dims=None) - # If this case is 3-d or missing variable, then break the loop and go to next variable - if skip_var: - continue + # annually averaged (keep noresm behavior) + if field not in emislist: + c_ts_da_ga = utils.annual_mean(c_ts_da_ga, whole_years=True, time_name="time") + + case_ts[labels[case_name]] = c_ts_da_ga - lens2_data = Lens2Data( - field - ) # Provides access to LENS2 dataset when available (class defined below) - ## SPECIAL SECTION -- CESM2 LENS DATA: - # Plot the timeseries fig, ax = make_plot( - case_ts, lens2_data, label=adfobj.data.ref_nickname, ref_ts_da=ref_ts_da + ref_ts_da, case_ts, field, label=ref_label ) - - unit = vres.get("new_unit","[-]") - ax.set_ylabel(getattr(ref_ts_da,"unit", unit)) # add units + ax.set_ylabel(getattr(ref_ts_da,"units", "[-]")) # add units plot_name = plot_loc / f"{field}_GlobalMean_ANN_TimeSeries_Mean.{plot_type}" conditional_save(adfobj, plot_name, fig) + + if field in res: + vres = res[field] + #If found then notify user, assuming debug log is enabled: + adfobj.debug_log(f"global_latlon_map: Found variable defaults for {field}") + + #Extract category (if available): + web_category = vres.get("category", None) + + else: + vres = {} + web_category = None adfobj.add_website_data( plot_name, f"{field}_GlobalMean", None, + category=web_category, season="ANN", multi_case=True, plot_type="TimeSeries", ) - #Notify user that script has ended: - print(" ... global mean time series plots have been generated successfully.") - - -# Helper/plotting functions -########################### +def column_burden(_data): + _area = _get_area(_data) + _data = (_data*_area).sum(dim={"lon","lat"}) + _data = 1e-9* _data + _data.attrs['units'] = " Tg" + return _data + +def surface_emission(_data): + _area = _get_area(_data) + _data = (_data*_area).sum(dim={"lon","lat"}) + # this provides the seconds in months in each year + # -- do it for each year to allow for non-standard calendars (360-day) + # -- and also to provision for data with leap years + _days_gb = _data.time.dt.daysinmonth + # weighted average with normalized weights: = SUM x_i * w_i (implied division by SUM w_i) + _data= (_data * _days_gb).groupby('time.year').sum(dim='time') + _data = 1e-9*86400 * _data + _data.attrs['units'] = " Tg/yr" + return _data def conditional_save(adfobj, plot_name, fig, verbose=None): """Determines whether to save figure""" @@ -260,57 +290,56 @@ def _include_lens(self): return has_lens, lens2 ###### - -def make_plot(case_ts, lens2, label=None, ref_ts_da=None): +def make_plot(ref_ts_da, case_ts, field, label=None): """plot yearly values of ref_ts_da""" fig, ax = plt.subplots() - - # Plot reference/baseline if available - if type(ref_ts_da) != NoneType: - ax.plot(ref_ts_da.year, ref_ts_da, label=label) - else: - return fig, ax - for idx, (c, cdata) in enumerate(case_ts.items()): + ax.plot(ref_ts_da.year, ref_ts_da, label=label) + for c, cdata in case_ts.items(): ax.plot(cdata.year, cdata, label=c) - # Force the plot axis to always plot the test case years - if idx == 0: - syr = min(cdata.year) - eyr = max(cdata.year) - - field = lens2.field # this will be defined even if no LENS2 data - if lens2.has_lens: - lensmin = lens2.lens2[field].min("M") # note: "M" is the member dimension - lensmax = lens2.lens2[field].max("M") - ax.fill_between(lensmin.year, lensmin, lensmax, color="lightgray", alpha=0.5) - ax.plot( - lens2.lens2[field].year, - lens2.lens2[field].mean("M"), - color="darkgray", - linewidth=2, - label="LENS2", - ) # Get the current y-axis limits ymin, ymax = ax.get_ylim() # Check if the y-axis crosses zero if ymin < 0 < ymax: ax.axhline(y=0, color="lightgray", linestyle="-", linewidth=1) ax.set_title(field, loc="left") - - # Set the x-axis limits to the first test case climo years - ax.set_xlim(syr, eyr) - # Force x-axis to use only integer labels - ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True)) - ax.set_xlabel("YEAR") # Place the legend - ax.legend( - bbox_to_anchor=(0.5, -0.15), loc="upper center", ncol=min(len(case_ts), 3) - ) + handles, labels = ax.get_legend_handles_labels() + if handles and labels: + ax.legend( + bbox_to_anchor=(0.5, -0.15),loc="upper center", ncol=min(len(handles), 3), + ) plt.tight_layout(pad=2, w_pad=1.0, h_pad=1.0) return fig, ax -###### +################## +# Helper functions +################## +def _get_area(tmp_file): + """ + This function retrieves the files, latitude, and longitude information + in all the directories within the chosen dates. + """ + Earth_rad=6.371e6 # Earth Radius in meters + + lon = tmp_file['lon'].data + lon[lon > 180.] -= 360 # shift longitude from 0-360˚ to -180-180˚ + lat = tmp_file['lat'].data + + dlon = np.abs(lon[1]-lon[0]) + dlat = np.abs(lat[1]-lat[0]) + + lon2d,lat2d = np.meshgrid(lon,lat) + + dy = Earth_rad*dlat*np.pi/180 + dx = Earth_rad*np.cos(lat2d*np.pi/180)*dlon*np.pi/180 + + _area = dx*dy + + # Variables to return + return _area +#END OF SCRIPT ############## -#END OF SCRIPT \ No newline at end of file +