From a629ef27af546743e736d7cae157603ac43b322b Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Thu, 12 Jun 2025 08:27:33 -0400 Subject: [PATCH 01/45] cleaned up "domain" variables in ObsFcstAna postprocessing package --- .../util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +- .../util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py | 4 +--- GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py | 7 ++++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index ed44a8db..b6297ca4 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -141,7 +141,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): grid_data, uy, ux = remap_1d_to_2d(tile_data, lat_1d = tc['com_lat'], lon_1d = tc['com_lon']) lon_2d,lat_2d = np.meshgrid(ux, uy) - # Aear weighted mean and mean(abs) + # Area weighted mean and mean(abs) wmean = np.nansum( tile_data * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area']) wabsmean = np.nansum(np.abs(tile_data) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area']) if 'normalized' in title_txt: diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 41b86df9..8594050f 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -82,7 +82,6 @@ def compute_monthly_sums(self, date_time): expdir_list = self.expdir_list expid_list = self.expid_list - domain = self.domain tc = self.tilecoord obsparam_list = self.obsparam_list var_list = self.var_list @@ -112,7 +111,7 @@ def compute_monthly_sums(self, date_time): # read the list of experiments at each time step (OFA="ObsFcstAna") OFA_list = [] for i in range(len(expdir_list)): - fname = expdir_list[i]+expid_list[i]+'/output/'+domain+'/ana/ens_avg/Y'+ \ + fname = expdir_list[i]+expid_list[i]+'/output/'+self.domain+'/ana/ens_avg/Y'+ \ date_time.strftime('%Y') + '/M' + \ date_time.strftime('%m') + '/' + \ expid_list[i]+'.ens_avg.ldas_ObsFcstAna.' + \ @@ -190,7 +189,6 @@ def compute_monthly_sums(self, date_time): def save_monthly_sums(self): expdir_list = self.expdir_list expid_list = self.expid_list - domain = self.domain var_list = self.var_list tc = self.tilecoord obsparam_list = self.obsparam_list diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py index 27e484fc..cc3f8d13 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py @@ -39,7 +39,8 @@ def get_config(): # Optional experiment(s) can be added for cross-masking or extracting obs from a different experiment. # - # All optional experiments and the main experiment must have identical tilecoords. + # All optional experiments and the main experiment must have identical tile space (BCs resolution) and domain. + # # If the default "species" number/order do not match, set "species_list" accordingly to force a match. # Output will be cross-masked between all specified experiments. # @@ -52,7 +53,6 @@ def get_config(): exp_sup1 = { 'expdir' : '/discover/nobackup/projects/gmao/merra/iau/merra_land/SMAP_runs/SMAP_Nature_v11/', 'expid' : 'DAv8_M36', - 'domain' : 'SMAP_EASEv2_M36_GLOBAL', 'use_obs' : True, # if True, use obs data from this exp 'species_list' : [1,2,3,4], # indices of species to be processed, # must identify same species as selected in main exp @@ -90,11 +90,12 @@ def get_config(): end_time = datetime( end_year, end_month, 1) # Get tilecoord and obsparam information for each experiment + + domain = exp_list[0]['domain'] for exp in exp_list: expdir = exp['expdir'] expid = exp['expid'] - domain = exp['domain'] YYYY = exp['obsparam_time'][0:4] MM = exp['obsparam_time'][4:6] From 9267abb48dc9adeae5e21c6efc49592bef0afeec Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 16 Jun 2025 10:16:49 -0400 Subject: [PATCH 02/45] add read_tilegrids() function --- .../util/shared/python/read_GEOSldas.py | 58 +++++++++++++++++++ .../util/shared/python/remap_1d_to_2d.py | 29 ---------- 2 files changed, 58 insertions(+), 29 deletions(-) delete mode 100644 GEOSldas_App/util/shared/python/remap_1d_to_2d.py diff --git a/GEOSldas_App/util/shared/python/read_GEOSldas.py b/GEOSldas_App/util/shared/python/read_GEOSldas.py index 96ba4173..1e728584 100644 --- a/GEOSldas_App/util/shared/python/read_GEOSldas.py +++ b/GEOSldas_App/util/shared/python/read_GEOSldas.py @@ -99,6 +99,64 @@ def read_tilecoord(fname): print("done reading file") return tile_coord +import struct +import numpy as np + +# ---------------------------------------------------------------------------- +# +# reader for GEOSldas tilecoord file (binary) + +def read_tilegrids(fname): + """ + Read tile grid information from file and return global and domain grid structures. + + Parameters: + ---------- + fname : str + Path to the input file (either .txt or .bin) + + Returns: + ------- + tile_grid_g : dict + Global tile grid information + tile_grid_d : dict + Domain tile grid information + """ + + # Set endian format + endian = '<' # '>' for big-endian, '<' for little-endian + + # Read binary file + print(f'reading from {fname}') + + with open(fname, 'rb') as ifp: + # Read first record: "global" grid (tile_grid_g) + for grid in ['global','domain']: + tile_grid = {} + fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['gridtype'] = ifp.read(40).decode('ascii').strip('\x00') + tile_grid['ind_base'] = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['i_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['j_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['N_lon'] = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['N_lat'] = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['i_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['j_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['ll_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0] + tile_grid['ll_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0] + tile_grid['ur_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0] + tile_grid['ur_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0] + tile_grid['dlon'] = struct.unpack(f'{endian}f', ifp.read(4))[0] + tile_grid['dlat'] = struct.unpack(f'{endian}f', ifp.read(4))[0] + fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0] + + if 'global' in grid: + tile_grid_g = tile_grid + else: + tile_grid_d = tile_grid + + return tile_grid_g, tile_grid_d + # ---------------------------------------------------------------------------- # # reader for GEOSldas ObsFcstAna file (binary) diff --git a/GEOSldas_App/util/shared/python/remap_1d_to_2d.py b/GEOSldas_App/util/shared/python/remap_1d_to_2d.py deleted file mode 100644 index d0a1d9fc..00000000 --- a/GEOSldas_App/util/shared/python/remap_1d_to_2d.py +++ /dev/null @@ -1,29 +0,0 @@ -import os -import numpy as np - -# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates -# -# 1-d input data can have one additional dimension (e.g., time) -# -# 2-d output data is lat-by-lon[-by-time] - -def remap_1d_to_2d( data_1d, *, lat_1d, lon_1d): - - # Extract unique coordinates - unique_lats, indY = np.unique(lat_1d, return_inverse=True) - unique_lons, indX = np.unique(lon_1d, return_inverse=True) - - ny = len(unique_lats) - nx = len(unique_lons) - - if data_1d.ndim == 2: #[n_1d, ntime] - ntime = data_1d.shape[1] - data_2d = np.full([ny, nx, ntime], np.nan) - data_2d[indY, indX, :] = data_1d - elif data_1d.ndim == 1: #[n_1d] - data_2d = np.full([ny, nx], np.nan) - data_2d[indY, indX ] = data_1d - - return data_2d, unique_lats, unique_lons - - From 60c30193ea62cf7ee4977538d9c4450418608cb0 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 16 Jun 2025 10:17:44 -0400 Subject: [PATCH 03/45] update tile2grid options --- GEOSldas_App/util/shared/python/tile2grid.py | 140 +++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 GEOSldas_App/util/shared/python/tile2grid.py diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py new file mode 100644 index 00000000..e99b2f05 --- /dev/null +++ b/GEOSldas_App/util/shared/python/tile2grid.py @@ -0,0 +1,140 @@ +import os +import numpy as np + +# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates +# +# 1-d input data can have one additional dimension (e.g., time) +# +# 2-d output data is lat-by-lon[-by-time] +def interpolate_to_latlon_grid(data_1d, lat, lon, resolution=1.0): + """ + For cubedsphere grid: use + + """ + from scipy.interpolate import griddata + + lat_grid = np.arange(-90+resolution/2, 90, resolution) + lon_grid = np.arange(-180+resolution/2, 180, resolution) + lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid) + + ny = len(lat_grid) + nx = len(lon_grid) + + nf = data_1d.shape[-1] + data_2d = np.full([ny, nx, nf], np.nan) + for i in range(nf): + data = data_1d[:,i] + data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear') + + return data_2d, lat_2d, lon_2d + +def remap_1d_to_2d( data_1d, lat_1d, lon_1d): + + """ + For EASE grid: use remapping + + """ + + # Extract unique coordinates + unique_lats, indY = np.unique(lat_1d, return_inverse=True) + unique_lons, indX = np.unique(lon_1d, return_inverse=True) + + ny = len(unique_lats) + nx = len(unique_lons) + + nf = data_1d.shape[1] + data_2d = np.full([ny, nx, nf], np.nan) + data_2d[indY, indX, :] = data_1d + + lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats) + + return data_2d, lat_2d, lon_2d + +def tile2grid( tile_data, tile_coord, tile_grid, nodata=1.e15, nodata_tol=1.e-4): + + """ + Convert tile-space data to grid-space data. + + Parameters: + ---------- + tile_data : Array in tile space, shape (N_tile, N_fields) + tile_coord : Dictionary containing tile coordinate information + tile_grid : Dictionary containing grid information + nodata : Value for missing data + nodata_tol : Tolerance for comparing values to nodata + + Returns: + ------- + grid_data : Array in grid space, shape (N_lat, N_lon, N_fields) + lat_2d: Lat array in grid space, shape (N_lat, N_lon) + lon_2d: Lon array in grid space, shape (N_lat, N_lon) + + """ + # Verifity input input datasize + if tile_data.shape[0] != tile_coord['N_tile']: + print(f'Error: tile2grid input data size don''t match N_tile') + sys.exit() + + # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1] + if tile_data.ndim == 1: + tile_data = np.expand_dims(tile_data, axis=1) + + N_fields = tile_data.shape[-1] + + # Determine grid_type (cubedsphere, EASE or regular latlon) + is_CS = int(tile_grid['N_lat']/tile_grid['N_lon']) == 6 + is_EASE = 'EASE' in tile_grid['gridtype'] + + if is_CS: + + grid_data, lat_2d,lon_2d = interpolate_to_latlon_grid( tile_data, tile_coord['com_lat'], tile_coord['com_lon']) + + elif is_EASE: + + grid_data, lat_2d,lon_2d = remap_1d_to_2d( tile_data, tile_coord['com_lat'], tile_coord['com_lon']) + + else: + + # Initialize grid data array + grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields)) + + for k in range(N_fields): + # Initialize weight grid for current field + wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'])) + + # Loop through tile space + for n in range(tile_coord['N_tile']): + # Calculate grid indices (adjust for Python's 0-based indexing) + i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1 + j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1 + + # Get weight + w = tile_coord['frac_cell'][n] + + # Check if current tile data is valid (not no-data) + if abs(tile_data[k, n] - nodata) > nodata_tol: + # Accumulate weighted data + grid_data[j, i, k] += w * tile_data[n,k] + wgrid[j, i] += w + + # Normalize and set no-data values + for i in range(tile_grid['N_lon']): + for j in range(tile_grid['N_lat']): + if wgrid[j, i] > 0.0: + # Normalize by total weight + grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i] + else: + # Set no-data value + grid_data[j, i, k] = nodata + + # Replace nodata values with NaN + grid_data[grid_data == nodata] = np.nan + + lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat']) + lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon']) + + if grid_data.shape[-1] == 1: + grid_data = grid_data[:,:,0] + + return grid_data, lat_2d, lon_2d + From 0b816644054ab879cc2e181263780fa56f1c1705 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 16 Jun 2025 10:20:22 -0400 Subject: [PATCH 04/45] add tilegrid info. in config --- .../util/postproc/ObsFcstAna_stats/user_config.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py index cc3f8d13..6384e410 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py @@ -6,7 +6,7 @@ import numpy as np from datetime import datetime, timedelta -from read_GEOSldas import read_tilecoord, read_obs_param +from read_GEOSldas import read_tilecoord, read_tilegrids, read_obs_param from postproc_ObsFcstAna import postproc_ObsFcstAna def get_config(): @@ -20,7 +20,7 @@ def get_config(): start_year = 2015 start_month = 4 last_year = 2016 - last_month = 4 + last_month = 3 # Sums or stats will be processed for exp_main: @@ -66,7 +66,7 @@ def get_config(): # Top level directory for all output from this package: - out_path = '/discover/nobackup/[USERNAME]/SMAP_test/' + out_path = '/discover/nobackup/qliu/SMAP_test/' # Directory for monthly sum files: # - Can use the experiment directory or a different path. @@ -115,8 +115,11 @@ def get_config(): ftc = expdir+expid+'/output/'+ domain+'/rc_out/'+ expid+'.ldas_tilecoord.bin' tc = read_tilecoord(ftc) + ftg = expdir+expid+'/output/'+ domain+'/rc_out/'+ expid+'.ldas_tilegrids.bin' + tg_global, tg_domain = read_tilegrids(ftg) + # add tilecoord and obsparam into to exp - exp.update({'tilecoord':tc, 'obsparam':obsparam}) + exp.update({'tilecoord':tc, 'obsparam':obsparam, 'tilegrid_global':tg_global,'tilegrid_domain': tg_domain}) # verify that obs species match across experiments for exp_idx, exp in enumerate(exp_list) : From 33b9123393de1bd2511ed82cab552e3370f8cf8b Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 16 Jun 2025 10:21:24 -0400 Subject: [PATCH 05/45] add tilegrids info --- .../util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 8594050f..5748d633 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -32,6 +32,8 @@ def __init__(self, exp_list, start_time, end_time, sum_path='./'): self.da_dt = exp_list[0]['da_dt'] self.var_list = ['obs_obs','obs_obsvar','obs_fcst','obs_fcstvar','obs_ana','obs_anavar'] self.tilecoord = exp_list[0]['tilecoord'] + self.tilegrid_global = exp_list[0]['tilegrid_global'] + self.tilegrid_domain = exp_list[0]['tilegrid_domain'] self.obsparam_list = [item['obsparam'] for item in exp_list] self.sum_path = sum_path From 79ff6a16f3760687c9cae4870e58ae406d8dc5f2 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 16 Jun 2025 10:24:06 -0400 Subject: [PATCH 06/45] change remap_1d_to_2d() to tile2grid() --- .../postproc/ObsFcstAna_stats/Plot_stats_maps.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index b6297ca4..7247b547 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -15,7 +15,7 @@ from datetime import timedelta -from remap_1d_to_2d import remap_1d_to_2d +from remap_1d_to_2d import tile2grid from plot import plotMap from EASEv2 import EASEv2_ind2latlon @@ -25,6 +25,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): end_time = postproc_obj.end_time domain = postproc_obj.domain tc = postproc_obj.tilecoord + tg = postproc_obj.tilegrid_global # Sample of final compuation of selected diagnostic metrics @@ -138,9 +139,8 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): lat_M36, lon_M36 = EASEv2_ind2latlon(np.arange(406), np.arange(964),'M36') lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36) else: - grid_data, uy, ux = remap_1d_to_2d(tile_data, lat_1d = tc['com_lat'], lon_1d = tc['com_lon']) - lon_2d,lat_2d = np.meshgrid(ux, uy) - + grid_data, lat_2d, lon_2d = tile2grid(tile_data, tc, tg) + # Area weighted mean and mean(abs) wmean = np.nansum( tile_data * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area']) wabsmean = np.nansum(np.abs(tile_data) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area']) @@ -157,10 +157,10 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): if 'normalized' in title_txt: grid_data = np.log10(grid_data) crange = [-0.6, 0.45] - + mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \ title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180]) - + plt.tight_layout() # Save figure to file fig.savefig(fig_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\ From b706b90eeca8f571427746ba2dbca152ee29e9ab Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 16 Jun 2025 10:27:15 -0400 Subject: [PATCH 07/45] correct module name for tile2grid --- GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 7247b547..800a4230 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -15,7 +15,7 @@ from datetime import timedelta -from remap_1d_to_2d import tile2grid +from tile2grid import tile2grid from plot import plotMap from EASEv2 import EASEv2_ind2latlon From 861923bfdbeabb9670704e8998a62fad77fa884a Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 16 Jun 2025 15:14:29 -0400 Subject: [PATCH 08/45] change to save stats of individual species --- .../ObsFcstAna_stats/Plot_stats_timeseries.py | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py index db72e9f0..63ca9a3a 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py @@ -28,8 +28,8 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): if os.path.isfile(stats_file): with open(stats_file,'rb') as file: - stats_dict = pickle.load(file) - date_vec = stats_dict['date_vec'] + stats = pickle.load(file) + date_vec = stats['date_vec'] else: @@ -47,17 +47,7 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): print('compute monthly spatial mean stats for '+ current_month.strftime('%Y%m')) OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_month) - - # Compute Nobs-weighted avg of each metric across all species. - # Typically used for SMAP Tb_h/h from asc and desc overpasses, - # or ASCAT soil moisture from Metop-A/B/C. - # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! - Nobsm = np.nansum( Nobsm) - OmFm = np.nansum(OmFm*Nobsm)/Nobsm - OmFs = np.nansum(OmFs*Nobsm)/Nobsm - OmAm = np.nansum(OmAm*Nobsm)/Nobsm - OmAs = np.nansum(OmAs*Nobsm)/Nobsm - + Ndata.append(Nobsm) OmF_mean.append(OmFm) @@ -69,18 +59,30 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): current_month = current_month + relativedelta(months=1) # Store stats in a dictionary for easier saving and referencing - stats_dict = {"OmF_mean":OmF_mean, "OmF_stdv":OmF_stdv, - "OmA_mean":OmA_mean, "OmA_stdv":OmA_stdv, - "Ndata": Ndata, "date_vec":date_vec} + stats = {"OmF_mean":np.array(OmF_mean), "OmF_stdv":np.array(OmF_stdv), + "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv), + "Ndata": np.array(Ndata), "date_vec":date_vec} if stats_file is not None: with open(stats_file,'wb') as file: - pickle.dump(stats_dict,file) + pickle.dump(stats,file) + + # Compute Nobs-weighted avg of each metric across all species. + # Typically used for SMAP Tb_h/h from asc and desc overpasses, + # or ASCAT soil moisture from Metop-A/B/C. + # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! + stats_plot = {} + Ndata = np.nansum(stats['Ndata'], axis=1) + stats_plot['Ndata'] = Ndata + stats_plot['OmF_mean'] = np.nansum(stats['OmF_mean']*stats['Ndata'], axis=1)/Ndata + stats_plot['OmF_stdv'] = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata + stats_plot['OmA_mean'] = np.nansum(stats['OmA_mean']*stats['Ndata'], axis=1)/Ndata + stats_plot['OmA_stdv'] = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata plot_var = 'OmF_mean' fig, ax = plt.subplots(figsize=(10,4)) - bars = ax.bar(date_vec, stats_dict[plot_var]) + bars = ax.bar(date_vec, stats_plot[plot_var]) plt.grid(True, linestyle='--', alpha=0.5) From 57bae3c360757fbeffec8aa49e9c70ec649ee6ff Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 17 Jun 2025 09:42:46 -0400 Subject: [PATCH 09/45] cleanup: corrected comments, removed redundant import statements, consistent variable name for endianness, vertical alignment (read_GEOSldas.py) --- .../util/shared/python/read_GEOSldas.py | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/GEOSldas_App/util/shared/python/read_GEOSldas.py b/GEOSldas_App/util/shared/python/read_GEOSldas.py index 1e728584..ccc1aa18 100644 --- a/GEOSldas_App/util/shared/python/read_GEOSldas.py +++ b/GEOSldas_App/util/shared/python/read_GEOSldas.py @@ -1,7 +1,6 @@ # collection of readers for GEOSldas output files -import numpy as np import struct import os import numpy as np @@ -99,9 +98,6 @@ def read_tilecoord(fname): print("done reading file") return tile_coord -import struct -import numpy as np - # ---------------------------------------------------------------------------- # # reader for GEOSldas tilecoord file (binary) @@ -124,31 +120,33 @@ def read_tilegrids(fname): """ # Set endian format - endian = '<' # '>' for big-endian, '<' for little-endian + machfmt = '<' # '>' for big-endian, '<' for little-endian # Read binary file print(f'reading from {fname}') with open(fname, 'rb') as ifp: - # Read first record: "global" grid (tile_grid_g) + # Read "global" and "domain" records for grid in ['global','domain']: + tile_grid = {} - fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0] + + fortran_tag = struct.unpack(f'{machfmt}i', ifp.read(4))[0] tile_grid['gridtype'] = ifp.read(40).decode('ascii').strip('\x00') - tile_grid['ind_base'] = struct.unpack(f'{endian}i', ifp.read(4))[0] - tile_grid['i_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0] - tile_grid['j_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0] - tile_grid['N_lon'] = struct.unpack(f'{endian}i', ifp.read(4))[0] - tile_grid['N_lat'] = struct.unpack(f'{endian}i', ifp.read(4))[0] - tile_grid['i_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0] - tile_grid['j_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0] - tile_grid['ll_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0] - tile_grid['ll_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0] - tile_grid['ur_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0] - tile_grid['ur_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0] - tile_grid['dlon'] = struct.unpack(f'{endian}f', ifp.read(4))[0] - tile_grid['dlat'] = struct.unpack(f'{endian}f', ifp.read(4))[0] - fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0] + tile_grid['ind_base'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0] + tile_grid['i_dir'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0] + tile_grid['j_dir'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0] + tile_grid['N_lon'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0] + tile_grid['N_lat'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0] + tile_grid['i_offg'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0] + tile_grid['j_offg'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0] + tile_grid['ll_lon'] = struct.unpack(f'{machfmt}f', ifp.read(4))[0] + tile_grid['ll_lat'] = struct.unpack(f'{machfmt}f', ifp.read(4))[0] + tile_grid['ur_lon'] = struct.unpack(f'{machfmt}f', ifp.read(4))[0] + tile_grid['ur_lat'] = struct.unpack(f'{machfmt}f', ifp.read(4))[0] + tile_grid['dlon'] = struct.unpack(f'{machfmt}f', ifp.read(4))[0] + tile_grid['dlat'] = struct.unpack(f'{machfmt}f', ifp.read(4))[0] + fortran_tag = struct.unpack(f'{machfmt}i', ifp.read(4))[0] if 'global' in grid: tile_grid_g = tile_grid From 25270fe749fa8499b9642c4fe557cf7d309cfe00 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 17 Jun 2025 09:49:14 -0400 Subject: [PATCH 10/45] removed "qliu" from sample output path (user_config.py) --- GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py index 6384e410..169bbcf3 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py @@ -66,7 +66,7 @@ def get_config(): # Top level directory for all output from this package: - out_path = '/discover/nobackup/qliu/SMAP_test/' + out_path = '/discover/nobackup/[USERNAME]/' # Directory for monthly sum files: # - Can use the experiment directory or a different path. @@ -119,7 +119,7 @@ def get_config(): tg_global, tg_domain = read_tilegrids(ftg) # add tilecoord and obsparam into to exp - exp.update({'tilecoord':tc, 'obsparam':obsparam, 'tilegrid_global':tg_global,'tilegrid_domain': tg_domain}) + exp.update({'tilecoord':tc, 'obsparam':obsparam, 'tilegrid_global':tg_global, 'tilegrid_domain':tg_domain}) # verify that obs species match across experiments for exp_idx, exp in enumerate(exp_list) : From 2a3d6e7deb479c94af29fc63e2b9e5e7bc622284 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 17 Jun 2025 09:52:33 -0400 Subject: [PATCH 11/45] fixed vertical alignment (postproc_ObsFcstAna.py) --- .../ObsFcstAna_stats/postproc_ObsFcstAna.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 5748d633..e191a154 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -21,21 +21,21 @@ class postproc_ObsFcstAna: def __init__(self, exp_list, start_time, end_time, sum_path='./'): - self.exp_list = exp_list - self.expdir_list = [item['expdir'] for item in exp_list] - self.expid_list = [item['expid'] for item in exp_list] - self.exptag = exp_list[0]['exptag'] - self.domain = exp_list[0]['domain'] - self.start_time = start_time - self.end_time = end_time - self.da_t0 = exp_list[0]['da_t0'] - self.da_dt = exp_list[0]['da_dt'] - self.var_list = ['obs_obs','obs_obsvar','obs_fcst','obs_fcstvar','obs_ana','obs_anavar'] - self.tilecoord = exp_list[0]['tilecoord'] - self.tilegrid_global = exp_list[0]['tilegrid_global'] - self.tilegrid_domain = exp_list[0]['tilegrid_domain'] - self.obsparam_list = [item['obsparam'] for item in exp_list] - self.sum_path = sum_path + self.exp_list = exp_list + self.expdir_list = [item['expdir'] for item in exp_list] + self.expid_list = [item['expid'] for item in exp_list] + self.exptag = exp_list[0]['exptag'] + self.domain = exp_list[0]['domain'] + self.start_time = start_time + self.end_time = end_time + self.da_t0 = exp_list[0]['da_t0'] + self.da_dt = exp_list[0]['da_dt'] + self.var_list = ['obs_obs','obs_obsvar','obs_fcst','obs_fcstvar','obs_ana','obs_anavar'] + self.tilecoord = exp_list[0]['tilecoord'] + self.tilegrid_global = exp_list[0]['tilegrid_global'] + self.tilegrid_domain = exp_list[0]['tilegrid_domain'] + self.obsparam_list = [item['obsparam'] for item in exp_list] + self.sum_path = sum_path # Determine experiment that supplies obs data self.obs_from = -1 From 2423fb3dee30330c81f321ea647384fc3e273590 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 17 Jun 2025 10:35:47 -0400 Subject: [PATCH 12/45] replaced variable name start/end/current_month with start/end/current_time for clarity and consistency; vertical alignment (Plot_stats_timeseries.py, Plot_stats_maps.py) --- .../ObsFcstAna_stats/Plot_stats_maps.py | 2 +- .../ObsFcstAna_stats/Plot_stats_timeseries.py | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 800a4230..9bfc88ee 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -25,7 +25,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): end_time = postproc_obj.end_time domain = postproc_obj.domain tc = postproc_obj.tilecoord - tg = postproc_obj.tilegrid_global + tg = postproc_obj.tilegrid_global # Sample of final compuation of selected diagnostic metrics diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py index 63ca9a3a..bd763165 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py @@ -18,12 +18,12 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): import pickle - exptag = postproc_obj.exptag - start_month = postproc_obj.start_time - end_month = postproc_obj.end_time + exptag = postproc_obj.exptag + start_time = postproc_obj.start_time + end_time = postproc_obj.end_time - stats_file = fig_path + 'spatial_stats_'+exptag+'_'+start_month.strftime('%Y%m')+ \ - '_'+(end_month+timedelta(days=-1)).strftime('%Y%m')+'.pkl' + stats_file = fig_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \ + '_'+(end_time+timedelta(days=-1)).strftime('%Y%m')+'.pkl' if os.path.isfile(stats_file): @@ -42,11 +42,11 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): date_vec =[] # Time loop - current_month = start_month - while current_month < end_month: - print('compute monthly spatial mean stats for '+ current_month.strftime('%Y%m')) + current_time = start_time + while current_time < end_time: + print('compute monthly spatial mean stats for '+ current_time.strftime('%Y%m')) - OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_month) + OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_time) Ndata.append(Nobsm) @@ -55,13 +55,13 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): OmA_mean.append(OmAm) OmA_stdv.append(OmAs) - date_vec.append(current_month.strftime('%Y%m')) - current_month = current_month + relativedelta(months=1) + date_vec.append(current_time.strftime('%Y%m')) + current_time = current_time + relativedelta(months=1) # Store stats in a dictionary for easier saving and referencing stats = {"OmF_mean":np.array(OmF_mean), "OmF_stdv":np.array(OmF_stdv), - "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv), - "Ndata": np.array(Ndata), "date_vec":date_vec} + "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv), + "Ndata":np.array(Ndata), "date_vec":date_vec} if stats_file is not None: with open(stats_file,'wb') as file: @@ -73,11 +73,11 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! stats_plot = {} Ndata = np.nansum(stats['Ndata'], axis=1) - stats_plot['Ndata'] = Ndata + stats_plot['Ndata'] = Ndata stats_plot['OmF_mean'] = np.nansum(stats['OmF_mean']*stats['Ndata'], axis=1)/Ndata - stats_plot['OmF_stdv'] = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata + stats_plot['OmF_stdv'] = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata stats_plot['OmA_mean'] = np.nansum(stats['OmA_mean']*stats['Ndata'], axis=1)/Ndata - stats_plot['OmA_stdv'] = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata + stats_plot['OmA_stdv'] = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata plot_var = 'OmF_mean' From 80a68ebca01efc312be3061bbc6e0c48e7b20a88 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Tue, 17 Jun 2025 14:14:58 -0400 Subject: [PATCH 13/45] clarify saved stats variables --- .../ObsFcstAna_stats/Plot_stats_maps.py | 45 ++--------- .../ObsFcstAna_stats/Plot_stats_timeseries.py | 10 ++- .../ObsFcstAna_stats/postproc_ObsFcstAna.py | 80 ++++++++++++++++--- 3 files changed, 84 insertions(+), 51 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 9bfc88ee..297af9be 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -27,46 +27,13 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): tc = postproc_obj.tilecoord tg = postproc_obj.tilegrid_global - # Sample of final compuation of selected diagnostic metrics - - Nmin = 20 - - # Then compute metrics of O-F, O-A, etc. based on above computed N_data = stats['N_data'] - O_mean = stats['obs_mean'] - # mean(x-y) = E[x] - E[y] - OmF_mean = stats['obs_mean'] - stats['fcst_mean'] - OmA_mean = stats['obs_mean'] - stats['ana_mean'] - # var(x-y) = var(x) + var(y) - 2cov(x,y) - # cov(x,y) = E[xy] - E[x]E[y] - OmF_stdv = np.sqrt(stats['obs_variance'] + stats['fcst_variance'] - \ - 2 * (stats['oxf_mean'] - stats['obs_mean']*stats['fcst_mean'])) - - OmA_stdv = np.sqrt(stats['obs_variance'] + stats['ana_variance'] - \ - 2 * (stats['oxa_mean'] - stats['obs_mean']*stats['ana_mean'])) - - # ***************************************************************************************** - # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED! - # ***************************************************************************************** - # Here, we first compute the stats of the OmF time series and then normalize using - # the time-avg "obsvar" and "fcstvar" values. - # Since "fcstvar" changes with time, the OmF values should be normalized at each time - # step (as in the older matlab scripts), and then the time series stats can be computed. - # To compute the exact stats with this python package, the sum and sum-of-squares of - # the normalized OmF values would need to be added into the sums files. - # - OmF_norm_mean = OmF_mean / np.sqrt(stats['obsvar_mean'] + stats['fcstvar_mean']) # APPROXIMATED stat! - OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (stats['obsvar_mean'] + stats['fcstvar_mean']) ) # APPROXIMATED stat! - - # Mask out data points with insufficent observations using the Nmin threshold - # Do NOT apply to N_data - OmF_mean[ N_data < Nmin] = np.nan - OmF_stdv[ N_data < Nmin] = np.nan - OmF_norm_mean[N_data < Nmin] = np.nan - OmF_norm_stdv[N_data < Nmin] = np.nan - OmA_mean[ N_data < Nmin] = np.nan - OmA_stdv[ N_data < Nmin] = np.nan - N_data[ N_data < Nmin] = 0 + OmF_mean = stats['OmF_mean'] + OmF_stdv = stats['OmF_stdv'] + OmA_mean = stats['OmA_mean'] + OmA_stdv = stats['OmA_stdv'] + OmF_norm_mean = stats['OmF_norm_mean'] + OmF_norm_stdv = stats['OmF_norm_stdv'] # Compute Nobs-weighted avg of each metric across all species. # Typically used for SMAP Tb_h/h from asc and desc overpasses, diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py index bd763165..fbb4d35f 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py @@ -46,7 +46,13 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): while current_time < end_time: print('compute monthly spatial mean stats for '+ current_time.strftime('%Y%m')) - OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_time) + stats_mo = postproc_obj.calc_spatial_stats_from_sums(current_time) + + OmFm = stats_mo['OmF_mean'] + OmFs = stats_mo['OmF_stdv'] + OmAm = stats_mo['OmA_mean'] + OmAs = stats_mo['OmA_stdv'] + Nobsm = stats_mo['N_data'] Ndata.append(Nobsm) @@ -87,7 +93,7 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): plt.grid(True, linestyle='--', alpha=0.5) plt.xticks(ticks=date_vec[::2], labels=date_vec[::2]) - plt.title(exptag+ 'monthly '+plot_var) + plt.title(exptag+ ' monthly '+plot_var) plt.xlim(-1, len(date_vec)+1) plt.ylim(-.1, 2.) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index e191a154..98809d23 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -227,7 +227,7 @@ def save_monthly_sums(self): # # Assumes that monthly sums files have been saved [see save_monthly_sums()]. - def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4'): + def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4', Nmin=20): if os.path.isfile(fout_stats): @@ -320,14 +320,65 @@ def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc oxa_mean = oxa_sum / N_data fxa_mean = fxa_sum / N_data - stats = {} - for var in var_list: - stats[var[4:]+'_mean'] = data_mean[var] - stats[var[4:]+'_variance'] = data_var[ var] - stats['oxf_mean'] = oxf_mean - stats['oxa_mean'] = oxa_mean - stats['fxa_mean'] = fxa_mean - stats['N_data'] = N_data + # Then compute metrics of O-F, O-A, etc. based on above computed + + O_mean = data_mean['obs_obs'] + F_mean = data_mean['obs_fcst'] + A_mean = data_mean['obs_ana'] + + O_stdv = np.sqrt(data_var['obs_obs']) + F_stdv = np.sqrt(data_var['obs_fcst']) + A_stdv = np.sqrt(data_var['obs_ana']) + + # mean(x-y) = E[x] - E[y] + OmF_mean = O_mean - F_mean + OmA_mean = O_mean - A_mean + + # var(x-y) = var(x) + var(y) - 2cov(x,y) + # cov(x,y) = E[xy] - E[x]E[y] + OmF_stdv = np.sqrt(O_stdv**2 + F_stdv**2 - 2 * (oxf_mean - O_mean*F_mean)) + OmA_stdv = np.sqrt(O_stdv**2 + A_stdv**2 - 2 * (oxa_mean - O_mean*A_mean)) + + # ***************************************************************************************** + # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED! + # ***************************************************************************************** + # Here, we first compute the stats of the OmF time series and then normalize using + # the time-avg "obsvar" and "fcstvar" values. + # Since "fcstvar" changes with time, the OmF values should be normalized at each time + # step (as in the older matlab scripts), and then the time series stats can be computed. + # To compute the exact stats with this python package, the sum and sum-of-squares of + # the normalized OmF values would need to be added into the sums files. + # + OmF_norm_mean = OmF_mean / np.sqrt(data_mean['obs_obsvar'] + data_mean['obs_fcstvar']) # APPROXIMATED stat! + OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])) # APPROXIMATED stat! + + # Mask out data points with insufficent observations using the Nmin threshold + # Do NOT apply to N_data + O_mean[ N_data < Nmin] = np.nan + O_stdv[ N_data < Nmin] = np.nan + F_mean[ N_data < Nmin] = np.nan + F_stdv[ N_data < Nmin] = np.nan + A_mean[ N_data < Nmin] = np.nan + A_stdv[ N_data < Nmin] = np.nan + + OmF_mean[ N_data < Nmin] = np.nan + OmF_stdv[ N_data < Nmin] = np.nan + OmF_norm_mean[N_data < Nmin] = np.nan + OmF_norm_stdv[N_data < Nmin] = np.nan + OmA_mean[ N_data < Nmin] = np.nan + OmA_stdv[ N_data < Nmin] = np.nan + N_data[ N_data < Nmin] = 0 + + stats = { + 'O_mean' : O_mean, 'O_stdv' : O_stdv, + 'F_mean' : F_mean, 'F_stdv' : F_stdv, + 'A_mean' : A_mean, 'A_stdv' : A_stdv, + 'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv, + 'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv, + 'OmF_norm_mean': OmF_norm_mean, + 'OmF_norm_stdv': OmF_norm_stdv, + 'N_data': N_data, + } if write_to_nc: print('writing stats nc4 file: '+fout_stats) @@ -418,6 +469,15 @@ def calc_spatial_stats_from_sums(self, date_time): OmF_stdv = np.sqrt(O_var + F_var - 2 * (OxF_mean - O_mean*F_mean)) OmA_stdv = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean*A_mean)) - return OmF_mean, OmF_stdv, OmA_mean, OmA_stdv, N_data + stats = { + 'O_mean' : O_mean, 'O_stdv' : np.sqrt(O_var), + 'F_mean' : F_mean, 'F_stdv' : np.sqrt(F_var), + 'A_mean' : A_mean, 'A_stdv' : np.sqrt(A_var), + 'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv, + 'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv, + 'N_data': N_data, + } + + return stats # ============== EOF ==================================================================== From 6237a890fb076e020af4f3be9129d48c6a17aa1e Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 17 Jun 2025 15:59:39 -0400 Subject: [PATCH 14/45] minor changes in processing of temporal stats (postproc_ObsFcstAna.py, Plot_stats_maps.py): - removed Nmin from calc_temporal_stats_from_sums() - moved read of stats file to Plot_stats_maps.py - edited/added comments - fixed vertical alignment --- .../ObsFcstAna_stats/Plot_stats_maps.py | 45 ++- .../ObsFcstAna_stats/postproc_ObsFcstAna.py | 282 +++++++++--------- 2 files changed, 168 insertions(+), 159 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 297af9be..91e776bb 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -2,7 +2,7 @@ """ Sample script for plotting maps of long-term data assimilation diagnostics. -Computes Nobs-weighted avg of each metric across all species. +Plots Nobs-weighted avg of each metric across all species. Requires saved files with monthly sums (see Get_ObsFcstAna_stat.py). Stats of *normalized* OmFs are approximated! """ @@ -21,17 +21,17 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): - start_time = postproc_obj.start_time - end_time = postproc_obj.end_time - domain = postproc_obj.domain - tc = postproc_obj.tilecoord - tg = postproc_obj.tilegrid_global + start_time = postproc_obj.start_time + end_time = postproc_obj.end_time + domain = postproc_obj.domain + tc = postproc_obj.tilecoord + tg = postproc_obj.tilegrid_global - N_data = stats['N_data'] - OmF_mean = stats['OmF_mean'] - OmF_stdv = stats['OmF_stdv'] - OmA_mean = stats['OmA_mean'] - OmA_stdv = stats['OmA_stdv'] + N_data = stats['N_data'] + OmF_mean = stats['OmF_mean'] + OmF_stdv = stats['OmF_stdv'] + OmA_mean = stats['OmA_mean'] + OmA_stdv = stats['OmA_stdv'] OmF_norm_mean = stats['OmF_norm_mean'] OmF_norm_stdv = stats['OmF_norm_stdv'] @@ -61,7 +61,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): # crange is [cmin, cmax] crange =[0, np.ceil((end_time-start_time).days/150)*300] colormap = plt.get_cmap('jet',20) - title_txt = exptag + ' Tb Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m') + title_txt = exptag + ' Tb Nobs ' + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m') units = '[-]' if i == 0 and j ==1: tile_data = OmF_mean @@ -138,6 +138,8 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): # ----------------------------------------------------------------------------------------------- if __name__ == '__main__': + + # Plot maps of long-term temporal stats from postproc_ObsFcstAna import postproc_ObsFcstAna from user_config import get_config @@ -151,18 +153,29 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): out_path = config['out_path'] # ------------------------------------------------------------------------------------ + # Initialize the postprocessing object postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path) - # Compute long-term temporal stats and plot maps + # File name for long-term temporal stats stats_file = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \ (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4' - # temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats - # each field has the dimension [N_tile, N_species] - temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file) + # Read or compute long-term temporal stats. Each field has the dimension [N_tile, N_species]. + if os.path.isfile(stats_file): + + print('reading stats nc4 file '+ stats_file) + temporal_stats = {} + with Dataset(stats_file,'r') as nc: + for key, value in nc.variables.items(): + temporal_stats[key] = value[:].filled(np.nan) + + else: + temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file) + # Plot stats + plot_OmF_maps(postproc, temporal_stats, fig_path=out_path ) # ====================== EOF ========================================================= diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 98809d23..03f95380 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -227,162 +227,158 @@ def save_monthly_sums(self): # # Assumes that monthly sums files have been saved [see save_monthly_sums()]. - def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4', Nmin=20): + def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4'): - if os.path.isfile(fout_stats): + # Variable list for computing sum and sum-of-squares + var_list = self.var_list - print('reading stats nc4 file '+ fout_stats) - stats = {} - with Dataset(fout_stats,'r') as nc: - for key, value in nc.variables.items(): - stats[key] = value[:].filled(np.nan) + # Read tilecoord and obsparam for tile and obs species information + n_tile = self.tilecoord['N_tile'] + n_spec = len(self.obsparam_list[0]) - else: - # Variable list for computing sum and sum-of-squares - var_list = self.var_list - - # Read tilecoord and obsparam for tile and obs species information - n_tile = self.tilecoord['N_tile'] - n_spec = len(self.obsparam_list[0]) + # --------------------------------------------------------------- + # + # compute accumulated sums for period (start_time, end_time) + + # Initialize statistical metrics + data_sum = {} + data2_sum = {} + N_data = np.zeros((n_tile, n_spec)) + oxf_sum = np.zeros((n_tile, n_spec)) + oxa_sum = np.zeros((n_tile, n_spec)) + fxa_sum = np.zeros((n_tile, n_spec)) - # Initialize statistical metrics - data_sum = {} - data2_sum = {} - N_data = np.zeros((n_tile, n_spec)) - oxf_sum = np.zeros((n_tile, n_spec)) - oxa_sum = np.zeros((n_tile, n_spec)) - fxa_sum = np.zeros((n_tile, n_spec)) + for var in var_list: + data_sum[ var] = np.zeros((n_tile, n_spec)) + data2_sum[var] = np.zeros((n_tile, n_spec)) - for var in var_list: - data_sum[ var] = np.zeros((n_tile, n_spec)) - data2_sum[var] = np.zeros((n_tile, n_spec)) + # Time loop: processing data at monthly time step + + date_time = self.start_time - # Time loop: processing data at monthly time step + while date_time < self.end_time: # loop through months - date_time = self.start_time - - while date_time < self.end_time: # loop through months + fpath = self.sum_path + '/Y'+ date_time.strftime('%Y') + '/M' + date_time.strftime('%m') + '/' - fpath = self.sum_path + '/Y'+ date_time.strftime('%Y') + '/M' + date_time.strftime('%m') + '/' - - fout = self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + date_time.strftime('%Y%m') +'.nc4' - - fout = fpath + fout - - # Read monthly data - if os.path.isfile(fout): - print('read sums from monthly file: '+fout) - mdata_sum = {} - mdata2_sum = {} - with Dataset(fout,'r') as nc: - mN_data = nc.variables['N_data' ][:] - moxf_sum = nc.variables['obsxfcst_sum'][:] - moxa_sum = nc.variables['obsxana_sum' ][:] - mfxa_sum = nc.variables['fcstxana_sum'][:] - for var in var_list: - mdata_sum[ var] = nc.variables[var+'_sum' ][:] - mdata2_sum[var] = nc.variables[var+'2_sum'][:] - - # Aggregate monthly data - N_data += mN_data - oxf_sum += moxf_sum - oxa_sum += moxa_sum - fxa_sum += mfxa_sum - + fout = self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + date_time.strftime('%Y%m') +'.nc4' + + fout = fpath + fout + + # Read and accumulate monthly sums data + if os.path.isfile(fout): + print('read sums from monthly file: '+fout) + mdata_sum = {} + mdata2_sum = {} + with Dataset(fout,'r') as nc: + mN_data = nc.variables['N_data' ][:] + moxf_sum = nc.variables['obsxfcst_sum'][:] + moxa_sum = nc.variables['obsxana_sum' ][:] + mfxa_sum = nc.variables['fcstxana_sum'][:] for var in var_list: - data_sum[ var] += mdata_sum[ var] - data2_sum[var] += mdata2_sum[var] - else: - raise FileNotFoundError(f"File {fout} does not exist, run save_monthly_sums() first") - - date_time = date_time + relativedelta(months=1) - - # Compute stats (DA diagnostics) from accumulated sums data. - data_mean = {} - data2_mean = {} - data_var = {} + mdata_sum[ var] = nc.variables[var+'_sum' ][:] + mdata2_sum[var] = nc.variables[var+'2_sum' ][:] + + # Aggregate monthly data + N_data += mN_data + oxf_sum += moxf_sum + oxa_sum += moxa_sum + fxa_sum += mfxa_sum + + for var in var_list: + data_sum[ var] += mdata_sum[ var] + data2_sum[var] += mdata2_sum[var] + else: + raise FileNotFoundError(f"File {fout} does not exist, run save_monthly_sums() first") + + date_time = date_time + relativedelta(months=1) - # calculation - for var in var_list: - data_sum[var][ N_data == 0] = np.nan - data2_sum[var][N_data == 0] = np.nan - - data_mean[ var] = data_sum[ var] / N_data - data2_mean[var] = data2_sum[var] / N_data - # var(x) = E[x2] - (E[x])^2 - data_var[var] = data2_mean[var] - data_mean[var]**2 - - oxf_sum[N_data == 0] = np.nan - oxa_sum[N_data == 0] = np.nan - fxa_sum[N_data == 0] = np.nan - # E[xy] - oxf_mean = oxf_sum / N_data - oxa_mean = oxa_sum / N_data - fxa_mean = fxa_sum / N_data - - # Then compute metrics of O-F, O-A, etc. based on above computed - - O_mean = data_mean['obs_obs'] - F_mean = data_mean['obs_fcst'] - A_mean = data_mean['obs_ana'] + # -------------------------------------------------------------------- + # + # Compute stats (DA diagnostics) from accumulated sums data. + + data_mean = {} + data2_mean = {} + data_var = {} - O_stdv = np.sqrt(data_var['obs_obs']) - F_stdv = np.sqrt(data_var['obs_fcst']) - A_stdv = np.sqrt(data_var['obs_ana']) - - # mean(x-y) = E[x] - E[y] - OmF_mean = O_mean - F_mean - OmA_mean = O_mean - A_mean + # calculation + for var in var_list: + data_sum[var][ N_data == 0] = np.nan + data2_sum[var][N_data == 0] = np.nan - # var(x-y) = var(x) + var(y) - 2cov(x,y) - # cov(x,y) = E[xy] - E[x]E[y] - OmF_stdv = np.sqrt(O_stdv**2 + F_stdv**2 - 2 * (oxf_mean - O_mean*F_mean)) - OmA_stdv = np.sqrt(O_stdv**2 + A_stdv**2 - 2 * (oxa_mean - O_mean*A_mean)) - - # ***************************************************************************************** - # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED! - # ***************************************************************************************** - # Here, we first compute the stats of the OmF time series and then normalize using - # the time-avg "obsvar" and "fcstvar" values. - # Since "fcstvar" changes with time, the OmF values should be normalized at each time - # step (as in the older matlab scripts), and then the time series stats can be computed. - # To compute the exact stats with this python package, the sum and sum-of-squares of - # the normalized OmF values would need to be added into the sums files. - # - OmF_norm_mean = OmF_mean / np.sqrt(data_mean['obs_obsvar'] + data_mean['obs_fcstvar']) # APPROXIMATED stat! - OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])) # APPROXIMATED stat! - - # Mask out data points with insufficent observations using the Nmin threshold - # Do NOT apply to N_data - O_mean[ N_data < Nmin] = np.nan - O_stdv[ N_data < Nmin] = np.nan - F_mean[ N_data < Nmin] = np.nan - F_stdv[ N_data < Nmin] = np.nan - A_mean[ N_data < Nmin] = np.nan - A_stdv[ N_data < Nmin] = np.nan + data_mean[ var] = data_sum[ var] / N_data + data2_mean[var] = data2_sum[var] / N_data + # var(x) = E[x2] - (E[x])^2 + data_var[var] = data2_mean[var] - data_mean[var]**2 - OmF_mean[ N_data < Nmin] = np.nan - OmF_stdv[ N_data < Nmin] = np.nan - OmF_norm_mean[N_data < Nmin] = np.nan - OmF_norm_stdv[N_data < Nmin] = np.nan - OmA_mean[ N_data < Nmin] = np.nan - OmA_stdv[ N_data < Nmin] = np.nan - N_data[ N_data < Nmin] = 0 - - stats = { - 'O_mean' : O_mean, 'O_stdv' : O_stdv, - 'F_mean' : F_mean, 'F_stdv' : F_stdv, - 'A_mean' : A_mean, 'A_stdv' : A_stdv, - 'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv, - 'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv, - 'OmF_norm_mean': OmF_norm_mean, - 'OmF_norm_stdv': OmF_norm_stdv, - 'N_data': N_data, - } - - if write_to_nc: - print('writing stats nc4 file: '+fout_stats) - write_stats_nc4(fout_stats, stats) + oxf_sum[N_data == 0] = np.nan + oxa_sum[N_data == 0] = np.nan + fxa_sum[N_data == 0] = np.nan + # E[xy] + oxf_mean = oxf_sum / N_data + oxa_mean = oxa_sum / N_data + fxa_mean = fxa_sum / N_data + + # Then compute metrics of O-F, O-A, etc. based on above computed + + O_mean = data_mean['obs_obs'] + F_mean = data_mean['obs_fcst'] + A_mean = data_mean['obs_ana'] + + O_stdv = np.sqrt(data_var['obs_obs']) + F_stdv = np.sqrt(data_var['obs_fcst']) + A_stdv = np.sqrt(data_var['obs_ana']) + + # mean(x-y) = E[x] - E[y] + OmF_mean = O_mean - F_mean + OmA_mean = O_mean - A_mean + + # var(x-y) = var(x) + var(y) - 2cov(x,y) + # cov(x,y) = E[xy] - E[x]E[y] + OmF_stdv = np.sqrt(O_stdv**2 + F_stdv**2 - 2 * (oxf_mean - O_mean*F_mean)) + OmA_stdv = np.sqrt(O_stdv**2 + A_stdv**2 - 2 * (oxa_mean - O_mean*A_mean)) + + # ***************************************************************************************** + # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED! + # ***************************************************************************************** + # Here, we first compute the stats of the OmF time series and then normalize using + # the time-avg "obsvar" and "fcstvar" values. + # Since "fcstvar" changes with time, the OmF values should be normalized at each time + # step (as in the older matlab scripts), and then the time series stats can be computed. + # To compute the exact stats with this python package, the sum and sum-of-squares of + # the normalized OmF values would need to be added into the sums files. + # + OmF_norm_mean = OmF_mean / np.sqrt(data_mean['obs_obsvar'] + data_mean['obs_fcstvar']) # APPROXIMATED stat! + OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])) # APPROXIMATED stat! + + # Mask out data points without any obs (NOTE: apply Nmin threshold when plotting or computing map avg) + # Do NOT apply to N_data + O_mean[ N_data < 1] = np.nan + O_stdv[ N_data < 1] = np.nan + F_mean[ N_data < 1] = np.nan + F_stdv[ N_data < 1] = np.nan + A_mean[ N_data < 1] = np.nan + A_stdv[ N_data < 1] = np.nan + + OmF_mean[ N_data < 1] = np.nan + OmF_stdv[ N_data < 1] = np.nan + OmF_norm_mean[N_data < 1] = np.nan + OmF_norm_stdv[N_data < 1] = np.nan + OmA_mean[ N_data < 1] = np.nan + OmA_stdv[ N_data < 1] = np.nan + + stats = { + 'O_mean' : O_mean, 'O_stdv' : O_stdv, + 'F_mean' : F_mean, 'F_stdv' : F_stdv, + 'A_mean' : A_mean, 'A_stdv' : A_stdv, + 'OmF_mean' : OmF_mean, 'OmF_stdv' : OmF_stdv, + 'OmA_mean' : OmA_mean, 'OmA_stdv' : OmA_stdv, + 'OmF_norm_mean': OmF_norm_mean, 'OmF_norm_stdv': OmF_norm_stdv, + 'N_data' : N_data, + } + + if write_to_nc: + print('writing stats nc4 file: '+fout_stats) + write_stats_nc4(fout_stats, stats) return stats From a285d6a2fb0be6c096c756916ddb1a775dbaa02c Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 17 Jun 2025 16:05:26 -0400 Subject: [PATCH 15/45] fixed vertical alignment (Plot_stats_timeseries.py) --- .../postproc/ObsFcstAna_stats/Plot_stats_timeseries.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py index fbb4d35f..f7a40e88 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py @@ -48,10 +48,10 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): stats_mo = postproc_obj.calc_spatial_stats_from_sums(current_time) - OmFm = stats_mo['OmF_mean'] - OmFs = stats_mo['OmF_stdv'] - OmAm = stats_mo['OmA_mean'] - OmAs = stats_mo['OmA_stdv'] + OmFm = stats_mo['OmF_mean'] + OmFs = stats_mo['OmF_stdv'] + OmAm = stats_mo['OmA_mean'] + OmAs = stats_mo['OmA_stdv'] Nobsm = stats_mo['N_data'] Ndata.append(Nobsm) From 8df81963d626f9070933f8ad252ca98846c97eaa Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 17 Jun 2025 17:29:20 -0400 Subject: [PATCH 16/45] dummy white-space change to facilitate Github comments (Get_ObsFcstAna_stats.py) --- .../util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py index cf7e61a0..c4cf6620 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py @@ -69,7 +69,7 @@ def main(): # The plotting scripts can also run standalone using the individual Plot_stats_*.py scripts, # as long as the monthly sum files are available. - plot_maps = False + plot_maps = False plot_timeseries = False if plot_maps: # Compute long-term temporal stats and plot maps From 4c9755d0be04fdf867ec947703607e57920de48b Mon Sep 17 00:00:00 2001 From: Rolf Reichle <54944691+gmao-rreichle@users.noreply.github.com> Date: Wed, 18 Jun 2025 14:45:27 -0400 Subject: [PATCH 17/45] Updated CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5703e81f..255c8540 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added optional SLURM "constraint". - Added functionality to run on tile space of stretched cube-sphere grids. -- Added python package for post-processing ObsFcstAna output. +- Added (and later revised) python package for post-processing ObsFcstAna output. ### Changed From 133eef94a2abacd0844506f947ea905cb1e90a23 Mon Sep 17 00:00:00 2001 From: Weiyuan Jiang Date: Mon, 23 Jun 2025 14:39:45 -0400 Subject: [PATCH 18/45] added more 2D outputs to sample HISTORY.rc --- CHANGELOG.md | 1 + GEOSldas_App/GEOSldas_HIST.rc | 111 ++++++++++++++++++++++++++++------ GEOSldas_App/process_hist.csh | 6 +- 3 files changed, 96 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5703e81f..6fe41d67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added EASE to Latlon and Cubed-Sphere to EASE history outputs - Added optional SLURM "constraint". - Added functionality to run on tile space of stretched cube-sphere grids. - Added python package for post-processing ObsFcstAna output. diff --git a/GEOSldas_App/GEOSldas_HIST.rc b/GEOSldas_App/GEOSldas_HIST.rc index d9d09dd8..1b6a14c1 100644 --- a/GEOSldas_App/GEOSldas_HIST.rc +++ b/GEOSldas_App/GEOSldas_HIST.rc @@ -6,7 +6,7 @@ # "#CUBE 'tavg24_2d_lnd_Nx'" # does *not* mean that the 'lnd' output will be on a cube-sphere grid. -#CUBE VERSION: 1 +VERSION: 1 # Must edit 'EXPID' manually if HISTORY file is re-used without going # through "ldas_setup". @@ -15,9 +15,10 @@ EXPID: GEOSldas_expid COLLECTIONS: #EASE 'tavg24_1d_lfs_Nt' -#CUBE 'tavg24_2d_lfs_Nx' -#EASE 'tavg24_1d_lnd_Nt' -#CUBE 'tavg24_2d_lnd_Nx' + 'tavg24_2d_lfs_Nx' +#EASE 'tavg24_1d_lnd_Nt' + 'tavg24_2d_lnd_Nx' +#CUBE 'tavg24_2d_lnd_EASE' #ASSIM 'SMAP_L4_SM_gph' # 'inst1_1d_lnr_Nt' # 'catch_progn_incr' @@ -29,24 +30,28 @@ COLLECTIONS: # 'tavg24_1d_glc_Nt' :: -#CUBE GRID_LABELS: PC720x361-DC -#CUBE PC1440x721-DC +GRID_LABELS: PC720x361-DC + PC1440x721-DC + EASEv2_M36 + :: -#CUBE :: +PC720x361-DC.GRID_TYPE: LatLon +PC720x361-DC.IM_WORLD: 720 +PC720x361-DC.JM_WORLD: 361 +PC720x361-DC.POLE: PC +PC720x361-DC.DATELINE: DC +PC720x361-DC.LM: 1 -#CUBE PC720x361-DC.GRID_TYPE: LatLon -#CUBE PC720x361-DC.IM_WORLD: 720 -#CUBE PC720x361-DC.JM_WORLD: 361 -#CUBE PC720x361-DC.POLE: PC -#CUBE PC720x361-DC.DATELINE: DC -#CUBE PC720x361-DC.LM: 1 +PC1440x721-DC.GRID_TYPE: LatLon +PC1440x721-DC.IM_WORLD: 1440 +PC1440x721-DC.JM_WORLD: 721 +PC1440x721-DC.POLE: PC +PC1440x721-DC.DATELINE: DC +PC1440x721-DC.LM: 1 -#CUBE PC1440x721-DC.GRID_TYPE: LatLon -#CUBE PC1440x721-DC.IM_WORLD: 1440 -#CUBE PC1440x721-DC.JM_WORLD: 721 -#CUBE PC1440x721-DC.POLE: PC -#CUBE PC1440x721-DC.DATELINE: DC -#CUBE PC1440x721-DC.LM: 1 +EASEv2_M36.GRID_TYPE: EASE +EASEv2_M36.GRIDNAME: EASEv2_M36 +EASEv2_M36.LM: 1 # Detailed definition of the collections listed above # @@ -323,6 +328,74 @@ COLLECTIONS: :: + tavg24_2d_lnd_EASE.format: 'CFIO', + tavg24_2d_lnd_EASE.descr: '2d,Daily,Time-Averaged,Single-Level,Assimilation,Land Surface Diagnostics', + tavg24_2d_lnd_EASE.nbits: 12, + tavg24_2d_lnd_EASE.template: '%y4%m2%d2_%h2%n2z.nc4', + tavg24_2d_lnd_EASE.mode: 'time-averaged', + tavg24_2d_lnd_EASE.frequency: 240000, + tavg24_2d_lnd_EASE.ref_time: 000000, + tavg24_2d_lnd_EASE.regrid_exch: '../input/tile.data' + tavg24_2d_lnd_EASE.regrid_name: 'GRIDNAME' +# tavg24_2d_lnd_EASE.regrid_method: 'BILINEAR_MONOTONIC' , + tavg24_2d_lnd_EASE.grid_label: EASEv2_M36 + tavg24_2d_lnd_EASE.deflate: 2, + tavg24_2d_lnd_EASE.fields: 'GRN' , 'VEGDYN' , + 'LAI' , 'VEGDYN' , + 'WET3' , 'GridComp' , 'GWETPROF' , + 'WET2' , 'GridComp' , 'GWETROOT' , + 'WET1' , 'GridComp' , 'GWETTOP' , + 'WCPR' , 'GridComp' , 'PRMC' , + 'WCRZ' , 'GridComp' , 'RZMC' , + 'WCSF' , 'GridComp' , 'SFMC' , + 'CAPAC' , 'GridComp' , 'INTRWATR' , + 'TPSNOW' , 'GridComp' , 'TPSNOWLAND' , + 'TPUNST' , 'GridComp' , 'TUNSTLAND' , + 'TPSAT' , 'GridComp' , 'TSATLAND' , + 'TPWLT' , 'GridComp' , 'TWLTLAND' , + 'TPSURF' , 'GridComp' , 'TSURFLAND' , + 'TP1' , 'GridComp' , 'TSOIL1' , # CATCH GC: TP1, ENSAVG GC: TSOIL1TILE + 'TP2' , 'GridComp' , 'TSOIL2' , # ... + 'TP3' , 'GridComp' , 'TSOIL3' , # ... + 'TP4' , 'GridComp' , 'TSOIL4' , # ... + 'TP5' , 'GridComp' , 'TSOIL5' , # ... + 'TP6' , 'GridComp' , 'TSOIL6' , # ... + 'PRLAND' , 'GridComp' , 'PRECTOTCORRLAND' , # assume "corrected" precip + 'SNOLAND' , 'GridComp' , 'PRECSNOCORRLAND' , # assume "corrected" precip + 'TSLAND' , 'GridComp' , 'SNOMASLAND' , + 'SNOWDP' , 'GridComp' , 'SNODPLAND' , + 'EVPSOI' , 'GridComp' , 'LHLANDSOIL' , + 'EVPVEG' , 'GridComp' , 'LHLANDTRNS' , + 'EVPINT' , 'GridComp' , 'LHLANDINTR' , + 'EVPICE' , 'GridComp' , 'LHLANDSBLN' , + 'RUNSURF' , 'GridComp' , 'RUNSURFLAND' , + 'BASEFLOW' , 'GridComp' , 'BASEFLOWLAND' , + 'SMLAND' , 'GridComp' , + 'QINFIL' , 'GridComp' , 'QINFILLAND' , + 'FRUST' , 'GridComp' , 'FRLANDUNST' , + 'FRSAT' , 'GridComp' , 'FRLANDSAT' , + 'ASNOW' , 'GridComp' , 'FRLANDSNO' , + 'FRWLT' , 'GridComp' , 'FRLANDWLT' , + 'DFPARLAND' , 'GridComp' , 'PARDFLAND' , + 'DRPARLAND' , 'GridComp' , 'PARDRLAND' , + 'SHLAND' , 'GridComp' , + 'LHLAND' , 'GridComp' , + 'EVLAND' , 'GridComp' , + 'LWLAND' , 'GridComp' , + 'SWLAND' , 'GridComp' , + 'GHLAND' , 'GridComp' , + 'TWLAND' , 'GridComp' , + 'TELAND' , 'GridComp' , + 'DWLAND' , 'GridComp' , 'WCHANGELAND' , + 'DHLAND' , 'GridComp' , 'ECHANGELAND' , + 'SPLAND' , 'GridComp' , 'SPSHLAND' , +# 'SPLH' , 'GridComp' , 'SPLHLAND' , # works for Catch only, not yet for CatchCN + 'SPWATR' , 'GridComp' , 'SPEVLAND' , + 'SPSNOW' , 'GridComp' , 'SPSNLAND' , + 'PEATCLSM_WATERLEVEL', 'GridComp' , + 'PEATCLSM_FSWCHANGE' , 'GridComp' , + :: + const_1d_lnd_Nt.descr: 'Tile-space,Constant,Time-invariant,Single-Level,Assimilation,Land Surface Model Parameters', const_1d_lnd_Nt.template: '%y4%m2%d2_%h2%n2z.bin', const_1d_lnd_Nt.mode: 'instantaneous', diff --git a/GEOSldas_App/process_hist.csh b/GEOSldas_App/process_hist.csh index 4c6516ff..deaba8df 100644 --- a/GEOSldas_App/process_hist.csh +++ b/GEOSldas_App/process_hist.csh @@ -7,7 +7,7 @@ setenv LSM_CHOICE $1 setenv AEROSOL_DEPOSITION $2 setenv GRID $3 -setenv GRIDNAME $4 +setenv GRIDNAME "'$4'" setenv HISTRC $5 setenv RUN_IRRIG $6 setenv ASSIM $7 @@ -26,11 +26,11 @@ endif if($GRID == CUBE) then sed -i '/^\#EASE/d' $HISTRC sed -i 's|\#CUBE|''|g' $HISTRC - sed -i 's|GRIDNAME|'"$GRIDNAME"'|g' $HISTRC + sed -i -e s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC else sed -i '/^\#CUBE/d' $HISTRC sed -i 's|\#EASE|''|g' $HISTRC - sed -i 's|GRIDNAME|'"$GRIDNAME"'|g' $HISTRC + sed -i -e s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC endif if($LSM_CHOICE == 1) then From 5160c6cffc6112031c0f924df1781c628bd8a6ab Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 15:41:00 -0400 Subject: [PATCH 19/45] rename file and remove plotting options to avoid confusion --- .../ObsFcstAna_stats/Get_ObsFcstAna_sums.py | 70 +++++++++ GEOSldas_App/util/shared/python/tile2grid.py | 140 ------------------ 2 files changed, 70 insertions(+), 140 deletions(-) create mode 100644 GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py delete mode 100644 GEOSldas_App/util/shared/python/tile2grid.py diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py new file mode 100644 index 00000000..1c29ca94 --- /dev/null +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +""" +Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics. +First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output. +Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast +residuals can then be diagnosed quickly from these intermediate "sums" files. +Sample script optionally computes and plots: +- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py). +- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py). + +Usage: + 1. Edit "user_config.py" with experiments information. + 2. Run this script as follows (on Discover): + $ module load python/GEOSpyD + $ ./Get_ObsFcstAna_sums.py + + # Background execution: + $ nohup ./Get_ObsFcstAna_sums.py > out.log & + +Authors: Q. Liu, R. Reichle, A. Fox +Last Modified: May 2025 +""" + +import sys; sys.path.append('../../shared/python/') +import warnings; warnings.filterwarnings("ignore") +import os + +import numpy as np + +from datetime import timedelta +from postproc_ObsFcstAna import postproc_ObsFcstAna +from user_config import get_config # user-defined config info + +# --- +# +# If the script is run in the background, uncomment the following lines to see the redirected +# standard output in the out.log file immediately. When the lines are commented out, the redirected +# standard output will not appear in the out.log file until the job has completed. +# If the script is run in the foreground, the lines can be commented out to monitor the output log. +# +#import io +#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True) +#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True) +# +# --- + +def main(): + + config = get_config() # edit user-defined inputs in user_config.py + + exp_list = config['exp_list'] + start_time = config['start_time'] + end_time = config['end_time'] + sum_path = config['sum_path'] + out_path = config['out_path'] + + # ------------------------------------------------------------------------------------ + # Postprocess raw ObsFcstAna output data into monthly sums + + # Initialize the postprocessing object + postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path) + + # Compute and save monthly sums + postproc.save_monthly_sums() + +if __name__ == '__main__': + main() + +# ====================== EOF ========================================================= diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py deleted file mode 100644 index e99b2f05..00000000 --- a/GEOSldas_App/util/shared/python/tile2grid.py +++ /dev/null @@ -1,140 +0,0 @@ -import os -import numpy as np - -# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates -# -# 1-d input data can have one additional dimension (e.g., time) -# -# 2-d output data is lat-by-lon[-by-time] -def interpolate_to_latlon_grid(data_1d, lat, lon, resolution=1.0): - """ - For cubedsphere grid: use - - """ - from scipy.interpolate import griddata - - lat_grid = np.arange(-90+resolution/2, 90, resolution) - lon_grid = np.arange(-180+resolution/2, 180, resolution) - lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid) - - ny = len(lat_grid) - nx = len(lon_grid) - - nf = data_1d.shape[-1] - data_2d = np.full([ny, nx, nf], np.nan) - for i in range(nf): - data = data_1d[:,i] - data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear') - - return data_2d, lat_2d, lon_2d - -def remap_1d_to_2d( data_1d, lat_1d, lon_1d): - - """ - For EASE grid: use remapping - - """ - - # Extract unique coordinates - unique_lats, indY = np.unique(lat_1d, return_inverse=True) - unique_lons, indX = np.unique(lon_1d, return_inverse=True) - - ny = len(unique_lats) - nx = len(unique_lons) - - nf = data_1d.shape[1] - data_2d = np.full([ny, nx, nf], np.nan) - data_2d[indY, indX, :] = data_1d - - lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats) - - return data_2d, lat_2d, lon_2d - -def tile2grid( tile_data, tile_coord, tile_grid, nodata=1.e15, nodata_tol=1.e-4): - - """ - Convert tile-space data to grid-space data. - - Parameters: - ---------- - tile_data : Array in tile space, shape (N_tile, N_fields) - tile_coord : Dictionary containing tile coordinate information - tile_grid : Dictionary containing grid information - nodata : Value for missing data - nodata_tol : Tolerance for comparing values to nodata - - Returns: - ------- - grid_data : Array in grid space, shape (N_lat, N_lon, N_fields) - lat_2d: Lat array in grid space, shape (N_lat, N_lon) - lon_2d: Lon array in grid space, shape (N_lat, N_lon) - - """ - # Verifity input input datasize - if tile_data.shape[0] != tile_coord['N_tile']: - print(f'Error: tile2grid input data size don''t match N_tile') - sys.exit() - - # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1] - if tile_data.ndim == 1: - tile_data = np.expand_dims(tile_data, axis=1) - - N_fields = tile_data.shape[-1] - - # Determine grid_type (cubedsphere, EASE or regular latlon) - is_CS = int(tile_grid['N_lat']/tile_grid['N_lon']) == 6 - is_EASE = 'EASE' in tile_grid['gridtype'] - - if is_CS: - - grid_data, lat_2d,lon_2d = interpolate_to_latlon_grid( tile_data, tile_coord['com_lat'], tile_coord['com_lon']) - - elif is_EASE: - - grid_data, lat_2d,lon_2d = remap_1d_to_2d( tile_data, tile_coord['com_lat'], tile_coord['com_lon']) - - else: - - # Initialize grid data array - grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields)) - - for k in range(N_fields): - # Initialize weight grid for current field - wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'])) - - # Loop through tile space - for n in range(tile_coord['N_tile']): - # Calculate grid indices (adjust for Python's 0-based indexing) - i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1 - j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1 - - # Get weight - w = tile_coord['frac_cell'][n] - - # Check if current tile data is valid (not no-data) - if abs(tile_data[k, n] - nodata) > nodata_tol: - # Accumulate weighted data - grid_data[j, i, k] += w * tile_data[n,k] - wgrid[j, i] += w - - # Normalize and set no-data values - for i in range(tile_grid['N_lon']): - for j in range(tile_grid['N_lat']): - if wgrid[j, i] > 0.0: - # Normalize by total weight - grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i] - else: - # Set no-data value - grid_data[j, i, k] = nodata - - # Replace nodata values with NaN - grid_data[grid_data == nodata] = np.nan - - lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat']) - lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon']) - - if grid_data.shape[-1] == 1: - grid_data = grid_data[:,:,0] - - return grid_data, lat_2d, lon_2d - From 4e0476406209e29171da0da1812132f5f06004ae Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 15:47:21 -0400 Subject: [PATCH 20/45] edit da_t0 for off-the-hour, move spatial stats time loop inside function --- .../ObsFcstAna_stats/postproc_ObsFcstAna.py | 184 +++++++++++------- 1 file changed, 110 insertions(+), 74 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 03f95380..9794ae42 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -93,7 +93,7 @@ def compute_monthly_sums(self, date_time): n_tile = tc['N_tile'] n_spec = len(obsparam_list[0]) - date_time = date_time.replace(hour=self.da_t0) + date_time = date_time.replace(hour=int(self.da_t0), minute=int(np.mod(self.da_t0,1)*60)) end_time = date_time + relativedelta(months=1) data_sum = {} @@ -392,86 +392,122 @@ def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc # the straight spatial avg across a map of a given DA diagnostic. The latter approach gives # the same weight to each location, regardless of how many obs are available at the location. - def calc_spatial_stats_from_sums(self, date_time): + def calc_spatial_stats_from_sums(self): + start_time = self.start_time + end_time = self.end_time + var_list = ['obs_obs', 'obs_fcst','obs_ana'] + + O_mean = [] + O_stdv = [] + F_mean = [] + F_stdv = [] + A_mean = [] + A_stdv = [] + OmF_mean = [] + OmF_stdv = [] + OmA_mean = [] + OmA_stdv = [] + N_data = [] + date_vec = [] - mo_path = self.sum_path + '/Y'+ date_time.strftime('%Y') + '/M' + date_time.strftime('%m') + '/' - fnc4_sums = mo_path + self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + date_time.strftime('%Y%m') +'.nc4' + # Time loop + current_time = start_time + while current_time < end_time: - mdata_sum = {} - mdata2_sum = {} - - try: - with Dataset(fnc4_sums,'r') as nc: - mN_data = nc.variables['N_data' ][:] - moxf_sum = nc.variables['obsxfcst_sum'][:] - moxa_sum = nc.variables['obsxana_sum' ][:] - moxf_sum[mN_data == 0] = np.nan - moxa_sum[mN_data == 0] = np.nan - for var in var_list: - mdata_sum[ var] = nc.variables[var+'_sum' ][:] - mdata2_sum[var] = nc.variables[var+'2_sum'][:] - mdata_sum[ var][mN_data == 0] = np.nan - mdata2_sum[var][mN_data == 0] = np.nan - except FileNotFoundError: - print(f"Error: File '{filename}' not found.") - sys.exit(1) - except Exception as e: - print(f"An unexpected error occurred: {e}") - sys.exit(1) - - # Make sure only aggregate tiles with valid values for all variables - for var in var_list: - mN_data = mN_data.astype(float) - mN_data[np.isnan(mdata_sum[var])] = np.nan - mN_data[mN_data == 0] = np.nan - - # cross mask before aggregating tile values - for var in var_list: - mdata_sum[ var][np.isnan(mN_data)] = np.nan - mdata2_sum[var][np.isnan(mN_data)] = np.nan - moxf_sum[np.isnan(mN_data)] = np.nan - moxa_sum[np.isnan(mN_data)] = np.nan - - # Aggregate data of all tiles - N_data = np.nansum(mN_data, axis=0) - OxF_mean = np.nansum(moxf_sum,axis=0)/N_data - OxA_mean = np.nansum(moxa_sum,axis=0)/N_data - data_mean = {} - data2_mean = {} - data_var = {} - for var in var_list: - data_mean[ var] = np.nansum(mdata_sum[var ],axis=0)/N_data - data2_mean[var] = np.nansum(mdata2_sum[var],axis=0)/N_data - # var(x) = E[x2] - (E[x])^2 - data_var[var] = data2_mean[var] - data_mean[var]**2 - - # Compute metrics of O-F, O-A, etc. based on above stats - O_mean = data_mean['obs_obs'] - F_mean = data_mean['obs_fcst'] - A_mean = data_mean['obs_ana'] - - O_var = data_var[ 'obs_obs'] - F_var = data_var[ 'obs_fcst'] - A_var = data_var[ 'obs_ana'] - - # mean(x-y) = E[x] - E[y] - OmF_mean = O_mean - F_mean - OmA_mean = O_mean - A_mean + mo_path = self.sum_path + '/Y'+ current_time.strftime('%Y') + '/M' + current_time.strftime('%m') + '/' + fnc4_sums = mo_path + self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + current_time.strftime('%Y%m') +'.nc4' + + mdata_sum = {} + mdata2_sum = {} + + try: + with Dataset(fnc4_sums,'r') as nc: + mN_data = nc.variables['N_data' ][:] + moxf_sum = nc.variables['obsxfcst_sum'][:] + moxa_sum = nc.variables['obsxana_sum' ][:] + moxf_sum[mN_data == 0] = np.nan + moxa_sum[mN_data == 0] = np.nan + for var in var_list: + mdata_sum[ var] = nc.variables[var+'_sum' ][:] + mdata2_sum[var] = nc.variables[var+'2_sum'][:] + mdata_sum[ var][mN_data == 0] = np.nan + mdata2_sum[var][mN_data == 0] = np.nan + except FileNotFoundError: + print(f"Error: File '{fnc4_sums}' not found. Run Get_ObsFcstAna_sums first.") + sys.exit(1) + except Exception as e: + print(f"An unexpected error occurred: {e}") + sys.exit(1) + + # Make sure only aggregate tiles with valid values for all variables + for var in var_list: + mN_data = mN_data.astype(float) + mN_data[np.isnan(mdata_sum[var])] = np.nan + mN_data[mN_data == 0] = np.nan - # var(x-y) = var(x) + var(y) - 2cov(x,y) - # cov(x,y) = E[xy] - E[x]E[y] - OmF_stdv = np.sqrt(O_var + F_var - 2 * (OxF_mean - O_mean*F_mean)) - OmA_stdv = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean*A_mean)) + # cross mask before aggregating tile values + for var in var_list: + mdata_sum[ var][np.isnan(mN_data)] = np.nan + mdata2_sum[var][np.isnan(mN_data)] = np.nan + moxf_sum[np.isnan(mN_data)] = np.nan + moxa_sum[np.isnan(mN_data)] = np.nan + + # Aggregate data of all tiles + N_data_mo = np.nansum(mN_data, axis=0) + OxF_mean = np.nansum(moxf_sum,axis=0)/N_data_mo + OxA_mean = np.nansum(moxa_sum,axis=0)/N_data_mo + data_mean = {} + data2_mean = {} + data_var = {} + for var in var_list: + data_mean[ var] = np.nansum(mdata_sum[var ],axis=0)/N_data_mo + data2_mean[var] = np.nansum(mdata2_sum[var],axis=0)/N_data_mo + # var(x) = E[x2] - (E[x])^2 + data_var[var] = data2_mean[var] - data_mean[var]**2 + + # Compute monthly metrics of O-F, O-A, etc. based on above stats + O_mean_mo = data_mean['obs_obs'] + F_mean_mo = data_mean['obs_fcst'] + A_mean_mo = data_mean['obs_ana'] + + O_var = data_var['obs_obs'] + F_var = data_var['obs_fcst'] + A_var = data_var['obs_ana'] + + # mean(x-y) = E[x] - E[y] + OmF_mean_mo = O_mean_mo - F_mean_mo + OmA_mean_mo = O_mean_mo - A_mean_mo + + # var(x-y) = var(x) + var(y) - 2cov(x,y) + # cov(x,y) = E[xy] - E[x]E[y] + OmF_stdv_mo = np.sqrt(O_var + F_var - 2 * (OxF_mean - O_mean_mo*F_mean_mo)) + OmA_stdv_mo = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean_mo*A_mean_mo)) + + # Extend timeseries + N_data.append(N_data_mo) + O_mean.append(O_mean_mo) + O_stdv.append(np.sqrt(O_var)) + F_mean.append(F_mean_mo) + F_stdv.append(np.sqrt(F_var)) + A_mean.append(A_mean_mo) + A_stdv.append(np.sqrt(A_var)) + OmF_mean.append(OmF_mean_mo) + OmF_stdv.append(OmF_stdv_mo) + OmA_mean.append(OmA_mean_mo) + OmA_stdv.append(OmA_stdv_mo) + + date_vec.append(current_time.strftime('%Y%m')) + current_time = current_time + relativedelta(months=1) stats = { - 'O_mean' : O_mean, 'O_stdv' : np.sqrt(O_var), - 'F_mean' : F_mean, 'F_stdv' : np.sqrt(F_var), - 'A_mean' : A_mean, 'A_stdv' : np.sqrt(A_var), - 'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv, - 'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv, - 'N_data': N_data, + 'O_mean' : np.array(O_mean), 'O_stdv' : np.array(O_stdv), + 'F_mean' : np.array(F_mean), 'F_stdv' : np.array(F_stdv), + 'A_mean' : np.array(A_mean), 'A_stdv' : np.array(A_stdv), + 'OmF_mean': np.array(OmF_mean), 'OmF_stdv': np.array(OmF_stdv), + 'OmA_mean': np.array(OmA_mean), 'OmA_stdv': np.array(OmA_stdv), + 'N_data': np.array(N_data), 'date_vec': date_vec } return stats From f82e55f24a4a1290916925290febc5381d742df5 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 15:59:25 -0400 Subject: [PATCH 21/45] change 1-d to 2-d regridding --- .../ObsFcstAna_stats/Plot_stats_maps.py | 195 ++++++++++-------- 1 file changed, 104 insertions(+), 91 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 91e776bb..17dae399 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -3,29 +3,63 @@ """ Sample script for plotting maps of long-term data assimilation diagnostics. Plots Nobs-weighted avg of each metric across all species. -Requires saved files with monthly sums (see Get_ObsFcstAna_stat.py). +Requires saved files with monthly sums (see Get_ObsFcstAna_sums.py). Stats of *normalized* OmFs are approximated! """ import sys; sys.path.append('../../shared/python/') import warnings; warnings.filterwarnings("ignore") +import os import numpy as np import matplotlib.pyplot as plt from datetime import timedelta -from tile2grid import tile2grid +from netCDF4 import Dataset + +from tile_to_latlon import tile_to_latlon, get_grid_resolution from plot import plotMap from EASEv2 import EASEv2_ind2latlon -def plot_OmF_maps(postproc_obj, stats, fig_path='./'): +from postproc_ObsFcstAna import postproc_ObsFcstAna +from user_config import get_config + +def Main_OmF_maps(): + + # Plot maps of long-term temporal stats + + config = get_config() # edit user-defined inputs in user_config.py + + exp_list = config['exp_list'] + start_time = config['start_time'] + end_time = config['end_time'] + sum_path = config['sum_path'] + out_path = config['out_path'] + + # Provide Nmin to screen out tiles with inadequate data for temporal stats + Nmin = 20 - start_time = postproc_obj.start_time - end_time = postproc_obj.end_time - domain = postproc_obj.domain - tc = postproc_obj.tilecoord - tg = postproc_obj.tilegrid_global + # ------------------------------------------------------------------------------------ + # Get metrics in [N_tile, N_species] for plotting + # ------------------------------------------------------------------------------------ + # File name for long-term temporal stats + stats_file = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \ + (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4' + + # Read or compute long-term temporal stats. Each field has the dimension [N_tile, N_species]. + if os.path.isfile(stats_file): + + print('reading stats nc4 file '+ stats_file) + stats = {} + with Dataset(stats_file,'r') as nc: + for key, value in nc.variables.items(): + stats[key] = value[:].filled(np.nan) + + else: + # Initialize the postprocessing object and get information + postproc_obj = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path) + stats = postproc_obj.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file) N_data = stats['N_data'] OmF_mean = stats['OmF_mean'] @@ -35,11 +69,22 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): OmF_norm_mean = stats['OmF_norm_mean'] OmF_norm_stdv = stats['OmF_norm_stdv'] + # Screen all fields with Nmin except for N_data + OmF_mean[N_data < Nmin] = np.nan + OmF_stdv[N_data < Nmin] = np.nan + OmA_mean[N_data < Nmin] = np.nan + OmA_stdv[N_data < Nmin] = np.nan + OmF_norm_mean[N_data < Nmin] = np.nan + OmF_norm_stdv[N_data < Nmin] = np.nan + + # Select/combine fields for plotting. The following provides an example to + # combine statistical fields of all species to averages. + # Compute Nobs-weighted avg of each metric across all species. # Typically used for SMAP Tb_h/h from asc and desc overpasses, # or ASCAT soil moisture from Metop-A/B/C. # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! - Nobs_data = np.nansum( N_data, axis=1) + Nobs_data = np.nansum( N_data, axis=1) OmF_mean = np.nansum(OmF_mean *N_data, axis=1)/Nobs_data OmF_stdv = np.nansum(OmF_stdv *N_data, axis=1)/Nobs_data OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data @@ -47,18 +92,47 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): OmA_mean = np.nansum(OmA_mean *N_data, axis=1)/Nobs_data OmA_stdv = np.nansum(OmA_stdv *N_data, axis=1)/Nobs_data + # Nobs_data need to be at least 1, otherwise unwanted zeros + # might get into the computation of global mean etc. + Nobs_data[Nobs_data == 0] = np.nan + + # -------------------------------------------------------------------------------- + # Get map grid information and provide map resolution (in degree). + # Observation FOV-based resolution estimate is provided. The resolution can also be + # manually set for optimal map performance + # --------------------------------------------------------------------------------- + tc = exp_list[0]['tilecoord'] + tg = exp_list[0]['tilegrid_global'] + obs_fov = exp_list[0]['obsparam'][0]['FOV'] + obs_fov_units = exp_list[0]['obsparam'][0]['FOV_units'] + + # Get map grid resolution for mapping tile data on lat/lon grid + # Convert FOV to degree, assuming 100km is about 1 deg. + if 'km' in obs_fov_units: + obs_fov = obs_fov /100. + + # Set obs_fov at model resolution if obs_fov = 0 + if obs_fov == 0: + obs_fov = 360./tile_grid['N_lon']/2. + + # Get map plot grid resolution based on obs FOV + map_grid_resolution = get_grid_resolution(obs_fov) + print(f'map grid resolution: {map_grid_resolution}' degree) + + # ---------------------------------------------------------------------------------- # Plotting - exptag = postproc_obj.exptag + # ---------------------------------------------------------------------------------- + exptag = exp_list[0]['exptag'] fig, axes = plt.subplots(2,2, figsize=(18,10)) plt.rcParams.update({'font.size':14}) - + for i in np.arange(2): for j in np.arange(2): units = '[k]' if i == 0 and j == 0: tile_data = Nobs_data - # crange is [cmin, cmax] + #crange is [cmin, cmax] crange =[0, np.ceil((end_time-start_time).days/150)*300] colormap = plt.get_cmap('jet',20) title_txt = exptag + ' Tb Nobs ' + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m') @@ -80,102 +154,41 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'): title_txt = exptag + ' Tb normalized O-F stdv (approx!) '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d') colormap.set_bad(color='0.9') # light grey, 0-black, 1-white + + # map tile_data on 2-d regular lat/lon grid for map plotting + grid_data, lat_2d, lon_2d = tile_to_latlon(tile_data, tc, tg, map_grid_resolution) - # Regrid 1d tile_data to 2d grid_data for map plots - if '_M09_' in domain: # special case - grid_data_M09 = np.zeros((1624, 3856)) + np.nan - grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data - - # Reshape the data into 4x4 blocks - reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4) - - # Combine each 4x4 M09 block into a M36 grid - #if i==0 and j==0: - # grid_data = np.sum(reshaped,axis=(1, 3)) - #else: - # grid_data = np.nanmean(reshaped,axis=(1, 3)) - - grid_data = grid_data_M09[1::4, 2::4] - - # NOT area weighted - wmean = np.nanmean(grid_data) - wabsmean = np.nanmean(np.abs(grid_data)) - if 'normalized' in title_txt: - wabsmean = np.nanmean(np.abs(grid_data-1.)) - - lat_M36, lon_M36 = EASEv2_ind2latlon(np.arange(406), np.arange(964),'M36') - lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36) - else: - grid_data, lat_2d, lon_2d = tile2grid(tile_data, tc, tg) - - # Area weighted mean and mean(abs) - wmean = np.nansum( tile_data * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area']) - wabsmean = np.nansum(np.abs(tile_data) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area']) - if 'normalized' in title_txt: - wabsmean = np.nansum(np.abs(tile_data-1.) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area']) - + mean = np.nanmean(grid_data) + absmean =np.nanmean(np.abs(grid_data)) + if 'normalized' in title_txt: - title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (wmean, wabsmean)+' '+units + absmean = np.nanmean(np.abs(grid_data-1.) ) + + if 'normalized' in title_txt and 'stdv' in title_txt: + title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (mean, absmean)+' '+units elif 'mean' in title_txt: - title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (wmean, wabsmean)+' '+units + title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (mean, absmean)+' '+units else: - title_txt = title_txt + '\n' + "avg=%.2f" % (wmean) +' '+units - + title_txt = title_txt + '\n' + "avg=%.2f" % (mean) +' '+units + if 'normalized' in title_txt: grid_data = np.log10(grid_data) crange = [-0.6, 0.45] mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \ - title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180]) - + title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180]) + plt.tight_layout() # Save figure to file - fig.savefig(fig_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\ + fig.savefig(out_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\ (end_time+timedelta(days=-1)).strftime('%Y%m')+'.png') - #plt.show() + plt.show() plt.close(fig) # ----------------------------------------------------------------------------------------------- if __name__ == '__main__': - - # Plot maps of long-term temporal stats - - from postproc_ObsFcstAna import postproc_ObsFcstAna - from user_config import get_config - - config = get_config() # edit user-defined inputs in user_config.py - - exp_list = config['exp_list'] - start_time = config['start_time'] - end_time = config['end_time'] - sum_path = config['sum_path'] - out_path = config['out_path'] - - # ------------------------------------------------------------------------------------ - - # Initialize the postprocessing object - postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path) - - # File name for long-term temporal stats - stats_file = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \ - (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4' - - - # Read or compute long-term temporal stats. Each field has the dimension [N_tile, N_species]. - if os.path.isfile(stats_file): - - print('reading stats nc4 file '+ stats_file) - temporal_stats = {} - with Dataset(stats_file,'r') as nc: - for key, value in nc.variables.items(): - temporal_stats[key] = value[:].filled(np.nan) - - else: - temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file) - - # Plot stats - plot_OmF_maps(postproc, temporal_stats, fig_path=out_path ) + Main_OmF_maps() # ====================== EOF ========================================================= From 54d822da31a276be961e741609bcaa7acde30115 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 16:00:32 -0400 Subject: [PATCH 22/45] minor change --- GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 17dae399..30650e82 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -182,7 +182,7 @@ def Main_OmF_maps(): # Save figure to file fig.savefig(out_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\ (end_time+timedelta(days=-1)).strftime('%Y%m')+'.png') - plt.show() + #plt.show() plt.close(fig) # ----------------------------------------------------------------------------------------------- From 6dfe1c3c83663b3e4b86c52abba8b87f1ed95a2a Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 16:01:58 -0400 Subject: [PATCH 23/45] minor clean up and move time loop out --- .../ObsFcstAna_stats/Plot_stats_timeseries.py | 101 ++++++------------ 1 file changed, 35 insertions(+), 66 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py index f7a40e88..bc66e8d2 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py @@ -12,78 +12,64 @@ import numpy as np import matplotlib.pyplot as plt +import pickle from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta + +from postproc_ObsFcstAna import postproc_ObsFcstAna +from user_config import get_config + +def Main_OmF_timeseries(): + + config = get_config() # edit user-defined inputs in user_config.py + + exp_list = config['exp_list'] + start_time = config['start_time'] + end_time = config['end_time'] + sum_path = config['sum_path'] + out_path = config['out_path'] + + # ------------------------------------------------------------------------------------ + # + # Initialize the postprocessing object + postproc_obj = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path) -def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): - import pickle exptag = postproc_obj.exptag start_time = postproc_obj.start_time end_time = postproc_obj.end_time - stats_file = fig_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \ + stats_file = out_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \ '_'+(end_time+timedelta(days=-1)).strftime('%Y%m')+'.pkl' if os.path.isfile(stats_file): with open(stats_file,'rb') as file: + print(f'reading from {stats_file}') stats = pickle.load(file) - date_vec = stats['date_vec'] else: - - # Initialize monthly metrics as list - OmF_mean =[] - OmF_stdv =[] - OmA_mean =[] - OmA_stdv =[] - Ndata =[] - date_vec =[] - - # Time loop - current_time = start_time - while current_time < end_time: - print('compute monthly spatial mean stats for '+ current_time.strftime('%Y%m')) - - stats_mo = postproc_obj.calc_spatial_stats_from_sums(current_time) - - OmFm = stats_mo['OmF_mean'] - OmFs = stats_mo['OmF_stdv'] - OmAm = stats_mo['OmA_mean'] - OmAs = stats_mo['OmA_stdv'] - Nobsm = stats_mo['N_data'] - - Ndata.append(Nobsm) - - OmF_mean.append(OmFm) - OmF_stdv.append(OmFs) - OmA_mean.append(OmAm) - OmA_stdv.append(OmAs) - - date_vec.append(current_time.strftime('%Y%m')) - current_time = current_time + relativedelta(months=1) - - # Store stats in a dictionary for easier saving and referencing - stats = {"OmF_mean":np.array(OmF_mean), "OmF_stdv":np.array(OmF_stdv), - "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv), - "Ndata":np.array(Ndata), "date_vec":date_vec} + + stats = postproc_obj.calc_spatial_stats_from_sums() if stats_file is not None: with open(stats_file,'wb') as file: + print(f'saveing stats to {stats_file}') pickle.dump(stats,file) + date_vec = stats['date_vec'] + # Compute Nobs-weighted avg of each metric across all species. # Typically used for SMAP Tb_h/h from asc and desc overpasses, # or ASCAT soil moisture from Metop-A/B/C. # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! stats_plot = {} - Ndata = np.nansum(stats['Ndata'], axis=1) - stats_plot['Ndata'] = Ndata - stats_plot['OmF_mean'] = np.nansum(stats['OmF_mean']*stats['Ndata'], axis=1)/Ndata - stats_plot['OmF_stdv'] = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata - stats_plot['OmA_mean'] = np.nansum(stats['OmA_mean']*stats['Ndata'], axis=1)/Ndata - stats_plot['OmA_stdv'] = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata + N_data = np.nansum(stats['N_data'], axis=1) + stats_plot['N_data'] = N_data + stats_plot['OmF_mean'] = np.nansum(stats['OmF_mean']*stats['N_data'], axis=1)/N_data + stats_plot['OmF_stdv'] = np.nansum(stats['OmF_stdv']*stats['N_data'], axis=1)/N_data + stats_plot['OmA_mean'] = np.nansum(stats['OmA_mean']*stats['N_data'], axis=1)/N_data + stats_plot['OmA_stdv'] = np.nansum(stats['OmA_stdv']*stats['N_data'], axis=1)/N_data plot_var = 'OmF_mean' @@ -93,36 +79,19 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'): plt.grid(True, linestyle='--', alpha=0.5) plt.xticks(ticks=date_vec[::2], labels=date_vec[::2]) - plt.title(exptag+ ' monthly '+plot_var) + plt.title(exptag+ 'monthly '+plot_var) plt.xlim(-1, len(date_vec)+1) plt.ylim(-.1, 2.) plt.tight_layout() #plt.show() - plt.savefig(fig_path+'Bars_'+plot_var+exptag+date_vec[0]+'_'+\ + plt.savefig(out_path+'Bars_'+plot_var+exptag+date_vec[0]+'_'+\ date_vec[-1]+'.png') plt.close() if __name__ == "__main__": - - from postproc_ObsFcstAna import postproc_ObsFcstAna - from user_config import get_config - - config = get_config() # edit user-defined inputs in user_config.py - exp_list = config['exp_list'] - start_time = config['start_time'] - end_time = config['end_time'] - sum_path = config['sum_path'] - out_path = config['out_path'] - - # ------------------------------------------------------------------------------------ - # - # Initialize the postprocessing object - postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path) - - # Compute spatial stats and plot monthly O-F stats bars - Plot_monthly_OmF_bars(postproc, fig_path=out_path) + Main_OmF_timeseries() # ====================== EOF ========================================================= From 76ccea91d96e92d2c0cf8496cb6e51567ff9c832 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 16:14:29 -0400 Subject: [PATCH 24/45] minor changes --- .../util/shared/python/tile_to_latlon.py | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 GEOSldas_App/util/shared/python/tile_to_latlon.py diff --git a/GEOSldas_App/util/shared/python/tile_to_latlon.py b/GEOSldas_App/util/shared/python/tile_to_latlon.py new file mode 100644 index 00000000..9ba346f4 --- /dev/null +++ b/GEOSldas_App/util/shared/python/tile_to_latlon.py @@ -0,0 +1,155 @@ +import os +import numpy as np + +# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates +# +# 1-d input data can have one additional dimension (e.g., time) +# +# 2-d output data is lat-by-lon[-by-time] + +def get_grid_resolution(fov_radius=0.5): + ''' + Estimate the resolution in degrees for the target lat/lon grid when remapping 1-D + tile data to regular 2-D lat/lon grid for plotting purposes. + The resolution is an approximation based on the observation FOV radius information. + + ''' + return np.round(fov_radius * 2., decimals=3) + +def interpolate_to_latlon(data_1d, lat, lon, resolution): + """ + For cubedsphere grid: use + + """ + from scipy.interpolate import griddata + from scipy.spatial.distance import cdist + + lat_grid = np.arange(-90+resolution/2, 90, resolution) + lon_grid = np.arange(-180+resolution/2, 180, resolution) + lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid) + + ny = len(lat_grid) + nx = len(lon_grid) + + nf = data_1d.shape[-1] + data_2d = np.full([ny, nx, nf], np.nan) + for i in range(nf): + data = data_1d[:,i] + data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear') + + return data_2d, lat_2d, lon_2d + +def remap_1d_to_2d( data_1d, lat_1d, lon_1d): + + """ + For EASE grid: use remapping + + """ + + # Extract unique coordinates + unique_lats, indY = np.unique(lat_1d, return_inverse=True) + unique_lons, indX = np.unique(lon_1d, return_inverse=True) + + ny = len(unique_lats) + nx = len(unique_lons) + + nf = data_1d.shape[1] + data_2d = np.full([ny, nx, nf], np.nan) + data_2d[indY, indX, :] = data_1d + + lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats) + + return data_2d, lat_2d, lon_2d + +def tile2grid(tile_data, tile_coord, tile_grid): + + N_fields = tile_data.shape[-1] + # Initialize grid data array + grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields)) + + for k in range(N_fields): + # Initialize weight grid for current field + wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'])) + + # Loop through tile space + for n in range(tile_coord['N_tile']): + # Calculate grid indices (adjust for Python's 0-based indexing) + i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1 + j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1 + + # Get weight + w = tile_coord['frac_cell'][n] + + # Check if current tile data is valid (not no-data) + if ~np.isnan(tile_data[k, n]): + # Accumulate weighted data + grid_data[j, i, k] += w * tile_data[n,k] + wgrid[j, i] += w + + # Normalize and set no-data values + for i in range(tile_grid['N_lon']): + for j in range(tile_grid['N_lat']): + if wgrid[j, i] > 0.0: + # Normalize by total weight + grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i] + else: + # Set no-data value + grid_data[j, i, k] = np.nan + + lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat']) + lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon']) + +def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15, nodata_tol=1.e-4): + + """ + Convert tile-space data to grid-space data. + + Parameters: + ---------- + tile_data : Array in tile space, shape (N_tile, N_fields) + tile_coord : Dictionary containing tile coordinate information + tile_grid : Dictionary containing grid information + resolution : target grid resolution + nodata : Value for missing data + nodata_tol : Tolerance for comparing values to nodata + + Returns: + ------- + grid_data : Array in regular grid space, shape (N_lat, N_lon, N_fields) + lat_2d: Lat array in regular grid space, shape (N_lat, N_lon) + lon_2d: Lon array in regular grid space, shape (N_lat, N_lon) + + """ + tile_data[np.abs(tile_data - nodata) < nodata_tol] = np.nan + + # Verifity input datasize + if tile_data.shape[0] != tile_coord['N_tile']: + print(f'Error: tile2grid input data size don''t match N_tile') + sys.exit() + + # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1] + if tile_data.ndim == 1: + tile_data = np.expand_dims(tile_data, axis=1) + + nf = tile_data.shape[-1] + + lat_grid = np.arange(-90+resolution/2, 90, resolution) + lon_grid = np.arange(-180+resolution/2, 180, resolution) + lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid) + + grid_data = np.full([len(lat_grid), len(lon_grid), nf], np.nan) + + for v in range(nf): + for k in range(tile_coord['N_tile']): + if np.any([~np.isnan(tile_data[k,:])], axis=1): + j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k]) + i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k]) + # sometime the missing points are filled with zeros. taking largest value within + # the target grid cell avoild picking up unwanted zeros. + grid_data[j,i,v] = np.nanmax([tile_data[k,v], grid_data[j,i,v]]) + + if grid_data.shape[-1] == 1: + grid_data = grid_data[:,:,0] + + return grid_data, lat_2d, lon_2d + From caa0f90e8e0bff63799cd6536c4540ca01cac7b3 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 16:15:28 -0400 Subject: [PATCH 25/45] minor correction --- GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 30650e82..98e1a63b 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -117,7 +117,7 @@ def Main_OmF_maps(): # Get map plot grid resolution based on obs FOV map_grid_resolution = get_grid_resolution(obs_fov) - print(f'map grid resolution: {map_grid_resolution}' degree) + print(f'map grid resolution: {map_grid_resolution} degree') # ---------------------------------------------------------------------------------- # Plotting From 978fdbf288740af50a25845662c6e09934179e94 Mon Sep 17 00:00:00 2001 From: Weiyuan Jiang Date: Mon, 23 Jun 2025 16:25:01 -0400 Subject: [PATCH 26/45] refactoring... --- GEOSldas_App/process_hist.csh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GEOSldas_App/process_hist.csh b/GEOSldas_App/process_hist.csh index deaba8df..97d35afe 100644 --- a/GEOSldas_App/process_hist.csh +++ b/GEOSldas_App/process_hist.csh @@ -26,12 +26,11 @@ endif if($GRID == CUBE) then sed -i '/^\#EASE/d' $HISTRC sed -i 's|\#CUBE|''|g' $HISTRC - sed -i -e s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC else sed -i '/^\#CUBE/d' $HISTRC sed -i 's|\#EASE|''|g' $HISTRC - sed -i -e s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC endif +sed -i -e s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC if($LSM_CHOICE == 1) then set GridComp = CATCH From f5fa08cf220a3171262a94e9cfb5b8a136558165 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Mon, 23 Jun 2025 16:27:27 -0400 Subject: [PATCH 27/45] remove old file --- .../ObsFcstAna_stats/Get_ObsFcstAna_stats.py | 97 ------------------- 1 file changed, 97 deletions(-) delete mode 100644 GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py deleted file mode 100644 index c4cf6620..00000000 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python3 - -""" -Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics. -First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output. -Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast -residuals can then be diagnosed quickly from these intermediate "sums" files. -Sample script optionally computes and plots: -- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py). -- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py). - -Usage: - 1. Edit "user_config.py" with experiments information. - 2. Run this script as follows (on Discover): - $ module load python/GEOSpyD - $ ./Get_ObsFcstAna_stats.py - - # Background execution: - $ nohup ./Get_ObsFcstAna_stats.py > out.log & - -Authors: Q. Liu, R. Reichle, A. Fox -Last Modified: May 2025 -""" - -import sys; sys.path.append('../../shared/python/') -import warnings; warnings.filterwarnings("ignore") -import os - -import numpy as np - -from datetime import timedelta -from postproc_ObsFcstAna import postproc_ObsFcstAna -from user_config import get_config # user-defined config info - -# --- -# -# If the script is run in the background, uncomment the following lines to see the redirected -# standard output in the out.log file immediately. When the lines are commented out, the redirected -# standard output will not appear in the out.log file until the job has completed. -# If the script is run in the foreground, the lines must be commented out. -# -#import io -#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True) -#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True) -# -# --- - -def main(): - - config = get_config() # edit user-defined inputs in user_config.py - - exp_list = config['exp_list'] - start_time = config['start_time'] - end_time = config['end_time'] - sum_path = config['sum_path'] - out_path = config['out_path'] - - # ------------------------------------------------------------------------------------ - # Postprocess raw ObsFcstAna output data into monthly sums - - # Initialize the postprocessing object - postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path) - - # Compute and save monthly sums - postproc.save_monthly_sums() - - # -------------------------------------------------------------------------------------- - # Optionally compute long-term temporal/spatial statistics and create plots. - # The plotting scripts can also run standalone using the individual Plot_stats_*.py scripts, - # as long as the monthly sum files are available. - - plot_maps = False - plot_timeseries = False - - if plot_maps: # Compute long-term temporal stats and plot maps - - stats_file = out_path + 'temporal_stats_'+exp_list[0]['exptag']+ '_'+start_time.strftime('%Y%m%d')+'_'+ \ - (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4' - - # temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats - # each field has the dimension [N_tile, N_species] - - temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file) - - # Example to plot some O-F maps - from Plot_stats_maps import plot_OmF_maps - plot_OmF_maps(postproc, temporal_stats, fig_path=out_path ) - - if plot_timeseries: # Compute spatially averaged stats and plot monthly time series of stats - - from Plot_stats_timeseries import Plot_monthly_OmF_bars - Plot_monthly_OmF_bars(postproc, fig_path=out_path) - -if __name__ == '__main__': - main() - -# ====================== EOF ========================================================= From dc7887631f71dc5721159d02f0283d0ff1583d9c Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 24 Jun 2025 08:46:46 -0400 Subject: [PATCH 28/45] corrected intro comments (Get_ObsFcstAna_sums.py) --- .../postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py index 1c29ca94..b8283e03 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py @@ -2,12 +2,9 @@ """ Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics. -First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output. +Computes and stores monthly sums and sums of squares and cross-products of raw ObsFcstAna output. Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast -residuals can then be diagnosed quickly from these intermediate "sums" files. -Sample script optionally computes and plots: -- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py). -- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py). +residuals can then be diagnosed quickly from these intermediate "sums" files. See Plot_stats_*.py. Usage: 1. Edit "user_config.py" with experiments information. @@ -19,7 +16,7 @@ $ nohup ./Get_ObsFcstAna_sums.py > out.log & Authors: Q. Liu, R. Reichle, A. Fox -Last Modified: May 2025 +Last Modified: June 2025 """ import sys; sys.path.append('../../shared/python/') From 387834bfce543f54a3ce31fca6e6530b7c87a097 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 24 Jun 2025 09:53:58 -0400 Subject: [PATCH 29/45] revised computation of plotting grid spacing; removed obsolete functions; edited comments (Plot_stats_maps.py, tile_to_latlon.py) --- .../ObsFcstAna_stats/Plot_stats_maps.py | 82 +++++++----- .../util/shared/python/tile_to_latlon.py | 122 ++---------------- 2 files changed, 65 insertions(+), 139 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 98e1a63b..ea8493ca 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -5,6 +5,12 @@ Plots Nobs-weighted avg of each metric across all species. Requires saved files with monthly sums (see Get_ObsFcstAna_sums.py). Stats of *normalized* OmFs are approximated! + +Usage: + 1. Edit "user_config.py" with experiments information. + 2. Run this script as follows (on Discover): + $ module load python/GEOSpyD + $ ./Plot_stats_maps.py """ import sys; sys.path.append('../../shared/python/') @@ -37,12 +43,14 @@ def Main_OmF_maps(): sum_path = config['sum_path'] out_path = config['out_path'] - # Provide Nmin to screen out tiles with inadequate data for temporal stats + # min number of individual O-Fs required for stats at a location (across all months in period) + Nmin = 20 # ------------------------------------------------------------------------------------ - # Get metrics in [N_tile, N_species] for plotting + # Read or compute stats. Each field has dimension [N_tile, N_species] # ------------------------------------------------------------------------------------ + # File name for long-term temporal stats stats_file = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \ (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4' @@ -69,22 +77,22 @@ def Main_OmF_maps(): OmF_norm_mean = stats['OmF_norm_mean'] OmF_norm_stdv = stats['OmF_norm_stdv'] - # Screen all fields with Nmin except for N_data - OmF_mean[N_data < Nmin] = np.nan - OmF_stdv[N_data < Nmin] = np.nan - OmA_mean[N_data < Nmin] = np.nan - OmA_stdv[N_data < Nmin] = np.nan + # Screen stats with Nmin (except for N_data) + OmF_mean[ N_data < Nmin] = np.nan + OmF_stdv[ N_data < Nmin] = np.nan + OmA_mean[ N_data < Nmin] = np.nan + OmA_stdv[ N_data < Nmin] = np.nan OmF_norm_mean[N_data < Nmin] = np.nan - OmF_norm_stdv[N_data < Nmin] = np.nan + OmF_norm_stdv[N_data < Nmin] = np.nan # Select/combine fields for plotting. The following provides an example to - # combine statistical fields of all species to averages. + # computes average stats across all species. # Compute Nobs-weighted avg of each metric across all species. # Typically used for SMAP Tb_h/h from asc and desc overpasses, # or ASCAT soil moisture from Metop-A/B/C. # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! - Nobs_data = np.nansum( N_data, axis=1) + Nobs_data = np.nansum( N_data, axis=1) OmF_mean = np.nansum(OmF_mean *N_data, axis=1)/Nobs_data OmF_stdv = np.nansum(OmF_stdv *N_data, axis=1)/Nobs_data OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data @@ -97,31 +105,43 @@ def Main_OmF_maps(): Nobs_data[Nobs_data == 0] = np.nan # -------------------------------------------------------------------------------- - # Get map grid information and provide map resolution (in degree). - # Observation FOV-based resolution estimate is provided. The resolution can also be - # manually set for optimal map performance + # Plot stats on regular lat/lon grid, with grid spacing guided by the footprint + # scale (field-of-view; FOV) of the observations. # --------------------------------------------------------------------------------- - tc = exp_list[0]['tilecoord'] - tg = exp_list[0]['tilegrid_global'] - obs_fov = exp_list[0]['obsparam'][0]['FOV'] + + # Get geographic info about model grid, model tile space, and obs FOV + # (assumes that all species have same FOV and FOV_units) + + tc = exp_list[0]['tilecoord'] + tg = exp_list[0]['tilegrid_global'] + obs_fov = exp_list[0]['obsparam'][0]['FOV'] obs_fov_units = exp_list[0]['obsparam'][0]['FOV_units'] - # Get map grid resolution for mapping tile data on lat/lon grid - # Convert FOV to degree, assuming 100km is about 1 deg. + # Determine spacing of lat/lon grid for plotting (degree) based on obs FOV. + # Can also set manually if desired. + + my_res = obs_fov*2. # Note: FOV is a radius, hence the factor of 2. + + # If applicable, convert to degree, assuming 100 km is about 1 deg. if 'km' in obs_fov_units: - obs_fov = obs_fov /100. + my_res = my_res/100. - # Set obs_fov at model resolution if obs_fov = 0 + # If obs_fov=0, reset to match model resolution. if obs_fov == 0: - obs_fov = 360./tile_grid['N_lon']/2. + my_res = np.sqrt( tile_grid['dlat']*tile_grid['dlon'] ) + + # pick a resolution from a sample vector of resolutions - # Get map plot grid resolution based on obs FOV - map_grid_resolution = get_grid_resolution(obs_fov) - print(f'map grid resolution: {map_grid_resolution} degree') + sample_res = [0.05, 0.1, 0.25, 0.5, 1.0, 2.0] + + my_res = min(sample_res, key=lambda x: abs(x - my_res)) + + print(f'resolution of lat/lon grid for plotting: {my_res} degree') # ---------------------------------------------------------------------------------- - # Plotting + # Plot # ---------------------------------------------------------------------------------- + exptag = exp_list[0]['exptag'] fig, axes = plt.subplots(2,2, figsize=(18,10)) @@ -155,11 +175,11 @@ def Main_OmF_maps(): colormap.set_bad(color='0.9') # light grey, 0-black, 1-white - # map tile_data on 2-d regular lat/lon grid for map plotting - grid_data, lat_2d, lon_2d = tile_to_latlon(tile_data, tc, tg, map_grid_resolution) + # map tile_data on 2-d regular lat/lon grid for plotting + grid_data, lat_2d, lon_2d = tile_to_latlongrid(tile_data, tc, my_res) - mean = np.nanmean(grid_data) - absmean =np.nanmean(np.abs(grid_data)) + mean = np.nanmean( grid_data ) + absmean = np.nanmean(np.abs(grid_data)) if 'normalized' in title_txt: absmean = np.nanmean(np.abs(grid_data-1.) ) @@ -167,9 +187,9 @@ def Main_OmF_maps(): if 'normalized' in title_txt and 'stdv' in title_txt: title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (mean, absmean)+' '+units elif 'mean' in title_txt: - title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (mean, absmean)+' '+units + title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (mean, absmean)+' '+units else: - title_txt = title_txt + '\n' + "avg=%.2f" % (mean) +' '+units + title_txt = title_txt + '\n' + "avg=%.2f" % (mean )+' '+units if 'normalized' in title_txt: grid_data = np.log10(grid_data) diff --git a/GEOSldas_App/util/shared/python/tile_to_latlon.py b/GEOSldas_App/util/shared/python/tile_to_latlon.py index 9ba346f4..8f730d55 100644 --- a/GEOSldas_App/util/shared/python/tile_to_latlon.py +++ b/GEOSldas_App/util/shared/python/tile_to_latlon.py @@ -1,130 +1,34 @@ import os import numpy as np -# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates -# -# 1-d input data can have one additional dimension (e.g., time) -# -# 2-d output data is lat-by-lon[-by-time] - -def get_grid_resolution(fov_radius=0.5): - ''' - Estimate the resolution in degrees for the target lat/lon grid when remapping 1-D - tile data to regular 2-D lat/lon grid for plotting purposes. - The resolution is an approximation based on the observation FOV radius information. - - ''' - return np.round(fov_radius * 2., decimals=3) +# -def interpolate_to_latlon(data_1d, lat, lon, resolution): - """ - For cubedsphere grid: use +def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_tol=1.e-4): """ - from scipy.interpolate import griddata - from scipy.spatial.distance import cdist - - lat_grid = np.arange(-90+resolution/2, 90, resolution) - lon_grid = np.arange(-180+resolution/2, 180, resolution) - lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid) - - ny = len(lat_grid) - nx = len(lon_grid) - - nf = data_1d.shape[-1] - data_2d = np.full([ny, nx, nf], np.nan) - for i in range(nf): - data = data_1d[:,i] - data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear') - - return data_2d, lat_2d, lon_2d - -def remap_1d_to_2d( data_1d, lat_1d, lon_1d): - - """ - For EASE grid: use remapping - - """ - - # Extract unique coordinates - unique_lats, indY = np.unique(lat_1d, return_inverse=True) - unique_lons, indX = np.unique(lon_1d, return_inverse=True) - - ny = len(unique_lats) - nx = len(unique_lons) - - nf = data_1d.shape[1] - data_2d = np.full([ny, nx, nf], np.nan) - data_2d[indY, indX, :] = data_1d - - lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats) - - return data_2d, lat_2d, lon_2d - -def tile2grid(tile_data, tile_coord, tile_grid): - - N_fields = tile_data.shape[-1] - # Initialize grid data array - grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields)) - - for k in range(N_fields): - # Initialize weight grid for current field - wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'])) - - # Loop through tile space - for n in range(tile_coord['N_tile']): - # Calculate grid indices (adjust for Python's 0-based indexing) - i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1 - j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1 - - # Get weight - w = tile_coord['frac_cell'][n] - - # Check if current tile data is valid (not no-data) - if ~np.isnan(tile_data[k, n]): - # Accumulate weighted data - grid_data[j, i, k] += w * tile_data[n,k] - wgrid[j, i] += w - - # Normalize and set no-data values - for i in range(tile_grid['N_lon']): - for j in range(tile_grid['N_lat']): - if wgrid[j, i] > 0.0: - # Normalize by total weight - grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i] - else: - # Set no-data value - grid_data[j, i, k] = np.nan - - lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat']) - lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon']) - -def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15, nodata_tol=1.e-4): - - """ - Convert tile-space data to grid-space data. + Map (1d) tile-space data onto (2d) regular lat/lon grid of arbitrary "resolution". Parameters: ---------- - tile_data : Array in tile space, shape (N_tile, N_fields) + tile_data : Array in tile space, shape (N_tile, N_fields) tile_coord : Dictionary containing tile coordinate information - tile_grid : Dictionary containing grid information resolution : target grid resolution - nodata : Value for missing data + nodata : Value for missing data nodata_tol : Tolerance for comparing values to nodata Returns: ------- grid_data : Array in regular grid space, shape (N_lat, N_lon, N_fields) - lat_2d: Lat array in regular grid space, shape (N_lat, N_lon) - lon_2d: Lon array in regular grid space, shape (N_lat, N_lon) + lat_2d : Lat array in regular grid space, shape (N_lat, N_lon) + lon_2d : Lon array in regular grid space, shape (N_lat, N_lon) """ + tile_data[np.abs(tile_data - nodata) < nodata_tol] = np.nan - # Verifity input datasize + # Verify input datasize if tile_data.shape[0] != tile_coord['N_tile']: - print(f'Error: tile2grid input data size don''t match N_tile') + print(f'Error: size of tile2grid input data does not match that of N_tile') sys.exit() # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1] @@ -133,8 +37,9 @@ def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15, nf = tile_data.shape[-1] - lat_grid = np.arange(-90+resolution/2, 90, resolution) + lat_grid = np.arange( -90+resolution/2, 90, resolution) lon_grid = np.arange(-180+resolution/2, 180, resolution) + lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid) grid_data = np.full([len(lat_grid), len(lon_grid), nf], np.nan) @@ -145,7 +50,7 @@ def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15, j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k]) i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k]) # sometime the missing points are filled with zeros. taking largest value within - # the target grid cell avoild picking up unwanted zeros. + # the target grid cell avoids picking up unwanted zeros. grid_data[j,i,v] = np.nanmax([tile_data[k,v], grid_data[j,i,v]]) if grid_data.shape[-1] == 1: @@ -153,3 +58,4 @@ def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15, return grid_data, lat_2d, lon_2d +# =========================== EOF ============================================================ From abdeace096ea5f4c0c10132061ba68fcbf341289 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 24 Jun 2025 09:55:07 -0400 Subject: [PATCH 30/45] renamed tile_to_latlon.py -> tile_to_latlongrid.py for clarity --- .../shared/python/{tile_to_latlon.py => tile_to_latlongrid.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename GEOSldas_App/util/shared/python/{tile_to_latlon.py => tile_to_latlongrid.py} (100%) diff --git a/GEOSldas_App/util/shared/python/tile_to_latlon.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py similarity index 100% rename from GEOSldas_App/util/shared/python/tile_to_latlon.py rename to GEOSldas_App/util/shared/python/tile_to_latlongrid.py From 693ff6595728ad33f1fcdfebc2d7474641a77a55 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 24 Jun 2025 11:18:40 -0400 Subject: [PATCH 31/45] add tile2grid.py; revised computation of spatial avg of temporal stats (tile_to_latlongrid.py, Plot_stats_maps.py) --- .../ObsFcstAna_stats/Plot_stats_maps.py | 10 ++-- GEOSldas_App/util/shared/python/tile2grid.py | 55 +++++++++++++++++++ .../util/shared/python/tile_to_latlongrid.py | 2 + 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 GEOSldas_App/util/shared/python/tile2grid.py diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index ea8493ca..ba0d7ed3 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -43,7 +43,7 @@ def Main_OmF_maps(): sum_path = config['sum_path'] out_path = config['out_path'] - # min number of individual O-Fs required for stats at a location (across all months in period) + # min total number of individual O-Fs required for stats at a location (across all months in period) Nmin = 20 @@ -178,11 +178,13 @@ def Main_OmF_maps(): # map tile_data on 2-d regular lat/lon grid for plotting grid_data, lat_2d, lon_2d = tile_to_latlongrid(tile_data, tc, my_res) - mean = np.nanmean( grid_data ) - absmean = np.nanmean(np.abs(grid_data)) + + # compute spatial avg of metric (directly from tile_data) + mean = np.nanmean( tile_data ) + absmean = np.nanmean(np.abs(tile_data)) if 'normalized' in title_txt: - absmean = np.nanmean(np.abs(grid_data-1.) ) + absmean = np.nanmean(np.abs(tile_data-1.) ) if 'normalized' in title_txt and 'stdv' in title_txt: title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (mean, absmean)+' '+units diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py new file mode 100644 index 00000000..b918ac51 --- /dev/null +++ b/GEOSldas_App/util/shared/python/tile2grid.py @@ -0,0 +1,55 @@ + + +import os +import numpy as np + +def tile2grid(tile_data, tile_coord, tile_grid): + + """ + Map (1d) tile-space data onto (2d) grid that is associated with the tile space. + + Parameters: + ---------- + tile_data : Array in tile space, shape (N_tile, N_fields) + tile_coord : Dictionary containing tile coordinate information + tile_grid : Dictionary containing tile grid information + + Returns: + ------- + grid_data : Array in grid space, shape (N_lat, N_lon, N_fields) + """ + + N_fields = tile_data.shape[-1] + # Initialize grid data array + grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields)) + + for k in range(N_fields): + # Initialize weight grid for current field + wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'])) + + # Loop through tile space + for n in range(tile_coord['N_tile']): + # Calculate grid indices (adjust for Python's 0-based indexing) + i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1 + j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1 + + # Get weight + w = tile_coord['frac_cell'][n] + + # Check if current tile data is valid (not no-data) + if ~np.isnan(tile_data[k, n]): + # Accumulate weighted data + grid_data[j, i, k] += w * tile_data[n,k] + wgrid[ j, i] += w + + # Normalize and set no-data values + for i in range(tile_grid['N_lon']): + for j in range(tile_grid['N_lat']): + if wgrid[j, i] > 0.0: + # Normalize by total weight + grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i] + else: + # Set no-data value + grid_data[j, i, k] = np.nan + +# ============ EOF =============================================== diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py index 8f730d55..c4751a69 100644 --- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py +++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py @@ -7,6 +7,8 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_ """ Map (1d) tile-space data onto (2d) regular lat/lon grid of arbitrary "resolution". + + Use "tile2grid.py" to map tile data onto the grid that is associated with the tile space. Parameters: ---------- From b4db565f6ce68d5e6dd66fef78900e99f76d8beb Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 24 Jun 2025 11:53:50 -0400 Subject: [PATCH 32/45] minor fixes to comments, variable names, and vertical alignment (postproc_ObsFcstAna.py, Plot_stats_*.py) --- .../ObsFcstAna_stats/Plot_stats_maps.py | 4 +- .../ObsFcstAna_stats/Plot_stats_timeseries.py | 10 +++-- .../ObsFcstAna_stats/postproc_ObsFcstAna.py | 40 +++++++++---------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index ba0d7ed3..42ad9d99 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -152,7 +152,7 @@ def Main_OmF_maps(): units = '[k]' if i == 0 and j == 0: tile_data = Nobs_data - #crange is [cmin, cmax] + # crange is [cmin, cmax] crange =[0, np.ceil((end_time-start_time).days/150)*300] colormap = plt.get_cmap('jet',20) title_txt = exptag + ' Tb Nobs ' + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m') @@ -179,7 +179,7 @@ def Main_OmF_maps(): grid_data, lat_2d, lon_2d = tile_to_latlongrid(tile_data, tc, my_res) - # compute spatial avg of metric (directly from tile_data) + # compute spatial avg of metric (directly from tile_data; no weighting) mean = np.nanmean( tile_data ) absmean = np.nanmean(np.abs(tile_data)) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py index bc66e8d2..0bdba303 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py @@ -41,6 +41,8 @@ def Main_OmF_timeseries(): stats_file = out_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \ '_'+(end_time+timedelta(days=-1)).strftime('%Y%m')+'.pkl' + + # Read or compute monthly stats across space if os.path.isfile(stats_file): @@ -64,8 +66,8 @@ def Main_OmF_timeseries(): # or ASCAT soil moisture from Metop-A/B/C. # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! stats_plot = {} - N_data = np.nansum(stats['N_data'], axis=1) - stats_plot['N_data'] = N_data + N_data = np.nansum(stats['N_data' ], axis=1) + stats_plot['N_data' ] = N_data stats_plot['OmF_mean'] = np.nansum(stats['OmF_mean']*stats['N_data'], axis=1)/N_data stats_plot['OmF_stdv'] = np.nansum(stats['OmF_stdv']*stats['N_data'], axis=1)/N_data stats_plot['OmA_mean'] = np.nansum(stats['OmA_mean']*stats['N_data'], axis=1)/N_data @@ -74,12 +76,12 @@ def Main_OmF_timeseries(): plot_var = 'OmF_mean' fig, ax = plt.subplots(figsize=(10,4)) - bars = ax.bar(date_vec, stats_plot[plot_var]) + bars = ax.bar(date_vec, stats_plot[plot_var]) plt.grid(True, linestyle='--', alpha=0.5) plt.xticks(ticks=date_vec[::2], labels=date_vec[::2]) - plt.title(exptag+ 'monthly '+plot_var) + plt.title(exptag+' monthly '+plot_var) plt.xlim(-1, len(date_vec)+1) plt.ylim(-.1, 2.) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 9794ae42..57a122fa 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -94,7 +94,7 @@ def compute_monthly_sums(self, date_time): n_spec = len(obsparam_list[0]) date_time = date_time.replace(hour=int(self.da_t0), minute=int(np.mod(self.da_t0,1)*60)) - end_time = date_time + relativedelta(months=1) + stop_time = date_time + relativedelta(months=1) data_sum = {} data2_sum = {} @@ -108,7 +108,7 @@ def compute_monthly_sums(self, date_time): data_sum[ var] = np.zeros((n_tile, n_spec)) data2_sum[var] = np.zeros((n_tile, n_spec)) - while date_time < end_time: + while date_time < stop_time: # read the list of experiments at each time step (OFA="ObsFcstAna") OFA_list = [] @@ -195,7 +195,7 @@ def save_monthly_sums(self): tc = self.tilecoord obsparam_list = self.obsparam_list - date_time = self.start_time + date_time = self.start_time while date_time < self.end_time: # loop through months @@ -416,10 +416,10 @@ def calc_spatial_stats_from_sums(self): current_time = start_time while current_time < end_time: - mo_path = self.sum_path + '/Y'+ current_time.strftime('%Y') + '/M' + current_time.strftime('%m') + '/' + mo_path = self.sum_path + '/Y'+ current_time.strftime('%Y') + '/M' + current_time.strftime('%m') + '/' fnc4_sums = mo_path + self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + current_time.strftime('%Y%m') +'.nc4' - mdata_sum = {} + mdata_sum = {} mdata2_sum = {} try: @@ -435,7 +435,7 @@ def calc_spatial_stats_from_sums(self): mdata_sum[ var][mN_data == 0] = np.nan mdata2_sum[var][mN_data == 0] = np.nan except FileNotFoundError: - print(f"Error: File '{fnc4_sums}' not found. Run Get_ObsFcstAna_sums first.") + print(f"Error: File '{fnc4_sums}' not found. Run Get_ObsFcstAna_sums.py first.") sys.exit(1) except Exception as e: print(f"An unexpected error occurred: {e}") @@ -451,8 +451,8 @@ def calc_spatial_stats_from_sums(self): for var in var_list: mdata_sum[ var][np.isnan(mN_data)] = np.nan mdata2_sum[var][np.isnan(mN_data)] = np.nan - moxf_sum[np.isnan(mN_data)] = np.nan - moxa_sum[np.isnan(mN_data)] = np.nan + moxf_sum[ np.isnan(mN_data)] = np.nan + moxa_sum[ np.isnan(mN_data)] = np.nan # Aggregate data of all tiles N_data_mo = np.nansum(mN_data, axis=0) @@ -486,17 +486,17 @@ def calc_spatial_stats_from_sums(self): OmA_stdv_mo = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean_mo*A_mean_mo)) # Extend timeseries - N_data.append(N_data_mo) - O_mean.append(O_mean_mo) - O_stdv.append(np.sqrt(O_var)) - F_mean.append(F_mean_mo) - F_stdv.append(np.sqrt(F_var)) - A_mean.append(A_mean_mo) - A_stdv.append(np.sqrt(A_var)) - OmF_mean.append(OmF_mean_mo) - OmF_stdv.append(OmF_stdv_mo) - OmA_mean.append(OmA_mean_mo) - OmA_stdv.append(OmA_stdv_mo) + N_data.append( N_data_mo ) + O_mean.append( O_mean_mo ) + O_stdv.append( np.sqrt(O_var)) + F_mean.append( F_mean_mo ) + F_stdv.append( np.sqrt(F_var)) + A_mean.append( A_mean_mo ) + A_stdv.append( np.sqrt(A_var)) + OmF_mean.append(OmF_mean_mo ) + OmF_stdv.append(OmF_stdv_mo ) + OmA_mean.append(OmA_mean_mo ) + OmA_stdv.append(OmA_stdv_mo ) date_vec.append(current_time.strftime('%Y%m')) current_time = current_time + relativedelta(months=1) @@ -507,7 +507,7 @@ def calc_spatial_stats_from_sums(self): 'A_mean' : np.array(A_mean), 'A_stdv' : np.array(A_stdv), 'OmF_mean': np.array(OmF_mean), 'OmF_stdv': np.array(OmF_stdv), 'OmA_mean': np.array(OmA_mean), 'OmA_stdv': np.array(OmA_stdv), - 'N_data': np.array(N_data), 'date_vec': date_vec + 'N_data' : np.array(N_data), 'date_vec': date_vec } return stats From 65abe5ea2c0d102daccc3cc28f27c1eab8484e68 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 24 Jun 2025 14:59:52 -0400 Subject: [PATCH 33/45] cleaned up and simplified process_hist.csh (GEOSldas_HIST.rc, ldas_setup) --- GEOSldas_App/GEOSldas_HIST.rc | 135 +++++++++------------------------- GEOSldas_App/ldas_setup | 18 +++-- GEOSldas_App/process_hist.csh | 91 ++++++++++++----------- 3 files changed, 98 insertions(+), 146 deletions(-) diff --git a/GEOSldas_App/GEOSldas_HIST.rc b/GEOSldas_App/GEOSldas_HIST.rc index 1b6a14c1..37ce4b59 100644 --- a/GEOSldas_App/GEOSldas_HIST.rc +++ b/GEOSldas_App/GEOSldas_HIST.rc @@ -1,25 +1,23 @@ # Sample HISTORY.rc file for GEOSldas # # This HISTORY template is edited by "ldas_setup" via "process_hist.csh". -# The strings '#ASSIM', '#EASE', and '#CUBE' are *not* linked to MAPL HISTORY -# functionality. For example, the line -# "#CUBE 'tavg24_2d_lnd_Nx'" -# does *not* mean that the 'lnd' output will be on a cube-sphere grid. VERSION: 1 -# Must edit 'EXPID' manually if HISTORY file is re-used without going -# through "ldas_setup". -# +# Must edit 'EXPID' manually if HISTORY file is re-used without going through "ldas_setup". + EXPID: GEOSldas_expid +# ------------------------------------------------------------------------------------------------ + +# pre-defined Collections + COLLECTIONS: -#EASE 'tavg24_1d_lfs_Nt' - 'tavg24_2d_lfs_Nx' -#EASE 'tavg24_1d_lnd_Nt' - 'tavg24_2d_lnd_Nx' -#CUBE 'tavg24_2d_lnd_EASE' -#ASSIM 'SMAP_L4_SM_gph' +#OUT1d 'tavg24_1d_lfs_Nt' +#OUT2d 'tavg24_2d_lfs_Nx' +#OUT1d 'tavg24_1d_lnd_Nt' +#OUT2d 'tavg24_2d_lnd_Nx' +# 'SMAP_L4_SM_gph' # 'inst1_1d_lnr_Nt' # 'catch_progn_incr' # 'inst3_1d_lndfcstana_Nt' @@ -30,28 +28,34 @@ COLLECTIONS: # 'tavg24_1d_glc_Nt' :: +# -------------------------------------------------------------------------------------------------- + +# 2d output can be on the following grids (see [COLLECTION_NAME].grid_label]) + GRID_LABELS: PC720x361-DC PC1440x721-DC EASEv2_M36 :: -PC720x361-DC.GRID_TYPE: LatLon -PC720x361-DC.IM_WORLD: 720 -PC720x361-DC.JM_WORLD: 361 -PC720x361-DC.POLE: PC -PC720x361-DC.DATELINE: DC -PC720x361-DC.LM: 1 +PC720x361-DC.GRID_TYPE: LatLon +PC720x361-DC.IM_WORLD: 720 +PC720x361-DC.JM_WORLD: 361 +PC720x361-DC.POLE: PC +PC720x361-DC.DATELINE: DC +PC720x361-DC.LM: 1 PC1440x721-DC.GRID_TYPE: LatLon -PC1440x721-DC.IM_WORLD: 1440 -PC1440x721-DC.JM_WORLD: 721 -PC1440x721-DC.POLE: PC -PC1440x721-DC.DATELINE: DC -PC1440x721-DC.LM: 1 +PC1440x721-DC.IM_WORLD: 1440 +PC1440x721-DC.JM_WORLD: 721 +PC1440x721-DC.POLE: PC +PC1440x721-DC.DATELINE: DC +PC1440x721-DC.LM: 1 -EASEv2_M36.GRID_TYPE: EASE -EASEv2_M36.GRIDNAME: EASEv2_M36 -EASEv2_M36.LM: 1 +EASEv2_M36.GRID_TYPE: EASE +EASEv2_M36.GRIDNAME: EASEv2_M36 +EASEv2_M36.LM: 1 + +# -------------------------------------------------------------------------------------------------- # Detailed definition of the collections listed above # @@ -224,15 +228,16 @@ EASEv2_M36.LM: 1 tavg24_2d_lnd_Nx.format: 'CFIO', tavg24_2d_lnd_Nx.descr: '2d,Daily,Time-Averaged,Single-Level,Assimilation,Land Surface Diagnostics', - tavg24_2d_lnd_Nx.nbits: 12, + tavg24_2d_lnd_Nx.nbits: 12, tavg24_2d_lnd_Nx.template: '%y4%m2%d2_%h2%n2z.nc4', tavg24_2d_lnd_Nx.mode: 'time-averaged', tavg24_2d_lnd_Nx.frequency: 240000, tavg24_2d_lnd_Nx.ref_time: 000000, - tavg24_2d_lnd_Nx.regrid_exch: '../input/tile.data' - tavg24_2d_lnd_Nx.regrid_name: 'GRIDNAME' + tavg24_2d_lnd_Nx.regrid_exch: '../input/tile.data', + tavg24_2d_lnd_Nx.regrid_name: 'GRIDNAME', # tavg24_2d_lnd_Nx.regrid_method: 'BILINEAR_MONOTONIC' , - tavg24_2d_lnd_Nx.grid_label: PC720x361-DC + tavg24_2d_lnd_Nx.grid_label: PC720x361-DC, +# tavg24_2d_lnd_Nx.grid_label: EASEv2_M36, tavg24_2d_lnd_Nx.deflate: 2, tavg24_2d_lnd_Nx.fields: 'GRN' , 'VEGDYN' , 'LAI' , 'VEGDYN' , @@ -328,74 +333,6 @@ EASEv2_M36.LM: 1 :: - tavg24_2d_lnd_EASE.format: 'CFIO', - tavg24_2d_lnd_EASE.descr: '2d,Daily,Time-Averaged,Single-Level,Assimilation,Land Surface Diagnostics', - tavg24_2d_lnd_EASE.nbits: 12, - tavg24_2d_lnd_EASE.template: '%y4%m2%d2_%h2%n2z.nc4', - tavg24_2d_lnd_EASE.mode: 'time-averaged', - tavg24_2d_lnd_EASE.frequency: 240000, - tavg24_2d_lnd_EASE.ref_time: 000000, - tavg24_2d_lnd_EASE.regrid_exch: '../input/tile.data' - tavg24_2d_lnd_EASE.regrid_name: 'GRIDNAME' -# tavg24_2d_lnd_EASE.regrid_method: 'BILINEAR_MONOTONIC' , - tavg24_2d_lnd_EASE.grid_label: EASEv2_M36 - tavg24_2d_lnd_EASE.deflate: 2, - tavg24_2d_lnd_EASE.fields: 'GRN' , 'VEGDYN' , - 'LAI' , 'VEGDYN' , - 'WET3' , 'GridComp' , 'GWETPROF' , - 'WET2' , 'GridComp' , 'GWETROOT' , - 'WET1' , 'GridComp' , 'GWETTOP' , - 'WCPR' , 'GridComp' , 'PRMC' , - 'WCRZ' , 'GridComp' , 'RZMC' , - 'WCSF' , 'GridComp' , 'SFMC' , - 'CAPAC' , 'GridComp' , 'INTRWATR' , - 'TPSNOW' , 'GridComp' , 'TPSNOWLAND' , - 'TPUNST' , 'GridComp' , 'TUNSTLAND' , - 'TPSAT' , 'GridComp' , 'TSATLAND' , - 'TPWLT' , 'GridComp' , 'TWLTLAND' , - 'TPSURF' , 'GridComp' , 'TSURFLAND' , - 'TP1' , 'GridComp' , 'TSOIL1' , # CATCH GC: TP1, ENSAVG GC: TSOIL1TILE - 'TP2' , 'GridComp' , 'TSOIL2' , # ... - 'TP3' , 'GridComp' , 'TSOIL3' , # ... - 'TP4' , 'GridComp' , 'TSOIL4' , # ... - 'TP5' , 'GridComp' , 'TSOIL5' , # ... - 'TP6' , 'GridComp' , 'TSOIL6' , # ... - 'PRLAND' , 'GridComp' , 'PRECTOTCORRLAND' , # assume "corrected" precip - 'SNOLAND' , 'GridComp' , 'PRECSNOCORRLAND' , # assume "corrected" precip - 'TSLAND' , 'GridComp' , 'SNOMASLAND' , - 'SNOWDP' , 'GridComp' , 'SNODPLAND' , - 'EVPSOI' , 'GridComp' , 'LHLANDSOIL' , - 'EVPVEG' , 'GridComp' , 'LHLANDTRNS' , - 'EVPINT' , 'GridComp' , 'LHLANDINTR' , - 'EVPICE' , 'GridComp' , 'LHLANDSBLN' , - 'RUNSURF' , 'GridComp' , 'RUNSURFLAND' , - 'BASEFLOW' , 'GridComp' , 'BASEFLOWLAND' , - 'SMLAND' , 'GridComp' , - 'QINFIL' , 'GridComp' , 'QINFILLAND' , - 'FRUST' , 'GridComp' , 'FRLANDUNST' , - 'FRSAT' , 'GridComp' , 'FRLANDSAT' , - 'ASNOW' , 'GridComp' , 'FRLANDSNO' , - 'FRWLT' , 'GridComp' , 'FRLANDWLT' , - 'DFPARLAND' , 'GridComp' , 'PARDFLAND' , - 'DRPARLAND' , 'GridComp' , 'PARDRLAND' , - 'SHLAND' , 'GridComp' , - 'LHLAND' , 'GridComp' , - 'EVLAND' , 'GridComp' , - 'LWLAND' , 'GridComp' , - 'SWLAND' , 'GridComp' , - 'GHLAND' , 'GridComp' , - 'TWLAND' , 'GridComp' , - 'TELAND' , 'GridComp' , - 'DWLAND' , 'GridComp' , 'WCHANGELAND' , - 'DHLAND' , 'GridComp' , 'ECHANGELAND' , - 'SPLAND' , 'GridComp' , 'SPSHLAND' , -# 'SPLH' , 'GridComp' , 'SPLHLAND' , # works for Catch only, not yet for CatchCN - 'SPWATR' , 'GridComp' , 'SPEVLAND' , - 'SPSNOW' , 'GridComp' , 'SPSNLAND' , - 'PEATCLSM_WATERLEVEL', 'GridComp' , - 'PEATCLSM_FSWCHANGE' , 'GridComp' , - :: - const_1d_lnd_Nt.descr: 'Tile-space,Constant,Time-invariant,Single-Level,Assimilation,Land Surface Model Parameters', const_1d_lnd_Nt.template: '%y4%m2%d2_%h2%n2z.bin', const_1d_lnd_Nt.mode: 'instantaneous', diff --git a/GEOSldas_App/ldas_setup b/GEOSldas_App/ldas_setup index c8d69b5c..845312b6 100755 --- a/GEOSldas_App/ldas_setup +++ b/GEOSldas_App/ldas_setup @@ -1283,12 +1283,18 @@ class LDASsetup: shutil.copy2(histrc_file,tmprcfile) else : shutil.copy2(histrc_file,tmprcfile) - GRID='EASE ' + self.rqdExeInp['GRIDNAME'] + ' ' +tmprcfile - if '-CF' in self.rqdExeInp['GRIDNAME'] : - GRID ='CUBE ' + self.rqdExeInp['GRIDNAME'] + ' ' +tmprcfile - _assim = '1' if self.assim else '0' - cmd =self.bindir +'/process_hist.csh '+ str(self.rqdExeInp['LSM_CHOICE']) + ' ' + str(self.rqdExeInp['AEROSOL_DEPOSITION']) + \ - ' ' + GRID + ' ' + str(self.rqdExeInp['RUN_IRRIG']) + ' ' + _assim + ' '+ str(self.nens) + if 'EASE' in self.rqdExeInp['GRIDNAME'] : + TMPSTR='OUT1d' + else : + TMPSTR='OUT2d' + cmd = self.bindir +'/process_hist.csh' + ' ' \ + + tmprcfile + ' ' \ + + TMPSTR + ' ' \ + + self.rqdExeInp['GRIDNAME'] + ' ' \ + + str(self.rqdExeInp['LSM_CHOICE']) + ' ' \ + + str(self.rqdExeInp['AEROSOL_DEPOSITION']) + ' ' \ + + str(self.rqdExeInp['RUN_IRRIG']) + ' ' \ + + str(self.nens) print(cmd) #os.system(cmd) sp.call(shlex.split(cmd)) diff --git a/GEOSldas_App/process_hist.csh b/GEOSldas_App/process_hist.csh index 97d35afe..6156e736 100644 --- a/GEOSldas_App/process_hist.csh +++ b/GEOSldas_App/process_hist.csh @@ -1,55 +1,74 @@ #!/bin/csh -f -## I am changed the CUBE/EASE logic -## if CUBE we produce 2D -## anything else, SMAP and other offline grids we produce tile space - -setenv LSM_CHOICE $1 -setenv AEROSOL_DEPOSITION $2 -setenv GRID $3 -setenv GRIDNAME "'$4'" -setenv HISTRC $5 -setenv RUN_IRRIG $6 -setenv ASSIM $7 -setenv NENS $8 +# process GEOSldas_HIST.rc (=$HISTRC) template +# +# - turn on/off HIST collections depending on tile space +# - EASE: turn on tile-space (1d) output +# - otherwise: turn on gridded (2d) output +# - turn on/off output variables depending on LSM_CHOICE, AEROSOL_DEPOSITION, and RUN_IRRIG +# - fill in source 'GridComp' info for variables depending on NENS + +# process command line args + +setenv HISTRC $1 # file name of HIST rc template (GEOSldas_HIST.rc) +setenv OUTxd $2 # "OUT1d" or "OUT2d" (to turn on/off collections) +setenv GRIDNAME "'$3'" # full name of grid associated with tile space +setenv LSM_CHOICE $4 +setenv AEROSOL_DEPOSITION $5 +setenv RUN_IRRIG $6 +setenv NENS $7 + +# ------------------------------------------------- echo $GRIDNAME -if($ASSIM == 1) then - sed -i 's|\#ASSIM|''|g' $HISTRC - sed -i '/^\#EASE/d' $HISTRC - sed -i '/^\#CUBE/d' $HISTRC -else - sed -i '/^\#ASSIM/d' $HISTRC -endif +# uncomment 2d or 1d collections, depending on "OUT1d" (EASE tile space) or "OUT2d" (non-EASE tile space) -if($GRID == CUBE) then - sed -i '/^\#EASE/d' $HISTRC - sed -i 's|\#CUBE|''|g' $HISTRC +if($OUTxd == OUT1d) then + sed -i 's|\#OUT1d|''|g' $HISTRC else - sed -i '/^\#CUBE/d' $HISTRC - sed -i 's|\#EASE|''|g' $HISTRC + sed -i 's|\#OUT2d|''|g' $HISTRC endif + +# fill in name of grid associated with tile space + sed -i -e s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC +# set 'GridComp' based on LSM_CHOICE; +# turn on/off variables associated with CATCHCN, AEROSOL_DEPOSITION, RUN_IRRIG + if($LSM_CHOICE == 1) then set GridComp = CATCH - sed -i '/^>>>HIST_CATCHCN<<>>HIST_CATCHCNCLM45<<>>HIST_CATCHCN<<>>HIST_CATCHCNCLM45<<>>HIST_CATCHCNCLM45<<>>HIST_CATCHCN<<>>HIST_CATCHCNCLM45<<>>HIST_CATCHCN<<>>HIST_CATCHCN<<>>HIST_CATCHCN<<>>HIST_CATCHCNCLM45<<>>HIST_AEROSOL<<>>HIST_AEROSOL<<>>HIST_IRRIG<<>>HIST_IRRIG<< 1) then set GridComp = ENSAVG sed -i 's|VEGDYN|'VEGDYN_e0000'|g' $HISTRC @@ -62,16 +81,6 @@ if($NENS > 1) then # sed -i 's|DATAATM|'DATAATM0000'|g' $HISTRC endif -sed -i 's|GridComp|'$GridComp'|g' $HISTRC +# fill in source 'GridComp' information for output variables -if($AEROSOL_DEPOSITION == 0) then - sed -i '/^>>>HIST_AEROSOL<<>>HIST_AEROSOL<<>>HIST_IRRIG<<>>HIST_IRRIG<< Date: Tue, 24 Jun 2025 15:02:33 -0400 Subject: [PATCH 34/45] edited CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fe41d67..8d12932b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Added EASE to Latlon and Cubed-Sphere to EASE history outputs - Added optional SLURM "constraint". - Added functionality to run on tile space of stretched cube-sphere grids. - Added python package for post-processing ObsFcstAna output. @@ -19,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Revised processing of HISTORY template (GEOSldas_HIST.rc). - Switch to using EASE grid tools in MAPL. - Specify only ntasks_model for SLURM resource request. - Revisions for handling of Nens and special nml and mwtrm path/files in coupled land-atm DAS. From 33f2fca1f96c3c97074a8c0e421617797abb224f Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Tue, 24 Jun 2025 16:57:35 -0400 Subject: [PATCH 35/45] add missing return line and minor edits --- GEOSldas_App/util/shared/python/tile2grid.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py index b918ac51..096d45b6 100644 --- a/GEOSldas_App/util/shared/python/tile2grid.py +++ b/GEOSldas_App/util/shared/python/tile2grid.py @@ -18,6 +18,15 @@ def tile2grid(tile_data, tile_coord, tile_grid): ------- grid_data : Array in grid space, shape (N_lat, N_lon, N_fields) """ + + # Verify input datasize + if tile_data.shape[0] != tile_coord['N_tile']: + print(f'Error: size of tile2grid input data does not match that of N_tile') + sys.exit() + + # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1] + if tile_data.ndim == 1: + tile_data = np.expand_dims(tile_data, axis=1) N_fields = tile_data.shape[-1] # Initialize grid data array @@ -37,7 +46,7 @@ def tile2grid(tile_data, tile_coord, tile_grid): w = tile_coord['frac_cell'][n] # Check if current tile data is valid (not no-data) - if ~np.isnan(tile_data[k, n]): + if ~np.isnan(tile_data[n, k]): # Accumulate weighted data grid_data[j, i, k] += w * tile_data[n,k] wgrid[ j, i] += w @@ -52,4 +61,6 @@ def tile2grid(tile_data, tile_coord, tile_grid): # Set no-data value grid_data[j, i, k] = np.nan + return grid_data.squeeze() + # ============ EOF =============================================== From 88739fa6ddab3977c2bbbe0587a3222487b71cef Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Tue, 24 Jun 2025 16:59:25 -0400 Subject: [PATCH 36/45] change max to mean tile values for grid --- .../util/shared/python/tile_to_latlongrid.py | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py index c4751a69..a4c4de46 100644 --- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py +++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py @@ -3,7 +3,7 @@ # -def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_tol=1.e-4): +def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_tol_frac=1.e-4): """ Map (1d) tile-space data onto (2d) regular lat/lon grid of arbitrary "resolution". @@ -26,7 +26,7 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_ """ - tile_data[np.abs(tile_data - nodata) < nodata_tol] = np.nan + tile_data[np.abs(tile_data - nodata) < nodata*nodata_tol_frac] = np.nan # Verify input datasize if tile_data.shape[0] != tile_coord['N_tile']: @@ -39,25 +39,26 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_ nf = tile_data.shape[-1] - lat_grid = np.arange( -90+resolution/2, 90, resolution) - lon_grid = np.arange(-180+resolution/2, 180, resolution) + lat_grid = np.arange( -90+resolution/2, 90+resolution/2, resolution) + lon_grid = np.arange(-180+resolution/2, 180+resolution/2, resolution) lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid) - grid_data = np.full([len(lat_grid), len(lon_grid), nf], np.nan) + grid_data = np.zeros((len(lat_grid), len(lon_grid), nf)) + N_tilecnt = np.zeros((len(lat_grid), len(lon_grid), nf)) for v in range(nf): for k in range(tile_coord['N_tile']): - if np.any([~np.isnan(tile_data[k,:])], axis=1): + if ~np.isnan(tile_data[k,:]): j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k]) i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k]) - # sometime the missing points are filled with zeros. taking largest value within - # the target grid cell avoids picking up unwanted zeros. - grid_data[j,i,v] = np.nanmax([tile_data[k,v], grid_data[j,i,v]]) - - if grid_data.shape[-1] == 1: - grid_data = grid_data[:,:,0] - - return grid_data, lat_2d, lon_2d + # grid value takes the mean of all tiles within + # new_mean = old_mean + (new_value - old_mean)/count + N_tilecnt[j,i,v] += 1 + grid_data[j,i,v] = grid_data[j,i,v] + (tile_data[k,v]-grid_data[j,i,v])/N_tilecnt[j,i,v] + + grid_data[N_tilecnt == 0] = np.nan + + return grid_data.squeeze(), lat_2d, lon_2d # =========================== EOF ============================================================ From bc8e44b7a9e152ad34df309382bb2db3172690fc Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Tue, 24 Jun 2025 17:01:26 -0400 Subject: [PATCH 37/45] reverse comment change --- .../util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py index b8283e03..b2543ffc 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py @@ -34,7 +34,7 @@ # If the script is run in the background, uncomment the following lines to see the redirected # standard output in the out.log file immediately. When the lines are commented out, the redirected # standard output will not appear in the out.log file until the job has completed. -# If the script is run in the foreground, the lines can be commented out to monitor the output log. +# If the script is run in the foreground, the lines must be commented out. # #import io #sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True) From f97f620804bebd857fd4ec5e144312b72c58c1d5 Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Tue, 24 Jun 2025 17:05:59 -0400 Subject: [PATCH 38/45] nodata_tol -> nodata_tol_frac --- GEOSldas_App/util/shared/python/tile_to_latlongrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py index a4c4de46..3a0b8cc3 100644 --- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py +++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py @@ -16,7 +16,7 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_ tile_coord : Dictionary containing tile coordinate information resolution : target grid resolution nodata : Value for missing data - nodata_tol : Tolerance for comparing values to nodata + nodata_tol_frac : Tolerance fraction for comparing values to nodata Returns: ------- From e07721c680f1d103af0aed187813ddf2697ba26c Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Tue, 24 Jun 2025 17:07:27 -0400 Subject: [PATCH 39/45] fix imports, minor modification --- .../ObsFcstAna_stats/Plot_stats_maps.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index 42ad9d99..b0b7382b 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -24,7 +24,7 @@ from netCDF4 import Dataset -from tile_to_latlon import tile_to_latlon, get_grid_resolution +from tile_to_latlongrid import tile_to_latlongrid from plot import plotMap from EASEv2 import EASEv2_ind2latlon @@ -92,17 +92,18 @@ def Main_OmF_maps(): # Typically used for SMAP Tb_h/h from asc and desc overpasses, # or ASCAT soil moisture from Metop-A/B/C. # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! - Nobs_data = np.nansum( N_data, axis=1) - OmF_mean = np.nansum(OmF_mean *N_data, axis=1)/Nobs_data - OmF_stdv = np.nansum(OmF_stdv *N_data, axis=1)/Nobs_data - OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data - OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Nobs_data - OmA_mean = np.nansum(OmA_mean *N_data, axis=1)/Nobs_data - OmA_stdv = np.nansum(OmA_stdv *N_data, axis=1)/Nobs_data - - # Nobs_data need to be at least 1, otherwise unwanted zeros + Ndata_sum = np.nansum( N_data, axis=1) + OmF_mean = np.nansum(OmF_mean *N_data, axis=1)/Ndata_sum + OmF_stdv = np.nansum(OmF_stdv *N_data, axis=1)/Ndata_sum + OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Ndata_sum + OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Ndata_sum + OmA_mean = np.nansum(OmA_mean *N_data, axis=1)/Ndata_sum + OmA_stdv = np.nansum(OmA_stdv *N_data, axis=1)/Ndata_sum + N_data = Ndata_sum + + # N_data need to be at least 1, otherwise unwanted zeros # might get into the computation of global mean etc. - Nobs_data[Nobs_data == 0] = np.nan + N_data[N_data == 0] = np.nan # -------------------------------------------------------------------------------- # Plot stats on regular lat/lon grid, with grid spacing guided by the footprint @@ -151,11 +152,11 @@ def Main_OmF_maps(): for j in np.arange(2): units = '[k]' if i == 0 and j == 0: - tile_data = Nobs_data + tile_data = N_data # crange is [cmin, cmax] crange =[0, np.ceil((end_time-start_time).days/150)*300] colormap = plt.get_cmap('jet',20) - title_txt = exptag + ' Tb Nobs ' + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m') + title_txt = exptag + ' Tb Nobs (excl. 0s)' + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m') units = '[-]' if i == 0 and j ==1: tile_data = OmF_mean From 351e0a06ddc899cb0d39851bc83c8740c18b20a9 Mon Sep 17 00:00:00 2001 From: Rolf Reichle Date: Tue, 24 Jun 2025 17:48:41 -0400 Subject: [PATCH 40/45] minimal edits of comments; white-space changes (Plot_stats_maps.py, tile_to_latlongrid.py) --- .../postproc/ObsFcstAna_stats/Plot_stats_maps.py | 15 +++++++++++---- .../util/shared/python/tile_to_latlongrid.py | 8 ++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index b0b7382b..ebc23090 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -87,22 +87,29 @@ def Main_OmF_maps(): # Select/combine fields for plotting. The following provides an example to # computes average stats across all species. - + # # Compute Nobs-weighted avg of each metric across all species. # Typically used for SMAP Tb_h/h from asc and desc overpasses, # or ASCAT soil moisture from Metop-A/B/C. # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS! + Ndata_sum = np.nansum( N_data, axis=1) + OmF_mean = np.nansum(OmF_mean *N_data, axis=1)/Ndata_sum OmF_stdv = np.nansum(OmF_stdv *N_data, axis=1)/Ndata_sum OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Ndata_sum OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Ndata_sum OmA_mean = np.nansum(OmA_mean *N_data, axis=1)/Ndata_sum OmA_stdv = np.nansum(OmA_stdv *N_data, axis=1)/Ndata_sum - N_data = Ndata_sum + + N_data = Ndata_sum + + # The obs are assigned to tiles only for book-keeping purposes, and the resolution of + # the obs is often coarser than that of the model tile space. Therefore, typically many + # tiles are never assigned any obs and end up with N_data=0. + # Here, we remove these unwanted zeros from N_data to keep them out of the computation of + # spatial averages (global avg; mapping from tile space to lat/lon plotting grid) - # N_data need to be at least 1, otherwise unwanted zeros - # might get into the computation of global mean etc. N_data[N_data == 0] = np.nan # -------------------------------------------------------------------------------- diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py index 3a0b8cc3..ea13a13c 100644 --- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py +++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py @@ -12,10 +12,10 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_ Parameters: ---------- - tile_data : Array in tile space, shape (N_tile, N_fields) - tile_coord : Dictionary containing tile coordinate information - resolution : target grid resolution - nodata : Value for missing data + tile_data : Array in tile space, shape (N_tile, N_fields) + tile_coord : Dictionary containing tile coordinate information + resolution : Target grid resolution + nodata : Value for missing data nodata_tol_frac : Tolerance fraction for comparing values to nodata Returns: From a48ac51f9bfe1d5b4dc0bdcd48d087ffc0ff640e Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Wed, 25 Jun 2025 09:30:40 -0400 Subject: [PATCH 41/45] correction for missing files --- .../ObsFcstAna_stats/postproc_ObsFcstAna.py | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 57a122fa..027ea8a1 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -152,31 +152,32 @@ def compute_monthly_sums(self, date_time): data_all.append(data_tile) - # cross-mask over all experiments - is_cross_valid = ~np.isnan(data_all[0]['obs_obs']) - for data in data_all[1:]: - mask = ~np.isnan(data['obs_obs']) - is_cross_valid = np.logical_and(is_cross_valid,mask) - - # reconstruct the output variable dictionary based on input options; - # obs_obs and obs_obsvar are from exp_list[obs_from], the rest are from exp_list[0] - data_tile = {} - for var in var_list: - if 'obs_obs' in var: - data_tile[var] = data_all[obs_from][var] - else: - data_tile[var] = data_all[0][var] + if len(data_all) > 0: + # cross-mask over all experiments + is_cross_valid = ~np.isnan(data_all[0]['obs_obs']) + for data in data_all[1:]: + mask = ~np.isnan(data['obs_obs']) + is_cross_valid = np.logical_and(is_cross_valid,mask) + + # reconstruct the output variable dictionary based on input options; + # obs_obs and obs_obsvar are from exp_list[obs_from], the rest are from exp_list[0] + data_tile = {} + for var in var_list: + if 'obs_obs' in var: + data_tile[var] = data_all[obs_from][var] + else: + data_tile[var] = data_all[0][var] - is_valid = is_cross_valid - - N_data[ is_valid] += 1 - oxf_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_fcst'][is_valid] - oxa_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_ana' ][is_valid] - fxa_sum[is_valid] += data_tile['obs_fcst'][is_valid] * data_tile['obs_ana' ][is_valid] + is_valid = is_cross_valid + + N_data[ is_valid] += 1 + oxf_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_fcst'][is_valid] + oxa_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_ana' ][is_valid] + fxa_sum[is_valid] += data_tile['obs_fcst'][is_valid] * data_tile['obs_ana' ][is_valid] - for var in var_list: - data_sum[ var][is_valid] += data_tile[var][is_valid] - data2_sum[var][is_valid] += data_tile[var][is_valid] **2 + for var in var_list: + data_sum[ var][is_valid] += data_tile[var][is_valid] + data2_sum[var][is_valid] += data_tile[var][is_valid] **2 date_time = date_time + timedelta(seconds=da_dt) From 6293a9f612f3f5036517c1e51ab20d8f99a0fd4a Mon Sep 17 00:00:00 2001 From: Qing Liu Date: Wed, 25 Jun 2025 09:31:52 -0400 Subject: [PATCH 42/45] change loop order --- GEOSldas_App/util/shared/python/tile_to_latlongrid.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py index ea13a13c..da870be0 100644 --- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py +++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py @@ -47,9 +47,10 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_ grid_data = np.zeros((len(lat_grid), len(lon_grid), nf)) N_tilecnt = np.zeros((len(lat_grid), len(lon_grid), nf)) - for v in range(nf): - for k in range(tile_coord['N_tile']): - if ~np.isnan(tile_data[k,:]): + + for k in range(tile_coord['N_tile']): + for v in range(nf): + if ~np.isnan(tile_data[k,v]): j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k]) i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k]) # grid value takes the mean of all tiles within From 6567edc861c91251100237fed10747f4dbe6dff8 Mon Sep 17 00:00:00 2001 From: Weiyuan Jiang Date: Wed, 25 Jun 2025 15:00:01 -0400 Subject: [PATCH 43/45] path fix for ncra --- CHANGELOG.md | 1 + GEOSldas_App/lenkf_j_template.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d12932b..c5060808 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed ncra path for monthly compression - Fixed error from MAPL's ApplicationSupport.F90 to init UDUNITS. ### Removed diff --git a/GEOSldas_App/lenkf_j_template.py b/GEOSldas_App/lenkf_j_template.py index 29090097..0ed56fd0 100644 --- a/GEOSldas_App/lenkf_j_template.py +++ b/GEOSldas_App/lenkf_j_template.py @@ -646,7 +646,7 @@ if($NAVAIL != $NDAYS) continue # create monthly-mean nc4 file - ncra -h $EXPID.$ThisCol.${{YYYY}}${{MM}}*.nc4 ${{EXPID}}.${{ThisCol}}.monthly.$YYYY$MM.nc4 + $BASEBIN/ncra -h $EXPID.$ThisCol.${{YYYY}}${{MM}}*.nc4 ${{EXPID}}.${{ThisCol}}.monthly.$YYYY$MM.nc4 if($POSTPROC_HIST == 2) then # rudimentary check for desired nc4 file; if ok, delete daily files From 34bd3e7a18b670ebe197d92a90bb4125ccf3e979 Mon Sep 17 00:00:00 2001 From: amfox37 Date: Wed, 25 Jun 2025 13:29:47 -0600 Subject: [PATCH 44/45] tg bug fix and typos --- .../util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +- .../util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py | 2 +- .../util/postproc/ObsFcstAna_stats/helper/write_nc4.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py index ebc23090..18a4c09e 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py @@ -136,7 +136,7 @@ def Main_OmF_maps(): # If obs_fov=0, reset to match model resolution. if obs_fov == 0: - my_res = np.sqrt( tile_grid['dlat']*tile_grid['dlon'] ) + my_res = np.sqrt( tg['dlat']*tg['dlon'] ) # pick a resolution from a sample vector of resolutions diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py index 0bdba303..2c499cfd 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py @@ -56,7 +56,7 @@ def Main_OmF_timeseries(): if stats_file is not None: with open(stats_file,'wb') as file: - print(f'saveing stats to {stats_file}') + print(f'saving stats to {stats_file}') pickle.dump(stats,file) date_vec = stats['date_vec'] diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py index 8090cc89..57ebf694 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py @@ -9,7 +9,7 @@ def write_sums_nc4(file_path, N_data, data_sum, data2_sum, oxf_sum, oxa_sum, fxa_sum, obs_param): - nc = Dataset(file_path,'w',formate='NETCDF4') + nc = Dataset(file_path,'w',format='NETCDF4') tile = nc.createDimension('tile', N_data.shape[0]) species = nc.createDimension('species', N_data.shape[1]) @@ -46,7 +46,7 @@ def write_sums_nc4(file_path, N_data, data_sum, data2_sum, oxf_sum, oxa_sum, fxa def write_stats_nc4(file_path, stats): - nc = Dataset(file_path,'w',formate='NETCDF4') + nc = Dataset(file_path,'w',format='NETCDF4') tile = nc.createDimension('tile', stats['N_data'].shape[0]) species = nc.createDimension('species', stats['N_data'].shape[1]) From b0b3ed1f88114eb1df9a744bf095447c0a9ea8d2 Mon Sep 17 00:00:00 2001 From: amfox37 Date: Wed, 25 Jun 2025 14:04:01 -0600 Subject: [PATCH 45/45] quiet divide by nan --- .../postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py index 027ea8a1..00058e06 100644 --- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py +++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py @@ -457,14 +457,16 @@ def calc_spatial_stats_from_sums(self): # Aggregate data of all tiles N_data_mo = np.nansum(mN_data, axis=0) - OxF_mean = np.nansum(moxf_sum,axis=0)/N_data_mo - OxA_mean = np.nansum(moxa_sum,axis=0)/N_data_mo + with np.errstate(divide='ignore', invalid='ignore'): + OxF_mean = np.nansum(moxf_sum, axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan) + OxA_mean = np.nansum(moxa_sum, axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan) data_mean = {} data2_mean = {} data_var = {} for var in var_list: - data_mean[ var] = np.nansum(mdata_sum[var ],axis=0)/N_data_mo - data2_mean[var] = np.nansum(mdata2_sum[var],axis=0)/N_data_mo + with np.errstate(divide='ignore', invalid='ignore'): + data_mean[ var] = np.nansum(mdata_sum[var ], axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan) + data2_mean[var] = np.nansum(mdata2_sum[var], axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan) # var(x) = E[x2] - (E[x])^2 data_var[var] = data2_mean[var] - data_mean[var]**2