From a629ef27af546743e736d7cae157603ac43b322b Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Thu, 12 Jun 2025 08:27:33 -0400
Subject: [PATCH 01/45] cleaned up "domain" variables in ObsFcstAna
 postprocessing package

---
 .../util/postproc/ObsFcstAna_stats/Plot_stats_maps.py      | 2 +-
 .../util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py  | 4 +---
 GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py | 7 ++++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index ed44a8db..b6297ca4 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -141,7 +141,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
                 grid_data, uy, ux = remap_1d_to_2d(tile_data, lat_1d = tc['com_lat'], lon_1d = tc['com_lon'])
                 lon_2d,lat_2d = np.meshgrid(ux, uy)
 
-                # Aear weighted mean and mean(abs)
+                # Area weighted mean and mean(abs)
                 wmean    =     np.nansum(       tile_data     * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
                 wabsmean =     np.nansum(np.abs(tile_data)    * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
                 if 'normalized' in title_txt:
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 41b86df9..8594050f 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -82,7 +82,6 @@ def compute_monthly_sums(self, date_time):
         
         expdir_list   = self.expdir_list
         expid_list    = self.expid_list
-        domain        = self.domain
         tc            = self.tilecoord
         obsparam_list = self.obsparam_list
         var_list      = self.var_list
@@ -112,7 +111,7 @@ def compute_monthly_sums(self, date_time):
             # read the list of experiments at each time step (OFA="ObsFcstAna")
             OFA_list = []
             for i in range(len(expdir_list)):
-                fname = expdir_list[i]+expid_list[i]+'/output/'+domain+'/ana/ens_avg/Y'+ \
+                fname = expdir_list[i]+expid_list[i]+'/output/'+self.domain+'/ana/ens_avg/Y'+ \
                                   date_time.strftime('%Y') + '/M' + \
                                   date_time.strftime('%m') + '/'  + \
                                   expid_list[i]+'.ens_avg.ldas_ObsFcstAna.' + \
@@ -190,7 +189,6 @@ def compute_monthly_sums(self, date_time):
     def save_monthly_sums(self):
         expdir_list    = self.expdir_list
         expid_list     = self.expid_list
-        domain         = self.domain
         var_list       = self.var_list
         tc             = self.tilecoord
         obsparam_list  = self.obsparam_list
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
index 27e484fc..cc3f8d13 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
@@ -39,7 +39,8 @@ def get_config():
 
     # Optional experiment(s) can be added for cross-masking or extracting obs from a different experiment.
     #
-    # All optional experiments and the main experiment must have identical tilecoords.
+    # All optional experiments and the main experiment must have identical tile space (BCs resolution) and domain.
+    #
     # If the default "species" number/order do not match, set "species_list" accordingly to force a match.
     # Output will be cross-masked between all specified experiments.    
     #
@@ -52,7 +53,6 @@ def get_config():
     exp_sup1 = {
         'expdir'        : '/discover/nobackup/projects/gmao/merra/iau/merra_land/SMAP_runs/SMAP_Nature_v11/',
         'expid'         : 'DAv8_M36',
-        'domain'        : 'SMAP_EASEv2_M36_GLOBAL',
         'use_obs'       : True,                        # if True, use obs data from this exp
         'species_list'  : [1,2,3,4],                   # indices of species to be processed,
                                                        #   must identify same species as selected in main exp
@@ -90,11 +90,12 @@ def get_config():
     end_time   = datetime( end_year,   end_month,   1)
 
     # Get tilecoord and obsparam information for each experiment
+
+    domain = exp_list[0]['domain']
     
     for exp in exp_list:
         expdir        = exp['expdir']
         expid         = exp['expid']
-        domain        = exp['domain']
 
         YYYY          = exp['obsparam_time'][0:4]
         MM            = exp['obsparam_time'][4:6]

From 9267abb48dc9adeae5e21c6efc49592bef0afeec Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 16 Jun 2025 10:16:49 -0400
Subject: [PATCH 02/45] add read_tilegrids() function

---
 .../util/shared/python/read_GEOSldas.py       | 58 +++++++++++++++++++
 .../util/shared/python/remap_1d_to_2d.py      | 29 ----------
 2 files changed, 58 insertions(+), 29 deletions(-)
 delete mode 100644 GEOSldas_App/util/shared/python/remap_1d_to_2d.py

diff --git a/GEOSldas_App/util/shared/python/read_GEOSldas.py b/GEOSldas_App/util/shared/python/read_GEOSldas.py
index 96ba4173..1e728584 100644
--- a/GEOSldas_App/util/shared/python/read_GEOSldas.py
+++ b/GEOSldas_App/util/shared/python/read_GEOSldas.py
@@ -99,6 +99,64 @@ def read_tilecoord(fname):
     print("done reading file")
     return tile_coord
 
+import struct
+import numpy as np
+
+# ----------------------------------------------------------------------------
+#
+# reader for GEOSldas tilecoord file (binary)
+
+def read_tilegrids(fname):
+    """
+    Read tile grid information from file and return global and domain grid structures.
+    
+    Parameters:
+    ----------
+    fname : str
+        Path to the input file (either .txt or .bin)
+        
+    Returns:
+    -------
+    tile_grid_g : dict
+        Global tile grid information
+    tile_grid_d : dict
+        Domain tile grid information
+    """
+    
+    # Set endian format
+    endian = '<'            # '>' for big-endian, '<' for little-endian
+    
+    # Read binary file
+    print(f'reading from {fname}')
+    
+    with open(fname, 'rb') as ifp:
+        # Read first record: "global" grid (tile_grid_g)
+        for grid in ['global','domain']:
+            tile_grid = {}
+            fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['gridtype'] = ifp.read(40).decode('ascii').strip('\x00')
+            tile_grid['ind_base'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['i_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['j_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['N_lon'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['N_lat'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['i_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['j_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['ll_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
+            tile_grid['ll_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
+            tile_grid['ur_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
+            tile_grid['ur_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
+            tile_grid['dlon'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
+            tile_grid['dlat'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
+            fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0]
+
+            if 'global' in grid:
+                tile_grid_g = tile_grid
+            else:
+                tile_grid_d = tile_grid
+            
+    return tile_grid_g, tile_grid_d
+
 # ----------------------------------------------------------------------------
 #
 # reader for GEOSldas ObsFcstAna file (binary)
diff --git a/GEOSldas_App/util/shared/python/remap_1d_to_2d.py b/GEOSldas_App/util/shared/python/remap_1d_to_2d.py
deleted file mode 100644
index d0a1d9fc..00000000
--- a/GEOSldas_App/util/shared/python/remap_1d_to_2d.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import os
-import numpy as np
-
-# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates
-#
-# 1-d input data can have one additional dimension (e.g., time)
-#
-# 2-d output data is lat-by-lon[-by-time]
-
-def remap_1d_to_2d( data_1d, *, lat_1d, lon_1d):
-    
-    # Extract unique coordinates 
-    unique_lats, indY = np.unique(lat_1d, return_inverse=True)
-    unique_lons, indX = np.unique(lon_1d, return_inverse=True)
-
-    ny = len(unique_lats)
-    nx = len(unique_lons)
-    
-    if   data_1d.ndim == 2:                          #[n_1d, ntime]
-        ntime = data_1d.shape[1]
-        data_2d = np.full([ny, nx, ntime], np.nan)
-        data_2d[indY, indX, :] = data_1d
-    elif data_1d.ndim == 1:                          #[n_1d]
-        data_2d = np.full([ny, nx],        np.nan)
-        data_2d[indY, indX   ] = data_1d
-
-    return data_2d, unique_lats, unique_lons
-
-

From 60c30193ea62cf7ee4977538d9c4450418608cb0 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 16 Jun 2025 10:17:44 -0400
Subject: [PATCH 03/45] update tile2grid options

---
 GEOSldas_App/util/shared/python/tile2grid.py | 140 +++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 GEOSldas_App/util/shared/python/tile2grid.py

diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py
new file mode 100644
index 00000000..e99b2f05
--- /dev/null
+++ b/GEOSldas_App/util/shared/python/tile2grid.py
@@ -0,0 +1,140 @@
+import os
+import numpy as np
+
+# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates
+#
+# 1-d input data can have one additional dimension (e.g., time)
+#
+# 2-d output data is lat-by-lon[-by-time]
+def interpolate_to_latlon_grid(data_1d, lat, lon, resolution=1.0):
+    """
+    For cubedsphere grid: use 
+    
+    """
+    from scipy.interpolate import griddata
+
+    lat_grid = np.arange(-90+resolution/2, 90, resolution)
+    lon_grid = np.arange(-180+resolution/2, 180, resolution)
+    lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
+
+    ny = len(lat_grid)
+    nx = len(lon_grid)
+
+    nf = data_1d.shape[-1]
+    data_2d = np.full([ny, nx, nf], np.nan)
+    for i in range(nf):
+        data = data_1d[:,i]
+        data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear')
+        
+    return data_2d, lat_2d, lon_2d
+
+def remap_1d_to_2d( data_1d,  lat_1d, lon_1d):
+
+    """
+    For EASE grid: use remapping
+    
+    """
+    
+    # Extract unique coordinates 
+    unique_lats, indY = np.unique(lat_1d, return_inverse=True)
+    unique_lons, indX = np.unique(lon_1d, return_inverse=True)
+
+    ny = len(unique_lats)
+    nx = len(unique_lons)
+    
+    nf = data_1d.shape[1]
+    data_2d = np.full([ny, nx, nf], np.nan)
+    data_2d[indY, indX, :] = data_1d
+        
+    lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats)
+    
+    return data_2d, lat_2d, lon_2d
+
+def tile2grid( tile_data, tile_coord, tile_grid, nodata=1.e15, nodata_tol=1.e-4):
+    
+    """
+    Convert tile-space data to grid-space data.
+    
+    Parameters:
+    ----------
+    tile_data : Array in tile space, shape (N_tile, N_fields)
+    tile_coord : Dictionary containing tile coordinate information
+    tile_grid : Dictionary containing grid information
+    nodata : Value for missing data
+    nodata_tol : Tolerance for comparing values to nodata
+        
+    Returns:
+    -------
+    grid_data : Array in grid space, shape (N_lat, N_lon, N_fields)
+    lat_2d: Lat array in grid space, shape (N_lat, N_lon)
+    lon_2d: Lon array in grid space, shape (N_lat, N_lon)
+    
+    """   
+    # Verifity input input datasize
+    if tile_data.shape[0] != tile_coord['N_tile']:
+        print(f'Error: tile2grid input data size don''t match N_tile')
+        sys.exit()
+        
+    # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1]
+    if tile_data.ndim == 1:
+        tile_data = np.expand_dims(tile_data, axis=1)
+
+    N_fields = tile_data.shape[-1]
+    
+    # Determine grid_type (cubedsphere, EASE or regular latlon)
+    is_CS = int(tile_grid['N_lat']/tile_grid['N_lon']) == 6
+    is_EASE = 'EASE' in tile_grid['gridtype']
+
+    if is_CS:
+
+        grid_data, lat_2d,lon_2d = interpolate_to_latlon_grid( tile_data,  tile_coord['com_lat'], tile_coord['com_lon'])
+
+    elif is_EASE:
+        
+        grid_data, lat_2d,lon_2d = remap_1d_to_2d( tile_data,  tile_coord['com_lat'], tile_coord['com_lon'])
+        
+    else:
+            
+        # Initialize grid data array
+        grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields))
+
+        for k in range(N_fields):
+            # Initialize weight grid for current field
+            wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon']))
+            
+            # Loop through tile space
+            for n in range(tile_coord['N_tile']):
+                # Calculate grid indices (adjust for Python's 0-based indexing)
+                i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1
+                j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1
+                
+                # Get weight
+                w = tile_coord['frac_cell'][n]
+                
+                # Check if current tile data is valid (not no-data)
+                if abs(tile_data[k, n] - nodata) > nodata_tol:
+                    # Accumulate weighted data
+                    grid_data[j, i, k] += w * tile_data[n,k]
+                    wgrid[j, i] += w
+            
+            # Normalize and set no-data values
+            for i in range(tile_grid['N_lon']):
+                for j in range(tile_grid['N_lat']):
+                    if wgrid[j, i] > 0.0:
+                        # Normalize by total weight
+                        grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i]
+                    else:
+                        # Set no-data value
+                        grid_data[j, i, k] = nodata
+        
+        # Replace nodata values with NaN
+        grid_data[grid_data == nodata] = np.nan
+
+        lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat'])
+        lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon'])
+
+    if grid_data.shape[-1] == 1:
+        grid_data = grid_data[:,:,0]
+        
+    return grid_data, lat_2d, lon_2d
+

From 0b816644054ab879cc2e181263780fa56f1c1705 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 16 Jun 2025 10:20:22 -0400
Subject: [PATCH 04/45] add tilegrid info. in config

---
 .../util/postproc/ObsFcstAna_stats/user_config.py     | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
index cc3f8d13..6384e410 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from datetime               import datetime, timedelta
-from read_GEOSldas          import read_tilecoord, read_obs_param
+from read_GEOSldas          import read_tilecoord, read_tilegrids, read_obs_param
 from postproc_ObsFcstAna    import postproc_ObsFcstAna
 
 def get_config():
@@ -20,7 +20,7 @@ def get_config():
     start_year  = 2015
     start_month =    4
     last_year   = 2016
-    last_month  =    4
+    last_month  =    3
 
     # Sums or stats will be processed for exp_main:
 
@@ -66,7 +66,7 @@ def get_config():
     
     # Top level directory for all output from this package:
 
-    out_path = '/discover/nobackup/[USERNAME]/SMAP_test/'
+    out_path = '/discover/nobackup/qliu/SMAP_test/'
 
     # Directory for monthly sum files:
     # - Can use the experiment directory or a different path.
@@ -115,8 +115,11 @@ def get_config():
         ftc = expdir+expid+'/output/'+ domain+'/rc_out/'+ expid+'.ldas_tilecoord.bin'
         tc  = read_tilecoord(ftc)
 
+        ftg = expdir+expid+'/output/'+ domain+'/rc_out/'+ expid+'.ldas_tilegrids.bin'
+        tg_global, tg_domain  = read_tilegrids(ftg)
+
         # add tilecoord and obsparam into to exp        
-        exp.update({'tilecoord':tc, 'obsparam':obsparam})
+        exp.update({'tilecoord':tc, 'obsparam':obsparam, 'tilegrid_global':tg_global,'tilegrid_domain': tg_domain})
 
     # verify that obs species match across experiments
     for exp_idx, exp in enumerate(exp_list) :

From 33b9123393de1bd2511ed82cab552e3370f8cf8b Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 16 Jun 2025 10:21:24 -0400
Subject: [PATCH 05/45] add tilegrids info

---
 .../util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 8594050f..5748d633 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -32,6 +32,8 @@ def __init__(self, exp_list, start_time, end_time, sum_path='./'):
         self.da_dt         = exp_list[0]['da_dt']
         self.var_list      = ['obs_obs','obs_obsvar','obs_fcst','obs_fcstvar','obs_ana','obs_anavar']
         self.tilecoord     = exp_list[0]['tilecoord']
+        self.tilegrid_global     = exp_list[0]['tilegrid_global']
+        self.tilegrid_domain    = exp_list[0]['tilegrid_domain']
         self.obsparam_list = [item['obsparam'] for item in exp_list]
         self.sum_path      = sum_path
 

From 79ff6a16f3760687c9cae4870e58ae406d8dc5f2 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 16 Jun 2025 10:24:06 -0400
Subject: [PATCH 06/45] change remap_1d_to_2d() to tile2grid()

---
 .../postproc/ObsFcstAna_stats/Plot_stats_maps.py     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index b6297ca4..7247b547 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -15,7 +15,7 @@
 
 from datetime               import timedelta
 
-from remap_1d_to_2d         import remap_1d_to_2d
+from remap_1d_to_2d         import tile2grid
 from plot                   import plotMap
 from EASEv2                 import EASEv2_ind2latlon
 
@@ -25,6 +25,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
     end_time   = postproc_obj.end_time
     domain     = postproc_obj.domain
     tc         = postproc_obj.tilecoord
+    tg        = postproc_obj.tilegrid_global
     
     # Sample of final compuation of selected diagnostic metrics 
      
@@ -138,9 +139,8 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
                 lat_M36, lon_M36 = EASEv2_ind2latlon(np.arange(406), np.arange(964),'M36')
                 lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
             else:
-                grid_data, uy, ux = remap_1d_to_2d(tile_data, lat_1d = tc['com_lat'], lon_1d = tc['com_lon'])
-                lon_2d,lat_2d = np.meshgrid(ux, uy)
-
+                grid_data, lat_2d, lon_2d = tile2grid(tile_data, tc, tg)
+                
                 # Area weighted mean and mean(abs)
                 wmean    =     np.nansum(       tile_data     * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
                 wabsmean =     np.nansum(np.abs(tile_data)    * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
@@ -157,10 +157,10 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
             if 'normalized' in title_txt:
                 grid_data = np.log10(grid_data)
                 crange = [-0.6, 0.45]
-                
+            
             mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
                         title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])            
-
+            
     plt.tight_layout()
     # Save figure to file
     fig.savefig(fig_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\

From b706b90eeca8f571427746ba2dbca152ee29e9ab Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 16 Jun 2025 10:27:15 -0400
Subject: [PATCH 07/45] correct module name for tile2grid

---
 GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 7247b547..800a4230 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -15,7 +15,7 @@
 
 from datetime               import timedelta
 
-from remap_1d_to_2d         import tile2grid
+from tile2grid              import tile2grid
 from plot                   import plotMap
 from EASEv2                 import EASEv2_ind2latlon
 

From 861923bfdbeabb9670704e8998a62fad77fa884a Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 16 Jun 2025 15:14:29 -0400
Subject: [PATCH 08/45] change to save stats of individual species

---
 .../ObsFcstAna_stats/Plot_stats_timeseries.py | 38 ++++++++++---------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
index db72e9f0..63ca9a3a 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
@@ -28,8 +28,8 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
     if os.path.isfile(stats_file):
         
         with open(stats_file,'rb') as file:
-            stats_dict = pickle.load(file)
-        date_vec = stats_dict['date_vec']
+            stats = pickle.load(file)
+        date_vec = stats['date_vec']
         
     else:
         
@@ -47,17 +47,7 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
             print('compute monthly spatial mean stats for '+ current_month.strftime('%Y%m'))
 
             OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_month)
-
-            # Compute Nobs-weighted avg of each metric across all species.
-            # Typically used for SMAP Tb_h/h from asc and desc overpasses,
-            # or ASCAT soil moisture from Metop-A/B/C.
-            # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
-            Nobsm = np.nansum(     Nobsm)
-            OmFm  = np.nansum(OmFm*Nobsm)/Nobsm
-            OmFs  = np.nansum(OmFs*Nobsm)/Nobsm
-            OmAm  = np.nansum(OmAm*Nobsm)/Nobsm
-            OmAs  = np.nansum(OmAs*Nobsm)/Nobsm
-      
+                  
             Ndata.append(Nobsm)
 
             OmF_mean.append(OmFm)
@@ -69,18 +59,30 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
             current_month = current_month + relativedelta(months=1)
 
         # Store stats in a dictionary for easier saving and referencing  
-        stats_dict = {"OmF_mean":OmF_mean, "OmF_stdv":OmF_stdv,
-                      "OmA_mean":OmA_mean, "OmA_stdv":OmA_stdv,
-                      "Ndata": Ndata, "date_vec":date_vec}
+        stats = {"OmF_mean":np.array(OmF_mean), "OmF_stdv":np.array(OmF_stdv),
+                      "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv),
+                      "Ndata": np.array(Ndata), "date_vec":date_vec}
         
         if stats_file is not None:
             with open(stats_file,'wb') as file:
-                pickle.dump(stats_dict,file)
+                pickle.dump(stats,file)
+
+    # Compute Nobs-weighted avg of each metric across all species.
+    # Typically used for SMAP Tb_h/h from asc and desc overpasses,
+    # or ASCAT soil moisture from Metop-A/B/C.
+    # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
+    stats_plot = {}
+    Ndata = np.nansum(stats['Ndata'], axis=1)
+    stats_plot['Ndata'] = Ndata
+    stats_plot['OmF_mean']  = np.nansum(stats['OmF_mean']*stats['Ndata'], axis=1)/Ndata
+    stats_plot['OmF_stdv']    = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata
+    stats_plot['OmA_mean']  = np.nansum(stats['OmA_mean']*stats['Ndata'], axis=1)/Ndata
+    stats_plot['OmA_stdv']   = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata
     
     plot_var = 'OmF_mean'
 
     fig, ax = plt.subplots(figsize=(10,4))
-    bars = ax.bar(date_vec, stats_dict[plot_var])
+    bars = ax.bar(date_vec, stats_plot[plot_var])
 
     plt.grid(True, linestyle='--', alpha=0.5)
 

From 57bae3c360757fbeffec8aa49e9c70ec649ee6ff Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 17 Jun 2025 09:42:46 -0400
Subject: [PATCH 09/45] cleanup: corrected comments, removed redundant import
 statements, consistent variable name for endianness, vertical alignment
 (read_GEOSldas.py)

---
 .../util/shared/python/read_GEOSldas.py       | 40 +++++++++----------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/GEOSldas_App/util/shared/python/read_GEOSldas.py b/GEOSldas_App/util/shared/python/read_GEOSldas.py
index 1e728584..ccc1aa18 100644
--- a/GEOSldas_App/util/shared/python/read_GEOSldas.py
+++ b/GEOSldas_App/util/shared/python/read_GEOSldas.py
@@ -1,7 +1,6 @@
 
 # collection of readers for GEOSldas output files
 
-import numpy as np
 import struct
 import os
 import numpy as np
@@ -99,9 +98,6 @@ def read_tilecoord(fname):
     print("done reading file")
     return tile_coord
 
-import struct
-import numpy as np
-
 # ----------------------------------------------------------------------------
 #
 # reader for GEOSldas tilecoord file (binary)
@@ -124,31 +120,33 @@ def read_tilegrids(fname):
     """
     
     # Set endian format
-    endian = '<'            # '>' for big-endian, '<' for little-endian
+    machfmt = '<'            # '>' for big-endian, '<' for little-endian
     
     # Read binary file
     print(f'reading from {fname}')
     
     with open(fname, 'rb') as ifp:
-        # Read first record: "global" grid (tile_grid_g)
+        # Read "global" and "domain" records
         for grid in ['global','domain']:
+            
             tile_grid = {}
-            fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            
+            fortran_tag           = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
             tile_grid['gridtype'] = ifp.read(40).decode('ascii').strip('\x00')
-            tile_grid['ind_base'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
-            tile_grid['i_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
-            tile_grid['j_dir'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
-            tile_grid['N_lon'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
-            tile_grid['N_lat'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
-            tile_grid['i_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
-            tile_grid['j_offg'] = struct.unpack(f'{endian}i', ifp.read(4))[0]
-            tile_grid['ll_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
-            tile_grid['ll_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
-            tile_grid['ur_lon'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
-            tile_grid['ur_lat'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
-            tile_grid['dlon'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
-            tile_grid['dlat'] = struct.unpack(f'{endian}f', ifp.read(4))[0]
-            fortran_tag = struct.unpack(f'{endian}i', ifp.read(4))[0]
+            tile_grid['ind_base'] = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
+            tile_grid['i_dir']    = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
+            tile_grid['j_dir']    = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
+            tile_grid['N_lon']    = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
+            tile_grid['N_lat']    = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
+            tile_grid['i_offg']   = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
+            tile_grid['j_offg']   = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
+            tile_grid['ll_lon']   = struct.unpack(f'{machfmt}f', ifp.read(4))[0]
+            tile_grid['ll_lat']   = struct.unpack(f'{machfmt}f', ifp.read(4))[0]
+            tile_grid['ur_lon']   = struct.unpack(f'{machfmt}f', ifp.read(4))[0]
+            tile_grid['ur_lat']   = struct.unpack(f'{machfmt}f', ifp.read(4))[0]
+            tile_grid['dlon']     = struct.unpack(f'{machfmt}f', ifp.read(4))[0]
+            tile_grid['dlat']     = struct.unpack(f'{machfmt}f', ifp.read(4))[0]
+            fortran_tag           = struct.unpack(f'{machfmt}i', ifp.read(4))[0]
 
             if 'global' in grid:
                 tile_grid_g = tile_grid

From 25270fe749fa8499b9642c4fe557cf7d309cfe00 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 17 Jun 2025 09:49:14 -0400
Subject: [PATCH 10/45] removed "qliu" from sample output path (user_config.py)

---
 GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
index 6384e410..169bbcf3 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/user_config.py
@@ -66,7 +66,7 @@ def get_config():
     
     # Top level directory for all output from this package:
 
-    out_path = '/discover/nobackup/qliu/SMAP_test/'
+    out_path = '/discover/nobackup/[USERNAME]/'
 
     # Directory for monthly sum files:
     # - Can use the experiment directory or a different path.
@@ -119,7 +119,7 @@ def get_config():
         tg_global, tg_domain  = read_tilegrids(ftg)
 
         # add tilecoord and obsparam into to exp        
-        exp.update({'tilecoord':tc, 'obsparam':obsparam, 'tilegrid_global':tg_global,'tilegrid_domain': tg_domain})
+        exp.update({'tilecoord':tc, 'obsparam':obsparam, 'tilegrid_global':tg_global, 'tilegrid_domain':tg_domain})
 
     # verify that obs species match across experiments
     for exp_idx, exp in enumerate(exp_list) :

From 2a3d6e7deb479c94af29fc63e2b9e5e7bc622284 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 17 Jun 2025 09:52:33 -0400
Subject: [PATCH 11/45] fixed vertical alignment (postproc_ObsFcstAna.py)

---
 .../ObsFcstAna_stats/postproc_ObsFcstAna.py   | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 5748d633..e191a154 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -21,21 +21,21 @@
 class postproc_ObsFcstAna:
     
     def __init__(self, exp_list, start_time, end_time, sum_path='./'):
-        self.exp_list      = exp_list
-        self.expdir_list   = [item['expdir'] for item in exp_list]
-        self.expid_list    = [item['expid']  for item in exp_list]
-        self.exptag        = exp_list[0]['exptag']
-        self.domain        = exp_list[0]['domain']
-        self.start_time    = start_time 
-        self.end_time      = end_time
-        self.da_t0         = exp_list[0]['da_t0']
-        self.da_dt         = exp_list[0]['da_dt']
-        self.var_list      = ['obs_obs','obs_obsvar','obs_fcst','obs_fcstvar','obs_ana','obs_anavar']
-        self.tilecoord     = exp_list[0]['tilecoord']
-        self.tilegrid_global     = exp_list[0]['tilegrid_global']
-        self.tilegrid_domain    = exp_list[0]['tilegrid_domain']
-        self.obsparam_list = [item['obsparam'] for item in exp_list]
-        self.sum_path      = sum_path
+        self.exp_list        = exp_list
+        self.expdir_list     = [item['expdir'] for item in exp_list]
+        self.expid_list      = [item['expid']  for item in exp_list]
+        self.exptag          = exp_list[0]['exptag']
+        self.domain          = exp_list[0]['domain']
+        self.start_time      = start_time 
+        self.end_time        = end_time
+        self.da_t0           = exp_list[0]['da_t0']
+        self.da_dt           = exp_list[0]['da_dt']
+        self.var_list        = ['obs_obs','obs_obsvar','obs_fcst','obs_fcstvar','obs_ana','obs_anavar']
+        self.tilecoord       = exp_list[0]['tilecoord']
+        self.tilegrid_global = exp_list[0]['tilegrid_global']
+        self.tilegrid_domain = exp_list[0]['tilegrid_domain']
+        self.obsparam_list   = [item['obsparam'] for item in exp_list]
+        self.sum_path        = sum_path
 
         # Determine experiment that supplies obs data
         self.obs_from = -1

From 2423fb3dee30330c81f321ea647384fc3e273590 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 17 Jun 2025 10:35:47 -0400
Subject: [PATCH 12/45] replaced variable name start/end/current_month with
 start/end/current_time for clarity and consistency; vertical alignment
 (Plot_stats_timeseries.py, Plot_stats_maps.py)

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       |  2 +-
 .../ObsFcstAna_stats/Plot_stats_timeseries.py | 32 +++++++++----------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 800a4230..9bfc88ee 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -25,7 +25,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
     end_time   = postproc_obj.end_time
     domain     = postproc_obj.domain
     tc         = postproc_obj.tilecoord
-    tg        = postproc_obj.tilegrid_global
+    tg         = postproc_obj.tilegrid_global
     
     # Sample of final compuation of selected diagnostic metrics 
      
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
index 63ca9a3a..bd763165 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
@@ -18,12 +18,12 @@
     
 def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
     import pickle
-    exptag      = postproc_obj.exptag
-    start_month = postproc_obj.start_time
-    end_month   = postproc_obj.end_time
+    exptag     = postproc_obj.exptag
+    start_time = postproc_obj.start_time
+    end_time   = postproc_obj.end_time
 
-    stats_file = fig_path + 'spatial_stats_'+exptag+'_'+start_month.strftime('%Y%m')+ \
-            '_'+(end_month+timedelta(days=-1)).strftime('%Y%m')+'.pkl'
+    stats_file = fig_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \
+        '_'+(end_time+timedelta(days=-1)).strftime('%Y%m')+'.pkl'
     
     if os.path.isfile(stats_file):
         
@@ -42,11 +42,11 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
         date_vec =[]
 
         # Time loop 
-        current_month = start_month
-        while current_month < end_month:
-            print('compute monthly spatial mean stats for '+ current_month.strftime('%Y%m'))
+        current_time = start_time
+        while current_time < end_time:
+            print('compute monthly spatial mean stats for '+ current_time.strftime('%Y%m'))
 
-            OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_month)
+            OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_time)
                   
             Ndata.append(Nobsm)
 
@@ -55,13 +55,13 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
             OmA_mean.append(OmAm)
             OmA_stdv.append(OmAs)
 
-            date_vec.append(current_month.strftime('%Y%m'))
-            current_month = current_month + relativedelta(months=1)
+            date_vec.append(current_time.strftime('%Y%m'))
+            current_time = current_time + relativedelta(months=1)
 
         # Store stats in a dictionary for easier saving and referencing  
         stats = {"OmF_mean":np.array(OmF_mean), "OmF_stdv":np.array(OmF_stdv),
-                      "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv),
-                      "Ndata": np.array(Ndata), "date_vec":date_vec}
+                 "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv),
+                 "Ndata":np.array(Ndata), "date_vec":date_vec}
         
         if stats_file is not None:
             with open(stats_file,'wb') as file:
@@ -73,11 +73,11 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
     # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
     stats_plot = {}
     Ndata = np.nansum(stats['Ndata'], axis=1)
-    stats_plot['Ndata'] = Ndata
+    stats_plot['Ndata']     = Ndata
     stats_plot['OmF_mean']  = np.nansum(stats['OmF_mean']*stats['Ndata'], axis=1)/Ndata
-    stats_plot['OmF_stdv']    = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata
+    stats_plot['OmF_stdv']  = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata
     stats_plot['OmA_mean']  = np.nansum(stats['OmA_mean']*stats['Ndata'], axis=1)/Ndata
-    stats_plot['OmA_stdv']   = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata
+    stats_plot['OmA_stdv']  = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata
     
     plot_var = 'OmF_mean'
 

From 80a68ebca01efc312be3061bbc6e0c48e7b20a88 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Tue, 17 Jun 2025 14:14:58 -0400
Subject: [PATCH 13/45] clarify saved stats variables

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       | 45 ++---------
 .../ObsFcstAna_stats/Plot_stats_timeseries.py | 10 ++-
 .../ObsFcstAna_stats/postproc_ObsFcstAna.py   | 80 ++++++++++++++++---
 3 files changed, 84 insertions(+), 51 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 9bfc88ee..297af9be 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -27,46 +27,13 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
     tc         = postproc_obj.tilecoord
     tg         = postproc_obj.tilegrid_global
     
-    # Sample of final compuation of selected diagnostic metrics 
-     
-    Nmin = 20
-
-    # Then compute metrics of O-F, O-A, etc. based on above computed
     N_data = stats['N_data']
-    O_mean = stats['obs_mean']
-    # mean(x-y) = E[x] - E[y]   
-    OmF_mean = stats['obs_mean'] - stats['fcst_mean']
-    OmA_mean = stats['obs_mean'] - stats['ana_mean']
-    # var(x-y) = var(x) + var(y) - 2cov(x,y)
-    # cov(x,y) = E[xy] - E[x]E[y]
-    OmF_stdv  = np.sqrt(stats['obs_variance'] + stats['fcst_variance'] - \
-                        2 * (stats['oxf_mean'] - stats['obs_mean']*stats['fcst_mean']))
-                        
-    OmA_stdv  = np.sqrt(stats['obs_variance'] + stats['ana_variance'] - \
-                        2 * (stats['oxa_mean'] - stats['obs_mean']*stats['ana_mean']))
-
-    # *****************************************************************************************
-    # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED!
-    # *****************************************************************************************
-    # Here, we first compute the stats of the OmF time series and then normalize using 
-    # the time-avg "obsvar" and "fcstvar" values.
-    # Since "fcstvar" changes with time, the OmF values should be normalized at each time 
-    # step (as in the older matlab scripts), and then the time series stats can be computed. 
-    # To compute the exact stats with this python package, the sum and sum-of-squares of 
-    # the normalized OmF values would need to be added into the sums files. 
-    #
-    OmF_norm_mean = OmF_mean / np.sqrt(stats['obsvar_mean'] + stats['fcstvar_mean'])         # APPROXIMATED stat!
-    OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (stats['obsvar_mean'] + stats['fcstvar_mean']) )   # APPROXIMATED stat!
-      
-    # Mask out data points with insufficent observations using the Nmin threshold
-    # Do NOT apply to N_data
-    OmF_mean[     N_data < Nmin] = np.nan
-    OmF_stdv[     N_data < Nmin] = np.nan
-    OmF_norm_mean[N_data < Nmin] = np.nan
-    OmF_norm_stdv[N_data < Nmin] = np.nan
-    OmA_mean[     N_data < Nmin] = np.nan
-    OmA_stdv[     N_data < Nmin] = np.nan
-    N_data[       N_data < Nmin] = 0
+    OmF_mean = stats['OmF_mean']
+    OmF_stdv = stats['OmF_stdv']
+    OmA_mean = stats['OmA_mean']
+    OmA_stdv = stats['OmA_stdv']
+    OmF_norm_mean = stats['OmF_norm_mean']
+    OmF_norm_stdv = stats['OmF_norm_stdv']
 
     # Compute Nobs-weighted avg of each metric across all species.
     # Typically used for SMAP Tb_h/h from asc and desc overpasses,
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
index bd763165..fbb4d35f 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
@@ -46,7 +46,13 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
         while current_time < end_time:
             print('compute monthly spatial mean stats for '+ current_time.strftime('%Y%m'))
 
-            OmFm,OmFs,OmAm,OmAs,Nobsm = postproc_obj.calc_spatial_stats_from_sums(current_time)
+            stats_mo = postproc_obj.calc_spatial_stats_from_sums(current_time)
+
+            OmFm = stats_mo['OmF_mean']
+            OmFs = stats_mo['OmF_stdv']
+            OmAm = stats_mo['OmA_mean']
+            OmAs = stats_mo['OmA_stdv']
+            Nobsm = stats_mo['N_data']
                   
             Ndata.append(Nobsm)
 
@@ -87,7 +93,7 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
     plt.grid(True, linestyle='--', alpha=0.5)
 
     plt.xticks(ticks=date_vec[::2], labels=date_vec[::2])
-    plt.title(exptag+ 'monthly '+plot_var)
+    plt.title(exptag+ ' monthly '+plot_var)
     plt.xlim(-1, len(date_vec)+1)
     plt.ylim(-.1, 2.)
 
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index e191a154..98809d23 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -227,7 +227,7 @@ def save_monthly_sums(self):
     #
     # Assumes that monthly sums files have been saved [see save_monthly_sums()].
 
-    def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4'):
+    def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4', Nmin=20):
 
         if os.path.isfile(fout_stats):
 
@@ -320,14 +320,65 @@ def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc
             oxa_mean = oxa_sum / N_data
             fxa_mean = fxa_sum / N_data
 
-            stats = {}
-            for var in var_list:
-                stats[var[4:]+'_mean']     = data_mean[var]
-                stats[var[4:]+'_variance'] = data_var[ var]
-            stats['oxf_mean'] = oxf_mean
-            stats['oxa_mean'] = oxa_mean
-            stats['fxa_mean'] = fxa_mean
-            stats['N_data']   = N_data
+            # Then compute metrics of O-F, O-A, etc. based on above computed
+            
+            O_mean = data_mean['obs_obs']
+            F_mean = data_mean['obs_fcst']
+            A_mean = data_mean['obs_ana']
+
+            O_stdv = np.sqrt(data_var['obs_obs'])
+            F_stdv = np.sqrt(data_var['obs_fcst'])
+            A_stdv = np.sqrt(data_var['obs_ana'])
+            
+            # mean(x-y) = E[x] - E[y]   
+            OmF_mean = O_mean - F_mean
+            OmA_mean = O_mean - A_mean
+            
+            # var(x-y) = var(x) + var(y) - 2cov(x,y)
+            # cov(x,y) = E[xy] - E[x]E[y]
+            OmF_stdv  = np.sqrt(O_stdv**2 + F_stdv**2 - 2 * (oxf_mean - O_mean*F_mean))
+            OmA_stdv  = np.sqrt(O_stdv**2 + A_stdv**2 - 2 * (oxa_mean - O_mean*A_mean))
+
+            # *****************************************************************************************
+            # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED!
+            # *****************************************************************************************
+            # Here, we first compute the stats of the OmF time series and then normalize using 
+            # the time-avg "obsvar" and "fcstvar" values.
+            # Since "fcstvar" changes with time, the OmF values should be normalized at each time 
+            # step (as in the older matlab scripts), and then the time series stats can be computed. 
+            # To compute the exact stats with this python package, the sum and sum-of-squares of 
+            # the normalized OmF values would need to be added into the sums files. 
+            #
+            OmF_norm_mean = OmF_mean / np.sqrt(data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])      # APPROXIMATED stat!
+            OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])) # APPROXIMATED stat!
+              
+            # Mask out data points with insufficent observations using the Nmin threshold
+            # Do NOT apply to N_data
+            O_mean[       N_data < Nmin] = np.nan
+            O_stdv[       N_data < Nmin] = np.nan
+            F_mean[       N_data < Nmin] = np.nan
+            F_stdv[       N_data < Nmin] = np.nan
+            A_mean[       N_data < Nmin] = np.nan
+            A_stdv[       N_data < Nmin] = np.nan
+            
+            OmF_mean[     N_data < Nmin] = np.nan
+            OmF_stdv[     N_data < Nmin] = np.nan
+            OmF_norm_mean[N_data < Nmin] = np.nan
+            OmF_norm_stdv[N_data < Nmin] = np.nan
+            OmA_mean[     N_data < Nmin] = np.nan
+            OmA_stdv[     N_data < Nmin] = np.nan
+            N_data[       N_data < Nmin] = 0
+
+            stats = {
+                'O_mean'  : O_mean,   'O_stdv'  : O_stdv,
+                'F_mean'  : F_mean,   'F_stdv'  : F_stdv,
+                'A_mean'  : A_mean,   'A_stdv'  : A_stdv,
+                'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv,
+                'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv,
+                'OmF_norm_mean': OmF_norm_mean,
+                'OmF_norm_stdv': OmF_norm_stdv,
+                'N_data': N_data,
+                }
 
             if write_to_nc:
                 print('writing stats nc4 file: '+fout_stats)
@@ -418,6 +469,15 @@ def calc_spatial_stats_from_sums(self, date_time):
         OmF_stdv  = np.sqrt(O_var + F_var - 2 * (OxF_mean - O_mean*F_mean))
         OmA_stdv  = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean*A_mean))
 
-        return OmF_mean, OmF_stdv, OmA_mean, OmA_stdv, N_data
+        stats = {
+            'O_mean'  : O_mean,   'O_stdv'  : np.sqrt(O_var),
+            'F_mean'  : F_mean,   'F_stdv'  : np.sqrt(F_var),
+            'A_mean'  : A_mean,   'A_stdv'  : np.sqrt(A_var),
+            'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv,
+            'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv,
+            'N_data': N_data,
+            }
+
+        return stats
     
 # ============== EOF ====================================================================

From 6237a890fb076e020af4f3be9129d48c6a17aa1e Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 17 Jun 2025 15:59:39 -0400
Subject: [PATCH 14/45] minor changes in processing of temporal stats
 (postproc_ObsFcstAna.py, Plot_stats_maps.py): - removed Nmin from
 calc_temporal_stats_from_sums() - moved read of stats file to
 Plot_stats_maps.py - edited/added comments - fixed vertical alignment

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       |  45 ++-
 .../ObsFcstAna_stats/postproc_ObsFcstAna.py   | 282 +++++++++---------
 2 files changed, 168 insertions(+), 159 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 297af9be..91e776bb 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -2,7 +2,7 @@
 
 """
 Sample script for plotting maps of long-term data assimilation diagnostics.
-Computes Nobs-weighted avg of each metric across all species.
+Plots Nobs-weighted avg of each metric across all species.
 Requires saved files with monthly sums (see Get_ObsFcstAna_stat.py).
 Stats of *normalized* OmFs are approximated!
 """
@@ -21,17 +21,17 @@
 
 def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
     
-    start_time = postproc_obj.start_time
-    end_time   = postproc_obj.end_time
-    domain     = postproc_obj.domain
-    tc         = postproc_obj.tilecoord
-    tg         = postproc_obj.tilegrid_global
+    start_time    = postproc_obj.start_time
+    end_time      = postproc_obj.end_time
+    domain        = postproc_obj.domain
+    tc            = postproc_obj.tilecoord
+    tg            = postproc_obj.tilegrid_global
     
-    N_data = stats['N_data']
-    OmF_mean = stats['OmF_mean']
-    OmF_stdv = stats['OmF_stdv']
-    OmA_mean = stats['OmA_mean']
-    OmA_stdv = stats['OmA_stdv']
+    N_data        = stats['N_data']
+    OmF_mean      = stats['OmF_mean']
+    OmF_stdv      = stats['OmF_stdv']
+    OmA_mean      = stats['OmA_mean']
+    OmA_stdv      = stats['OmA_stdv']
     OmF_norm_mean = stats['OmF_norm_mean']
     OmF_norm_stdv = stats['OmF_norm_stdv']
 
@@ -61,7 +61,7 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
                 # crange is [cmin, cmax]
                 crange =[0, np.ceil((end_time-start_time).days/150)*300]
                 colormap = plt.get_cmap('jet',20)
-                title_txt = exptag + ' Tb Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
+                title_txt = exptag + ' Tb Nobs '    + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
                 units = '[-]'
             if i == 0 and j ==1:
                 tile_data = OmF_mean
@@ -138,6 +138,8 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
 # -----------------------------------------------------------------------------------------------
     
 if __name__ == '__main__':
+
+    # Plot maps of long-term temporal stats
     
     from postproc_ObsFcstAna    import postproc_ObsFcstAna
     from user_config            import get_config
@@ -151,18 +153,29 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
     out_path   = config['out_path']
      
     # ------------------------------------------------------------------------------------
+
     # Initialize the postprocessing object
     postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
 
-    # Compute long-term temporal stats and plot maps
+    # File name for long-term temporal stats
     stats_file  = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \
         (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
 
-    # temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats
-    # each field has the dimension [N_tile, N_species]
 
-    temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
+    # Read or compute long-term temporal stats.  Each field has the dimension [N_tile, N_species].
+    if os.path.isfile(stats_file):
+
+        print('reading stats nc4 file '+ stats_file)
+        temporal_stats = {}
+        with Dataset(stats_file,'r') as nc:
+            for key, value in nc.variables.items():
+                temporal_stats[key] = value[:].filled(np.nan)
+
+    else:
+        temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
 
+    # Plot stats
+        
     plot_OmF_maps(postproc, temporal_stats, fig_path=out_path )
 
 # ====================== EOF =========================================================
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 98809d23..03f95380 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -227,162 +227,158 @@ def save_monthly_sums(self):
     #
     # Assumes that monthly sums files have been saved [see save_monthly_sums()].
 
-    def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4', Nmin=20):
+    def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc4'):
 
-        if os.path.isfile(fout_stats):
+        # Variable list for computing sum and sum-of-squares
+        var_list = self.var_list 
 
-            print('reading stats nc4 file '+ fout_stats)
-            stats = {}
-            with Dataset(fout_stats,'r') as nc:
-                for key, value in nc.variables.items():
-                    stats[key] = value[:].filled(np.nan)
+        # Read tilecoord and obsparam for tile and obs species information
+        n_tile = self.tilecoord['N_tile']
+        n_spec = len(self.obsparam_list[0])
 
-        else:
-            # Variable list for computing sum and sum-of-squares
-            var_list = self.var_list 
-
-            # Read tilecoord and obsparam for tile and obs species information
-            n_tile = self.tilecoord['N_tile']
-            n_spec = len(self.obsparam_list[0])
+        # ---------------------------------------------------------------
+        #
+        # compute accumulated sums for period (start_time, end_time)
+        
+        # Initialize statistical metrics 
+        data_sum  = {}
+        data2_sum = {}
+        N_data    = np.zeros((n_tile, n_spec))
+        oxf_sum   = np.zeros((n_tile, n_spec))
+        oxa_sum   = np.zeros((n_tile, n_spec))
+        fxa_sum   = np.zeros((n_tile, n_spec))
 
-            # Initialize statistical metrics 
-            data_sum  = {}
-            data2_sum = {}
-            N_data  = np.zeros((n_tile, n_spec))
-            oxf_sum = np.zeros((n_tile, n_spec))
-            oxa_sum = np.zeros((n_tile, n_spec))
-            fxa_sum = np.zeros((n_tile, n_spec))
+        for var in var_list:
+            data_sum[ var] = np.zeros((n_tile, n_spec))
+            data2_sum[var] = np.zeros((n_tile, n_spec))
 
-            for var in var_list:
-                data_sum[ var] = np.zeros((n_tile, n_spec))
-                data2_sum[var] = np.zeros((n_tile, n_spec))
+        # Time loop: processing data at monthly time step
+        
+        date_time = self.start_time
 
-            # Time loop: processing data at monthly time step
+        while date_time < self.end_time:      # loop through months
             
-            date_time = self.start_time
-
-            while date_time < self.end_time:      # loop through months
+            fpath = self.sum_path + '/Y'+ date_time.strftime('%Y') + '/M' + date_time.strftime('%m') + '/'
                 
-                fpath = self.sum_path + '/Y'+ date_time.strftime('%Y') + '/M' + date_time.strftime('%m') + '/'
-                    
-                fout  = self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + date_time.strftime('%Y%m') +'.nc4'
-
-                fout  = fpath + fout
-                                
-                # Read monthly data 
-                if os.path.isfile(fout):
-                    print('read sums from monthly file: '+fout)
-                    mdata_sum  = {}
-                    mdata2_sum = {}
-                    with Dataset(fout,'r') as nc:
-                        mN_data  = nc.variables['N_data'      ][:]
-                        moxf_sum = nc.variables['obsxfcst_sum'][:]
-                        moxa_sum = nc.variables['obsxana_sum' ][:]
-                        mfxa_sum = nc.variables['fcstxana_sum'][:]
-                        for var in var_list:
-                            mdata_sum[ var] = nc.variables[var+'_sum' ][:]
-                            mdata2_sum[var] = nc.variables[var+'2_sum'][:]
-                           
-                    # Aggregate monthly data
-                    N_data  += mN_data
-                    oxf_sum += moxf_sum
-                    oxa_sum += moxa_sum
-                    fxa_sum += mfxa_sum
-                   
+            fout  = self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + date_time.strftime('%Y%m') +'.nc4'
+
+            fout  = fpath + fout
+                            
+            # Read and accumulate monthly sums data 
+            if os.path.isfile(fout):
+                print('read sums from monthly file: '+fout)
+                mdata_sum  = {}
+                mdata2_sum = {}
+                with Dataset(fout,'r') as nc:
+                    mN_data             = nc.variables['N_data'      ][:]
+                    moxf_sum            = nc.variables['obsxfcst_sum'][:]
+                    moxa_sum            = nc.variables['obsxana_sum' ][:]
+                    mfxa_sum            = nc.variables['fcstxana_sum'][:]
                     for var in var_list:
-                        data_sum[ var] += mdata_sum[ var] 
-                        data2_sum[var] += mdata2_sum[var]  
-                else:
-                    raise FileNotFoundError(f"File {fout} does not exist, run save_monthly_sums() first")
-                     
-                date_time = date_time + relativedelta(months=1)
-
-            # Compute stats (DA diagnostics) from accumulated sums data.
-            data_mean  = {}
-            data2_mean = {}
-            data_var   = {}
+                        mdata_sum[ var] = nc.variables[var+'_sum'    ][:]
+                        mdata2_sum[var] = nc.variables[var+'2_sum'   ][:]
+                       
+                # Aggregate monthly data
+                N_data  += mN_data
+                oxf_sum += moxf_sum
+                oxa_sum += moxa_sum
+                fxa_sum += mfxa_sum
+               
+                for var in var_list:
+                    data_sum[ var] += mdata_sum[ var] 
+                    data2_sum[var] += mdata2_sum[var]  
+            else:
+                raise FileNotFoundError(f"File {fout} does not exist, run save_monthly_sums() first")
+                 
+            date_time = date_time + relativedelta(months=1)
 
-            # calculation  
-            for var in var_list:
-                data_sum[var][ N_data == 0] = np.nan
-                data2_sum[var][N_data == 0] = np.nan
-                
-                data_mean[ var] = data_sum[ var] / N_data
-                data2_mean[var] = data2_sum[var] / N_data
-                # var(x) = E[x2] - (E[x])^2
-                data_var[var]   = data2_mean[var] - data_mean[var]**2
-                
-            oxf_sum[N_data == 0] = np.nan
-            oxa_sum[N_data == 0] = np.nan
-            fxa_sum[N_data == 0] = np.nan
-            # E[xy]
-            oxf_mean = oxf_sum / N_data
-            oxa_mean = oxa_sum / N_data
-            fxa_mean = fxa_sum / N_data
-
-            # Then compute metrics of O-F, O-A, etc. based on above computed
-            
-            O_mean = data_mean['obs_obs']
-            F_mean = data_mean['obs_fcst']
-            A_mean = data_mean['obs_ana']
+        # --------------------------------------------------------------------
+        #
+        # Compute stats (DA diagnostics) from accumulated sums data.
+        
+        data_mean  = {}
+        data2_mean = {}
+        data_var   = {}
 
-            O_stdv = np.sqrt(data_var['obs_obs'])
-            F_stdv = np.sqrt(data_var['obs_fcst'])
-            A_stdv = np.sqrt(data_var['obs_ana'])
-            
-            # mean(x-y) = E[x] - E[y]   
-            OmF_mean = O_mean - F_mean
-            OmA_mean = O_mean - A_mean
+        # calculation  
+        for var in var_list:
+            data_sum[var][ N_data == 0] = np.nan
+            data2_sum[var][N_data == 0] = np.nan
             
-            # var(x-y) = var(x) + var(y) - 2cov(x,y)
-            # cov(x,y) = E[xy] - E[x]E[y]
-            OmF_stdv  = np.sqrt(O_stdv**2 + F_stdv**2 - 2 * (oxf_mean - O_mean*F_mean))
-            OmA_stdv  = np.sqrt(O_stdv**2 + A_stdv**2 - 2 * (oxa_mean - O_mean*A_mean))
-
-            # *****************************************************************************************
-            # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED!
-            # *****************************************************************************************
-            # Here, we first compute the stats of the OmF time series and then normalize using 
-            # the time-avg "obsvar" and "fcstvar" values.
-            # Since "fcstvar" changes with time, the OmF values should be normalized at each time 
-            # step (as in the older matlab scripts), and then the time series stats can be computed. 
-            # To compute the exact stats with this python package, the sum and sum-of-squares of 
-            # the normalized OmF values would need to be added into the sums files. 
-            #
-            OmF_norm_mean = OmF_mean / np.sqrt(data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])      # APPROXIMATED stat!
-            OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])) # APPROXIMATED stat!
-              
-            # Mask out data points with insufficent observations using the Nmin threshold
-            # Do NOT apply to N_data
-            O_mean[       N_data < Nmin] = np.nan
-            O_stdv[       N_data < Nmin] = np.nan
-            F_mean[       N_data < Nmin] = np.nan
-            F_stdv[       N_data < Nmin] = np.nan
-            A_mean[       N_data < Nmin] = np.nan
-            A_stdv[       N_data < Nmin] = np.nan
+            data_mean[ var] = data_sum[ var] / N_data
+            data2_mean[var] = data2_sum[var] / N_data
+            # var(x) = E[x2] - (E[x])^2
+            data_var[var]   = data2_mean[var] - data_mean[var]**2
             
-            OmF_mean[     N_data < Nmin] = np.nan
-            OmF_stdv[     N_data < Nmin] = np.nan
-            OmF_norm_mean[N_data < Nmin] = np.nan
-            OmF_norm_stdv[N_data < Nmin] = np.nan
-            OmA_mean[     N_data < Nmin] = np.nan
-            OmA_stdv[     N_data < Nmin] = np.nan
-            N_data[       N_data < Nmin] = 0
-
-            stats = {
-                'O_mean'  : O_mean,   'O_stdv'  : O_stdv,
-                'F_mean'  : F_mean,   'F_stdv'  : F_stdv,
-                'A_mean'  : A_mean,   'A_stdv'  : A_stdv,
-                'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv,
-                'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv,
-                'OmF_norm_mean': OmF_norm_mean,
-                'OmF_norm_stdv': OmF_norm_stdv,
-                'N_data': N_data,
-                }
-
-            if write_to_nc:
-                print('writing stats nc4 file: '+fout_stats)
-                write_stats_nc4(fout_stats, stats)
+        oxf_sum[N_data == 0] = np.nan
+        oxa_sum[N_data == 0] = np.nan
+        fxa_sum[N_data == 0] = np.nan
+        # E[xy]
+        oxf_mean = oxf_sum / N_data
+        oxa_mean = oxa_sum / N_data
+        fxa_mean = fxa_sum / N_data
+
+        # Then compute metrics of O-F, O-A, etc. based on above computed
+        
+        O_mean = data_mean['obs_obs']
+        F_mean = data_mean['obs_fcst']
+        A_mean = data_mean['obs_ana']
+
+        O_stdv = np.sqrt(data_var['obs_obs'])
+        F_stdv = np.sqrt(data_var['obs_fcst'])
+        A_stdv = np.sqrt(data_var['obs_ana'])
+        
+        # mean(x-y) = E[x] - E[y]   
+        OmF_mean = O_mean - F_mean
+        OmA_mean = O_mean - A_mean
+        
+        # var(x-y) = var(x) + var(y) - 2cov(x,y)
+        # cov(x,y) = E[xy] - E[x]E[y]
+        OmF_stdv  = np.sqrt(O_stdv**2 + F_stdv**2 - 2 * (oxf_mean - O_mean*F_mean))
+        OmA_stdv  = np.sqrt(O_stdv**2 + A_stdv**2 - 2 * (oxa_mean - O_mean*A_mean))
+
+        # *****************************************************************************************
+        # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED!
+        # *****************************************************************************************
+        # Here, we first compute the stats of the OmF time series and then normalize using 
+        # the time-avg "obsvar" and "fcstvar" values.
+        # Since "fcstvar" changes with time, the OmF values should be normalized at each time 
+        # step (as in the older matlab scripts), and then the time series stats can be computed. 
+        # To compute the exact stats with this python package, the sum and sum-of-squares of 
+        # the normalized OmF values would need to be added into the sums files. 
+        #
+        OmF_norm_mean = OmF_mean / np.sqrt(data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])      # APPROXIMATED stat!
+        OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (data_mean['obs_obsvar'] + data_mean['obs_fcstvar'])) # APPROXIMATED stat!
+          
+        # Mask out data points without any obs (NOTE: apply Nmin threshold when plotting or computing map avg)
+        # Do NOT apply to N_data
+        O_mean[       N_data < 1] = np.nan
+        O_stdv[       N_data < 1] = np.nan
+        F_mean[       N_data < 1] = np.nan
+        F_stdv[       N_data < 1] = np.nan
+        A_mean[       N_data < 1] = np.nan
+        A_stdv[       N_data < 1] = np.nan
+        
+        OmF_mean[     N_data < 1] = np.nan
+        OmF_stdv[     N_data < 1] = np.nan
+        OmF_norm_mean[N_data < 1] = np.nan
+        OmF_norm_stdv[N_data < 1] = np.nan
+        OmA_mean[     N_data < 1] = np.nan
+        OmA_stdv[     N_data < 1] = np.nan
+
+        stats = {
+            'O_mean'       : O_mean,        'O_stdv'       : O_stdv,
+            'F_mean'       : F_mean,        'F_stdv'       : F_stdv,
+            'A_mean'       : A_mean,        'A_stdv'       : A_stdv,
+            'OmF_mean'     : OmF_mean,      'OmF_stdv'     : OmF_stdv,
+            'OmA_mean'     : OmA_mean,      'OmA_stdv'     : OmA_stdv,
+            'OmF_norm_mean': OmF_norm_mean, 'OmF_norm_stdv': OmF_norm_stdv,
+            'N_data'       : N_data,
+            }
+
+        if write_to_nc:
+            print('writing stats nc4 file: '+fout_stats)
+            write_stats_nc4(fout_stats, stats)
             
         return stats
 

From a285d6a2fb0be6c096c756916ddb1a775dbaa02c Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 17 Jun 2025 16:05:26 -0400
Subject: [PATCH 15/45] fixed vertical alignment (Plot_stats_timeseries.py)

---
 .../postproc/ObsFcstAna_stats/Plot_stats_timeseries.py    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
index fbb4d35f..f7a40e88 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
@@ -48,10 +48,10 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
 
             stats_mo = postproc_obj.calc_spatial_stats_from_sums(current_time)
 
-            OmFm = stats_mo['OmF_mean']
-            OmFs = stats_mo['OmF_stdv']
-            OmAm = stats_mo['OmA_mean']
-            OmAs = stats_mo['OmA_stdv']
+            OmFm  = stats_mo['OmF_mean']
+            OmFs  = stats_mo['OmF_stdv']
+            OmAm  = stats_mo['OmA_mean']
+            OmAs  = stats_mo['OmA_stdv']
             Nobsm = stats_mo['N_data']
                   
             Ndata.append(Nobsm)

From 8df81963d626f9070933f8ad252ca98846c97eaa Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 17 Jun 2025 17:29:20 -0400
Subject: [PATCH 16/45] dummy white-space change to facilitate Github comments
 (Get_ObsFcstAna_stats.py)

---
 .../util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py
index cf7e61a0..c4cf6620 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py
@@ -69,7 +69,7 @@ def main():
     # The plotting scripts can also run standalone using the individual Plot_stats_*.py scripts,
     #   as long as the monthly sum files are available.
     
-    plot_maps       = False
+    plot_maps       = False 
     plot_timeseries = False
 
     if plot_maps:            # Compute long-term temporal stats and plot maps

From 4c9755d0be04fdf867ec947703607e57920de48b Mon Sep 17 00:00:00 2001
From: Rolf Reichle <54944691+gmao-rreichle@users.noreply.github.com>
Date: Wed, 18 Jun 2025 14:45:27 -0400
Subject: [PATCH 17/45] Updated CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5703e81f..255c8540 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Added optional SLURM "constraint".
 - Added functionality to run on tile space of stretched cube-sphere grids.
-- Added python package for post-processing ObsFcstAna output.
+- Added (and later revised) python package for post-processing ObsFcstAna output.
 
 
 ### Changed

From 133eef94a2abacd0844506f947ea905cb1e90a23 Mon Sep 17 00:00:00 2001
From: Weiyuan Jiang <weiyuan.jiang@nasa.gov>
Date: Mon, 23 Jun 2025 14:39:45 -0400
Subject: [PATCH 18/45] added more 2D outputs to sample HISTORY.rc

---
 CHANGELOG.md                  |   1 +
 GEOSldas_App/GEOSldas_HIST.rc | 111 ++++++++++++++++++++++++++++------
 GEOSldas_App/process_hist.csh |   6 +-
 3 files changed, 96 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5703e81f..6fe41d67 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Added EASE to Latlon and Cubed-Sphere to EASE history outputs
 - Added optional SLURM "constraint".
 - Added functionality to run on tile space of stretched cube-sphere grids.
 - Added python package for post-processing ObsFcstAna output.
diff --git a/GEOSldas_App/GEOSldas_HIST.rc b/GEOSldas_App/GEOSldas_HIST.rc
index d9d09dd8..1b6a14c1 100644
--- a/GEOSldas_App/GEOSldas_HIST.rc
+++ b/GEOSldas_App/GEOSldas_HIST.rc
@@ -6,7 +6,7 @@
 #    "#CUBE            'tavg24_2d_lnd_Nx'"
 #  does *not* mean that the 'lnd' output will be on a cube-sphere grid. 
 
-#CUBE VERSION: 1
+VERSION: 1
 
 # Must edit 'EXPID' manually if HISTORY file is re-used without going  
 #  through "ldas_setup".
@@ -15,9 +15,10 @@ EXPID:  GEOSldas_expid
 
 COLLECTIONS:
 #EASE            'tavg24_1d_lfs_Nt'
-#CUBE            'tavg24_2d_lfs_Nx'	
-#EASE            'tavg24_1d_lnd_Nt'	
-#CUBE            'tavg24_2d_lnd_Nx'
+                 'tavg24_2d_lfs_Nx'
+#EASE            'tavg24_1d_lnd_Nt'
+                 'tavg24_2d_lnd_Nx'
+#CUBE            'tavg24_2d_lnd_EASE'
 #ASSIM           'SMAP_L4_SM_gph'
 #                'inst1_1d_lnr_Nt'
 #                'catch_progn_incr'
@@ -29,24 +30,28 @@ COLLECTIONS:
 #                'tavg24_1d_glc_Nt'
            ::
 
-#CUBE GRID_LABELS: PC720x361-DC
-#CUBE              PC1440x721-DC
+GRID_LABELS: PC720x361-DC
+             PC1440x721-DC
+             EASEv2_M36
+      ::
 
-#CUBE      ::
+PC720x361-DC.GRID_TYPE: LatLon
+PC720x361-DC.IM_WORLD: 720
+PC720x361-DC.JM_WORLD: 361
+PC720x361-DC.POLE: PC
+PC720x361-DC.DATELINE: DC
+PC720x361-DC.LM: 1
 
-#CUBE PC720x361-DC.GRID_TYPE: LatLon
-#CUBE PC720x361-DC.IM_WORLD: 720
-#CUBE PC720x361-DC.JM_WORLD: 361
-#CUBE PC720x361-DC.POLE: PC
-#CUBE PC720x361-DC.DATELINE: DC
-#CUBE PC720x361-DC.LM: 1
+PC1440x721-DC.GRID_TYPE: LatLon
+PC1440x721-DC.IM_WORLD: 1440
+PC1440x721-DC.JM_WORLD: 721
+PC1440x721-DC.POLE: PC
+PC1440x721-DC.DATELINE: DC
+PC1440x721-DC.LM: 1
 
-#CUBE PC1440x721-DC.GRID_TYPE: LatLon
-#CUBE PC1440x721-DC.IM_WORLD: 1440
-#CUBE PC1440x721-DC.JM_WORLD: 721
-#CUBE PC1440x721-DC.POLE: PC
-#CUBE PC1440x721-DC.DATELINE: DC
-#CUBE PC1440x721-DC.LM: 1
+EASEv2_M36.GRID_TYPE: EASE
+EASEv2_M36.GRIDNAME: EASEv2_M36
+EASEv2_M36.LM: 1
 
 # Detailed definition of the collections listed above 
 #
@@ -323,6 +328,74 @@ COLLECTIONS:
                                ::
 
 
+  tavg24_2d_lnd_EASE.format:        'CFIO',
+  tavg24_2d_lnd_EASE.descr:         '2d,Daily,Time-Averaged,Single-Level,Assimilation,Land Surface Diagnostics',
+  tavg24_2d_lnd_EASE.nbits:        12,
+  tavg24_2d_lnd_EASE.template:      '%y4%m2%d2_%h2%n2z.nc4',
+  tavg24_2d_lnd_EASE.mode:          'time-averaged',
+  tavg24_2d_lnd_EASE.frequency:     240000,
+  tavg24_2d_lnd_EASE.ref_time:      000000,
+  tavg24_2d_lnd_EASE.regrid_exch:   '../input/tile.data'
+  tavg24_2d_lnd_EASE.regrid_name:   'GRIDNAME'
+#  tavg24_2d_lnd_EASE.regrid_method: 'BILINEAR_MONOTONIC' ,
+  tavg24_2d_lnd_EASE.grid_label:    EASEv2_M36
+  tavg24_2d_lnd_EASE.deflate:       2,
+  tavg24_2d_lnd_EASE.fields:     'GRN'        , 'VEGDYN'  ,
+                               'LAI'        , 'VEGDYN'  ,
+                               'WET3'       , 'GridComp'  , 'GWETPROF'     ,
+                               'WET2'       , 'GridComp'  , 'GWETROOT'     ,
+                               'WET1'       , 'GridComp'  , 'GWETTOP'      ,
+                               'WCPR'       , 'GridComp'  , 'PRMC'         ,
+                               'WCRZ'       , 'GridComp'  , 'RZMC'         ,
+                               'WCSF'       , 'GridComp'  , 'SFMC'         ,
+                               'CAPAC'      , 'GridComp'  , 'INTRWATR'     ,
+                               'TPSNOW'     , 'GridComp'  , 'TPSNOWLAND'   ,
+                               'TPUNST'     , 'GridComp'  , 'TUNSTLAND'    ,
+                               'TPSAT'      , 'GridComp'  , 'TSATLAND'     ,
+                               'TPWLT'      , 'GridComp'  , 'TWLTLAND'     ,
+                               'TPSURF'     , 'GridComp'  , 'TSURFLAND'    ,
+                               'TP1'        , 'GridComp'  , 'TSOIL1'       ,         # CATCH GC: TP1,  ENSAVG GC: TSOIL1TILE
+                               'TP2'        , 'GridComp'  , 'TSOIL2'       ,         # ...
+                               'TP3'        , 'GridComp'  , 'TSOIL3'       ,         # ...
+                               'TP4'        , 'GridComp'  , 'TSOIL4'       ,         # ...
+                               'TP5'        , 'GridComp'  , 'TSOIL5'       ,         # ...
+                               'TP6'        , 'GridComp'  , 'TSOIL6'       ,         # ...
+                               'PRLAND'     , 'GridComp'  , 'PRECTOTCORRLAND'  ,     # assume "corrected" precip
+                               'SNOLAND'    , 'GridComp'  , 'PRECSNOCORRLAND'  ,     # assume "corrected" precip
+                               'TSLAND'     , 'GridComp'  , 'SNOMASLAND'   ,
+                               'SNOWDP'     , 'GridComp'  , 'SNODPLAND'    ,
+                               'EVPSOI'     , 'GridComp'  , 'LHLANDSOIL'   ,
+                               'EVPVEG'     , 'GridComp'  , 'LHLANDTRNS'   ,
+                               'EVPINT'     , 'GridComp'  , 'LHLANDINTR'   ,
+                               'EVPICE'     , 'GridComp'  , 'LHLANDSBLN'   ,
+                               'RUNSURF'    , 'GridComp'  , 'RUNSURFLAND'  ,
+                               'BASEFLOW'   , 'GridComp'  , 'BASEFLOWLAND' ,
+                               'SMLAND'     , 'GridComp'  ,
+                               'QINFIL'     , 'GridComp'  , 'QINFILLAND'   ,
+                               'FRUST'      , 'GridComp'  , 'FRLANDUNST'   ,
+                               'FRSAT'      , 'GridComp'  , 'FRLANDSAT'    ,
+                               'ASNOW'      , 'GridComp'  , 'FRLANDSNO'    ,
+                               'FRWLT'      , 'GridComp'  , 'FRLANDWLT'    ,
+                               'DFPARLAND'  , 'GridComp'  , 'PARDFLAND'    ,
+                               'DRPARLAND'  , 'GridComp'  , 'PARDRLAND'    ,
+                               'SHLAND'     , 'GridComp'  ,
+                               'LHLAND'     , 'GridComp'  ,
+                               'EVLAND'     , 'GridComp'  ,
+                               'LWLAND'     , 'GridComp'  ,
+                               'SWLAND'     , 'GridComp'  ,
+                               'GHLAND'     , 'GridComp'  ,
+                               'TWLAND'     , 'GridComp'  ,
+                               'TELAND'     , 'GridComp'  ,
+                               'DWLAND'     , 'GridComp'  , 'WCHANGELAND'  ,
+                               'DHLAND'     , 'GridComp'  , 'ECHANGELAND'  ,
+                               'SPLAND'     , 'GridComp'  , 'SPSHLAND'     ,
+#                               'SPLH'       , 'GridComp'  , 'SPLHLAND'     ,        # works for Catch only, not yet for CatchCN
+                               'SPWATR'     , 'GridComp'  , 'SPEVLAND'     ,
+                               'SPSNOW'     , 'GridComp'  , 'SPSNLAND'     ,
+                               'PEATCLSM_WATERLEVEL', 'GridComp'  ,
+                               'PEATCLSM_FSWCHANGE' , 'GridComp'  ,
+                           ::
+
   const_1d_lnd_Nt.descr:       'Tile-space,Constant,Time-invariant,Single-Level,Assimilation,Land Surface Model Parameters',
   const_1d_lnd_Nt.template:    '%y4%m2%d2_%h2%n2z.bin',
   const_1d_lnd_Nt.mode:        'instantaneous',
diff --git a/GEOSldas_App/process_hist.csh b/GEOSldas_App/process_hist.csh
index 4c6516ff..deaba8df 100644
--- a/GEOSldas_App/process_hist.csh
+++ b/GEOSldas_App/process_hist.csh
@@ -7,7 +7,7 @@
 setenv    LSM_CHOICE $1
 setenv    AEROSOL_DEPOSITION $2
 setenv    GRID $3
-setenv    GRIDNAME $4
+setenv    GRIDNAME "'$4'"
 setenv    HISTRC $5
 setenv    RUN_IRRIG $6
 setenv    ASSIM  $7
@@ -26,11 +26,11 @@ endif
 if($GRID == CUBE) then
    sed -i '/^\#EASE/d' $HISTRC
    sed -i 's|\#CUBE|''|g' $HISTRC
-   sed -i 's|GRIDNAME|'"$GRIDNAME"'|g' $HISTRC
+   sed -i -e  s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC
 else
    sed -i '/^\#CUBE/d' $HISTRC
    sed -i 's|\#EASE|''|g' $HISTRC
-   sed -i 's|GRIDNAME|'"$GRIDNAME"'|g' $HISTRC
+   sed -i -e  s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC
 endif
 
 if($LSM_CHOICE == 1) then

From 5160c6cffc6112031c0f924df1781c628bd8a6ab Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 15:41:00 -0400
Subject: [PATCH 19/45] rename file and remove plotting options to avoid
 confusion

---
 .../ObsFcstAna_stats/Get_ObsFcstAna_sums.py   |  70 +++++++++
 GEOSldas_App/util/shared/python/tile2grid.py  | 140 ------------------
 2 files changed, 70 insertions(+), 140 deletions(-)
 create mode 100644 GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
 delete mode 100644 GEOSldas_App/util/shared/python/tile2grid.py

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
new file mode 100644
index 00000000..1c29ca94
--- /dev/null
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+"""
+Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics.
+First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output.  
+Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast 
+residuals can then be diagnosed quickly from these intermediate "sums" files. 
+Sample script optionally computes and plots:
+- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py).
+- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py).
+
+Usage:
+    1. Edit "user_config.py" with experiments information.
+    2. Run this script as follows (on Discover):
+    $ module load python/GEOSpyD
+    $ ./Get_ObsFcstAna_sums.py
+    
+    # Background execution:
+    $ nohup ./Get_ObsFcstAna_sums.py > out.log &
+
+Authors: Q. Liu, R. Reichle, A. Fox
+Last Modified: May 2025
+"""
+
+import sys;       sys.path.append('../../shared/python/')
+import warnings;  warnings.filterwarnings("ignore")
+import os
+
+import numpy as np
+
+from datetime               import timedelta
+from postproc_ObsFcstAna    import postproc_ObsFcstAna
+from user_config            import get_config              # user-defined config info
+
+# ---
+#
+# If the script is run in the background, uncomment the following lines to see the redirected
+#   standard output in the out.log file immediately.  When the lines are commented out, the redirected
+#   standard output will not appear in the out.log file until the job has completed.
+# If the script is run in the foreground, the lines can be commented out to monitor the output log.
+#
+#import io
+#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
+#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)
+#
+# ---
+
+def main():
+     
+    config     = get_config()                 # edit user-defined inputs in user_config.py
+    
+    exp_list   = config['exp_list']
+    start_time = config['start_time']
+    end_time   = config['end_time']
+    sum_path   = config['sum_path']
+    out_path   = config['out_path']
+     
+    # ------------------------------------------------------------------------------------
+    # Postprocess raw ObsFcstAna output data into monthly sums 
+
+    # Initialize the postprocessing object
+    postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
+
+    # Compute and save monthly sums
+    postproc.save_monthly_sums()
+
+if __name__ == '__main__':
+    main()
+
+# ====================== EOF =========================================================
diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py
deleted file mode 100644
index e99b2f05..00000000
--- a/GEOSldas_App/util/shared/python/tile2grid.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import os
-import numpy as np
-
-# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates
-#
-# 1-d input data can have one additional dimension (e.g., time)
-#
-# 2-d output data is lat-by-lon[-by-time]
-def interpolate_to_latlon_grid(data_1d, lat, lon, resolution=1.0):
-    """
-    For cubedsphere grid: use 
-    
-    """
-    from scipy.interpolate import griddata
-
-    lat_grid = np.arange(-90+resolution/2, 90, resolution)
-    lon_grid = np.arange(-180+resolution/2, 180, resolution)
-    lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
-
-    ny = len(lat_grid)
-    nx = len(lon_grid)
-
-    nf = data_1d.shape[-1]
-    data_2d = np.full([ny, nx, nf], np.nan)
-    for i in range(nf):
-        data = data_1d[:,i]
-        data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear')
-        
-    return data_2d, lat_2d, lon_2d
-
-def remap_1d_to_2d( data_1d,  lat_1d, lon_1d):
-
-    """
-    For EASE grid: use remapping
-    
-    """
-    
-    # Extract unique coordinates 
-    unique_lats, indY = np.unique(lat_1d, return_inverse=True)
-    unique_lons, indX = np.unique(lon_1d, return_inverse=True)
-
-    ny = len(unique_lats)
-    nx = len(unique_lons)
-    
-    nf = data_1d.shape[1]
-    data_2d = np.full([ny, nx, nf], np.nan)
-    data_2d[indY, indX, :] = data_1d
-        
-    lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats)
-    
-    return data_2d, lat_2d, lon_2d
-
-def tile2grid( tile_data, tile_coord, tile_grid, nodata=1.e15, nodata_tol=1.e-4):
-    
-    """
-    Convert tile-space data to grid-space data.
-    
-    Parameters:
-    ----------
-    tile_data : Array in tile space, shape (N_tile, N_fields)
-    tile_coord : Dictionary containing tile coordinate information
-    tile_grid : Dictionary containing grid information
-    nodata : Value for missing data
-    nodata_tol : Tolerance for comparing values to nodata
-        
-    Returns:
-    -------
-    grid_data : Array in grid space, shape (N_lat, N_lon, N_fields)
-    lat_2d: Lat array in grid space, shape (N_lat, N_lon)
-    lon_2d: Lon array in grid space, shape (N_lat, N_lon)
-    
-    """   
-    # Verifity input input datasize
-    if tile_data.shape[0] != tile_coord['N_tile']:
-        print(f'Error: tile2grid input data size don''t match N_tile')
-        sys.exit()
-        
-    # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1]
-    if tile_data.ndim == 1:
-        tile_data = np.expand_dims(tile_data, axis=1)
-
-    N_fields = tile_data.shape[-1]
-    
-    # Determine grid_type (cubedsphere, EASE or regular latlon)
-    is_CS = int(tile_grid['N_lat']/tile_grid['N_lon']) == 6
-    is_EASE = 'EASE' in tile_grid['gridtype']
-
-    if is_CS:
-
-        grid_data, lat_2d,lon_2d = interpolate_to_latlon_grid( tile_data,  tile_coord['com_lat'], tile_coord['com_lon'])
-
-    elif is_EASE:
-        
-        grid_data, lat_2d,lon_2d = remap_1d_to_2d( tile_data,  tile_coord['com_lat'], tile_coord['com_lon'])
-        
-    else:
-            
-        # Initialize grid data array
-        grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields))
-
-        for k in range(N_fields):
-            # Initialize weight grid for current field
-            wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon']))
-            
-            # Loop through tile space
-            for n in range(tile_coord['N_tile']):
-                # Calculate grid indices (adjust for Python's 0-based indexing)
-                i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1
-                j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1
-                
-                # Get weight
-                w = tile_coord['frac_cell'][n]
-                
-                # Check if current tile data is valid (not no-data)
-                if abs(tile_data[k, n] - nodata) > nodata_tol:
-                    # Accumulate weighted data
-                    grid_data[j, i, k] += w * tile_data[n,k]
-                    wgrid[j, i] += w
-            
-            # Normalize and set no-data values
-            for i in range(tile_grid['N_lon']):
-                for j in range(tile_grid['N_lat']):
-                    if wgrid[j, i] > 0.0:
-                        # Normalize by total weight
-                        grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i]
-                    else:
-                        # Set no-data value
-                        grid_data[j, i, k] = nodata
-        
-        # Replace nodata values with NaN
-        grid_data[grid_data == nodata] = np.nan
-
-        lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat'])
-        lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon'])
-
-    if grid_data.shape[-1] == 1:
-        grid_data = grid_data[:,:,0]
-        
-    return grid_data, lat_2d, lon_2d
-

From 4e0476406209e29171da0da1812132f5f06004ae Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 15:47:21 -0400
Subject: [PATCH 20/45] edit da_t0 for off-the-hour, move spatial stats time
 loop inside function

---
 .../ObsFcstAna_stats/postproc_ObsFcstAna.py   | 184 +++++++++++-------
 1 file changed, 110 insertions(+), 74 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 03f95380..9794ae42 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -93,7 +93,7 @@ def compute_monthly_sums(self, date_time):
         n_tile = tc['N_tile']
         n_spec = len(obsparam_list[0])
 
-        date_time = date_time.replace(hour=self.da_t0)
+        date_time = date_time.replace(hour=int(self.da_t0), minute=int(np.mod(self.da_t0,1)*60))
         end_time  = date_time + relativedelta(months=1) 
         
         data_sum  = {}
@@ -392,86 +392,122 @@ def calc_temporal_stats_from_sums(self, write_to_nc=True, fout_stats='./stats.nc
     #   the straight spatial avg across a map of a given DA diagnostic.  The latter approach gives 
     #   the same weight to each location, regardless of how many obs are available at the location.
 
-    def calc_spatial_stats_from_sums(self, date_time):
+    def calc_spatial_stats_from_sums(self):
 
+        start_time = self.start_time
+        end_time   = self.end_time
+        
         var_list = ['obs_obs', 'obs_fcst','obs_ana']
+
+        O_mean    = []
+        O_stdv    = []
+        F_mean    = []
+        F_stdv    = []
+        A_mean    = []
+        A_stdv    = []
+        OmF_mean  = []
+        OmF_stdv  = []
+        OmA_mean  = []
+        OmA_stdv  = []
+        N_data    = []
+        date_vec  = []
         
-        mo_path = self.sum_path + '/Y'+ date_time.strftime('%Y') + '/M' + date_time.strftime('%m') + '/'            
-        fnc4_sums = mo_path + self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + date_time.strftime('%Y%m') +'.nc4'
+        # Time loop 
+        current_time = start_time
+        while current_time < end_time:
         
-        mdata_sum = {}
-        mdata2_sum = {}
-
-        try:
-            with Dataset(fnc4_sums,'r') as nc:
-                mN_data                = nc.variables['N_data'      ][:]
-                moxf_sum               = nc.variables['obsxfcst_sum'][:]
-                moxa_sum               = nc.variables['obsxana_sum' ][:]
-                moxf_sum[mN_data == 0] = np.nan
-                moxa_sum[mN_data == 0] = np.nan
-                for var in var_list:
-                    mdata_sum[ var]               = nc.variables[var+'_sum' ][:]
-                    mdata2_sum[var]               = nc.variables[var+'2_sum'][:]
-                    mdata_sum[ var][mN_data == 0] = np.nan
-                    mdata2_sum[var][mN_data == 0] = np.nan
-        except FileNotFoundError:
-            print(f"Error: File '{filename}' not found.")
-            sys.exit(1)
-        except Exception as e:
-            print(f"An unexpected error occurred: {e}")
-            sys.exit(1)
-
-        # Make sure only aggregate tiles with valid values for all variables
-        for var in var_list:
-            mN_data                           = mN_data.astype(float)
-            mN_data[np.isnan(mdata_sum[var])] = np.nan
-            mN_data[mN_data == 0]             = np.nan
-
-        # cross mask before aggregating tile values   
-        for var in var_list:
-            mdata_sum[ var][np.isnan(mN_data)] = np.nan
-            mdata2_sum[var][np.isnan(mN_data)] = np.nan
-            moxf_sum[np.isnan(mN_data)]        = np.nan
-            moxa_sum[np.isnan(mN_data)]        = np.nan
-
-        # Aggregate data of all tiles
-        N_data     = np.nansum(mN_data, axis=0)
-        OxF_mean   = np.nansum(moxf_sum,axis=0)/N_data
-        OxA_mean   = np.nansum(moxa_sum,axis=0)/N_data
-        data_mean  = {}
-        data2_mean = {}
-        data_var   = {}
-        for var in var_list:
-            data_mean[ var] = np.nansum(mdata_sum[var ],axis=0)/N_data
-            data2_mean[var] = np.nansum(mdata2_sum[var],axis=0)/N_data
-            # var(x) = E[x2] - (E[x])^2
-            data_var[var] = data2_mean[var] - data_mean[var]**2
-
-        # Compute metrics of O-F, O-A, etc. based on above stats
-        O_mean = data_mean['obs_obs']
-        F_mean = data_mean['obs_fcst']
-        A_mean = data_mean['obs_ana']
-
-        O_var  = data_var[ 'obs_obs']
-        F_var  = data_var[ 'obs_fcst']
-        A_var  = data_var[ 'obs_ana']
-
-        # mean(x-y) = E[x] - E[y]   
-        OmF_mean = O_mean - F_mean
-        OmA_mean = O_mean - A_mean
+            mo_path = self.sum_path + '/Y'+ current_time.strftime('%Y') + '/M' + current_time.strftime('%m') + '/'            
+            fnc4_sums = mo_path + self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + current_time.strftime('%Y%m') +'.nc4'
+            
+            mdata_sum = {}
+            mdata2_sum = {}
+
+            try:
+                with Dataset(fnc4_sums,'r') as nc:
+                    mN_data                = nc.variables['N_data'      ][:]
+                    moxf_sum               = nc.variables['obsxfcst_sum'][:]
+                    moxa_sum               = nc.variables['obsxana_sum' ][:]
+                    moxf_sum[mN_data == 0] = np.nan
+                    moxa_sum[mN_data == 0] = np.nan
+                    for var in var_list:
+                        mdata_sum[ var]               = nc.variables[var+'_sum' ][:]
+                        mdata2_sum[var]               = nc.variables[var+'2_sum'][:]
+                        mdata_sum[ var][mN_data == 0] = np.nan
+                        mdata2_sum[var][mN_data == 0] = np.nan
+            except FileNotFoundError:
+                print(f"Error: File '{fnc4_sums}' not found. Run Get_ObsFcstAna_sums first.")
+                sys.exit(1)
+            except Exception as e:
+                print(f"An unexpected error occurred: {e}")
+                sys.exit(1)
+
+            # Make sure only aggregate tiles with valid values for all variables
+            for var in var_list:
+                mN_data                           = mN_data.astype(float)
+                mN_data[np.isnan(mdata_sum[var])] = np.nan
+                mN_data[mN_data == 0]             = np.nan
 
-        # var(x-y) = var(x) + var(y) - 2cov(x,y)
-        # cov(x,y) = E[xy] - E[x]E[y]
-        OmF_stdv  = np.sqrt(O_var + F_var - 2 * (OxF_mean - O_mean*F_mean))
-        OmA_stdv  = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean*A_mean))
+            # cross mask before aggregating tile values   
+            for var in var_list:
+                mdata_sum[ var][np.isnan(mN_data)] = np.nan
+                mdata2_sum[var][np.isnan(mN_data)] = np.nan
+                moxf_sum[np.isnan(mN_data)]        = np.nan
+                moxa_sum[np.isnan(mN_data)]        = np.nan
+
+            # Aggregate data of all tiles
+            N_data_mo  = np.nansum(mN_data, axis=0)
+            OxF_mean   = np.nansum(moxf_sum,axis=0)/N_data_mo
+            OxA_mean   = np.nansum(moxa_sum,axis=0)/N_data_mo
+            data_mean  = {}
+            data2_mean = {}
+            data_var   = {}
+            for var in var_list:
+                data_mean[ var] = np.nansum(mdata_sum[var ],axis=0)/N_data_mo
+                data2_mean[var] = np.nansum(mdata2_sum[var],axis=0)/N_data_mo
+                # var(x) = E[x2] - (E[x])^2
+                data_var[var] = data2_mean[var] - data_mean[var]**2
+
+            # Compute monthly metrics of O-F, O-A, etc. based on above stats
+            O_mean_mo = data_mean['obs_obs']
+            F_mean_mo = data_mean['obs_fcst']
+            A_mean_mo = data_mean['obs_ana']
+
+            O_var     = data_var['obs_obs']
+            F_var     = data_var['obs_fcst']
+            A_var     = data_var['obs_ana']
+
+            # mean(x-y) = E[x] - E[y]   
+            OmF_mean_mo = O_mean_mo - F_mean_mo
+            OmA_mean_mo = O_mean_mo - A_mean_mo
+
+            # var(x-y) = var(x) + var(y) - 2cov(x,y)
+            # cov(x,y) = E[xy] - E[x]E[y]
+            OmF_stdv_mo  = np.sqrt(O_var + F_var - 2 * (OxF_mean - O_mean_mo*F_mean_mo))
+            OmA_stdv_mo  = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean_mo*A_mean_mo))
+
+            # Extend timeseries
+            N_data.append(N_data_mo)
+            O_mean.append(O_mean_mo)
+            O_stdv.append(np.sqrt(O_var))
+            F_mean.append(F_mean_mo)
+            F_stdv.append(np.sqrt(F_var))
+            A_mean.append(A_mean_mo)
+            A_stdv.append(np.sqrt(A_var))
+            OmF_mean.append(OmF_mean_mo)
+            OmF_stdv.append(OmF_stdv_mo)
+            OmA_mean.append(OmA_mean_mo)
+            OmA_stdv.append(OmA_stdv_mo)            
+
+            date_vec.append(current_time.strftime('%Y%m'))
+            current_time = current_time + relativedelta(months=1)
 
         stats = {
-            'O_mean'  : O_mean,   'O_stdv'  : np.sqrt(O_var),
-            'F_mean'  : F_mean,   'F_stdv'  : np.sqrt(F_var),
-            'A_mean'  : A_mean,   'A_stdv'  : np.sqrt(A_var),
-            'OmF_mean': OmF_mean, 'OmF_stdv': OmF_stdv,
-            'OmA_mean': OmA_mean, 'OmA_stdv': OmA_stdv,
-            'N_data': N_data,
+            'O_mean'  : np.array(O_mean),   'O_stdv'  : np.array(O_stdv),
+            'F_mean'  : np.array(F_mean),   'F_stdv'  : np.array(F_stdv),
+            'A_mean'  : np.array(A_mean),   'A_stdv'  : np.array(A_stdv),
+            'OmF_mean': np.array(OmF_mean), 'OmF_stdv': np.array(OmF_stdv),
+            'OmA_mean': np.array(OmA_mean), 'OmA_stdv': np.array(OmA_stdv),
+            'N_data': np.array(N_data),     'date_vec': date_vec
             }
 
         return stats

From f82e55f24a4a1290916925290febc5381d742df5 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 15:59:25 -0400
Subject: [PATCH 21/45] change 1-d to 2-d regridding

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       | 195 ++++++++++--------
 1 file changed, 104 insertions(+), 91 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 91e776bb..17dae399 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -3,29 +3,63 @@
 """
 Sample script for plotting maps of long-term data assimilation diagnostics.
 Plots Nobs-weighted avg of each metric across all species.
-Requires saved files with monthly sums (see Get_ObsFcstAna_stat.py).
+Requires saved files with monthly sums (see Get_ObsFcstAna_sums.py).
 Stats of *normalized* OmFs are approximated!
 """
 
 import sys;       sys.path.append('../../shared/python/')
 import warnings;  warnings.filterwarnings("ignore")
+import os
 
 import numpy             as np
 import matplotlib.pyplot as plt
 
 from datetime               import timedelta
 
-from tile2grid              import tile2grid
+from netCDF4                import Dataset
+
+from tile_to_latlon         import tile_to_latlon, get_grid_resolution
 from plot                   import plotMap
 from EASEv2                 import EASEv2_ind2latlon
 
-def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
+from postproc_ObsFcstAna    import postproc_ObsFcstAna
+from user_config            import get_config
+
+def Main_OmF_maps():
+    
+    # Plot maps of long-term temporal stats
+    
+    config     = get_config()                 # edit user-defined inputs in user_config.py
+    
+    exp_list   = config['exp_list']
+    start_time = config['start_time']
+    end_time   = config['end_time']
+    sum_path   = config['sum_path']
+    out_path   = config['out_path']
+
+    # Provide Nmin to screen out tiles with inadequate data for temporal stats 
+    Nmin = 20
     
-    start_time    = postproc_obj.start_time
-    end_time      = postproc_obj.end_time
-    domain        = postproc_obj.domain
-    tc            = postproc_obj.tilecoord
-    tg            = postproc_obj.tilegrid_global
+    # ------------------------------------------------------------------------------------
+    # Get metrics in [N_tile, N_species] for plotting
+    # ------------------------------------------------------------------------------------
+    # File name for long-term temporal stats
+    stats_file  = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \
+        (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
+
+    # Read or compute long-term temporal stats.  Each field has the dimension [N_tile, N_species].
+    if os.path.isfile(stats_file):
+
+        print('reading stats nc4 file '+ stats_file)
+        stats = {}
+        with Dataset(stats_file,'r') as nc:
+            for key, value in nc.variables.items():
+                stats[key] = value[:].filled(np.nan)
+
+    else:
+        # Initialize the postprocessing object and get information
+        postproc_obj = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
+        stats = postproc_obj.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
     
     N_data        = stats['N_data']
     OmF_mean      = stats['OmF_mean']
@@ -35,11 +69,22 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
     OmF_norm_mean = stats['OmF_norm_mean']
     OmF_norm_stdv = stats['OmF_norm_stdv']
 
+    # Screen all fields with Nmin  except for N_data
+    OmF_mean[N_data < Nmin]      = np.nan
+    OmF_stdv[N_data < Nmin]        = np.nan
+    OmA_mean[N_data < Nmin]      = np.nan
+    OmA_stdv[N_data < Nmin]       = np.nan   
+    OmF_norm_mean[N_data < Nmin] = np.nan
+    OmF_norm_stdv[N_data < Nmin]   = np.nan
+    
+    # Select/combine fields for plotting. The following provides an example to
+    # combine statistical fields of all species to averages.
+    
     # Compute Nobs-weighted avg of each metric across all species.
     # Typically used for SMAP Tb_h/h from asc and desc overpasses,
     # or ASCAT soil moisture from Metop-A/B/C.
     # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
-    Nobs_data     = np.nansum(              N_data, axis=1)
+    Nobs_data     = np.nansum(  N_data, axis=1)
     OmF_mean      = np.nansum(OmF_mean     *N_data, axis=1)/Nobs_data
     OmF_stdv      = np.nansum(OmF_stdv     *N_data, axis=1)/Nobs_data
     OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data
@@ -47,18 +92,47 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
     OmA_mean      = np.nansum(OmA_mean     *N_data, axis=1)/Nobs_data
     OmA_stdv      = np.nansum(OmA_stdv     *N_data, axis=1)/Nobs_data
 
+    # Nobs_data need to be at least 1, otherwise unwanted zeros 
+    # might get into the computation of global mean etc.
+    Nobs_data[Nobs_data == 0] = np.nan  
+
+    # --------------------------------------------------------------------------------
+    # Get map grid information and provide map resolution (in degree).
+    # Observation FOV-based resolution estimate is provided. The resolution can also be
+    # manually set for optimal map performance
+    # ---------------------------------------------------------------------------------
+    tc = exp_list[0]['tilecoord']
+    tg = exp_list[0]['tilegrid_global']
+    obs_fov = exp_list[0]['obsparam'][0]['FOV']
+    obs_fov_units = exp_list[0]['obsparam'][0]['FOV_units']
+
+    # Get map grid resolution for mapping tile data on lat/lon grid
+    # Convert FOV to degree, assuming 100km is about 1 deg.
+    if 'km' in obs_fov_units:
+        obs_fov = obs_fov /100.
+
+    # Set obs_fov at model resolution if obs_fov = 0
+    if obs_fov == 0:
+        obs_fov = 360./tile_grid['N_lon']/2.
+
+    # Get map plot grid resolution based on obs FOV    
+    map_grid_resolution = get_grid_resolution(obs_fov)
+    print(f'map grid resolution: {map_grid_resolution}' degree)
+
+    # ----------------------------------------------------------------------------------
     # Plotting
-    exptag = postproc_obj.exptag
+    # ----------------------------------------------------------------------------------
+    exptag = exp_list[0]['exptag']
 
     fig, axes = plt.subplots(2,2, figsize=(18,10))
     plt.rcParams.update({'font.size':14})
-
+    
     for i in np.arange(2):
         for j in np.arange(2):
             units = '[k]'
             if i == 0 and j == 0:
                 tile_data = Nobs_data
-                # crange is [cmin, cmax]
+                #crange is [cmin, cmax]
                 crange =[0, np.ceil((end_time-start_time).days/150)*300]
                 colormap = plt.get_cmap('jet',20)
                 title_txt = exptag + ' Tb Nobs '    + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
@@ -80,102 +154,41 @@ def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
                 title_txt = exptag + ' Tb normalized O-F stdv (approx!) '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')
 
             colormap.set_bad(color='0.9') # light grey, 0-black, 1-white
+           
+            # map tile_data on 2-d regular lat/lon grid for map plotting
+            grid_data, lat_2d, lon_2d = tile_to_latlon(tile_data, tc, tg, map_grid_resolution)
 
-            # Regrid 1d tile_data to 2d grid_data for map plots
-            if '_M09_' in domain: # special case  
-                grid_data_M09 = np.zeros((1624, 3856)) + np.nan  
-                grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
-                
-                # Reshape the data into 4x4 blocks
-                reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)
-
-                # Combine each 4x4 M09 block into a M36 grid
-                #if i==0 and j==0:
-                #   grid_data = np.sum(reshaped,axis=(1, 3)) 
-                #else:
-                #   grid_data = np.nanmean(reshaped,axis=(1, 3))
-
-                grid_data = grid_data_M09[1::4, 2::4]
-
-                # NOT area weighted 
-                wmean = np.nanmean(grid_data)
-                wabsmean = np.nanmean(np.abs(grid_data))
-                if 'normalized' in title_txt:
-                    wabsmean = np.nanmean(np.abs(grid_data-1.))
-                    
-                lat_M36, lon_M36 = EASEv2_ind2latlon(np.arange(406), np.arange(964),'M36')
-                lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
-            else:
-                grid_data, lat_2d, lon_2d = tile2grid(tile_data, tc, tg)
-                
-                # Area weighted mean and mean(abs)
-                wmean    =     np.nansum(       tile_data     * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
-                wabsmean =     np.nansum(np.abs(tile_data)    * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
-                if 'normalized' in title_txt:
-                    wabsmean = np.nansum(np.abs(tile_data-1.) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
-                    
+            mean       = np.nanmean(grid_data)
+            absmean =np.nanmean(np.abs(grid_data))
+            
             if 'normalized' in title_txt:
-                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (wmean, wabsmean)+' '+units
+                absmean = np.nanmean(np.abs(grid_data-1.) )
+
+            if 'normalized' in title_txt and 'stdv' in title_txt:
+                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (mean, absmean)+' '+units
             elif 'mean' in title_txt:
-                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (wmean, wabsmean)+' '+units
+                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (mean, absmean)+' '+units
             else:
-                title_txt = title_txt + '\n' + "avg=%.2f" % (wmean) +' '+units                
-         
+                title_txt = title_txt + '\n' + "avg=%.2f" % (mean) +' '+units                
+   
             if 'normalized' in title_txt:
                 grid_data = np.log10(grid_data)
                 crange = [-0.6, 0.45]
             
             mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
-                        title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])            
-            
+                        title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])
+
     plt.tight_layout()
     # Save figure to file
-    fig.savefig(fig_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\
+    fig.savefig(out_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\
                         (end_time+timedelta(days=-1)).strftime('%Y%m')+'.png')
-    #plt.show()
+    plt.show()
     plt.close(fig)
 
 # -----------------------------------------------------------------------------------------------
     
 if __name__ == '__main__':
-
-    # Plot maps of long-term temporal stats
-    
-    from postproc_ObsFcstAna    import postproc_ObsFcstAna
-    from user_config            import get_config
-    
-    config     = get_config()                 # edit user-defined inputs in user_config.py
-    
-    exp_list   = config['exp_list']
-    start_time = config['start_time']
-    end_time   = config['end_time']
-    sum_path   = config['sum_path']
-    out_path   = config['out_path']
-     
-    # ------------------------------------------------------------------------------------
-
-    # Initialize the postprocessing object
-    postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
-
-    # File name for long-term temporal stats
-    stats_file  = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \
-        (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
-
-
-    # Read or compute long-term temporal stats.  Each field has the dimension [N_tile, N_species].
-    if os.path.isfile(stats_file):
-
-        print('reading stats nc4 file '+ stats_file)
-        temporal_stats = {}
-        with Dataset(stats_file,'r') as nc:
-            for key, value in nc.variables.items():
-                temporal_stats[key] = value[:].filled(np.nan)
-
-    else:
-        temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
-
-    # Plot stats
         
-    plot_OmF_maps(postproc, temporal_stats, fig_path=out_path )
+    Main_OmF_maps()
 
 # ====================== EOF =========================================================

From 54d822da31a276be961e741609bcaa7acde30115 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 16:00:32 -0400
Subject: [PATCH 22/45] minor change

---
 GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 17dae399..30650e82 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -182,7 +182,7 @@ def Main_OmF_maps():
     # Save figure to file
     fig.savefig(out_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\
                         (end_time+timedelta(days=-1)).strftime('%Y%m')+'.png')
-    plt.show()
+    #plt.show()
     plt.close(fig)
 
 # -----------------------------------------------------------------------------------------------

From 6dfe1c3c83663b3e4b86c52abba8b87f1ed95a2a Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 16:01:58 -0400
Subject: [PATCH 23/45] minor clean up and move time loop out

---
 .../ObsFcstAna_stats/Plot_stats_timeseries.py | 101 ++++++------------
 1 file changed, 35 insertions(+), 66 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
index f7a40e88..bc66e8d2 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
@@ -12,78 +12,64 @@
 
 import numpy             as np
 import matplotlib.pyplot as plt
+import pickle
 
 from datetime               import datetime, timedelta
 from dateutil.relativedelta import relativedelta
+
+from postproc_ObsFcstAna    import postproc_ObsFcstAna
+from user_config            import get_config
+    
+def Main_OmF_timeseries():
+
+    config     = get_config()                 # edit user-defined inputs in user_config.py
+    
+    exp_list   = config['exp_list']
+    start_time = config['start_time']
+    end_time   = config['end_time']
+    sum_path   = config['sum_path']
+    out_path   = config['out_path']
+     
+    # ------------------------------------------------------------------------------------
+    #
+    # Initialize the postprocessing object
+    postproc_obj = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
     
-def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
-    import pickle
     exptag     = postproc_obj.exptag
     start_time = postproc_obj.start_time
     end_time   = postproc_obj.end_time
 
-    stats_file = fig_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \
+    stats_file = out_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \
         '_'+(end_time+timedelta(days=-1)).strftime('%Y%m')+'.pkl'
     
     if os.path.isfile(stats_file):
         
         with open(stats_file,'rb') as file:
+            print(f'reading from {stats_file}')
             stats = pickle.load(file)
-        date_vec = stats['date_vec']
         
     else:
-        
-        # Initialize monthly metrics as list
-        OmF_mean =[]
-        OmF_stdv =[]
-        OmA_mean =[]
-        OmA_stdv =[]
-        Ndata    =[]
-        date_vec =[]
-
-        # Time loop 
-        current_time = start_time
-        while current_time < end_time:
-            print('compute monthly spatial mean stats for '+ current_time.strftime('%Y%m'))
-
-            stats_mo = postproc_obj.calc_spatial_stats_from_sums(current_time)
-
-            OmFm  = stats_mo['OmF_mean']
-            OmFs  = stats_mo['OmF_stdv']
-            OmAm  = stats_mo['OmA_mean']
-            OmAs  = stats_mo['OmA_stdv']
-            Nobsm = stats_mo['N_data']
-                  
-            Ndata.append(Nobsm)
-
-            OmF_mean.append(OmFm)
-            OmF_stdv.append(OmFs)
-            OmA_mean.append(OmAm)
-            OmA_stdv.append(OmAs)
-
-            date_vec.append(current_time.strftime('%Y%m'))
-            current_time = current_time + relativedelta(months=1)
-
-        # Store stats in a dictionary for easier saving and referencing  
-        stats = {"OmF_mean":np.array(OmF_mean), "OmF_stdv":np.array(OmF_stdv),
-                 "OmA_mean":np.array(OmA_mean), "OmA_stdv":np.array(OmA_stdv),
-                 "Ndata":np.array(Ndata), "date_vec":date_vec}
+
+        stats = postproc_obj.calc_spatial_stats_from_sums()
         
         if stats_file is not None:
             with open(stats_file,'wb') as file:
+                print(f'saveing stats to {stats_file}')
                 pickle.dump(stats,file)
 
+    date_vec = stats['date_vec']
+
     # Compute Nobs-weighted avg of each metric across all species.
     # Typically used for SMAP Tb_h/h from asc and desc overpasses,
     # or ASCAT soil moisture from Metop-A/B/C.
     # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
     stats_plot = {}
-    Ndata = np.nansum(stats['Ndata'], axis=1)
-    stats_plot['Ndata']     = Ndata
-    stats_plot['OmF_mean']  = np.nansum(stats['OmF_mean']*stats['Ndata'], axis=1)/Ndata
-    stats_plot['OmF_stdv']  = np.nansum(stats['OmF_stdv']*stats['Ndata'], axis=1)/Ndata
-    stats_plot['OmA_mean']  = np.nansum(stats['OmA_mean']*stats['Ndata'], axis=1)/Ndata
-    stats_plot['OmA_stdv']  = np.nansum(stats['OmA_stdv']*stats['Ndata'], axis=1)/Ndata
+    N_data = np.nansum(stats['N_data'], axis=1)
+    stats_plot['N_data']     = N_data
+    stats_plot['OmF_mean']  = np.nansum(stats['OmF_mean']*stats['N_data'], axis=1)/N_data
+    stats_plot['OmF_stdv']  = np.nansum(stats['OmF_stdv']*stats['N_data'], axis=1)/N_data
+    stats_plot['OmA_mean']  = np.nansum(stats['OmA_mean']*stats['N_data'], axis=1)/N_data
+    stats_plot['OmA_stdv']  = np.nansum(stats['OmA_stdv']*stats['N_data'], axis=1)/N_data
     
     plot_var = 'OmF_mean'
 
@@ -93,36 +79,19 @@ def Plot_monthly_OmF_bars(postproc_obj, fig_path='./'):
     plt.grid(True, linestyle='--', alpha=0.5)
 
     plt.xticks(ticks=date_vec[::2], labels=date_vec[::2])
-    plt.title(exptag+ ' monthly '+plot_var)
+    plt.title(exptag+ 'monthly '+plot_var)
     plt.xlim(-1, len(date_vec)+1)
     plt.ylim(-.1, 2.)
 
     plt.tight_layout()
     #plt.show()
-    plt.savefig(fig_path+'Bars_'+plot_var+exptag+date_vec[0]+'_'+\
+    plt.savefig(out_path+'Bars_'+plot_var+exptag+date_vec[0]+'_'+\
                     date_vec[-1]+'.png')
     plt.close()
 
 if __name__ == "__main__":
-
-    from postproc_ObsFcstAna    import postproc_ObsFcstAna
-    from user_config            import get_config
-
-    config     = get_config()                 # edit user-defined inputs in user_config.py
     
-    exp_list   = config['exp_list']
-    start_time = config['start_time']
-    end_time   = config['end_time']
-    sum_path   = config['sum_path']
-    out_path   = config['out_path']
-     
-    # ------------------------------------------------------------------------------------
-    #
-    # Initialize the postprocessing object
-    postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
-
-    # Compute spatial stats and plot monthly O-F stats bars
-    Plot_monthly_OmF_bars(postproc, fig_path=out_path)
+    Main_OmF_timeseries()
 
 
 # ====================== EOF =========================================================

From 76ccea91d96e92d2c0cf8496cb6e51567ff9c832 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 16:14:29 -0400
Subject: [PATCH 24/45] minor changes

---
 .../util/shared/python/tile_to_latlon.py      | 155 ++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 GEOSldas_App/util/shared/python/tile_to_latlon.py

diff --git a/GEOSldas_App/util/shared/python/tile_to_latlon.py b/GEOSldas_App/util/shared/python/tile_to_latlon.py
new file mode 100644
index 00000000..9ba346f4
--- /dev/null
+++ b/GEOSldas_App/util/shared/python/tile_to_latlon.py
@@ -0,0 +1,155 @@
+import os
+import numpy as np
+
+# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates
+#
+# 1-d input data can have one additional dimension (e.g., time)
+#
+# 2-d output data is lat-by-lon[-by-time]
+
+def get_grid_resolution(fov_radius=0.5):
+    '''
+    Estimate the resolution in degrees for the target lat/lon grid when remapping 1-D
+    tile data to regular 2-D lat/lon grid for plotting purposes.
+    The resolution is an approximation based on the observation FOV radius information. 
+    
+    '''    
+    return np.round(fov_radius * 2., decimals=3)
+        
+def interpolate_to_latlon(data_1d, lat, lon, resolution):
+    """
+    For cubedsphere grid: use 
+    
+    """
+    from scipy.interpolate import griddata
+    from scipy.spatial.distance import cdist
+    
+    lat_grid = np.arange(-90+resolution/2, 90, resolution)
+    lon_grid = np.arange(-180+resolution/2, 180, resolution)
+    lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
+
+    ny = len(lat_grid)
+    nx = len(lon_grid)
+
+    nf = data_1d.shape[-1]
+    data_2d = np.full([ny, nx, nf], np.nan)
+    for i in range(nf):
+        data = data_1d[:,i]
+        data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear')
+     
+    return data_2d, lat_2d, lon_2d
+
+def remap_1d_to_2d( data_1d,  lat_1d, lon_1d):
+
+    """
+    For EASE grid: use remapping
+    
+    """
+    
+    # Extract unique coordinates 
+    unique_lats, indY = np.unique(lat_1d, return_inverse=True)
+    unique_lons, indX = np.unique(lon_1d, return_inverse=True)
+
+    ny = len(unique_lats)
+    nx = len(unique_lons)
+    
+    nf = data_1d.shape[1]
+    data_2d = np.full([ny, nx, nf], np.nan)
+    data_2d[indY, indX, :] = data_1d
+        
+    lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats)
+    
+    return data_2d, lat_2d, lon_2d
+
+def tile2grid(tile_data, tile_coord, tile_grid):
+
+    N_fields = tile_data.shape[-1]
+    # Initialize grid data array
+    grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields))
+
+    for k in range(N_fields):
+        # Initialize weight grid for current field
+        wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon']))
+        
+        # Loop through tile space
+        for n in range(tile_coord['N_tile']):
+            # Calculate grid indices (adjust for Python's 0-based indexing)
+            i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1
+            j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1
+            
+            # Get weight
+            w = tile_coord['frac_cell'][n]
+            
+            # Check if current tile data is valid (not no-data)
+            if  ~np.isnan(tile_data[k, n]):
+                # Accumulate weighted data
+                grid_data[j, i, k] += w * tile_data[n,k]
+                wgrid[j, i] += w
+        
+        # Normalize and set no-data values
+        for i in range(tile_grid['N_lon']):
+            for j in range(tile_grid['N_lat']):
+                if wgrid[j, i] > 0.0:
+                    # Normalize by total weight
+                    grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i]
+                else:
+                    # Set no-data value
+                    grid_data[j, i, k] = np.nan
+    
+    lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat'])
+    lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon'])
+    
+def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15, nodata_tol=1.e-4):
+    
+    """
+    Convert tile-space data to grid-space data.
+    
+    Parameters:
+    ----------
+    tile_data : Array in tile space, shape (N_tile, N_fields)
+    tile_coord : Dictionary containing tile coordinate information
+    tile_grid : Dictionary containing grid information
+    resolution : target grid resolution
+    nodata : Value for missing data
+    nodata_tol : Tolerance for comparing values to nodata
+        
+    Returns:
+    -------
+    grid_data : Array in regular grid space, shape (N_lat, N_lon, N_fields)
+    lat_2d: Lat array in regular grid space, shape (N_lat, N_lon)
+    lon_2d: Lon array in regular grid space, shape (N_lat, N_lon)
+    
+    """
+    tile_data[np.abs(tile_data - nodata) < nodata_tol]  = np.nan
+    
+    # Verifity input datasize
+    if tile_data.shape[0] != tile_coord['N_tile']:
+        print(f'Error: tile2grid input data size don''t match N_tile')
+        sys.exit()
+        
+    # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1]
+    if tile_data.ndim == 1:
+        tile_data = np.expand_dims(tile_data, axis=1)
+
+    nf = tile_data.shape[-1]
+
+    lat_grid = np.arange(-90+resolution/2, 90, resolution)
+    lon_grid = np.arange(-180+resolution/2, 180, resolution)
+    lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
+
+    grid_data = np.full([len(lat_grid), len(lon_grid), nf], np.nan)
+
+    for v in range(nf):        
+        for k in range(tile_coord['N_tile']):
+            if np.any([~np.isnan(tile_data[k,:])], axis=1):
+                j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k])
+                i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k])
+                # sometime the missing points are filled with zeros. taking largest value within
+                # the target grid cell avoild picking up unwanted zeros.
+                grid_data[j,i,v] = np.nanmax([tile_data[k,v], grid_data[j,i,v]])
+                                            
+    if grid_data.shape[-1] == 1:
+        grid_data = grid_data[:,:,0]
+        
+    return grid_data, lat_2d, lon_2d
+

From caa0f90e8e0bff63799cd6536c4540ca01cac7b3 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 16:15:28 -0400
Subject: [PATCH 25/45] minor correction

---
 GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 30650e82..98e1a63b 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -117,7 +117,7 @@ def Main_OmF_maps():
 
     # Get map plot grid resolution based on obs FOV    
     map_grid_resolution = get_grid_resolution(obs_fov)
-    print(f'map grid resolution: {map_grid_resolution}' degree)
+    print(f'map grid resolution: {map_grid_resolution} degree')
 
     # ----------------------------------------------------------------------------------
     # Plotting

From 978fdbf288740af50a25845662c6e09934179e94 Mon Sep 17 00:00:00 2001
From: Weiyuan Jiang <weiyuan.jiang@nasa.gov>
Date: Mon, 23 Jun 2025 16:25:01 -0400
Subject: [PATCH 26/45] refactoring...

---
 GEOSldas_App/process_hist.csh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GEOSldas_App/process_hist.csh b/GEOSldas_App/process_hist.csh
index deaba8df..97d35afe 100644
--- a/GEOSldas_App/process_hist.csh
+++ b/GEOSldas_App/process_hist.csh
@@ -26,12 +26,11 @@ endif
 if($GRID == CUBE) then
    sed -i '/^\#EASE/d' $HISTRC
    sed -i 's|\#CUBE|''|g' $HISTRC
-   sed -i -e  s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC
 else
    sed -i '/^\#CUBE/d' $HISTRC
    sed -i 's|\#EASE|''|g' $HISTRC
-   sed -i -e  s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC
 endif
+sed -i -e  s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC
 
 if($LSM_CHOICE == 1) then
    set GridComp = CATCH

From f5fa08cf220a3171262a94e9cfb5b8a136558165 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Mon, 23 Jun 2025 16:27:27 -0400
Subject: [PATCH 27/45] remove old file

---
 .../ObsFcstAna_stats/Get_ObsFcstAna_stats.py  | 97 -------------------
 1 file changed, 97 deletions(-)
 delete mode 100644 GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py
deleted file mode 100644
index c4cf6620..00000000
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics.
-First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output.  
-Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast 
-residuals can then be diagnosed quickly from these intermediate "sums" files. 
-Sample script optionally computes and plots:
-- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py).
-- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py).
-
-Usage:
-    1. Edit "user_config.py" with experiments information.
-    2. Run this script as follows (on Discover):
-    $ module load python/GEOSpyD
-    $ ./Get_ObsFcstAna_stats.py
-    
-    # Background execution:
-    $ nohup ./Get_ObsFcstAna_stats.py > out.log &
-
-Authors: Q. Liu, R. Reichle, A. Fox
-Last Modified: May 2025
-"""
-
-import sys;       sys.path.append('../../shared/python/')
-import warnings;  warnings.filterwarnings("ignore")
-import os
-
-import numpy as np
-
-from datetime               import timedelta
-from postproc_ObsFcstAna    import postproc_ObsFcstAna
-from user_config            import get_config              # user-defined config info
-
-# ---
-#
-# If the script is run in the background, uncomment the following lines to see the redirected
-#   standard output in the out.log file immediately.  When the lines are commented out, the redirected
-#   standard output will not appear in the out.log file until the job has completed.
-# If the script is run in the foreground, the lines must be commented out.
-#
-#import io
-#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
-#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)
-#
-# ---
-
-def main():
-     
-    config     = get_config()                 # edit user-defined inputs in user_config.py
-    
-    exp_list   = config['exp_list']
-    start_time = config['start_time']
-    end_time   = config['end_time']
-    sum_path   = config['sum_path']
-    out_path   = config['out_path']
-     
-    # ------------------------------------------------------------------------------------
-    # Postprocess raw ObsFcstAna output data into monthly sums 
-
-    # Initialize the postprocessing object
-    postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
-
-    # Compute and save monthly sums
-    postproc.save_monthly_sums()
-
-    # --------------------------------------------------------------------------------------
-    # Optionally compute long-term temporal/spatial statistics and create plots.
-    # The plotting scripts can also run standalone using the individual Plot_stats_*.py scripts,
-    #   as long as the monthly sum files are available.
-    
-    plot_maps       = False 
-    plot_timeseries = False
-
-    if plot_maps:            # Compute long-term temporal stats and plot maps
-
-        stats_file  = out_path + 'temporal_stats_'+exp_list[0]['exptag']+ '_'+start_time.strftime('%Y%m%d')+'_'+ \
-            (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
-
-        # temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats
-        # each field has the dimension [N_tile, N_species]
-
-        temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
-
-        # Example to plot some O-F maps
-        from Plot_stats_maps import plot_OmF_maps
-        plot_OmF_maps(postproc, temporal_stats, fig_path=out_path )
-
-    if plot_timeseries:      # Compute spatially averaged stats and plot monthly time series of stats
-
-        from Plot_stats_timeseries import Plot_monthly_OmF_bars
-        Plot_monthly_OmF_bars(postproc, fig_path=out_path)
-
-if __name__ == '__main__':
-    main()
-
-# ====================== EOF =========================================================

From dc7887631f71dc5721159d02f0283d0ff1583d9c Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 08:46:46 -0400
Subject: [PATCH 28/45] corrected intro comments (Get_ObsFcstAna_sums.py)

---
 .../postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py     | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
index 1c29ca94..b8283e03 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
@@ -2,12 +2,9 @@
 
 """
 Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics.
-First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output.  
+Computes and stores monthly sums and sums of squares and cross-products of raw ObsFcstAna output.  
 Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast 
-residuals can then be diagnosed quickly from these intermediate "sums" files. 
-Sample script optionally computes and plots:
-- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py).
-- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py).
+residuals can then be diagnosed quickly from these intermediate "sums" files.  See Plot_stats_*.py.
 
 Usage:
     1. Edit "user_config.py" with experiments information.
@@ -19,7 +16,7 @@
     $ nohup ./Get_ObsFcstAna_sums.py > out.log &
 
 Authors: Q. Liu, R. Reichle, A. Fox
-Last Modified: May 2025
+Last Modified: June 2025
 """
 
 import sys;       sys.path.append('../../shared/python/')

From 387834bfce543f54a3ce31fca6e6530b7c87a097 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 09:53:58 -0400
Subject: [PATCH 29/45] revised computation of plotting grid spacing; removed
 obsolete functions; edited comments (Plot_stats_maps.py, tile_to_latlon.py)

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       |  82 +++++++-----
 .../util/shared/python/tile_to_latlon.py      | 122 ++----------------
 2 files changed, 65 insertions(+), 139 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 98e1a63b..ea8493ca 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -5,6 +5,12 @@
 Plots Nobs-weighted avg of each metric across all species.
 Requires saved files with monthly sums (see Get_ObsFcstAna_sums.py).
 Stats of *normalized* OmFs are approximated!
+
+Usage:
+    1. Edit "user_config.py" with experiments information.
+    2. Run this script as follows (on Discover):
+    $ module load python/GEOSpyD
+    $ ./Plot_stats_maps.py
 """
 
 import sys;       sys.path.append('../../shared/python/')
@@ -37,12 +43,14 @@ def Main_OmF_maps():
     sum_path   = config['sum_path']
     out_path   = config['out_path']
 
-    # Provide Nmin to screen out tiles with inadequate data for temporal stats 
+    # min number of individual O-Fs required for stats at a location (across all months in period)
+    
     Nmin = 20
     
     # ------------------------------------------------------------------------------------
-    # Get metrics in [N_tile, N_species] for plotting
+    # Read or compute stats.  Each field has dimension [N_tile, N_species] 
     # ------------------------------------------------------------------------------------
+
     # File name for long-term temporal stats
     stats_file  = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \
         (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
@@ -69,22 +77,22 @@ def Main_OmF_maps():
     OmF_norm_mean = stats['OmF_norm_mean']
     OmF_norm_stdv = stats['OmF_norm_stdv']
 
-    # Screen all fields with Nmin  except for N_data
-    OmF_mean[N_data < Nmin]      = np.nan
-    OmF_stdv[N_data < Nmin]        = np.nan
-    OmA_mean[N_data < Nmin]      = np.nan
-    OmA_stdv[N_data < Nmin]       = np.nan   
+    # Screen stats with Nmin (except for N_data)
+    OmF_mean[     N_data < Nmin] = np.nan
+    OmF_stdv[     N_data < Nmin] = np.nan
+    OmA_mean[     N_data < Nmin] = np.nan
+    OmA_stdv[     N_data < Nmin] = np.nan   
     OmF_norm_mean[N_data < Nmin] = np.nan
-    OmF_norm_stdv[N_data < Nmin]   = np.nan
+    OmF_norm_stdv[N_data < Nmin] = np.nan
     
     # Select/combine fields for plotting. The following provides an example to
-    # combine statistical fields of all species to averages.
+    # computes average stats across all species.
     
     # Compute Nobs-weighted avg of each metric across all species.
     # Typically used for SMAP Tb_h/h from asc and desc overpasses,
     # or ASCAT soil moisture from Metop-A/B/C.
     # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
-    Nobs_data     = np.nansum(  N_data, axis=1)
+    Nobs_data     = np.nansum(              N_data, axis=1)
     OmF_mean      = np.nansum(OmF_mean     *N_data, axis=1)/Nobs_data
     OmF_stdv      = np.nansum(OmF_stdv     *N_data, axis=1)/Nobs_data
     OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data
@@ -97,31 +105,43 @@ def Main_OmF_maps():
     Nobs_data[Nobs_data == 0] = np.nan  
 
     # --------------------------------------------------------------------------------
-    # Get map grid information and provide map resolution (in degree).
-    # Observation FOV-based resolution estimate is provided. The resolution can also be
-    # manually set for optimal map performance
+    # Plot stats on regular lat/lon grid, with grid spacing guided by the footprint 
+    # scale (field-of-view; FOV) of the  observations.  
     # ---------------------------------------------------------------------------------
-    tc = exp_list[0]['tilecoord']
-    tg = exp_list[0]['tilegrid_global']
-    obs_fov = exp_list[0]['obsparam'][0]['FOV']
+    
+    # Get geographic info about model grid, model tile space, and obs FOV
+    # (assumes that all species have same FOV and FOV_units)
+
+    tc            = exp_list[0]['tilecoord']
+    tg            = exp_list[0]['tilegrid_global']
+    obs_fov       = exp_list[0]['obsparam'][0]['FOV']
     obs_fov_units = exp_list[0]['obsparam'][0]['FOV_units']
 
-    # Get map grid resolution for mapping tile data on lat/lon grid
-    # Convert FOV to degree, assuming 100km is about 1 deg.
+    # Determine spacing of lat/lon grid for plotting (degree) based on obs FOV.
+    # Can also set manually if desired.
+
+    my_res = obs_fov*2.      # Note: FOV is a radius, hence the factor of 2.
+    
+    # If applicable, convert to degree, assuming 100 km is about 1 deg.
     if 'km' in obs_fov_units:
-        obs_fov = obs_fov /100.
+        my_res = my_res/100.
 
-    # Set obs_fov at model resolution if obs_fov = 0
+    # If obs_fov=0, reset to match model resolution.
     if obs_fov == 0:
-        obs_fov = 360./tile_grid['N_lon']/2.
+        my_res = np.sqrt( tile_grid['dlat']*tile_grid['dlon'] )
+
+    # pick a resolution from a sample vector of resolutions
 
-    # Get map plot grid resolution based on obs FOV    
-    map_grid_resolution = get_grid_resolution(obs_fov)
-    print(f'map grid resolution: {map_grid_resolution} degree')
+    sample_res = [0.05, 0.1, 0.25, 0.5, 1.0, 2.0]
+
+    my_res = min(sample_res, key=lambda x: abs(x - my_res))
+    
+    print(f'resolution of lat/lon grid for plotting: {my_res} degree')
 
     # ----------------------------------------------------------------------------------
-    # Plotting
+    # Plot
     # ----------------------------------------------------------------------------------
+
     exptag = exp_list[0]['exptag']
 
     fig, axes = plt.subplots(2,2, figsize=(18,10))
@@ -155,11 +175,11 @@ def Main_OmF_maps():
 
             colormap.set_bad(color='0.9') # light grey, 0-black, 1-white
            
-            # map tile_data on 2-d regular lat/lon grid for map plotting
-            grid_data, lat_2d, lon_2d = tile_to_latlon(tile_data, tc, tg, map_grid_resolution)
+            # map tile_data on 2-d regular lat/lon grid for plotting
+            grid_data, lat_2d, lon_2d = tile_to_latlongrid(tile_data, tc, my_res)
 
-            mean       = np.nanmean(grid_data)
-            absmean =np.nanmean(np.abs(grid_data))
+            mean    = np.nanmean(       grid_data )
+            absmean = np.nanmean(np.abs(grid_data))
             
             if 'normalized' in title_txt:
                 absmean = np.nanmean(np.abs(grid_data-1.) )
@@ -167,9 +187,9 @@ def Main_OmF_maps():
             if 'normalized' in title_txt and 'stdv' in title_txt:
                 title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (mean, absmean)+' '+units
             elif 'mean' in title_txt:
-                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (mean, absmean)+' '+units
+                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f"          % (mean, absmean)+' '+units
             else:
-                title_txt = title_txt + '\n' + "avg=%.2f" % (mean) +' '+units                
+                title_txt = title_txt + '\n' + "avg=%.2f"                         % (mean         )+' '+units                
    
             if 'normalized' in title_txt:
                 grid_data = np.log10(grid_data)
diff --git a/GEOSldas_App/util/shared/python/tile_to_latlon.py b/GEOSldas_App/util/shared/python/tile_to_latlon.py
index 9ba346f4..8f730d55 100644
--- a/GEOSldas_App/util/shared/python/tile_to_latlon.py
+++ b/GEOSldas_App/util/shared/python/tile_to_latlon.py
@@ -1,130 +1,34 @@
 import os
 import numpy as np
 
-# convert data from 1-d array ("compressed") format to 2-d array (grid) format based on lat/lon coordinates
-#
-# 1-d input data can have one additional dimension (e.g., time)
-#
-# 2-d output data is lat-by-lon[-by-time]
-
-def get_grid_resolution(fov_radius=0.5):
-    '''
-    Estimate the resolution in degrees for the target lat/lon grid when remapping 1-D
-    tile data to regular 2-D lat/lon grid for plotting purposes.
-    The resolution is an approximation based on the observation FOV radius information. 
-    
-    '''    
-    return np.round(fov_radius * 2., decimals=3)
+# 
         
-def interpolate_to_latlon(data_1d, lat, lon, resolution):
-    """
-    For cubedsphere grid: use 
+def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_tol=1.e-4):
     
     """
-    from scipy.interpolate import griddata
-    from scipy.spatial.distance import cdist
-    
-    lat_grid = np.arange(-90+resolution/2, 90, resolution)
-    lon_grid = np.arange(-180+resolution/2, 180, resolution)
-    lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
-
-    ny = len(lat_grid)
-    nx = len(lon_grid)
-
-    nf = data_1d.shape[-1]
-    data_2d = np.full([ny, nx, nf], np.nan)
-    for i in range(nf):
-        data = data_1d[:,i]
-        data_2d[:,:,i] = griddata((lat.flatten(), lon.flatten()), data.flatten(), (lat_2d, lon_2d), method='linear')
-     
-    return data_2d, lat_2d, lon_2d
-
-def remap_1d_to_2d( data_1d,  lat_1d, lon_1d):
-
-    """
-    For EASE grid: use remapping
-    
-    """
-    
-    # Extract unique coordinates 
-    unique_lats, indY = np.unique(lat_1d, return_inverse=True)
-    unique_lons, indX = np.unique(lon_1d, return_inverse=True)
-
-    ny = len(unique_lats)
-    nx = len(unique_lons)
-    
-    nf = data_1d.shape[1]
-    data_2d = np.full([ny, nx, nf], np.nan)
-    data_2d[indY, indX, :] = data_1d
-        
-    lon_2d,lat_2d = np.meshgrid(unique_lons, unique_lats)
-    
-    return data_2d, lat_2d, lon_2d
-
-def tile2grid(tile_data, tile_coord, tile_grid):
-
-    N_fields = tile_data.shape[-1]
-    # Initialize grid data array
-    grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields))
-
-    for k in range(N_fields):
-        # Initialize weight grid for current field
-        wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon']))
-        
-        # Loop through tile space
-        for n in range(tile_coord['N_tile']):
-            # Calculate grid indices (adjust for Python's 0-based indexing)
-            i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1
-            j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1
-            
-            # Get weight
-            w = tile_coord['frac_cell'][n]
-            
-            # Check if current tile data is valid (not no-data)
-            if  ~np.isnan(tile_data[k, n]):
-                # Accumulate weighted data
-                grid_data[j, i, k] += w * tile_data[n,k]
-                wgrid[j, i] += w
-        
-        # Normalize and set no-data values
-        for i in range(tile_grid['N_lon']):
-            for j in range(tile_grid['N_lat']):
-                if wgrid[j, i] > 0.0:
-                    # Normalize by total weight
-                    grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i]
-                else:
-                    # Set no-data value
-                    grid_data[j, i, k] = np.nan
-    
-    lat_2d = np.arange(tile_grid['ll_lat']+0.5*tile_grid['dlat'], tile_grid['ur_lat'], tile_grid['dlat'])
-    lon_2d = np.arange(tile_grid['ll_lon']+0.5*tile_grid['dlon'], tile_grid['ur_lon'], tile_grid['dlon'])
-    
-def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15, nodata_tol=1.e-4):
-    
-    """
-    Convert tile-space data to grid-space data.
+    Map (1d) tile-space data onto (2d) regular lat/lon grid of arbitrary "resolution".
     
     Parameters:
     ----------
-    tile_data : Array in tile space, shape (N_tile, N_fields)
+    tile_data  : Array in tile space, shape (N_tile, N_fields)
     tile_coord : Dictionary containing tile coordinate information
-    tile_grid : Dictionary containing grid information
     resolution : target grid resolution
-    nodata : Value for missing data
+    nodata     : Value for missing data
     nodata_tol : Tolerance for comparing values to nodata
         
     Returns:
     -------
     grid_data : Array in regular grid space, shape (N_lat, N_lon, N_fields)
-    lat_2d: Lat array in regular grid space, shape (N_lat, N_lon)
-    lon_2d: Lon array in regular grid space, shape (N_lat, N_lon)
+    lat_2d    : Lat array in regular grid space, shape (N_lat, N_lon)
+    lon_2d    : Lon array in regular grid space, shape (N_lat, N_lon)
     
     """
+    
     tile_data[np.abs(tile_data - nodata) < nodata_tol]  = np.nan
     
-    # Verifity input datasize
+    # Verify input datasize
     if tile_data.shape[0] != tile_coord['N_tile']:
-        print(f'Error: tile2grid input data size don''t match N_tile')
+        print(f'Error: size of tile2grid input data does not match that of N_tile')
         sys.exit()
         
     # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1]
@@ -133,8 +37,9 @@ def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15,
 
     nf = tile_data.shape[-1]
 
-    lat_grid = np.arange(-90+resolution/2, 90, resolution)
+    lat_grid = np.arange( -90+resolution/2,  90, resolution)
     lon_grid = np.arange(-180+resolution/2, 180, resolution)
+
     lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
 
     grid_data = np.full([len(lat_grid), len(lon_grid), nf], np.nan)
@@ -145,7 +50,7 @@ def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15,
                 j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k])
                 i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k])
                 # sometime the missing points are filled with zeros. taking largest value within
-                # the target grid cell avoild picking up unwanted zeros.
+                # the target grid cell avoids picking up unwanted zeros.
                 grid_data[j,i,v] = np.nanmax([tile_data[k,v], grid_data[j,i,v]])
                                             
     if grid_data.shape[-1] == 1:
@@ -153,3 +58,4 @@ def tile_to_latlon( tile_data, tile_coord, tile_grid, resolution, nodata=1.e15,
         
     return grid_data, lat_2d, lon_2d
 
+# =========================== EOF ============================================================

From abdeace096ea5f4c0c10132061ba68fcbf341289 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 09:55:07 -0400
Subject: [PATCH 30/45] renamed tile_to_latlon.py -> tile_to_latlongrid.py for
 clarity

---
 .../shared/python/{tile_to_latlon.py => tile_to_latlongrid.py}    | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename GEOSldas_App/util/shared/python/{tile_to_latlon.py => tile_to_latlongrid.py} (100%)

diff --git a/GEOSldas_App/util/shared/python/tile_to_latlon.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
similarity index 100%
rename from GEOSldas_App/util/shared/python/tile_to_latlon.py
rename to GEOSldas_App/util/shared/python/tile_to_latlongrid.py

From 693ff6595728ad33f1fcdfebc2d7474641a77a55 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 11:18:40 -0400
Subject: [PATCH 31/45] add tile2grid.py; revised computation of spatial avg of
 temporal stats (tile_to_latlongrid.py, Plot_stats_maps.py)

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       | 10 ++--
 GEOSldas_App/util/shared/python/tile2grid.py  | 55 +++++++++++++++++++
 .../util/shared/python/tile_to_latlongrid.py  |  2 +
 3 files changed, 63 insertions(+), 4 deletions(-)
 create mode 100644 GEOSldas_App/util/shared/python/tile2grid.py

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index ea8493ca..ba0d7ed3 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -43,7 +43,7 @@ def Main_OmF_maps():
     sum_path   = config['sum_path']
     out_path   = config['out_path']
 
-    # min number of individual O-Fs required for stats at a location (across all months in period)
+    # min total number of individual O-Fs required for stats at a location (across all months in period)
     
     Nmin = 20
     
@@ -178,11 +178,13 @@ def Main_OmF_maps():
             # map tile_data on 2-d regular lat/lon grid for plotting
             grid_data, lat_2d, lon_2d = tile_to_latlongrid(tile_data, tc, my_res)
 
-            mean    = np.nanmean(       grid_data )
-            absmean = np.nanmean(np.abs(grid_data))
+
+            # compute spatial avg of metric (directly from tile_data)
+            mean    = np.nanmean(       tile_data )
+            absmean = np.nanmean(np.abs(tile_data))
             
             if 'normalized' in title_txt:
-                absmean = np.nanmean(np.abs(grid_data-1.) )
+                absmean = np.nanmean(np.abs(tile_data-1.) )
 
             if 'normalized' in title_txt and 'stdv' in title_txt:
                 title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (mean, absmean)+' '+units
diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py
new file mode 100644
index 00000000..b918ac51
--- /dev/null
+++ b/GEOSldas_App/util/shared/python/tile2grid.py
@@ -0,0 +1,55 @@
+
+
+import os
+import numpy as np
+
+def tile2grid(tile_data, tile_coord, tile_grid):
+
+    """
+    Map (1d) tile-space data onto (2d) grid that is associated with the tile space.
+    
+    Parameters:
+    ----------
+    tile_data  : Array in tile space, shape (N_tile, N_fields)
+    tile_coord : Dictionary containing tile coordinate information
+    tile_grid  : Dictionary containing tile grid information
+
+    Returns:
+    -------
+    grid_data : Array in grid space, shape (N_lat, N_lon, N_fields)
+    """
+    
+    N_fields = tile_data.shape[-1]
+    # Initialize grid data array
+    grid_data = np.zeros((tile_grid['N_lat'], tile_grid['N_lon'], N_fields))
+
+    for k in range(N_fields):
+        # Initialize weight grid for current field
+        wgrid = np.zeros((tile_grid['N_lat'], tile_grid['N_lon']))
+
+        # Loop through tile space
+        for n in range(tile_coord['N_tile']):
+            # Calculate grid indices (adjust for Python's 0-based indexing)
+            i = tile_coord['i_indg'][n] - (tile_grid['i_offg'] - (1-tile_grid['ind_base'])) - 1
+            j = tile_coord['j_indg'][n] - (tile_grid['j_offg'] - (1-tile_grid['ind_base'])) - 1
+
+            # Get weight
+            w = tile_coord['frac_cell'][n]
+
+            # Check if current tile data is valid (not no-data)
+            if  ~np.isnan(tile_data[k, n]):
+                # Accumulate weighted data
+                grid_data[j, i, k] += w * tile_data[n,k]
+                wgrid[    j, i]    += w
+
+        # Normalize and set no-data values
+        for i in range(tile_grid['N_lon']):
+            for j in range(tile_grid['N_lat']):
+                if wgrid[j, i] > 0.0:
+                    # Normalize by total weight
+                    grid_data[j, i, k] = grid_data[j, i, k] / wgrid[j, i]
+                else:
+                    # Set no-data value
+                    grid_data[j, i, k] = np.nan
+
+# ============ EOF ===============================================
diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
index 8f730d55..c4751a69 100644
--- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
+++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
@@ -7,6 +7,8 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_
     
     """
     Map (1d) tile-space data onto (2d) regular lat/lon grid of arbitrary "resolution".
+
+    Use "tile2grid.py" to map tile data onto the grid that is associated with the tile space.
     
     Parameters:
     ----------

From b4db565f6ce68d5e6dd66fef78900e99f76d8beb Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 11:53:50 -0400
Subject: [PATCH 32/45] minor fixes to comments, variable names, and vertical
 alignment (postproc_ObsFcstAna.py, Plot_stats_*.py)

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       |  4 +-
 .../ObsFcstAna_stats/Plot_stats_timeseries.py | 10 +++--
 .../ObsFcstAna_stats/postproc_ObsFcstAna.py   | 40 +++++++++----------
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index ba0d7ed3..42ad9d99 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -152,7 +152,7 @@ def Main_OmF_maps():
             units = '[k]'
             if i == 0 and j == 0:
                 tile_data = Nobs_data
-                #crange is [cmin, cmax]
+                # crange is [cmin, cmax]
                 crange =[0, np.ceil((end_time-start_time).days/150)*300]
                 colormap = plt.get_cmap('jet',20)
                 title_txt = exptag + ' Tb Nobs '    + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
@@ -179,7 +179,7 @@ def Main_OmF_maps():
             grid_data, lat_2d, lon_2d = tile_to_latlongrid(tile_data, tc, my_res)
 
 
-            # compute spatial avg of metric (directly from tile_data)
+            # compute spatial avg of metric (directly from tile_data; no weighting)
             mean    = np.nanmean(       tile_data )
             absmean = np.nanmean(np.abs(tile_data))
             
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
index bc66e8d2..0bdba303 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
@@ -41,6 +41,8 @@ def Main_OmF_timeseries():
 
     stats_file = out_path + 'spatial_stats_'+exptag+'_'+start_time.strftime('%Y%m')+ \
         '_'+(end_time+timedelta(days=-1)).strftime('%Y%m')+'.pkl'
+
+    # Read or compute monthly stats across space
     
     if os.path.isfile(stats_file):
         
@@ -64,8 +66,8 @@ def Main_OmF_timeseries():
     # or ASCAT soil moisture from Metop-A/B/C.
     # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
     stats_plot = {}
-    N_data = np.nansum(stats['N_data'], axis=1)
-    stats_plot['N_data']     = N_data
+    N_data                  = np.nansum(stats['N_data'  ],                 axis=1)
+    stats_plot['N_data'  ]  = N_data
     stats_plot['OmF_mean']  = np.nansum(stats['OmF_mean']*stats['N_data'], axis=1)/N_data
     stats_plot['OmF_stdv']  = np.nansum(stats['OmF_stdv']*stats['N_data'], axis=1)/N_data
     stats_plot['OmA_mean']  = np.nansum(stats['OmA_mean']*stats['N_data'], axis=1)/N_data
@@ -74,12 +76,12 @@ def Main_OmF_timeseries():
     plot_var = 'OmF_mean'
 
     fig, ax = plt.subplots(figsize=(10,4))
-    bars = ax.bar(date_vec, stats_plot[plot_var])
+    bars    = ax.bar(date_vec, stats_plot[plot_var])
 
     plt.grid(True, linestyle='--', alpha=0.5)
 
     plt.xticks(ticks=date_vec[::2], labels=date_vec[::2])
-    plt.title(exptag+ 'monthly '+plot_var)
+    plt.title(exptag+' monthly '+plot_var)
     plt.xlim(-1, len(date_vec)+1)
     plt.ylim(-.1, 2.)
 
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 9794ae42..57a122fa 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -94,7 +94,7 @@ def compute_monthly_sums(self, date_time):
         n_spec = len(obsparam_list[0])
 
         date_time = date_time.replace(hour=int(self.da_t0), minute=int(np.mod(self.da_t0,1)*60))
-        end_time  = date_time + relativedelta(months=1) 
+        stop_time = date_time + relativedelta(months=1) 
         
         data_sum  = {}
         data2_sum = {}
@@ -108,7 +108,7 @@ def compute_monthly_sums(self, date_time):
             data_sum[ var] = np.zeros((n_tile, n_spec))
             data2_sum[var] = np.zeros((n_tile, n_spec))
 
-        while date_time < end_time:
+        while date_time < stop_time:
             
             # read the list of experiments at each time step (OFA="ObsFcstAna")
             OFA_list = []
@@ -195,7 +195,7 @@ def save_monthly_sums(self):
         tc             = self.tilecoord
         obsparam_list  = self.obsparam_list
 
-        date_time   = self.start_time
+        date_time      = self.start_time
 
         while date_time < self.end_time:           # loop through months
             
@@ -416,10 +416,10 @@ def calc_spatial_stats_from_sums(self):
         current_time = start_time
         while current_time < end_time:
         
-            mo_path = self.sum_path + '/Y'+ current_time.strftime('%Y') + '/M' + current_time.strftime('%m') + '/'            
+            mo_path   = self.sum_path + '/Y'+ current_time.strftime('%Y') + '/M' + current_time.strftime('%m') + '/'            
             fnc4_sums = mo_path + self.exptag + '.ens_avg.ldas_ObsFcstAna_sums.' + current_time.strftime('%Y%m') +'.nc4'
             
-            mdata_sum = {}
+            mdata_sum  = {}
             mdata2_sum = {}
 
             try:
@@ -435,7 +435,7 @@ def calc_spatial_stats_from_sums(self):
                         mdata_sum[ var][mN_data == 0] = np.nan
                         mdata2_sum[var][mN_data == 0] = np.nan
             except FileNotFoundError:
-                print(f"Error: File '{fnc4_sums}' not found. Run Get_ObsFcstAna_sums first.")
+                print(f"Error: File '{fnc4_sums}' not found. Run Get_ObsFcstAna_sums.py first.")
                 sys.exit(1)
             except Exception as e:
                 print(f"An unexpected error occurred: {e}")
@@ -451,8 +451,8 @@ def calc_spatial_stats_from_sums(self):
             for var in var_list:
                 mdata_sum[ var][np.isnan(mN_data)] = np.nan
                 mdata2_sum[var][np.isnan(mN_data)] = np.nan
-                moxf_sum[np.isnan(mN_data)]        = np.nan
-                moxa_sum[np.isnan(mN_data)]        = np.nan
+                moxf_sum[       np.isnan(mN_data)] = np.nan
+                moxa_sum[       np.isnan(mN_data)] = np.nan
 
             # Aggregate data of all tiles
             N_data_mo  = np.nansum(mN_data, axis=0)
@@ -486,17 +486,17 @@ def calc_spatial_stats_from_sums(self):
             OmA_stdv_mo  = np.sqrt(O_var + A_var - 2 * (OxA_mean - O_mean_mo*A_mean_mo))
 
             # Extend timeseries
-            N_data.append(N_data_mo)
-            O_mean.append(O_mean_mo)
-            O_stdv.append(np.sqrt(O_var))
-            F_mean.append(F_mean_mo)
-            F_stdv.append(np.sqrt(F_var))
-            A_mean.append(A_mean_mo)
-            A_stdv.append(np.sqrt(A_var))
-            OmF_mean.append(OmF_mean_mo)
-            OmF_stdv.append(OmF_stdv_mo)
-            OmA_mean.append(OmA_mean_mo)
-            OmA_stdv.append(OmA_stdv_mo)            
+            N_data.append(  N_data_mo     )
+            O_mean.append(  O_mean_mo     )
+            O_stdv.append(  np.sqrt(O_var))
+            F_mean.append(  F_mean_mo     )
+            F_stdv.append(  np.sqrt(F_var))
+            A_mean.append(  A_mean_mo     )
+            A_stdv.append(  np.sqrt(A_var))
+            OmF_mean.append(OmF_mean_mo   )
+            OmF_stdv.append(OmF_stdv_mo   )
+            OmA_mean.append(OmA_mean_mo   )
+            OmA_stdv.append(OmA_stdv_mo   )            
 
             date_vec.append(current_time.strftime('%Y%m'))
             current_time = current_time + relativedelta(months=1)
@@ -507,7 +507,7 @@ def calc_spatial_stats_from_sums(self):
             'A_mean'  : np.array(A_mean),   'A_stdv'  : np.array(A_stdv),
             'OmF_mean': np.array(OmF_mean), 'OmF_stdv': np.array(OmF_stdv),
             'OmA_mean': np.array(OmA_mean), 'OmA_stdv': np.array(OmA_stdv),
-            'N_data': np.array(N_data),     'date_vec': date_vec
+            'N_data'  : np.array(N_data),   'date_vec': date_vec
             }
 
         return stats

From 65abe5ea2c0d102daccc3cc28f27c1eab8484e68 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 14:59:52 -0400
Subject: [PATCH 33/45] cleaned up and simplified process_hist.csh
 (GEOSldas_HIST.rc, ldas_setup)

---
 GEOSldas_App/GEOSldas_HIST.rc | 135 +++++++++-------------------------
 GEOSldas_App/ldas_setup       |  18 +++--
 GEOSldas_App/process_hist.csh |  91 ++++++++++++-----------
 3 files changed, 98 insertions(+), 146 deletions(-)

diff --git a/GEOSldas_App/GEOSldas_HIST.rc b/GEOSldas_App/GEOSldas_HIST.rc
index 1b6a14c1..37ce4b59 100644
--- a/GEOSldas_App/GEOSldas_HIST.rc
+++ b/GEOSldas_App/GEOSldas_HIST.rc
@@ -1,25 +1,23 @@
 # Sample HISTORY.rc file for GEOSldas
 #
 # This HISTORY template is edited by "ldas_setup" via "process_hist.csh".  
-# The strings '#ASSIM', '#EASE', and '#CUBE' are *not* linked to MAPL HISTORY 
-# functionality.  For example, the line 
-#    "#CUBE            'tavg24_2d_lnd_Nx'"
-#  does *not* mean that the 'lnd' output will be on a cube-sphere grid. 
 
 VERSION: 1
 
-# Must edit 'EXPID' manually if HISTORY file is re-used without going  
-#  through "ldas_setup".
-#
+# Must edit 'EXPID' manually if HISTORY file is re-used without going through "ldas_setup".
+
 EXPID:  GEOSldas_expid
 
+# ------------------------------------------------------------------------------------------------
+
+# pre-defined Collections
+
 COLLECTIONS:
-#EASE            'tavg24_1d_lfs_Nt'
-                 'tavg24_2d_lfs_Nx'
-#EASE            'tavg24_1d_lnd_Nt'
-                 'tavg24_2d_lnd_Nx'
-#CUBE            'tavg24_2d_lnd_EASE'
-#ASSIM           'SMAP_L4_SM_gph'
+#OUT1d           'tavg24_1d_lfs_Nt'
+#OUT2d           'tavg24_2d_lfs_Nx'
+#OUT1d           'tavg24_1d_lnd_Nt'
+#OUT2d           'tavg24_2d_lnd_Nx'
+#                'SMAP_L4_SM_gph'
 #                'inst1_1d_lnr_Nt'
 #                'catch_progn_incr'
 #                'inst3_1d_lndfcstana_Nt'
@@ -30,28 +28,34 @@ COLLECTIONS:
 #                'tavg24_1d_glc_Nt'
            ::
 
+# --------------------------------------------------------------------------------------------------
+
+# 2d output can be on the following grids (see [COLLECTION_NAME].grid_label])
+
 GRID_LABELS: PC720x361-DC
              PC1440x721-DC
              EASEv2_M36
       ::
 
-PC720x361-DC.GRID_TYPE: LatLon
-PC720x361-DC.IM_WORLD: 720
-PC720x361-DC.JM_WORLD: 361
-PC720x361-DC.POLE: PC
-PC720x361-DC.DATELINE: DC
-PC720x361-DC.LM: 1
+PC720x361-DC.GRID_TYPE:  LatLon
+PC720x361-DC.IM_WORLD:   720
+PC720x361-DC.JM_WORLD:   361
+PC720x361-DC.POLE:       PC
+PC720x361-DC.DATELINE:   DC
+PC720x361-DC.LM:         1
 
 PC1440x721-DC.GRID_TYPE: LatLon
-PC1440x721-DC.IM_WORLD: 1440
-PC1440x721-DC.JM_WORLD: 721
-PC1440x721-DC.POLE: PC
-PC1440x721-DC.DATELINE: DC
-PC1440x721-DC.LM: 1
+PC1440x721-DC.IM_WORLD:  1440
+PC1440x721-DC.JM_WORLD:   721
+PC1440x721-DC.POLE:      PC
+PC1440x721-DC.DATELINE:  DC
+PC1440x721-DC.LM:        1
 
-EASEv2_M36.GRID_TYPE: EASE
-EASEv2_M36.GRIDNAME: EASEv2_M36
-EASEv2_M36.LM: 1
+EASEv2_M36.GRID_TYPE:    EASE
+EASEv2_M36.GRIDNAME:     EASEv2_M36
+EASEv2_M36.LM:           1
+
+# --------------------------------------------------------------------------------------------------
 
 # Detailed definition of the collections listed above 
 #
@@ -224,15 +228,16 @@ EASEv2_M36.LM: 1
 
   tavg24_2d_lnd_Nx.format:        'CFIO',
   tavg24_2d_lnd_Nx.descr:         '2d,Daily,Time-Averaged,Single-Level,Assimilation,Land Surface Diagnostics',
-  tavg24_2d_lnd_Nx.nbits:        12,
+  tavg24_2d_lnd_Nx.nbits:         12,
   tavg24_2d_lnd_Nx.template:      '%y4%m2%d2_%h2%n2z.nc4',
   tavg24_2d_lnd_Nx.mode:          'time-averaged',
   tavg24_2d_lnd_Nx.frequency:     240000,
   tavg24_2d_lnd_Nx.ref_time:      000000,
-  tavg24_2d_lnd_Nx.regrid_exch:   '../input/tile.data'
-  tavg24_2d_lnd_Nx.regrid_name:   'GRIDNAME'
+  tavg24_2d_lnd_Nx.regrid_exch:   '../input/tile.data',
+  tavg24_2d_lnd_Nx.regrid_name:   'GRIDNAME',
 #  tavg24_2d_lnd_Nx.regrid_method: 'BILINEAR_MONOTONIC' ,                           
-  tavg24_2d_lnd_Nx.grid_label:    PC720x361-DC
+  tavg24_2d_lnd_Nx.grid_label:    PC720x361-DC,
+#  tavg24_2d_lnd_Nx.grid_label:   EASEv2_M36,
   tavg24_2d_lnd_Nx.deflate:       2,
   tavg24_2d_lnd_Nx.fields:     'GRN'        , 'VEGDYN'  ,
                                'LAI'        , 'VEGDYN'  ,
@@ -328,74 +333,6 @@ EASEv2_M36.LM: 1
                                ::
 
 
-  tavg24_2d_lnd_EASE.format:        'CFIO',
-  tavg24_2d_lnd_EASE.descr:         '2d,Daily,Time-Averaged,Single-Level,Assimilation,Land Surface Diagnostics',
-  tavg24_2d_lnd_EASE.nbits:        12,
-  tavg24_2d_lnd_EASE.template:      '%y4%m2%d2_%h2%n2z.nc4',
-  tavg24_2d_lnd_EASE.mode:          'time-averaged',
-  tavg24_2d_lnd_EASE.frequency:     240000,
-  tavg24_2d_lnd_EASE.ref_time:      000000,
-  tavg24_2d_lnd_EASE.regrid_exch:   '../input/tile.data'
-  tavg24_2d_lnd_EASE.regrid_name:   'GRIDNAME'
-#  tavg24_2d_lnd_EASE.regrid_method: 'BILINEAR_MONOTONIC' ,
-  tavg24_2d_lnd_EASE.grid_label:    EASEv2_M36
-  tavg24_2d_lnd_EASE.deflate:       2,
-  tavg24_2d_lnd_EASE.fields:     'GRN'        , 'VEGDYN'  ,
-                               'LAI'        , 'VEGDYN'  ,
-                               'WET3'       , 'GridComp'  , 'GWETPROF'     ,
-                               'WET2'       , 'GridComp'  , 'GWETROOT'     ,
-                               'WET1'       , 'GridComp'  , 'GWETTOP'      ,
-                               'WCPR'       , 'GridComp'  , 'PRMC'         ,
-                               'WCRZ'       , 'GridComp'  , 'RZMC'         ,
-                               'WCSF'       , 'GridComp'  , 'SFMC'         ,
-                               'CAPAC'      , 'GridComp'  , 'INTRWATR'     ,
-                               'TPSNOW'     , 'GridComp'  , 'TPSNOWLAND'   ,
-                               'TPUNST'     , 'GridComp'  , 'TUNSTLAND'    ,
-                               'TPSAT'      , 'GridComp'  , 'TSATLAND'     ,
-                               'TPWLT'      , 'GridComp'  , 'TWLTLAND'     ,
-                               'TPSURF'     , 'GridComp'  , 'TSURFLAND'    ,
-                               'TP1'        , 'GridComp'  , 'TSOIL1'       ,         # CATCH GC: TP1,  ENSAVG GC: TSOIL1TILE
-                               'TP2'        , 'GridComp'  , 'TSOIL2'       ,         # ...
-                               'TP3'        , 'GridComp'  , 'TSOIL3'       ,         # ...
-                               'TP4'        , 'GridComp'  , 'TSOIL4'       ,         # ...
-                               'TP5'        , 'GridComp'  , 'TSOIL5'       ,         # ...
-                               'TP6'        , 'GridComp'  , 'TSOIL6'       ,         # ...
-                               'PRLAND'     , 'GridComp'  , 'PRECTOTCORRLAND'  ,     # assume "corrected" precip
-                               'SNOLAND'    , 'GridComp'  , 'PRECSNOCORRLAND'  ,     # assume "corrected" precip
-                               'TSLAND'     , 'GridComp'  , 'SNOMASLAND'   ,
-                               'SNOWDP'     , 'GridComp'  , 'SNODPLAND'    ,
-                               'EVPSOI'     , 'GridComp'  , 'LHLANDSOIL'   ,
-                               'EVPVEG'     , 'GridComp'  , 'LHLANDTRNS'   ,
-                               'EVPINT'     , 'GridComp'  , 'LHLANDINTR'   ,
-                               'EVPICE'     , 'GridComp'  , 'LHLANDSBLN'   ,
-                               'RUNSURF'    , 'GridComp'  , 'RUNSURFLAND'  ,
-                               'BASEFLOW'   , 'GridComp'  , 'BASEFLOWLAND' ,
-                               'SMLAND'     , 'GridComp'  ,
-                               'QINFIL'     , 'GridComp'  , 'QINFILLAND'   ,
-                               'FRUST'      , 'GridComp'  , 'FRLANDUNST'   ,
-                               'FRSAT'      , 'GridComp'  , 'FRLANDSAT'    ,
-                               'ASNOW'      , 'GridComp'  , 'FRLANDSNO'    ,
-                               'FRWLT'      , 'GridComp'  , 'FRLANDWLT'    ,
-                               'DFPARLAND'  , 'GridComp'  , 'PARDFLAND'    ,
-                               'DRPARLAND'  , 'GridComp'  , 'PARDRLAND'    ,
-                               'SHLAND'     , 'GridComp'  ,
-                               'LHLAND'     , 'GridComp'  ,
-                               'EVLAND'     , 'GridComp'  ,
-                               'LWLAND'     , 'GridComp'  ,
-                               'SWLAND'     , 'GridComp'  ,
-                               'GHLAND'     , 'GridComp'  ,
-                               'TWLAND'     , 'GridComp'  ,
-                               'TELAND'     , 'GridComp'  ,
-                               'DWLAND'     , 'GridComp'  , 'WCHANGELAND'  ,
-                               'DHLAND'     , 'GridComp'  , 'ECHANGELAND'  ,
-                               'SPLAND'     , 'GridComp'  , 'SPSHLAND'     ,
-#                               'SPLH'       , 'GridComp'  , 'SPLHLAND'     ,        # works for Catch only, not yet for CatchCN
-                               'SPWATR'     , 'GridComp'  , 'SPEVLAND'     ,
-                               'SPSNOW'     , 'GridComp'  , 'SPSNLAND'     ,
-                               'PEATCLSM_WATERLEVEL', 'GridComp'  ,
-                               'PEATCLSM_FSWCHANGE' , 'GridComp'  ,
-                           ::
-
   const_1d_lnd_Nt.descr:       'Tile-space,Constant,Time-invariant,Single-Level,Assimilation,Land Surface Model Parameters',
   const_1d_lnd_Nt.template:    '%y4%m2%d2_%h2%n2z.bin',
   const_1d_lnd_Nt.mode:        'instantaneous',
diff --git a/GEOSldas_App/ldas_setup b/GEOSldas_App/ldas_setup
index c8d69b5c..845312b6 100755
--- a/GEOSldas_App/ldas_setup
+++ b/GEOSldas_App/ldas_setup
@@ -1283,12 +1283,18 @@ class LDASsetup:
                     shutil.copy2(histrc_file,tmprcfile)
                 else :
                     shutil.copy2(histrc_file,tmprcfile)
-                    GRID='EASE ' + self.rqdExeInp['GRIDNAME'] + ' ' +tmprcfile
-                    if '-CF' in self.rqdExeInp['GRIDNAME'] :
-                        GRID ='CUBE ' + self.rqdExeInp['GRIDNAME'] + ' ' +tmprcfile
-                    _assim = '1' if self.assim else '0'
-                    cmd =self.bindir +'/process_hist.csh '+ str(self.rqdExeInp['LSM_CHOICE']) + ' ' + str(self.rqdExeInp['AEROSOL_DEPOSITION']) + \
-                          ' ' + GRID + ' ' + str(self.rqdExeInp['RUN_IRRIG']) + ' ' + _assim + ' '+ str(self.nens)
+                    if 'EASE' in self.rqdExeInp['GRIDNAME'] :
+                        TMPSTR='OUT1d'
+                    else :
+                        TMPSTR='OUT2d'
+                    cmd = self.bindir +'/process_hist.csh'          + ' ' \
+                        + tmprcfile                                 + ' ' \
+                        + TMPSTR                                    + ' ' \
+                        + self.rqdExeInp['GRIDNAME']                + ' ' \
+                        + str(self.rqdExeInp['LSM_CHOICE'])         + ' ' \
+                        + str(self.rqdExeInp['AEROSOL_DEPOSITION']) + ' ' \
+                        + str(self.rqdExeInp['RUN_IRRIG'])          + ' ' \
+                        + str(self.nens)
                     print(cmd)
                     #os.system(cmd)
                     sp.call(shlex.split(cmd))
diff --git a/GEOSldas_App/process_hist.csh b/GEOSldas_App/process_hist.csh
index 97d35afe..6156e736 100644
--- a/GEOSldas_App/process_hist.csh
+++ b/GEOSldas_App/process_hist.csh
@@ -1,55 +1,74 @@
 #!/bin/csh -f
 
-## I am changed the CUBE/EASE logic
-## if CUBE we produce 2D
-## anything else, SMAP and other offline grids we produce tile space
-
-setenv    LSM_CHOICE $1
-setenv    AEROSOL_DEPOSITION $2
-setenv    GRID $3
-setenv    GRIDNAME "'$4'"
-setenv    HISTRC $5
-setenv    RUN_IRRIG $6
-setenv    ASSIM  $7
-setenv    NENS $8
+# process GEOSldas_HIST.rc (=$HISTRC) template
+#
+# - turn on/off HIST collections depending on tile space
+#   - EASE:      turn on tile-space (1d) output
+#   - otherwise: turn on gridded    (2d) output
+# - turn on/off output variables depending on LSM_CHOICE, AEROSOL_DEPOSITION, and RUN_IRRIG
+# - fill in source 'GridComp' info for variables depending on NENS
+
+# process command line args
+
+setenv HISTRC             $1         # file name of HIST rc template (GEOSldas_HIST.rc)
+setenv OUTxd              $2         # "OUT1d" or "OUT2d" (to turn on/off collections)
+setenv GRIDNAME         "'$3'"       # full name of grid associated with tile space
+setenv LSM_CHOICE         $4
+setenv AEROSOL_DEPOSITION $5
+setenv RUN_IRRIG          $6
+setenv NENS               $7
+
+# -------------------------------------------------
 
 echo $GRIDNAME
 
-if($ASSIM == 1) then
-   sed -i 's|\#ASSIM|''|g' $HISTRC
-   sed -i '/^\#EASE/d' $HISTRC
-   sed -i '/^\#CUBE/d' $HISTRC
-else
-   sed -i '/^\#ASSIM/d' $HISTRC
-endif
+# uncomment 2d or 1d collections, depending on "OUT1d" (EASE tile space) or "OUT2d" (non-EASE tile space)
 
-if($GRID == CUBE) then
-   sed -i '/^\#EASE/d' $HISTRC
-   sed -i 's|\#CUBE|''|g' $HISTRC
+if($OUTxd == OUT1d) then
+   sed -i 's|\#OUT1d|''|g' $HISTRC
 else
-   sed -i '/^\#CUBE/d' $HISTRC
-   sed -i 's|\#EASE|''|g' $HISTRC
+   sed -i 's|\#OUT2d|''|g' $HISTRC
 endif
+
+# fill in name of grid associated with tile space
+
 sed -i -e  s/\'GRIDNAME\'/$GRIDNAME/g $HISTRC
 
+# set 'GridComp' based on LSM_CHOICE;
+# turn on/off variables associated with CATCHCN, AEROSOL_DEPOSITION, RUN_IRRIG
+
 if($LSM_CHOICE == 1) then
    set GridComp = CATCH
-   sed -i '/^>>>HIST_CATCHCN<<</d' $HISTRC
-   sed -i '/^>>>HIST_CATCHCNCLM45<<</d' $HISTRC
+   sed -i '/^>>>HIST_CATCHCN<<</d'         $HISTRC
+   sed -i '/^>>>HIST_CATCHCNCLM45<<</d'    $HISTRC
 endif
 
 if($LSM_CHOICE == 2) then
    set GridComp = CATCHCN
-   sed -i '/^>>>HIST_CATCHCNCLM45<<</d' $HISTRC
-   sed -i 's/>>>HIST_CATCHCN<<</''/g' $HISTRC
+   sed -i '/^>>>HIST_CATCHCNCLM45<<</d'    $HISTRC
+   sed -i 's/>>>HIST_CATCHCN<<</''/g'      $HISTRC
 endif
 
 if($LSM_CHOICE == 3) then
    set GridComp = CATCHCN
-   sed -i 's/>>>HIST_CATCHCN<<</''/g' $HISTRC
+   sed -i 's/>>>HIST_CATCHCN<<</''/g'      $HISTRC
    sed -i 's/>>>HIST_CATCHCNCLM45<<</''/g' $HISTRC
 endif
 
+if($AEROSOL_DEPOSITION == 0) then
+   sed -i '/^>>>HIST_AEROSOL<<</d'         $HISTRC
+else
+   sed -i 's/>>>HIST_AEROSOL<<</''/g'      $HISTRC
+endif
+
+if($RUN_IRRIG == 0) then
+   sed -i '/^>>>HIST_IRRIG<<</d'           $HISTRC
+else
+   sed -i 's/>>>HIST_IRRIG<<</''/g'        $HISTRC
+endif
+
+# for ensemble simulations, set 'GridComp' to ENSAVG 
+
 if($NENS > 1) then
    set GridComp = ENSAVG
    sed -i 's|VEGDYN|'VEGDYN_e0000'|g' $HISTRC
@@ -62,16 +81,6 @@ if($NENS > 1) then
 #   sed -i 's|DATAATM|'DATAATM0000'|g' $HISTRC
 endif
 
-sed -i 's|GridComp|'$GridComp'|g' $HISTRC
+# fill in source 'GridComp' information for output variables
 
-if($AEROSOL_DEPOSITION == 0) then
-   sed -i '/^>>>HIST_AEROSOL<<</d' $HISTRC
-else
-   sed -i 's/>>>HIST_AEROSOL<<</''/g' $HISTRC
-endif
-
-if($RUN_IRRIG == 0) then
-   sed -i '/^>>>HIST_IRRIG<<</d' $HISTRC
-else
-   sed -i 's/>>>HIST_IRRIG<<</''/g' $HISTRC
-endif
+sed -i 's|GridComp|'$GridComp'|g' $HISTRC

From 1ed302925e9cd3df940c8f68d3057bdba63f9f33 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 15:02:33 -0400
Subject: [PATCH 34/45] edited CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6fe41d67..8d12932b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added EASE to Latlon and Cubed-Sphere to EASE history outputs
 - Added optional SLURM "constraint".
 - Added functionality to run on tile space of stretched cube-sphere grids.
 - Added python package for post-processing ObsFcstAna output.
@@ -19,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+- Revised processing of HISTORY template (GEOSldas_HIST.rc).
 - Switch to using EASE grid tools in MAPL.
 - Specify only ntasks_model for SLURM resource request.
 - Revisions for handling of Nens and special nml and mwtrm path/files in coupled land-atm DAS.

From 33f2fca1f96c3c97074a8c0e421617797abb224f Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Tue, 24 Jun 2025 16:57:35 -0400
Subject: [PATCH 35/45] add missing return line and minor edits

---
 GEOSldas_App/util/shared/python/tile2grid.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/GEOSldas_App/util/shared/python/tile2grid.py b/GEOSldas_App/util/shared/python/tile2grid.py
index b918ac51..096d45b6 100644
--- a/GEOSldas_App/util/shared/python/tile2grid.py
+++ b/GEOSldas_App/util/shared/python/tile2grid.py
@@ -18,6 +18,15 @@ def tile2grid(tile_data, tile_coord, tile_grid):
     -------
     grid_data : Array in grid space, shape (N_lat, N_lon, N_fields)
     """
+
+    # Verify input datasize
+    if tile_data.shape[0] != tile_coord['N_tile']:
+        print(f'Error: size of tile2grid input data does not match that of N_tile')
+        sys.exit()
+        
+    # if tile_data is 1-D [N_tile], expand into 2-D [N_tile,1]
+    if tile_data.ndim == 1:
+        tile_data = np.expand_dims(tile_data, axis=1)
     
     N_fields = tile_data.shape[-1]
     # Initialize grid data array
@@ -37,7 +46,7 @@ def tile2grid(tile_data, tile_coord, tile_grid):
             w = tile_coord['frac_cell'][n]
 
             # Check if current tile data is valid (not no-data)
-            if  ~np.isnan(tile_data[k, n]):
+            if  ~np.isnan(tile_data[n, k]):
                 # Accumulate weighted data
                 grid_data[j, i, k] += w * tile_data[n,k]
                 wgrid[    j, i]    += w
@@ -52,4 +61,6 @@ def tile2grid(tile_data, tile_coord, tile_grid):
                     # Set no-data value
                     grid_data[j, i, k] = np.nan
 
+    return grid_data.squeeze()
+
 # ============ EOF ===============================================

From 88739fa6ddab3977c2bbbe0587a3222487b71cef Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Tue, 24 Jun 2025 16:59:25 -0400
Subject: [PATCH 36/45] change max to mean tile values for grid

---
 .../util/shared/python/tile_to_latlongrid.py  | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
index c4751a69..a4c4de46 100644
--- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
+++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
@@ -3,7 +3,7 @@
 
 # 
         
-def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_tol=1.e-4):
+def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_tol_frac=1.e-4):
     
     """
     Map (1d) tile-space data onto (2d) regular lat/lon grid of arbitrary "resolution".
@@ -26,7 +26,7 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_
     
     """
     
-    tile_data[np.abs(tile_data - nodata) < nodata_tol]  = np.nan
+    tile_data[np.abs(tile_data - nodata) < nodata*nodata_tol_frac]  = np.nan
     
     # Verify input datasize
     if tile_data.shape[0] != tile_coord['N_tile']:
@@ -39,25 +39,26 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_
 
     nf = tile_data.shape[-1]
 
-    lat_grid = np.arange( -90+resolution/2,  90, resolution)
-    lon_grid = np.arange(-180+resolution/2, 180, resolution)
+    lat_grid = np.arange( -90+resolution/2,  90+resolution/2, resolution)
+    lon_grid = np.arange(-180+resolution/2, 180+resolution/2, resolution)
 
     lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
 
-    grid_data = np.full([len(lat_grid), len(lon_grid), nf], np.nan)
+    grid_data = np.zeros((len(lat_grid), len(lon_grid), nf))
+    N_tilecnt = np.zeros((len(lat_grid), len(lon_grid), nf))
 
     for v in range(nf):        
         for k in range(tile_coord['N_tile']):
-            if np.any([~np.isnan(tile_data[k,:])], axis=1):
+            if ~np.isnan(tile_data[k,:]):
                 j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k])
                 i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k])
-                # sometime the missing points are filled with zeros. taking largest value within
-                # the target grid cell avoids picking up unwanted zeros.
-                grid_data[j,i,v] = np.nanmax([tile_data[k,v], grid_data[j,i,v]])
-                                            
-    if grid_data.shape[-1] == 1:
-        grid_data = grid_data[:,:,0]
-        
-    return grid_data, lat_2d, lon_2d
+                # grid value takes the mean of all tiles within
+                # new_mean = old_mean + (new_value - old_mean)/count
+                N_tilecnt[j,i,v] += 1
+                grid_data[j,i,v] = grid_data[j,i,v] + (tile_data[k,v]-grid_data[j,i,v])/N_tilecnt[j,i,v]
+
+    grid_data[N_tilecnt == 0] = np.nan
+    
+    return grid_data.squeeze(), lat_2d, lon_2d
 
 # =========================== EOF ============================================================

From bc8e44b7a9e152ad34df309382bb2db3172690fc Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Tue, 24 Jun 2025 17:01:26 -0400
Subject: [PATCH 37/45] reverse comment change

---
 .../util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
index b8283e03..b2543ffc 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_sums.py
@@ -34,7 +34,7 @@
 # If the script is run in the background, uncomment the following lines to see the redirected
 #   standard output in the out.log file immediately.  When the lines are commented out, the redirected
 #   standard output will not appear in the out.log file until the job has completed.
-# If the script is run in the foreground, the lines can be commented out to monitor the output log.
+# If the script is run in the foreground, the lines must be commented out.
 #
 #import io
 #sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)

From f97f620804bebd857fd4ec5e144312b72c58c1d5 Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Tue, 24 Jun 2025 17:05:59 -0400
Subject: [PATCH 38/45] nodata_tol -> nodata_tol_frac

---
 GEOSldas_App/util/shared/python/tile_to_latlongrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
index a4c4de46..3a0b8cc3 100644
--- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
+++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
@@ -16,7 +16,7 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_
     tile_coord : Dictionary containing tile coordinate information
     resolution : target grid resolution
     nodata     : Value for missing data
-    nodata_tol : Tolerance for comparing values to nodata
+    nodata_tol_frac : Tolerance fraction for comparing values to nodata
         
     Returns:
     -------

From e07721c680f1d103af0aed187813ddf2697ba26c Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Tue, 24 Jun 2025 17:07:27 -0400
Subject: [PATCH 39/45] fix imports, minor modification

---
 .../ObsFcstAna_stats/Plot_stats_maps.py       | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index 42ad9d99..b0b7382b 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -24,7 +24,7 @@
 
 from netCDF4                import Dataset
 
-from tile_to_latlon         import tile_to_latlon, get_grid_resolution
+from tile_to_latlongrid     import tile_to_latlongrid
 from plot                   import plotMap
 from EASEv2                 import EASEv2_ind2latlon
 
@@ -92,17 +92,18 @@ def Main_OmF_maps():
     # Typically used for SMAP Tb_h/h from asc and desc overpasses,
     # or ASCAT soil moisture from Metop-A/B/C.
     # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
-    Nobs_data     = np.nansum(              N_data, axis=1)
-    OmF_mean      = np.nansum(OmF_mean     *N_data, axis=1)/Nobs_data
-    OmF_stdv      = np.nansum(OmF_stdv     *N_data, axis=1)/Nobs_data
-    OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data
-    OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Nobs_data
-    OmA_mean      = np.nansum(OmA_mean     *N_data, axis=1)/Nobs_data
-    OmA_stdv      = np.nansum(OmA_stdv     *N_data, axis=1)/Nobs_data
-
-    # Nobs_data need to be at least 1, otherwise unwanted zeros 
+    Ndata_sum     = np.nansum(              N_data, axis=1)
+    OmF_mean      = np.nansum(OmF_mean     *N_data, axis=1)/Ndata_sum
+    OmF_stdv      = np.nansum(OmF_stdv     *N_data, axis=1)/Ndata_sum
+    OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Ndata_sum
+    OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Ndata_sum
+    OmA_mean      = np.nansum(OmA_mean     *N_data, axis=1)/Ndata_sum
+    OmA_stdv      = np.nansum(OmA_stdv     *N_data, axis=1)/Ndata_sum
+    N_data = Ndata_sum
+
+    # N_data need to be at least 1, otherwise unwanted zeros 
     # might get into the computation of global mean etc.
-    Nobs_data[Nobs_data == 0] = np.nan  
+    N_data[N_data == 0] = np.nan  
 
     # --------------------------------------------------------------------------------
     # Plot stats on regular lat/lon grid, with grid spacing guided by the footprint 
@@ -151,11 +152,11 @@ def Main_OmF_maps():
         for j in np.arange(2):
             units = '[k]'
             if i == 0 and j == 0:
-                tile_data = Nobs_data
+                tile_data = N_data
                 # crange is [cmin, cmax]
                 crange =[0, np.ceil((end_time-start_time).days/150)*300]
                 colormap = plt.get_cmap('jet',20)
-                title_txt = exptag + ' Tb Nobs '    + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
+                title_txt = exptag + ' Tb Nobs (excl. 0s)'    + start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
                 units = '[-]'
             if i == 0 and j ==1:
                 tile_data = OmF_mean

From 351e0a06ddc899cb0d39851bc83c8740c18b20a9 Mon Sep 17 00:00:00 2001
From: Rolf Reichle <rolf.reichle@nasa.gov>
Date: Tue, 24 Jun 2025 17:48:41 -0400
Subject: [PATCH 40/45] minimal edits of comments; white-space changes
 (Plot_stats_maps.py, tile_to_latlongrid.py)

---
 .../postproc/ObsFcstAna_stats/Plot_stats_maps.py  | 15 +++++++++++----
 .../util/shared/python/tile_to_latlongrid.py      |  8 ++++----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index b0b7382b..ebc23090 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -87,22 +87,29 @@ def Main_OmF_maps():
     
     # Select/combine fields for plotting. The following provides an example to
     # computes average stats across all species.
-    
+    #
     # Compute Nobs-weighted avg of each metric across all species.
     # Typically used for SMAP Tb_h/h from asc and desc overpasses,
     # or ASCAT soil moisture from Metop-A/B/C.
     # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
+    
     Ndata_sum     = np.nansum(              N_data, axis=1)
+    
     OmF_mean      = np.nansum(OmF_mean     *N_data, axis=1)/Ndata_sum
     OmF_stdv      = np.nansum(OmF_stdv     *N_data, axis=1)/Ndata_sum
     OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Ndata_sum
     OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Ndata_sum
     OmA_mean      = np.nansum(OmA_mean     *N_data, axis=1)/Ndata_sum
     OmA_stdv      = np.nansum(OmA_stdv     *N_data, axis=1)/Ndata_sum
-    N_data = Ndata_sum
+    
+    N_data        = Ndata_sum
+
+    # The obs are assigned to tiles only for book-keeping purposes, and the resolution of
+    # the obs is often coarser than that of the model tile space.  Therefore, typically many
+    # tiles are never assigned any obs and end up with N_data=0.
+    # Here, we remove these unwanted zeros from N_data to keep them out of the computation of 
+    # spatial averages (global avg; mapping from tile space to lat/lon plotting grid)
 
-    # N_data need to be at least 1, otherwise unwanted zeros 
-    # might get into the computation of global mean etc.
     N_data[N_data == 0] = np.nan  
 
     # --------------------------------------------------------------------------------
diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
index 3a0b8cc3..ea13a13c 100644
--- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
+++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
@@ -12,10 +12,10 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_
     
     Parameters:
     ----------
-    tile_data  : Array in tile space, shape (N_tile, N_fields)
-    tile_coord : Dictionary containing tile coordinate information
-    resolution : target grid resolution
-    nodata     : Value for missing data
+    tile_data       : Array in tile space, shape (N_tile, N_fields)
+    tile_coord      : Dictionary containing tile coordinate information
+    resolution      : Target grid resolution
+    nodata          : Value for missing data
     nodata_tol_frac : Tolerance fraction for comparing values to nodata
         
     Returns:

From a48ac51f9bfe1d5b4dc0bdcd48d087ffc0ff640e Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Wed, 25 Jun 2025 09:30:40 -0400
Subject: [PATCH 41/45] correction for missing files

---
 .../ObsFcstAna_stats/postproc_ObsFcstAna.py   | 47 ++++++++++---------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 57a122fa..027ea8a1 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -152,31 +152,32 @@ def compute_monthly_sums(self, date_time):
 
                 data_all.append(data_tile)
 
-            # cross-mask over all experiments 
-            is_cross_valid = ~np.isnan(data_all[0]['obs_obs'])
-            for data in data_all[1:]:
-                mask = ~np.isnan(data['obs_obs'])
-                is_cross_valid = np.logical_and(is_cross_valid,mask)
-
-            # reconstruct the output variable dictionary based on input options;
-            # obs_obs and obs_obsvar are from exp_list[obs_from], the rest are from exp_list[0]
-            data_tile = {}    
-            for var in var_list:
-                if 'obs_obs' in var:
-                    data_tile[var] = data_all[obs_from][var]
-                else:
-                    data_tile[var] = data_all[0][var]
+            if len(data_all) > 0:
+                # cross-mask over all experiments 
+                is_cross_valid = ~np.isnan(data_all[0]['obs_obs'])
+                for data in data_all[1:]:
+                    mask = ~np.isnan(data['obs_obs'])
+                    is_cross_valid = np.logical_and(is_cross_valid,mask)
+
+                # reconstruct the output variable dictionary based on input options;
+                # obs_obs and obs_obsvar are from exp_list[obs_from], the rest are from exp_list[0]
+                data_tile = {}    
+                for var in var_list:
+                    if 'obs_obs' in var:
+                        data_tile[var] = data_all[obs_from][var]
+                    else:
+                        data_tile[var] = data_all[0][var]
 
-            is_valid = is_cross_valid
-            
-            N_data[ is_valid] += 1
-            oxf_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_fcst'][is_valid]
-            oxa_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_ana' ][is_valid]
-            fxa_sum[is_valid] += data_tile['obs_fcst'][is_valid] * data_tile['obs_ana' ][is_valid]
+                is_valid = is_cross_valid
+                
+                N_data[ is_valid] += 1
+                oxf_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_fcst'][is_valid]
+                oxa_sum[is_valid] += data_tile['obs_obs' ][is_valid] * data_tile['obs_ana' ][is_valid]
+                fxa_sum[is_valid] += data_tile['obs_fcst'][is_valid] * data_tile['obs_ana' ][is_valid]
 
-            for var in var_list:
-                data_sum[ var][is_valid] += data_tile[var][is_valid]
-                data2_sum[var][is_valid] += data_tile[var][is_valid] **2
+                for var in var_list:
+                    data_sum[ var][is_valid] += data_tile[var][is_valid]
+                    data2_sum[var][is_valid] += data_tile[var][is_valid] **2
             
             date_time = date_time + timedelta(seconds=da_dt)
 

From 6293a9f612f3f5036517c1e51ab20d8f99a0fd4a Mon Sep 17 00:00:00 2001
From: Qing Liu <qing.liu@nasa.gov>
Date: Wed, 25 Jun 2025 09:31:52 -0400
Subject: [PATCH 42/45] change loop order

---
 GEOSldas_App/util/shared/python/tile_to_latlongrid.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
index ea13a13c..da870be0 100644
--- a/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
+++ b/GEOSldas_App/util/shared/python/tile_to_latlongrid.py
@@ -47,9 +47,10 @@ def tile_to_latlongrid( tile_data, tile_coord, resolution, nodata=1.e15, nodata_
     grid_data = np.zeros((len(lat_grid), len(lon_grid), nf))
     N_tilecnt = np.zeros((len(lat_grid), len(lon_grid), nf))
 
-    for v in range(nf):        
-        for k in range(tile_coord['N_tile']):
-            if ~np.isnan(tile_data[k,:]):
+          
+    for k in range(tile_coord['N_tile']):
+        for v in range(nf):
+            if ~np.isnan(tile_data[k,v]):
                 j = np.searchsorted(lat_grid + resolution/2., tile_coord['com_lat'][k])
                 i = np.searchsorted(lon_grid + resolution/2., tile_coord['com_lon'][k])
                 # grid value takes the mean of all tiles within

From 6567edc861c91251100237fed10747f4dbe6dff8 Mon Sep 17 00:00:00 2001
From: Weiyuan Jiang <weiyuan.jiang@nasa.gov>
Date: Wed, 25 Jun 2025 15:00:01 -0400
Subject: [PATCH 43/45] path fix for ncra

---
 CHANGELOG.md                     | 1 +
 GEOSldas_App/lenkf_j_template.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8d12932b..c5060808 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Fixed ncra path for monthly compression
 - Fixed error from MAPL's ApplicationSupport.F90 to init UDUNITS.
 
 ### Removed
diff --git a/GEOSldas_App/lenkf_j_template.py b/GEOSldas_App/lenkf_j_template.py
index 29090097..0ed56fd0 100644
--- a/GEOSldas_App/lenkf_j_template.py
+++ b/GEOSldas_App/lenkf_j_template.py
@@ -646,7 +646,7 @@
           if($NAVAIL != $NDAYS) continue
 
           # create monthly-mean nc4 file
-          ncra -h $EXPID.$ThisCol.${{YYYY}}${{MM}}*.nc4 ${{EXPID}}.${{ThisCol}}.monthly.$YYYY$MM.nc4
+          $BASEBIN/ncra -h $EXPID.$ThisCol.${{YYYY}}${{MM}}*.nc4 ${{EXPID}}.${{ThisCol}}.monthly.$YYYY$MM.nc4
 
           if($POSTPROC_HIST == 2) then
              # rudimentary check for desired nc4 file;  if ok, delete daily files

From 34bd3e7a18b670ebe197d92a90bb4125ccf3e979 Mon Sep 17 00:00:00 2001
From: amfox37 <amfox37@gmail.com>
Date: Wed, 25 Jun 2025 13:29:47 -0600
Subject: [PATCH 44/45] tg bug fix and typos

---
 .../util/postproc/ObsFcstAna_stats/Plot_stats_maps.py         | 2 +-
 .../util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py   | 2 +-
 .../util/postproc/ObsFcstAna_stats/helper/write_nc4.py        | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
index ebc23090..18a4c09e 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py
@@ -136,7 +136,7 @@ def Main_OmF_maps():
 
     # If obs_fov=0, reset to match model resolution.
     if obs_fov == 0:
-        my_res = np.sqrt( tile_grid['dlat']*tile_grid['dlon'] )
+        my_res = np.sqrt( tg['dlat']*tg['dlon'] )
 
     # pick a resolution from a sample vector of resolutions
 
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
index 0bdba303..2c499cfd 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_timeseries.py
@@ -56,7 +56,7 @@ def Main_OmF_timeseries():
         
         if stats_file is not None:
             with open(stats_file,'wb') as file:
-                print(f'saveing stats to {stats_file}')
+                print(f'saving stats to {stats_file}')
                 pickle.dump(stats,file)
 
     date_vec = stats['date_vec']
diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py
index 8090cc89..57ebf694 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/helper/write_nc4.py
@@ -9,7 +9,7 @@
 
 def write_sums_nc4(file_path, N_data, data_sum, data2_sum, oxf_sum, oxa_sum, fxa_sum, obs_param):
     
-    nc = Dataset(file_path,'w',formate='NETCDF4')
+    nc = Dataset(file_path,'w',format='NETCDF4')
     
     tile    = nc.createDimension('tile',    N_data.shape[0])
     species = nc.createDimension('species', N_data.shape[1])
@@ -46,7 +46,7 @@ def write_sums_nc4(file_path, N_data, data_sum, data2_sum, oxf_sum, oxa_sum, fxa
 
 def write_stats_nc4(file_path, stats):
 
-    nc = Dataset(file_path,'w',formate='NETCDF4')
+    nc = Dataset(file_path,'w',format='NETCDF4')
     
     tile    = nc.createDimension('tile',    stats['N_data'].shape[0])
     species = nc.createDimension('species', stats['N_data'].shape[1])

From b0b3ed1f88114eb1df9a744bf095447c0a9ea8d2 Mon Sep 17 00:00:00 2001
From: amfox37 <amfox37@gmail.com>
Date: Wed, 25 Jun 2025 14:04:01 -0600
Subject: [PATCH 45/45] quiet divide by nan

---
 .../postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py   | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
index 027ea8a1..00058e06 100644
--- a/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
+++ b/GEOSldas_App/util/postproc/ObsFcstAna_stats/postproc_ObsFcstAna.py
@@ -457,14 +457,16 @@ def calc_spatial_stats_from_sums(self):
 
             # Aggregate data of all tiles
             N_data_mo  = np.nansum(mN_data, axis=0)
-            OxF_mean   = np.nansum(moxf_sum,axis=0)/N_data_mo
-            OxA_mean   = np.nansum(moxa_sum,axis=0)/N_data_mo
+            with np.errstate(divide='ignore', invalid='ignore'):
+                OxF_mean   = np.nansum(moxf_sum, axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan)
+                OxA_mean   = np.nansum(moxa_sum, axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan)
             data_mean  = {}
             data2_mean = {}
             data_var   = {}
             for var in var_list:
-                data_mean[ var] = np.nansum(mdata_sum[var ],axis=0)/N_data_mo
-                data2_mean[var] = np.nansum(mdata2_sum[var],axis=0)/N_data_mo
+                with np.errstate(divide='ignore', invalid='ignore'):
+                    data_mean[ var]  = np.nansum(mdata_sum[var ], axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan)
+                    data2_mean[var]  = np.nansum(mdata2_sum[var], axis=0) / np.where(N_data_mo > 0, N_data_mo, np.nan)
                 # var(x) = E[x2] - (E[x])^2
                 data_var[var] = data2_mean[var] - data_mean[var]**2