forked from USEPA/CMAQ
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcube_maker_1.csh
More file actions
executable file
·267 lines (230 loc) · 7.64 KB
/
cube_maker_1.csh
File metadata and controls
executable file
·267 lines (230 loc) · 7.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#!/bin/csh -f
#
# cube_maker_2phase.csh
#
# Two-phase production workflow:
# Phase 1: Subset daily CMAQ chemistry files to a reduced variable set
# (fast, avoids building a huge temporary cube)
# Phase 2: Concatenate all reduced daily files once using ncrcat
#
# Rationale:
# - Repeated “append” with m3cple rewrites the growing cube each day (O(N^2) I/O),
# which becomes extremely slow by mid-year.
# - Subset + one-shot ncrcat is O(N) I/O and is the standard scalable approach.
#
# Assumptions:
# - Daily input files exist as:
# ${NAME_ROOT}/${NAME_PREF}${YYYYMMDD}${NAME_SUFF}
# - W_VEL and TA exist in those chemistry output files (as you noted).
# - All daily files share consistent dimensions and metadata.
#
# Notes:
# - We keep TFLAG in the subset (required for IOAPI time axis handling).
# - We do NOT use m3cple in this workflow; ncrcat replaces it for concatenation.
# - ncrcat expects the time dimension to be record/unlimited and consistent.
#
# Usage:
# ./cube_maker_2phase.csh
#
# Optional environment overrides:
# setenv BASE_D_Greg 20190101
# setenv FINAL_D_Greg 20191231
# setenv FORCE_REDO_SUBSET Y # regenerate subset files even if they exist
# setenv KEEP_DAILY_SUBSET Y # keep daily subset files after concat (default Y)
#
# -----------------------------
# Configuration
# -----------------------------
setenv NAME_SUFF ".nc"
setenv NAME_TYPE "ACONC" # "AELMO" # "DRYDEP" # "WETDEP1" # "AELMO" # "ACONC"
setenv NAME_PREF "CCTM_${NAME_TYPE}_v55_gcc_baseAnnual_"
setenv NAME_ROOT "/data/CMAQ_5.5/output/12km/CCTM_v55_gcc_baseAnnual_nonDDM/"
setenv NAME_HOME "$PWD"
setenv BASE_D_Greg 20190101
setenv FINAL_D_Greg 20191231
if ( ! $?FORCE_REDO_SUBSET ) then
setenv FORCE_REDO_SUBSET N
endif
if ( ! $?KEEP_DAILY_SUBSET ) then
setenv KEEP_DAILY_SUBSET Y
endif
# Reduced-variable cube output
set OUT_CUBE = "${NAME_TYPE}_CUBE_SUBSET.nc"
# Directory for daily subset files
set SUBDIR = "subset_${NAME_TYPE}_${BASE_D_Greg}_${FINAL_D_Greg}"
# Variables to keep (include TFLAG)
# IMPORTANT: variable names must match exactly those in the CMAQ file.
# If any are missing, ncks will fail (and the script will stop).
set VARLIST = "TFLAG,NO2,NO,CO,SO2,O3,BENZENE,PCVOC,TA" # ACONC
# set VARLIST = "TFLAG,NO2,NO,CO,SO2,O3,BENZENE,PCVOC,W_VEL,TA,ASO4I,ANO3I,ANH4I,AECI,APOCI,ACLI,ANAI,AOTHRI,ASO4J,ANO3J,ANH4J,AECJ,APOCJ,ACLJ,ANAJ,AOTHRJ,AMGJ,AKJ,ACAJ" # ACONC
#set VARLIST = "TFLAG,NO3" # SO4 WETDEP1
#set VARLIST = "TFLAG,SO2,HNO3,NO3" # DRYDEP
#set VARLIST = "TFLAG,PM25,PM10,WET_DENSACC,DRY_DENSACC,TNO3" # AELMO
# Ctrl-C handler
onintr cleanup_and_exit
# -----------------------------
# Pre-flight checks
# -----------------------------
if ( ! -d "$NAME_ROOT" ) then
echo "ERROR: NAME_ROOT does not exist or is not a directory: $NAME_ROOT"
exit 1
endif
cd "$NAME_ROOT"
if ( $status != 0 ) then
echo "ERROR: Unable to cd into $NAME_ROOT"
exit 1
endif
# Tools required for 2-phase workflow
command -v greg2jul >/dev/null
if ( $status != 0 ) then
echo "ERROR: greg2jul not found on PATH"
exit 1
endif
command -v jul2greg >/dev/null
if ( $status != 0 ) then
echo "ERROR: jul2greg not found on PATH"
exit 1
endif
command -v ncks >/dev/null
if ( $status != 0 ) then
echo "ERROR: ncks (NCO) not found on PATH"
exit 1
endif
command -v ncrcat >/dev/null
if ( $status != 0 ) then
echo "ERROR: ncrcat (NCO) not found on PATH"
exit 1
endif
# Base-day input must exist
set base_in = "${NAME_PREF}${BASE_D_Greg}${NAME_SUFF}"
if ( ! -e "$base_in" ) then
echo "ERROR: Base input file missing: $base_in"
exit 1
endif
# Refuse to overwrite final cube
if ( -e "$OUT_CUBE" ) then
echo "ERROR: Output already exists: $OUT_CUBE"
echo " Refusing to overwrite."
exit 1
endif
# Create subset directory
if ( ! -d "$SUBDIR" ) then
/bin/mkdir -p "$SUBDIR"
if ( $status != 0 ) then
echo "ERROR: Failed to create subset directory: $SUBDIR"
exit 1
endif
endif
# Compute Julian bounds
set BASE_D_Jul = `greg2jul $BASE_D_Greg`
if ( $status != 0 ) then
echo "ERROR: greg2jul failed for BASE_D_Greg=$BASE_D_Greg"
exit 1
endif
set FINAL_D_Jul = `greg2jul $FINAL_D_Greg`
if ( $status != 0 ) then
echo "ERROR: greg2jul failed for FINAL_D_Greg=$FINAL_D_Greg"
exit 1
endif
# -----------------------------
# Phase 0: quick variable sanity check on one file
# -----------------------------
echo "Sanity check: verifying requested variables exist in base file: $base_in"
ncks -m -v $VARLIST "$base_in" >/dev/null
if ( $status != 0 ) then
echo "ERROR: One or more variables in VARLIST not found in: $base_in"
echo " VARLIST=$VARLIST"
echo " Run: ncks -m $base_in | less to inspect variable names."
exit 1
endif
# -----------------------------
# Phase 1: subset daily files
# -----------------------------
echo "Phase 1: Subsetting daily files into: $SUBDIR"
echo " Variables: $VARLIST"
echo " Range: $BASE_D_Greg to $FINAL_D_Greg"
set c = $BASE_D_Jul
while ( $c <= $FINAL_D_Jul )
set c_Greg = `jul2greg $c`
if ( $status != 0 ) then
echo "ERROR: jul2greg failed for c=$c"
goto cleanup_and_exit
endif
set in_day = "${NAME_PREF}${c_Greg}${NAME_SUFF}"
set out_day = "${SUBDIR}/DAY_SUB_${c_Greg}.nc"
if ( ! -e "$in_day" ) then
echo "ERROR: Missing daily input file: $in_day"
echo " Aborting before concat. You can re-run after fixing missing days."
goto cleanup_and_exit
endif
if ( -e "$out_day" && "$FORCE_REDO_SUBSET" != "Y" ) then
# already done
@ c = $c + 1
continue
endif
echo " Subset $c_Greg -> $out_day"
/bin/rm -f "$out_day"
# Subset variables. Keep original structure; overwrite output if present.
# If you want compression, add "-L 1" (or higher) here.
ncks -O -v $VARLIST "$in_day" "$out_day"
if ( $status != 0 ) then
echo "ERROR: ncks failed subsetting $in_day"
echo " Check variable names and file integrity."
goto cleanup_and_exit
endif
# Quick, cheap validation: file exists and is non-empty
if ( ! -s "$out_day" ) then
echo "ERROR: Subset output is missing or empty: $out_day"
goto cleanup_and_exit
endif
@ c = $c + 1
end
# -----------------------------
# Phase 2: concatenate once
# -----------------------------
echo "Phase 2: Concatenating daily subset files into: $OUT_CUBE"
# Build ordered list explicitly (avoids glob surprises and ensures chronological order)
set filelist = ()
set c = $BASE_D_Jul
while ( $c <= $FINAL_D_Jul )
set c_Greg = `jul2greg $c`
if ( $status != 0 ) then
echo "ERROR: jul2greg failed for c=$c"
goto cleanup_and_exit
endif
set f = "${SUBDIR}/DAY_SUB_${c_Greg}.nc"
if ( ! -e "$f" ) then
echo "ERROR: Missing subset file needed for concat: $f"
goto cleanup_and_exit
endif
set filelist = ( $filelist $f )
@ c = $c + 1
end
# One-shot concatenate along record dimension
# -O overwrite output if it exists (we already refused earlier, but keep safe)
ncrcat -O $filelist "$OUT_CUBE"
if ( $status != 0 ) then
echo "ERROR: ncrcat failed while creating $OUT_CUBE"
goto cleanup_and_exit
endif
if ( ! -s "$OUT_CUBE" ) then
echo "ERROR: Output cube missing or empty: $OUT_CUBE"
goto cleanup_and_exit
endif
echo "OK: wrote cube: $OUT_CUBE"
# Optional cleanup of daily subsets
if ( "$KEEP_DAILY_SUBSET" != "Y" ) then
echo "Cleaning up daily subset files in $SUBDIR"
/bin/rm -f ${SUBDIR}/DAY_SUB_*.nc
endif
cd "$NAME_HOME"
exit 0
# -----------------------------
# Cleanup and exit (error path)
# -----------------------------
cleanup_and_exit:
echo "ERROR: aborting. Partial outputs (if any) are left in place for inspection."
echo " Subset directory: $NAME_ROOT/$SUBDIR"
echo " Output cube (if created): $NAME_ROOT/$OUT_CUBE"
cd "$NAME_HOME"
exit 1