Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 19 additions & 14 deletions batchprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,22 @@
# TODO: Add method to determine if files have already been processed to prevent reprocessing
# TODO: Add optional flag to reprocess all images
files_to_be_processed = []
for root, subdirs, files in os.walk(rootdir + "/data"):
for root, subdirs, files in os.walk(os.path.join(rootdir, "data")):
# skip any files found in the output directory
if root.find("output") != -1:
continue
# have files, check to see if they're the ones we want
if files:
for f in files:
if f.endswith(".tif") and f.find("_SR_") != -1:
pathname = root + "/" + f
pathname = os.path.join(root, f)
files_to_be_processed.append((pathname, f)) # appending (path, filename)

# if we have files, check output directory structure
if files_to_be_processed:
if not os.path.isdir("data/output"):
os.mkdir("data/output")
output_dir = os.path.join("data", "output")
if not os.path.isdir(output_dir):
os.mkdir(output_dir)

file_groups = {}

Expand All @@ -52,10 +53,12 @@


# process directory structure if not already made
if not os.path.isdir("data/output/{}".format(file_year)):
os.mkdir("data/output/{}".format(file_year))
if not os.path.isdir("data/output/{}/{}".format(file_year, file_month)):
os.mkdir("data/output/{}/{}".format(file_year, file_month))
year_dir = os.path.join("data", "output", file_year)
if not os.path.isdir(year_dir):
os.mkdir(year_dir)
month_dir = os.path.join("data", "output", file_year, file_month)
if not os.path.isdir(month_dir):
os.mkdir(month_dir)

# outfile_base = f[1][:15] + "_AnalyticMS_SR"

Expand All @@ -78,19 +81,21 @@
"height": mosaic.shape[1],
"width": mosaic.shape[2],
"transform": out_transform})
out_fp = "data/output/{year}/{month}/{year}{month}{day}_AnalyticMS_SR_merged.tif".format(year=year, month=month, day=day)
out_filename = "{year}{month}{day}_AnalyticMS_SR_merged.tif".format(year=year, month=month, day=day)
out_fp = os.path.join("data", "output", year, month, out_filename)
with rasterio.open(out_fp, "w", **out_meta) as dst:
dst.write(mosaic)

# process the merged files
for root, subdirs, files in os.walk(rootdir + "/data/output"):
for root, subdirs, files in os.walk(os.path.join(rootdir, "data", "output")):
if files:
for f in files:
if f.find("NDWI") != -1:
continue
pathname = root + "/"
pathname = root
outfile_base = f.split(sep=".")[0]
ndwi_outfile = pathname + outfile_base + "_NDWI.tif"
ndwi_class_outfile = pathname + outfile_base + "_NDWI_classified.tif"
ndwi = rt.calculate_ndwi(pathname + f, ndwi_outfile, plot=False)
ndwi_outfile = os.path.join(pathname, outfile_base + "_NDWI.tif")
ndwi_class_outfile = os.path.join(pathname, outfile_base + "_NDWI_classified.tif")
input_file = os.path.join(pathname, f)
ndwi = rt.calculate_ndwi(input_file, ndwi_outfile, plot=False)
ndwi_class = rt.ndwi_classify(ndwi_outfile, ndwi_class_outfile, plot=False)
134 changes: 116 additions & 18 deletions data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import numpy as np
import os
import glob
import argparse
import sys

# adapted from https://gis.stackexchange.com/questions/285499/how-to-split-multiband-image-into-image-tiles-using-rasterio
def make_tiles(image, tile_height=512, tile_width=512, skip_no_data=False):
def make_tiles(image, tile_height=512, tile_width=512, skip_no_data=False, output_dir="data/tiles"):
with rio.open(image) as src:
filepath, filename = os.path.split(image)
file_base, file_extension = os.path.splitext(filename)
Expand All @@ -32,7 +34,9 @@ def make_tiles(image, tile_height=512, tile_width=512, skip_no_data=False):
if 0 in window_data[..., :-1]:
continue
out_name = file_base + "_" + str(i + 1).zfill(2) + "-of-" + str(len(tiles)) + file_extension
out_path = os.path.join("data/tiles/", out_name)
out_path = os.path.join(output_dir, out_name)
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
with rio.open(out_path, 'w', **meta) as dst:
dst.write(src.read(window=window))

Expand All @@ -48,20 +52,26 @@ def _flip_bands(bands):
# takes the path to all image tiles and creates tiles that are rotated 90°, 180° and 270° as well as their flipped counterparts
# this results in 8 tiles for every input tile (including the input tile)
def augment_tiles(tile_path):
files = glob.glob(tile_path + "*.tif")
files = set(files) - set(glob.glob(tile_path + "*rot*"))
files = set(files) - set(glob.glob(tile_path + "*flip*"))
# Normalize tile_path and ensure it's a directory path
tile_path = os.path.normpath(tile_path)
if not os.path.isdir(tile_path):
raise ValueError(f"Tile path is not a valid directory: {tile_path}")

# Use os.path.join for glob patterns (works cross-platform)
files = glob.glob(os.path.join(tile_path, "*.tif"))
files = set(files) - set(glob.glob(os.path.join(tile_path, "*rot*")))
files = set(files) - set(glob.glob(os.path.join(tile_path, "*flip*")))
for file in files:
filename = os.path.basename(file)
file_base, file_extension = os.path.splitext(filename)
# generating filepaths for new tiles
path_90 = tile_path + file_base + "_rot90" + file_extension
path_180 = tile_path + file_base + "_rot180" + file_extension
path_270 = tile_path + file_base + "_rot270" + file_extension
path_flip_name = tile_path + file_base + "_flip" + file_extension
path_flip_90 = tile_path + file_base + "_rot90_flip" + file_extension
path_flip_180 = tile_path + file_base + "_rot180_flip" + file_extension
path_flip_270 = tile_path + file_base + "_rot270_flip" + file_extension
path_90 = os.path.join(tile_path, file_base + "_rot90" + file_extension)
path_180 = os.path.join(tile_path, file_base + "_rot180" + file_extension)
path_270 = os.path.join(tile_path, file_base + "_rot270" + file_extension)
path_flip_name = os.path.join(tile_path, file_base + "_flip" + file_extension)
path_flip_90 = os.path.join(tile_path, file_base + "_rot90_flip" + file_extension)
path_flip_180 = os.path.join(tile_path, file_base + "_rot180_flip" + file_extension)
path_flip_270 = os.path.join(tile_path, file_base + "_rot270_flip" + file_extension)

with rio.open(file, driver="GTiff") as src:
# band_1 = src.read(1)
Expand All @@ -85,10 +95,98 @@ def augment_tiles(tile_path):

# example usage
if __name__ == '__main__':
files = glob.glob("data/labeled_inputs/*.tif")

parser = argparse.ArgumentParser(
description='Preprocess images by creating tiles and augmenting them',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Examples:
python data_preprocessing.py --input_dir data/labeled_inputs --output_dir data/tiles

python data_preprocessing.py --input_dir ./images --output_dir ./tiles --augment
'''
)
parser.add_argument(
'--input_dir',
type=str,
default='data/labeled_inputs',
help='Directory containing input images to tile. Default: data/labeled_inputs'
)
parser.add_argument(
'--output_dir',
type=str,
default='data/tiles',
help='Directory for output tiles. Default: data/tiles'
)
parser.add_argument(
'--augment',
action='store_true',
help='Run tile augmentation after creating tiles'
)
parser.add_argument(
'--tile_height',
type=int,
default=512,
help='Height of tiles in pixels. Default: 512'
)
parser.add_argument(
'--tile_width',
type=int,
default=512,
help='Width of tiles in pixels. Default: 512'
)
parser.add_argument(
'--skip_no_data',
action='store_true',
help='Skip tiles with no data values'
)
parser.add_argument(
'--max_workers',
type=int,
default=6,
help='Maximum number of worker threads. Default: 6'
)

args = parser.parse_args()

# Check if input directory exists
if not os.path.isdir(args.input_dir):
print(f"ERROR: Input directory does not exist: {args.input_dir}", file=sys.stderr)
print("Please specify a valid directory using --input_dir argument.", file=sys.stderr)
sys.exit(1)

# Create output directory if it doesn't exist
os.makedirs(args.output_dir, exist_ok=True)

# Find all .tif files in input directory
input_pattern = os.path.join(args.input_dir, "*.tif")
files = glob.glob(input_pattern)

if not files:
print(f"No .tif files found in {args.input_dir}", file=sys.stderr)
sys.exit(1)

print(f"Found {len(files)} files to process")
print(f"Output directory: {args.output_dir}")

from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=6) as p:
p.map(make_tiles, files)

# augment_tiles("data/tiles/")

# Create tiles
def process_file(file):
make_tiles(
file,
tile_height=args.tile_height,
tile_width=args.tile_width,
skip_no_data=args.skip_no_data,
output_dir=args.output_dir
)

with ThreadPoolExecutor(max_workers=args.max_workers) as p:
p.map(process_file, files)

print("Tile creation complete!")

# Augment tiles if requested
if args.augment:
print("Starting tile augmentation...")
augment_tiles(args.output_dir)
print("Tile augmentation complete!")
106 changes: 91 additions & 15 deletions process_snap_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from datetime import datetime, date, timedelta
import time
import math
import argparse
import sys

# WKT Used to convert Lat/Lon to projection used in snap data
CRS_WKT = 'PROJCS["unnamed",GEOGCS["unnamed ellipse",DATUM["unknown",SPHEROID["unnamed",6370000,0]],PRIMEM["Greenwich",' \
Expand All @@ -39,19 +41,6 @@
# Center of region in lat/lon
REGION_CENTER = (66.0756, -162.7172)

# Filepaths for SNAP data

TSK_FILEPATH = '/usr/local/coastal/snap_processing/snap_data/tsk'
U10_FILEPATH = '/usr/local/coastal/snap_processing/snap_data/u10'
V10_FILEPATH = '/usr/local/coastal/snap_processing/snap_data/v10'
SEAICE_FILEPATH = '/usr/local/coastal/snap_processing/snap_data/seaice'
PSFC_FILEPATH = '/usr/local/coastal/snap_processing/snap_data/psfc'
T2_FILEPATH = '/usr/local/coastal/snap_processing/snap_data/t2'
TRANSECT_FILEPATH = '/usr/local/coastal/snap_processing/transect_data/WestChukchi_exposed_STepr_rates.shp'

# Filepath for I/O csv data
OUTPUT_FILEPATH = '/usr/local/coastal/snap_processing/snap_output/SNAP_daily_by_transect'

# Create global variables
global transformer_to_snap_proj
global transformer_to_lat_lon
Expand Down Expand Up @@ -112,7 +101,7 @@ def read_data(filepath):
if os.path.isdir(filepath):
for filename in os.scandir(filepath):
try:
data.append(nc.Dataset(filepath + '/' + filename.name))
data.append(nc.Dataset(os.path.join(filepath, filename.name)))
except OSError:
pass

Expand Down Expand Up @@ -288,6 +277,91 @@ def calculate_wind_data(dataframe):

if __name__ == '__main__':

# Parse command-line arguments
parser = argparse.ArgumentParser(
description='Process SNAP climate data for coastline extraction',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Examples:
python process_snap_data.py --snap_data_dir ./data/snap_data --transect_file ./data/transects/WestChukchi_exposed_STepr_rates.shp --output_dir ./data/output

python process_snap_data.py --snap_data_dir /usr/local/coastal/snap_processing/snap_data --transect_file /usr/local/coastal/snap_processing/transect_data/WestChukchi_exposed_STepr_rates.shp --output_dir /usr/local/coastal/snap_processing/snap_output
'''
)
parser.add_argument(
'--snap_data_dir',
type=str,
default='./data/snap_data',
help='Base directory containing SNAP data subdirectories (tsk, u10, v10, seaice, psfc, t2). Default: ./data/snap_data'
)
parser.add_argument(
'--transect_file',
type=str,
default='./data/transect_data/WestChukchi_exposed_STepr_rates.shp',
help='Path to transect shapefile. Default: ./data/transect_data/WestChukchi_exposed_STepr_rates.shp'
)
parser.add_argument(
'--output_dir',
type=str,
default='./data/snap_output',
help='Directory for output CSV files. Default: ./data/snap_output'
)
parser.add_argument(
'--output_prefix',
type=str,
default='SNAP_daily_by_transect',
help='Prefix for output CSV filenames. Default: SNAP_daily_by_transect'
)

args = parser.parse_args()

# Construct filepaths from arguments
SNAP_DATA_DIR = args.snap_data_dir
TRANSECT_FILEPATH = args.transect_file
OUTPUT_DIR = args.output_dir
OUTPUT_PREFIX = args.output_prefix

# Construct individual data filepaths
TSK_FILEPATH = os.path.join(SNAP_DATA_DIR, 'tsk')
U10_FILEPATH = os.path.join(SNAP_DATA_DIR, 'u10')
V10_FILEPATH = os.path.join(SNAP_DATA_DIR, 'v10')
SEAICE_FILEPATH = os.path.join(SNAP_DATA_DIR, 'seaice')
PSFC_FILEPATH = os.path.join(SNAP_DATA_DIR, 'psfc')
T2_FILEPATH = os.path.join(SNAP_DATA_DIR, 't2')

# Check if directories exist
data_dirs = {
'tsk': TSK_FILEPATH,
'u10': U10_FILEPATH,
'v10': V10_FILEPATH,
'seaice': SEAICE_FILEPATH,
'psfc': PSFC_FILEPATH,
't2': T2_FILEPATH
}

missing_dirs = []
for name, path in data_dirs.items():
if not os.path.exists(path):
missing_dirs.append(f" {name}: {path}")

if missing_dirs:
print("ERROR: The following SNAP data directories do not exist:", file=sys.stderr)
for dir_info in missing_dirs:
print(dir_info, file=sys.stderr)
print("\nPlease specify the correct path using --snap_data_dir argument.", file=sys.stderr)
print("Example: --snap_data_dir /path/to/snap_data", file=sys.stderr)
sys.exit(1)

if not os.path.exists(TRANSECT_FILEPATH):
print(f"ERROR: Transect file does not exist: {TRANSECT_FILEPATH}", file=sys.stderr)
print("Please specify the correct path using --transect_file argument.", file=sys.stderr)
sys.exit(1)

# Create output directory if it doesn't exist
if not os.path.exists(OUTPUT_DIR):
print(f"Creating output directory: {OUTPUT_DIR}")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Import Transects and extract points
transect_points, transects = get_transect_points(TRANSECT_FILEPATH)

Expand Down Expand Up @@ -415,4 +489,6 @@ def calculate_wind_data(dataframe):
print(f'Unable to perform wind calculations for year {y}. Please check dataframe for missing data.')
print(f'{y} Wind data processed. Writing final dataframe')

df.to_csv(OUTPUT_FILEPATH + f'_{y}.csv')
output_filename = f'{OUTPUT_PREFIX}_{y}.csv'
output_filepath = os.path.join(OUTPUT_DIR, output_filename)
df.to_csv(output_filepath)