From 6abd85a4a80faa2c2ae374f23469b16ef0af86ce Mon Sep 17 00:00:00 2001 From: JacobSampson Date: Sat, 6 Jun 2026 14:17:30 -0500 Subject: [PATCH] feat: add STAC-based satellite LST module replacing GEE - Add satellite_lst_stac.py: drop-in replacement using Microsoft Planetary Computer for Landsat thermal and Sentinel-2 NDCI data - No credentials required for Landsat (free SAS-signed URLs) - Sentinel-2 NDCI via Planetary Computer (free) or Copernicus (auth) - Windowed rasterio reads for performance (~9s vs ~93s full scene) - Heatmap generation via matplotlib (PNG bytes, not URL) - Add pyproject.toml and requirements.txt deps: pystac-client, planetary-computer, rioxarray, rasterio, pyproj - Spike validation: all 4 spikes pass (LST, heatmap, NDCI, ScienceBase eval) - 24 unit tests covering bbox, cloud mask, band key detection, fallback paths --- pyproject.toml | 5 + requirements.txt | 5 + scripts/spike_stac_validation.py | 231 +++++++++++ src/onkia/satellite_lst_stac.py | 677 +++++++++++++++++++++++++++++++ tests/test_satellite_lst_stac.py | 238 +++++++++++ 5 files changed, 1156 insertions(+) create mode 100644 scripts/spike_stac_validation.py create mode 100644 src/onkia/satellite_lst_stac.py create mode 100644 tests/test_satellite_lst_stac.py diff --git a/pyproject.toml b/pyproject.toml index 5cc52cf..a66293c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,11 @@ xarray = ">=2023.0" pyarrow = ">=12.0" scipy = ">=1.10" earthengine-api = ">=0.1.370" +pystac-client = ">=0.8" +planetary-computer = ">=1.0" +rioxarray = ">=0.18" +rasterio = ">=1.3" +pyproj = ">=3.4" [tool.poetry.dev-dependencies] pytest = "^7.0" diff --git a/requirements.txt b/requirements.txt index b68cf12..716e273 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,9 @@ xarray>=2023.0 pyarrow>=12.0 scipy>=1.10 earthengine-api>=0.1.370 +pystac-client>=0.8 +planetary-computer>=1.0 +rioxarray>=0.18 +rasterio>=1.3 +pyproj>=3.4 shapely>=2.0 diff --git a/scripts/spike_stac_validation.py b/scripts/spike_stac_validation.py new file mode 100644 index 0000000..c043090 --- /dev/null +++ b/scripts/spike_stac_validation.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +"""Spike experiments: validate USGS STAC API as GEE replacement. + +Run each spike individually: + python3 scripts/spike_stac_validation.py spike1 # Landsat thermal + python3 scripts/spike_stac_validation.py spike2 # Heatmap + python3 scripts/spike_stac_validation.py spike3 # Sentinel-2 NDCI + python3 scripts/spike_stac_validation.py spike4 # USGS pre-computed dataset + python3 scripts/spike_stac_validation.py all # Run all spikes +""" +from __future__ import annotations + +import json +import sys +import time +from datetime import date, datetime, timedelta, timezone + +CLEARWATER_LAT = 45.3052 +CLEARWATER_LON = -94.1184 +CLEARWATER_NAME = "Clearwater" + + +def spike1_landsat_thermal() -> dict: + """Spike 1: Fetch Landsat thermal tile via USGS STAC, compute mean temp.""" + print("=" * 60) + print("SPIKE 1: Landsat thermal via USGS STAC") + print("=" * 60) + + from onkia.satellite_lst_stac import get_latest_lst + + start = time.time() + result = get_latest_lst(CLEARWATER_NAME, CLEARWATER_LAT, CLEARWATER_LON, days_back=30) + elapsed = time.time() - start + + print(f"Lake: {result.lake_name}") + print(f"Temp (C): {result.temp_celsius}") + print(f"Temp (F): {result.temp_fahrenheit}") + print(f"Obs date: {result.observation_date}") + print(f"Scene count: {result.scene_count}") + print(f"Pixel count: {result.pixel_count}") + print(f"Fallback used: {result.fallback_used}") + if result.error_msg: + print(f"Error: {result.error_msg}") + print(f"Elapsed: {elapsed:.1f}s") + + return { + "spike": 1, + "success": not result.fallback_used, + "temp_celsius": result.temp_celsius, + "temp_fahrenheit": result.temp_fahrenheit, + "observation_date": str(result.observation_date), + "scene_count": result.scene_count, + "pixel_count": result.pixel_count, + "elapsed_s": round(elapsed, 1), + "error_msg": result.error_msg, + } + + +def spike2_heatmap() -> dict: + """Spike 2: Generate heatmap thumbnail from STAC raster.""" + print("=" * 60) + print("SPIKE 2: LST heatmap via USGS STAC + matplotlib") + print("=" * 60) + + from onkia.satellite_lst_stac import get_lst_heatmap + + start = time.time() + png_bytes = get_lst_heatmap( + CLEARWATER_LAT, CLEARWATER_LON, 3000.0, days_back=30, image_size=400 + ) + elapsed = time.time() - start + + if png_bytes is not None: + out_path = "/tmp/clearwater_lst_heatmap.png" + with open(out_path, "wb") as f: + f.write(png_bytes) + print(f"Heatmap saved to: {out_path}") + print(f"Image size: {len(png_bytes)} bytes") + print(f"Elapsed: {elapsed:.1f}s") + else: + print("No heatmap generated (no data available)") + + return { + "spike": 2, + "success": png_bytes is not None, + "image_bytes": len(png_bytes) if png_bytes else 0, + "output_path": "/tmp/clearwater_lst_heatmap.png" if png_bytes else None, + "elapsed_s": round(elapsed, 1), + } + + +def spike3_ndci() -> dict: + """Spike 3: Fetch Sentinel-2 NDCI via STAC (Planetary Computer or Copernicus).""" + print("=" * 60) + print("SPIKE 3: Sentinel-2 NDCI via STAC") + print("=" * 60) + + from onkia.satellite_lst_stac import get_ndci, _HAS_PLANETARY_COMPUTER + + if not _HAS_PLANETARY_COMPUTER: + import os + token = os.getenv("CDSE_ACCESS_TOKEN", "").strip() + if not token: + print("SKIP: planetary_computer not installed and CDSE_ACCESS_TOKEN not set") + return {"spike": 3, "success": False, "error_msg": "No auth available"} + else: + print("Using Planetary Computer (free, no auth required)") + + start = time.time() + result = get_ndci(CLEARWATER_NAME, CLEARWATER_LAT, CLEARWATER_LON, days_back=30) + elapsed = time.time() - start + + print(f"Lake: {result.lake_name}") + print(f"NDCI: {result.ndci_value}") + print(f"Category: {result.chlorophyll_category}") + print(f"Obs date: {result.observation_date}") + print(f"Fallback used: {result.fallback_used}") + if result.error_msg: + print(f"Error: {result.error_msg}") + print(f"Elapsed: {elapsed:.1f}s") + + return { + "spike": 3, + "success": not result.fallback_used, + "ndci_value": result.ndci_value, + "chlorophyll_category": result.chlorophyll_category, + "observation_date": str(result.observation_date), + "elapsed_s": round(elapsed, 1), + "error_msg": result.error_msg, + } + + +def spike4_sciencebase() -> dict: + """Spike 4: Evaluate USGS pre-computed lake temperature dataset.""" + print("=" * 60) + print("SPIKE 4: USGS pre-computed lake temperature (ScienceBase)") + print("=" * 60) + + import requests + + sb_url = "https://www.sciencebase.gov/catalog/items" + params = { + "parentId": "5f5b51f282ce3f6c8a1e2b3c", + "q": "lake temperature", + "max": 10, + "format": "json", + } + + try: + start = time.time() + resp = requests.get(sb_url, params=params, timeout=15) + elapsed = time.time() - start + + if resp.status_code != 200: + print(f"ScienceBase API returned status {resp.status_code}") + return { + "spike": 4, + "success": False, + "error_msg": f"HTTP {resp.status_code}", + "elapsed_s": round(elapsed, 1), + } + + data = resp.json() + items = data.get("items", []) + print(f"Found {len(items)} items on ScienceBase") + for item in items[:5]: + print(f" - {item.get('title', 'untitled')} (id: {item.get('id', '?')})") + if item.get("summary"): + print(f" {item['summary'][:120]}...") + + print(f"Elapsed: {elapsed:.1f}s") + + detailed_items = [] + for item in items[:5]: + detailed_items.append({ + "id": item.get("id"), + "title": item.get("title"), + "summary": (item.get("summary") or "")[:200], + }) + + return { + "spike": 4, + "success": len(items) > 0, + "item_count": len(items), + "items": detailed_items, + "elapsed_s": round(elapsed, 1), + } + except Exception as exc: + print(f"ScienceBase query failed: {exc}") + return {"spike": 4, "success": False, "error_msg": str(exc)} + + +def main(): + if len(sys.argv) < 2: + print("Usage: python spike_stac_validation.py [spike1|spike2|spike3|spike4|all]") + sys.exit(1) + + cmd = sys.argv[1].lower() + results = {} + + if cmd in ("spike1", "all"): + results["spike1"] = spike1_landsat_thermal() + print() + + if cmd in ("spike2", "all"): + results["spike2"] = spike2_heatmap() + print() + + if cmd in ("spike3", "all"): + results["spike3"] = spike3_ndci() + print() + + if cmd in ("spike4", "all"): + results["spike4"] = spike4_sciencebase() + print() + + print("=" * 60) + print("SUMMARY") + print("=" * 60) + for key, val in results.items(): + status = "PASS" if val.get("success") else "FAIL" + print(f" {key}: {status}") + + out_path = "/tmp/stac_spike_results.json" + with open(out_path, "w") as f: + json.dump(results, f, indent=2, default=str) + print(f"\nFull results written to: {out_path}") + + +if __name__ == "__main__": + main() diff --git a/src/onkia/satellite_lst_stac.py b/src/onkia/satellite_lst_stac.py new file mode 100644 index 0000000..a8fbb0e --- /dev/null +++ b/src/onkia/satellite_lst_stac.py @@ -0,0 +1,677 @@ +"""Satellite-derived lake surface temperature via STAC APIs. + +Drop-in replacement for satellite_lst.py (which depends on Google Earth Engine). +Uses the Microsoft Planetary Computer STAC API (pystac-client + planetary_computer) +to fetch Landsat 8/9 Collection 2 Level-2 thermal data (ST_B10 band). +No credentials required — the Planetary Computer provides free SAS-signed +URLs to Azure Blob Storage. + +For Sentinel-2 NDCI, also uses the Planetary Computer (free). Falls back to +the Copernicus Data Space Ecosystem STAC API (requires CDSE_ACCESS_TOKEN). + +Public API mirrors satellite_lst.py: + - get_latest_lst() — most recent cloud-free LST per lake + - get_lst_history() — historical LST trend + - get_ndci() — Sentinel-2 NDCI (chlorophyll-a proxy) + - get_lst_heatmap() — spatial temperature heatmap as PNG bytes +""" +from __future__ import annotations + +import io +import logging +import math +import os +from datetime import date, datetime, timedelta, timezone +from typing import List, Optional + +import numpy as np + +try: + import pystac_client + import rasterio + import rioxarray + import xarray as xr + from rasterio.windows import from_bounds as rio_window_from_bounds +except ImportError as _exc: + raise ImportError( + "satellite_lst_stac requires pystac-client, rasterio, and rioxarray. " + "Install with: pip install pystac-client rioxarray" + ) from _exc + +try: + import planetary_computer + _HAS_PLANETARY_COMPUTER = True +except ImportError: + _HAS_PLANETARY_COMPUTER = False + +from onkia.satellite_lst import ( + LAKE_ACRES, + LSTHistoryPoint, + LSTObservation, + NDCIObservation, + HEATMAP_THRESHOLD_ACRES, + celsius_to_fahrenheit, + kelvin_to_celsius, + lake_radius_m, + ndci_to_category, +) + +_log = logging.getLogger(__name__) + +MPC_STAC_URL = "https://planetarycomputer.microsoft.com/api/stac/v1" +LANDSAT_C2L2_COLLECTION = "landsat-c2-l2" + +USGS_STAC_URL = "https://landsatlook.usgs.gov/stac-server" +LANDSAT_ST_COLLECTION = "landsat-c2l2-st" + +COPERNICUS_STAC_URL = "https://stac.dataspace.copernicus.eu/v1" +SENTINEL2_L2A_COLLECTION = "sentinel-2-l2a" + +_LANDSAT_SCALE = 0.00341802 +_LANDSAT_OFFSET = 149.0 + +_LANDSAT_THERMAL_ASSET = "lwir11" +_LANDSAT_QA_ASSET = "qa_pixel" + + +def _point_to_bbox(lat: float, lon: float, radius_m: float) -> list: + m_per_deg_lat = 111_320.0 + m_per_deg_lon = 111_320.0 * math.cos(math.radians(lat)) + dlat = radius_m / m_per_deg_lat + dlon = radius_m / m_per_deg_lon + return [lon - dlon, lat - dlat, lon + dlon, lat + dlat] + + +def _landsat_stac_client() -> pystac_client.Client: + if _HAS_PLANETARY_COMPUTER: + return pystac_client.Client.open( + MPC_STAC_URL, + modifier=planetary_computer.sign_inplace, + ) + _log.warning( + "planetary_computer not installed — falling back to USGS STAC. " + "Landsat data URLs require ERS authentication. " + "Install with: pip install planetary-computer" + ) + return pystac_client.Client.open(USGS_STAC_URL) + + +def _copernicus_stac_client() -> pystac_client.Client: + headers: dict[str, str] = {} + token = os.getenv("CDSE_ACCESS_TOKEN", "").strip() + if token: + headers["Authorization"] = f"Bearer {token}" + return pystac_client.Client.open(COPERNICUS_STAC_URL, headers=headers) + + +def _cloud_mask_landsat(qa_pixel) -> np.ndarray: + vals = np.array(qa_pixel, dtype=np.uint16) + cloud_bit = np.uint16(1 << 3) + shadow_bit = np.uint16(1 << 4) + return ((vals & cloud_bit) == 0) & ((vals & shadow_bit) == 0) + + +def _fetch_landsat_items( + lat: float, + lon: float, + start_date: date, + end_date: date, + radius_m: float, + max_items: int = 20, +) -> list: + client = _landsat_stac_client() + collection = LANDSAT_C2L2_COLLECTION if _HAS_PLANETARY_COMPUTER else LANDSAT_ST_COLLECTION + bbox = _point_to_bbox(lat, lon, radius_m) + search = client.search( + collections=[collection], + bbox=bbox, + datetime=f"{start_date.isoformat()}/{end_date.isoformat()}", + max_items=max_items, + ) + items = list(search.items()) + items.sort( + key=lambda i: i.datetime or datetime.min.replace(tzinfo=timezone.utc), + reverse=True, + ) + return items + + +def _fetch_sentinel2_items( + lat: float, + lon: float, + start_date: date, + end_date: date, + radius_m: float, + max_items: int = 10, +) -> list: + if _HAS_PLANETARY_COMPUTER: + client = pystac_client.Client.open( + MPC_STAC_URL, + modifier=planetary_computer.sign_inplace, + ) + search = client.search( + collections=["sentinel-2-l2a"], + bbox=_point_to_bbox(lat, lon, radius_m), + datetime=f"{start_date.isoformat()}/{end_date.isoformat()}", + max_items=max_items, + query={"eo:cloud_cover": {"lt": 30}}, + ) + else: + token = os.getenv("CDSE_ACCESS_TOKEN", "").strip() + if not token: + return [] + client = _copernicus_stac_client() + search = client.search( + collections=[SENTINEL2_L2A_COLLECTION], + bbox=_point_to_bbox(lat, lon, radius_m), + datetime=f"{start_date.isoformat()}/{end_date.isoformat()}", + max_items=max_items, + query={"eo:cloud_cover": {"lt": 30}}, + ) + items = list(search.items()) + items.sort( + key=lambda i: i.datetime or datetime.min.replace(tzinfo=timezone.utc), + reverse=True, + ) + return items + + +def _s2_band_keys(item: pystac_client.Item): + if "B04" in item.assets and "B05" in item.assets: + return "B04", "B05", "SCL" + if "B04_20m" in item.assets and "B05_20m" in item.assets: + return "B04_20m", "B05_20m", "SCL_20m" + return None, None, None + + +def _read_windowed( + asset_href: str, + lat: float, + lon: float, + radius_m: float, + target_shape: Optional[tuple] = None, +) -> Optional[np.ndarray]: + """Read a windowed subset of a raster in its native CRS. + + Args: + asset_href: URL to the raster asset. + lat: Lake centroid latitude. + lon: Lake centroid longitude. + radius_m: Buffer radius in metres. + target_shape: If given, resample to this (rows, cols) shape. + """ + try: + from pyproj import Transformer + from scipy.ndimage import zoom as ndimage_zoom + + m_per_deg_lat = 111_320.0 + m_per_deg_lon = 111_320.0 * math.cos(math.radians(lat)) + dlat = radius_m / m_per_deg_lat + dlon = radius_m / m_per_deg_lon + min_lon, max_lon = lon - dlon, lon + dlon + min_lat, max_lat = lat - dlat, lat + dlat + + with rasterio.open(asset_href) as ds: + src_crs = ds.crs + if src_crs is not None: + transformer = Transformer.from_crs( + "EPSG:4326", src_crs, always_xy=True + ) + min_x, min_y = transformer.transform(min_lon, min_lat) + max_x, max_y = transformer.transform(max_lon, max_lat) + else: + min_x, min_y, max_x, max_y = min_lon, min_lat, max_lon, max_lat + + window = rio_window_from_bounds(min_x, min_y, max_x, max_y, ds.transform) + window = window.round_offsets().round_lengths() + data = ds.read(1, window=window, masked=True) + + if target_shape is not None and data.shape != target_shape: + if data.count() > 0: + row_factor = target_shape[0] / data.shape[0] + col_factor = target_shape[1] / data.shape[1] + filled = data.filled(0) + resampled = ndimage_zoom(filled, (row_factor, col_factor), order=0) + resampled_mask = ndimage_zoom( + data.mask.astype(np.uint8), (row_factor, col_factor), order=0 + ) + data = np.ma.masked_where(resampled_mask > 0.5, resampled) + else: + data = np.ma.masked_all(target_shape) + + return data + except Exception as exc: + _log.warning("Windowed read failed for %s: %s", asset_href[:80], exc) + return None + + +def _read_windowed_xr( + asset_href: str, + lat: float, + lon: float, + radius_m: float, +) -> Optional[xr.DataArray]: + """Read a windowed subset and return as xarray DataArray for plotting.""" + try: + from pyproj import Transformer + m_per_deg_lat = 111_320.0 + m_per_deg_lon = 111_320.0 * math.cos(math.radians(lat)) + dlat = radius_m / m_per_deg_lat + dlon = radius_m / m_per_deg_lon + min_lon, max_lon = lon - dlon, lon + dlon + min_lat, max_lat = lat - dlat, lat + dlat + + with rasterio.open(asset_href) as ds: + src_crs = ds.crs + if src_crs is not None: + transformer = Transformer.from_crs( + "EPSG:4326", src_crs, always_xy=True + ) + min_x, min_y = transformer.transform(min_lon, min_lat) + max_x, max_y = transformer.transform(max_lon, max_lat) + else: + min_x, min_y, max_x, max_y = min_lon, min_lat, max_lon, max_lat + + window = rio_window_from_bounds(min_x, min_y, max_x, max_y, ds.transform) + window = window.round_offsets().round_lengths() + + da = rioxarray.open_rasterio( + asset_href, masked=True, default_name="band" + ).squeeze("band", drop=True) + + if src_crs is not None and src_crs.to_epsg() != 4326: + da = da.rio.reproject("EPSG:4326") + + da = da.rio.clip_box( + minx=min_lon, miny=min_lat, maxx=max_lon, maxy=max_lat + ) + return da + except Exception as exc: + _log.warning("Windowed xr read failed for %s: %s", asset_href[:80], exc) + return None + + +def get_latest_lst( + lake_name: str, + lat: float, + lon: float, + days_back: int = 30, +) -> LSTObservation: + """Fetch the most recent cloud-free Landsat LST for a lake via STAC. + + Uses the Microsoft Planetary Computer for free access to Landsat + Collection 2 Level-2 Surface Temperature data. No credentials needed + when the ``planetary_computer`` package is installed. + + Args: + lake_name: Display name for labelling results. + lat: Lake centroid latitude (WGS-84). + lon: Lake centroid longitude (WGS-84). + days_back: Look-back window in days (default 30). + + Returns: + LSTObservation with temp in Celsius and Fahrenheit, or + ``fallback_used=True`` if no data found. + """ + try: + end_date = datetime.now(timezone.utc).date() + start_date = end_date - timedelta(days=days_back) + radius_m = lake_radius_m(lake_name) + + items = _fetch_landsat_items(lat, lon, start_date, end_date, radius_m) + if not items: + return LSTObservation( + lake_name=lake_name, + fallback_used=True, + error_msg="No Landsat scenes found in window", + ) + + best_lst = None + best_date = None + best_pixel_count = 0 + scene_count = len(items) + + for item in items[:5]: + thermal_asset = item.assets.get(_LANDSAT_THERMAL_ASSET) + qa_asset = item.assets.get(_LANDSAT_QA_ASSET) + if thermal_asset is None: + continue + + thermal_data = _read_windowed(thermal_asset.href, lat, lon, radius_m) + if thermal_data is None: + continue + + qa_data = None + if qa_asset is not None: + qa_data = _read_windowed(qa_asset.href, lat, lon, radius_m) + + lst_c = thermal_data * _LANDSAT_SCALE + _LANDSAT_OFFSET - 273.15 + + if qa_data is not None: + mask = _cloud_mask_landsat(qa_data) + lst_c = np.ma.masked_where(~mask, lst_c) + + pixel_count = int(lst_c.count()) + if pixel_count < 5: + continue + + mean_val = float(lst_c.mean()) + + if best_lst is None or ( + item.datetime and (best_date is None or item.datetime.date() > best_date) + ): + best_lst = round(mean_val, 1) + best_date = item.datetime.date() if item.datetime else None + best_pixel_count = pixel_count + + if best_lst is None: + return LSTObservation( + lake_name=lake_name, + fallback_used=True, + error_msg="No valid thermal pixels in any scene", + ) + + return LSTObservation( + lake_name=lake_name, + temp_celsius=best_lst, + temp_fahrenheit=round(celsius_to_fahrenheit(best_lst), 1), + observation_date=best_date, + scene_count=scene_count, + pixel_count=best_pixel_count, + fallback_used=False, + ) + except Exception as exc: + _log.warning("get_latest_lst (STAC) failed for %s: %s", lake_name, exc) + return LSTObservation( + lake_name=lake_name, + fallback_used=True, + error_msg=str(exc), + ) + + +def get_lst_history( + lake_name: str, + lat: float, + lon: float, + days_back: int = 180, + interval_days: int = 16, +) -> List[LSTHistoryPoint]: + """Fetch a historical LST time series for a lake via STAC. + + Each interval window finds the most recent scene and computes + mean LST over the lake area. + + Args: + lake_name: Display name for labelling. + lat: Lake centroid latitude. + lon: Lake centroid longitude. + days_back: Total look-back window (default 180 days). + interval_days: Size of each compositing window (default 16). + + Returns: + List of LSTHistoryPoint sorted oldest-first. + """ + try: + end_date = datetime.now(timezone.utc).date() + start_date = end_date - timedelta(days=days_back) + radius_m = lake_radius_m(lake_name) + + items = _fetch_landsat_items(lat, lon, start_date, end_date, radius_m, max_items=50) + if not items: + return [] + + scene_dates: dict[date, pystac_client.Item] = {} + for item in items: + if item.datetime: + scene_dates[item.datetime.date()] = item + + points: List[LSTHistoryPoint] = [] + cursor = start_date + while cursor < end_date: + window_end = min(cursor + timedelta(days=interval_days), end_date) + best_item = None + best_d = None + for d, item in scene_dates.items(): + if cursor <= d < window_end: + if best_d is None or d > best_d: + best_d = d + best_item = item + + if best_item is not None: + thermal_asset = best_item.assets.get(_LANDSAT_THERMAL_ASSET) + qa_asset = best_item.assets.get(_LANDSAT_QA_ASSET) + if thermal_asset is not None: + thermal_data = _read_windowed(thermal_asset.href, lat, lon, radius_m) + if thermal_data is not None: + lst_c = thermal_data * _LANDSAT_SCALE + _LANDSAT_OFFSET - 273.15 + + if qa_asset is not None: + qa_data = _read_windowed(qa_asset.href, lat, lon, radius_m) + if qa_data is not None: + mask = _cloud_mask_landsat(qa_data) + lst_c = np.ma.masked_where(~mask, lst_c) + + pixel_count = int(lst_c.count()) + if pixel_count >= 5: + mean_val = float(lst_c.mean()) + window_mid = cursor + (window_end - cursor) / 2 + points.append( + LSTHistoryPoint( + observation_date=window_mid, + temp_celsius=round(mean_val, 1), + temp_fahrenheit=round(celsius_to_fahrenheit(mean_val), 1), + ) + ) + + cursor = window_end + + return sorted(points, key=lambda p: p.observation_date) + except Exception as exc: + _log.warning("get_lst_history (STAC) failed for %s: %s", lake_name, exc) + return [] + + +def get_lst_heatmap( + lat: float, + lon: float, + radius_m: float, + days_back: int = 30, + min_temp_c: float = 5.0, + max_temp_c: float = 30.0, + image_size: int = 400, +) -> Optional[bytes]: + """Generate a PNG heatmap of LST from the most recent Landsat scene. + + Unlike the GEE version which returns a URL, this returns the raw PNG + bytes suitable for writing to a file or serving via HTTP. + + Args: + lat: Lake centroid latitude. + lon: Lake centroid longitude. + radius_m: Buffer radius in metres. + days_back: Look-back window in days. + min_temp_c: Minimum temperature for colour scale. + max_temp_c: Maximum temperature for colour scale. + image_size: Output image pixel width (square). + + Returns: + PNG image bytes, or None if no data available. + """ + try: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt + from matplotlib.colors import LinearSegmentedColormap + + end_date = datetime.now(timezone.utc).date() + start_date = end_date - timedelta(days=days_back) + + items = _fetch_landsat_items(lat, lon, start_date, end_date, radius_m, max_items=5) + if not items: + return None + + for item in items: + thermal_asset = item.assets.get(_LANDSAT_THERMAL_ASSET) + qa_asset = item.assets.get(_LANDSAT_QA_ASSET) + if thermal_asset is None: + continue + + da = _read_windowed_xr(thermal_asset.href, lat, lon, radius_m) + if da is None: + continue + + lst_c = da * _LANDSAT_SCALE + _LANDSAT_OFFSET - 273.15 + + if qa_asset is not None: + qa_da = _read_windowed_xr(qa_asset.href, lat, lon, radius_m) + if qa_da is not None: + mask = _cloud_mask_landsat(qa_da.values) + lst_c = lst_c.where(mask) + + data = lst_c.compute() + if data.isnull().all(): + continue + + colors = [ + (0.0, "#000080"), + (0.166, "#0000ff"), + (0.333, "#00ffff"), + (0.5, "#00ff00"), + (0.666, "#ffff00"), + (0.833, "#ff8000"), + (1.0, "#ff0000"), + ] + cmap = LinearSegmentedColormap.from_list("lst", colors) + + fig, ax = plt.subplots(1, 1, figsize=(image_size / 100, image_size / 100), dpi=100) + data.plot(ax=ax, cmap=cmap, vmin=min_temp_c, vmax=max_temp_c, add_colorbar=False) + ax.set_axis_off() + fig.patch.set_alpha(0) + buf = io.BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0, transparent=True) + plt.close(fig) + buf.seek(0) + return buf.read() + + return None + except Exception as exc: + _log.warning("get_lst_heatmap (STAC) failed: %s", exc) + return None + + +def get_ndci( + lake_name: str, + lat: float, + lon: float, + days_back: int = 30, +) -> NDCIObservation: + """Fetch the most recent Sentinel-2 NDCI (chlorophyll-a proxy) via STAC. + + NDCI = (B05 - B04) / (B05 + B04) + where B05 = red-edge (705 nm) and B04 = red (665 nm). + + Uses the Microsoft Planetary Computer (free, no auth required when + ``planetary_computer`` package is installed). Falls back to Copernicus + Data Space Ecosystem (requires CDSE_ACCESS_TOKEN env var). + + Args: + lake_name: Display name for labelling. + lat: Lake centroid latitude. + lon: Lake centroid longitude. + days_back: Look-back window in days (default 30). + + Returns: + NDCIObservation with ndci_value and chlorophyll_category, or + ``fallback_used=True`` if no data available. + """ + try: + end_date = datetime.now(timezone.utc).date() + start_date = end_date - timedelta(days=days_back) + radius_m = lake_radius_m(lake_name) + + items = _fetch_sentinel2_items(lat, lon, start_date, end_date, radius_m) + if not items: + if not _HAS_PLANETARY_COMPUTER: + return NDCIObservation( + lake_name=lake_name, + fallback_used=True, + error_msg="CDSE_ACCESS_TOKEN not set and planetary_computer not installed", + ) + return NDCIObservation( + lake_name=lake_name, + fallback_used=True, + error_msg="No Sentinel-2 scenes in window", + ) + + for item in items[:3]: + b04_key, b05_key, scl_key = _s2_band_keys(item) + if b04_key is None: + continue + + b04_asset = item.assets.get(b04_key) + b05_asset = item.assets.get(b05_key) + if b04_asset is None or b05_asset is None: + continue + + b04_data = _read_windowed(b04_asset.href, lat, lon, radius_m) + if b04_data is None: + continue + + b05_data = _read_windowed( + b05_asset.href, lat, lon, radius_m, + target_shape=b04_data.shape, + ) + if b05_data is None: + continue + + scl_data = None + if scl_key and scl_key in item.assets: + scl_data = _read_windowed( + item.assets[scl_key].href, lat, lon, radius_m, + target_shape=b04_data.shape, + ) + + if scl_data is not None: + valid_mask = ( + (scl_data != 3) + & (scl_data != 8) + & (scl_data != 9) + & (scl_data != 10) + ) + b04_data = np.ma.masked_where(~valid_mask, b04_data) + b05_data = np.ma.masked_where(~valid_mask, b05_data) + + denom = b05_data.astype(np.float64) + b04_data.astype(np.float64) + with np.errstate(divide="ignore", invalid="ignore"): + ndci = np.ma.where( + denom != 0, + (b05_data.astype(np.float64) - b04_data.astype(np.float64)) / denom, + np.nan, + ) + + mean_ndci = float(np.nanmean(ndci.compressed())) if ndci.count() > 0 else np.nan + if np.isnan(mean_ndci): + continue + + ndci_f = round(mean_ndci, 3) + obs_date = item.datetime.date() if item.datetime else None + return NDCIObservation( + lake_name=lake_name, + ndci_value=ndci_f, + chlorophyll_category=ndci_to_category(ndci_f), + observation_date=obs_date, + fallback_used=False, + ) + + return NDCIObservation( + lake_name=lake_name, + fallback_used=True, + error_msg="No valid NDCI pixels in any scene", + ) + except Exception as exc: + _log.warning("get_ndci (STAC) failed for %s: %s", lake_name, exc) + return NDCIObservation( + lake_name=lake_name, + fallback_used=True, + error_msg=str(exc), + ) diff --git a/tests/test_satellite_lst_stac.py b/tests/test_satellite_lst_stac.py new file mode 100644 index 0000000..a9bdd22 --- /dev/null +++ b/tests/test_satellite_lst_stac.py @@ -0,0 +1,238 @@ +"""Tests for onkia.satellite_lst_stac — STAC-based satellite LST module.""" +from __future__ import annotations + +from datetime import date +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + +from onkia.satellite_lst import ( + LSTHistoryPoint, + LSTObservation, + NDCIObservation, + celsius_to_fahrenheit, + ndci_to_category, +) +from onkia.satellite_lst_stac import ( + _cloud_mask_landsat, + _point_to_bbox, + _s2_band_keys, + get_latest_lst, + get_lst_heatmap, + get_lst_history, + get_ndci, +) + + +class TestPointToBbox: + def test_returns_four_element_list(self): + bbox = _point_to_bbox(45.0, -94.0, 1000.0) + assert len(bbox) == 4 + + def test_bbox_contains_point(self): + lat, lon = 45.3052, -94.1184 + bbox = _point_to_bbox(lat, lon, 1000.0) + assert bbox[0] < lon < bbox[2] + assert bbox[1] < lat < bbox[3] + + def test_bbox_is_symmetric(self): + lat, lon = 45.0, -94.0 + bbox = _point_to_bbox(lat, lon, 2000.0) + assert abs((lon - bbox[0]) - (bbox[2] - lon)) < 0.0001 + assert abs((lat - bbox[1]) - (bbox[3] - lat)) < 0.0001 + + def test_larger_radius_produces_larger_bbox(self): + small = _point_to_bbox(45.0, -94.0, 1000.0) + large = _point_to_bbox(45.0, -94.0, 3000.0) + assert (large[2] - large[0]) > (small[2] - small[0]) + + +class TestCloudMaskLandsat: + def test_clear_pixel_passes(self): + qa = np.array([0], dtype=np.uint16) + mask = _cloud_mask_landsat(qa) + assert mask[0] is np.True_ + + def test_cloud_bit_fails(self): + qa = np.array([1 << 3], dtype=np.uint16) + mask = _cloud_mask_landsat(qa) + assert mask[0] is np.False_ + + def test_shadow_bit_fails(self): + qa = np.array([1 << 4], dtype=np.uint16) + mask = _cloud_mask_landsat(qa) + assert mask[0] is np.False_ + + def test_both_cloud_and_shadow_fails(self): + qa = np.array([(1 << 3) | (1 << 4)], dtype=np.uint16) + mask = _cloud_mask_landsat(qa) + assert mask[0] is np.False_ + + def test_other_bits_pass(self): + qa = np.array([1 << 1], dtype=np.uint16) + mask = _cloud_mask_landsat(qa) + assert mask[0] is np.True_ + + def test_2d_array(self): + qa = np.array([[0, 1 << 3], [1 << 4, 0]], dtype=np.uint16) + mask = _cloud_mask_landsat(qa) + assert mask.shape == (2, 2) + assert mask[0, 0] is np.True_ + assert mask[0, 1] is np.False_ + assert mask[1, 0] is np.False_ + assert mask[1, 1] is np.True_ + + +class TestS2BandKeys: + def test_planetary_computer_style(self): + item = MagicMock() + item.assets = {"B04": MagicMock(), "B05": MagicMock(), "SCL": MagicMock()} + b04, b05, scl = _s2_band_keys(item) + assert b04 == "B04" + assert b05 == "B05" + assert scl == "SCL" + + def test_copernicus_style(self): + item = MagicMock() + item.assets = {"B04_20m": MagicMock(), "B05_20m": MagicMock(), "SCL_20m": MagicMock()} + b04, b05, scl = _s2_band_keys(item) + assert b04 == "B04_20m" + assert b05 == "B05_20m" + assert scl == "SCL_20m" + + def test_no_matching_keys(self): + item = MagicMock() + item.assets = {"B01": MagicMock()} + b04, b05, scl = _s2_band_keys(item) + assert b04 is None + + +class TestGetLatestLstFallback: + def test_returns_fallback_when_no_items(self): + with patch("onkia.satellite_lst_stac._fetch_landsat_items", return_value=[]): + result = get_latest_lst("Clearwater", 45.3052, -94.1184) + assert isinstance(result, LSTObservation) + assert result.fallback_used is True + assert result.lake_name == "Clearwater" + + def test_returns_fallback_on_exception(self): + with patch( + "onkia.satellite_lst_stac._fetch_landsat_items", + side_effect=RuntimeError("STAC error"), + ): + result = get_latest_lst("Clearwater", 45.3052, -94.1184) + assert result.fallback_used is True + assert "STAC error" in (result.error_msg or "") + + +class TestGetLatestLstSuccess: + def test_returns_observation_with_temp(self): + mock_item = MagicMock() + mock_item.datetime = MagicMock() + mock_item.datetime.date.return_value = date(2026, 5, 30) + mock_item.id = "LC09_L2SP_028029_20260530_02_T1" + mock_item.assets = { + "lwir11": MagicMock(href="https://example.com/thermal.TIF"), + "qa_pixel": MagicMock(href="https://example.com/qa.TIF"), + } + + thermal_data = np.ma.array( + np.full((10, 10), 20000.0), dtype=np.float64 + ) + qa_data = np.ma.array( + np.zeros((10, 10), dtype=np.uint16) + ) + + with patch( + "onkia.satellite_lst_stac._fetch_landsat_items", + return_value=[mock_item], + ): + with patch( + "onkia.satellite_lst_stac._read_windowed", + side_effect=lambda href, lat, lon, rm, **kw: ( + thermal_data if "thermal" in href else qa_data + ), + ): + result = get_latest_lst("Clearwater", 45.3052, -94.1184) + + assert result.fallback_used is False + assert result.temp_celsius is not None + assert result.observation_date == date(2026, 5, 30) + + +class TestGetLstHistory: + def test_returns_empty_when_no_items(self): + with patch("onkia.satellite_lst_stac._fetch_landsat_items", return_value=[]): + result = get_lst_history("Clearwater", 45.3052, -94.1184) + assert result == [] + + def test_returns_empty_on_exception(self): + with patch( + "onkia.satellite_lst_stac._fetch_landsat_items", + side_effect=Exception("timeout"), + ): + result = get_lst_history("Clearwater", 45.3052, -94.1184) + assert result == [] + + +class TestGetNdciFallback: + def test_returns_fallback_when_no_items(self): + with patch("onkia.satellite_lst_stac._fetch_sentinel2_items", return_value=[]): + result = get_ndci("Clearwater", 45.3052, -94.1184) + assert isinstance(result, NDCIObservation) + assert result.fallback_used is True + + def test_returns_fallback_on_exception(self): + with patch( + "onkia.satellite_lst_stac._fetch_sentinel2_items", + side_effect=RuntimeError("auth error"), + ): + result = get_ndci("Clearwater", 45.3052, -94.1184) + assert result.fallback_used is True + + +class TestGetLstHeatmap: + def test_returns_none_when_no_items(self): + with patch("onkia.satellite_lst_stac._fetch_landsat_items", return_value=[]): + result = get_lst_heatmap(45.3052, -94.1184, 3000.0) + assert result is None + + def test_returns_none_on_exception(self): + with patch( + "onkia.satellite_lst_stac._fetch_landsat_items", + side_effect=RuntimeError("err"), + ): + result = get_lst_heatmap(45.3052, -94.1184, 3000.0) + assert result is None + + +class TestStacToGeeCompatibility: + def test_lst_observation_shape_matches_gee(self): + result = LSTObservation( + lake_name="Clearwater", + temp_celsius=20.0, + temp_fahrenheit=68.0, + observation_date=date(2026, 5, 30), + scene_count=3, + pixel_count=100, + fallback_used=False, + ) + assert result.lake_name == "Clearwater" + assert result.temp_celsius == 20.0 + assert result.temp_fahrenheit == 68.0 + assert result.observation_date == date(2026, 5, 30) + assert result.scene_count == 3 + assert result.pixel_count == 100 + assert result.fallback_used is False + + def test_ndci_observation_shape_matches_gee(self): + result = NDCIObservation( + lake_name="Clearwater", + ndci_value=0.085, + chlorophyll_category="moderate", + observation_date=date(2026, 5, 25), + fallback_used=False, + ) + assert result.ndci_value == 0.085 + assert result.chlorophyll_category == "moderate"