From 0bd1d2ed37839de5309dba9fe0d8669255663c05 Mon Sep 17 00:00:00 2001 From: "guoqing.ge" Date: Tue, 12 Aug 2025 21:30:23 -0400 Subject: [PATCH] update obs_exploring.ipynb --- DAmonitor/base.py | 4 + notebooks/etc/Untitled.ipynb | 11 + notebooks/obs_exploring.ipynb | 497 ++++++++++++++++-- ... => install_pyDAmonitor_jupyter_kernel.sh} | 0 4 files changed, 457 insertions(+), 55 deletions(-) rename ush/{install_bokeh_jupyter_kernel.sh => install_pyDAmonitor_jupyter_kernel.sh} (100%) diff --git a/DAmonitor/base.py b/DAmonitor/base.py index 9805870..04af627 100644 --- a/DAmonitor/base.py +++ b/DAmonitor/base.py @@ -127,6 +127,10 @@ def query_data(data, meta_exclude=None): print(text.rstrip(",")) +def query_obj(obj): + return [attr for attr in dir(obj) if not callable(getattr(obj, attr)) and not attr.startswith("__")] + + def to_dataframe(obsDF): obsDF = obsDF if obsDF.data: diff --git a/notebooks/etc/Untitled.ipynb b/notebooks/etc/Untitled.ipynb index 09dbef9..682e6a1 100644 --- a/notebooks/etc/Untitled.ipynb +++ b/notebooks/etc/Untitled.ipynb @@ -1,5 +1,16 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5b039970-39eb-4d48-a3c3-16515ff29500", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "Image('../../docs/logo.png')" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/notebooks/obs_exploring.ipynb b/notebooks/obs_exploring.ipynb index acf1dd5..327ca83 100644 --- a/notebooks/obs_exploring.ipynb +++ b/notebooks/obs_exploring.ipynb @@ -2,31 +2,28 @@ "cells": [ { "cell_type": "markdown", - "id": "0", + "id": "f67320da-b876-40bf-92b3-47952dc62cd3", "metadata": {}, "source": [ - "# obs exploring\n", - "" + "# Work with obsSpace\n", + "\n", + "JEDI or GSI generates observation space diagnostic files, which contains original observation information, hofx (H(x), i.e. model counter-parts at the observataion locations), OMB (observation minus background), OMA as well as other information.\n", + "\n", + "This notebook covers how to deal with JEDI diganostic files (which are also called output ioda files). We call them `jdiag` files." ] }, { "cell_type": "markdown", - "id": "1", + "id": "87185560-60fb-4f54-9c77-2b0204f24070", "metadata": {}, "source": [ - "## House keeping and load modules" + "## import packages and define variables" ] }, { "cell_type": "code", "execution_count": null, - "id": "2", + "id": "fe92c849-6bd7-409a-a6ff-a57ed3de62f5", "metadata": {}, "outputs": [], "source": [ @@ -43,86 +40,269 @@ "else:\n", " print(f\"pyDAmonitor_ROOT={pyDAmonitor_ROOT}\\n\")\n", "sys.path.insert(0, pyDAmonitor_ROOT)\n", - "\n", + " \n", + "# import modules\n", + "import warnings\n", + "import math\n", + "import numpy as np\n", + "import uxarray as ux\n", + "import xarray as xr\n", + "import pandas as pd\n", "import seaborn as sns # seaborn handles NaN values automatically\n", "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "import numpy as np\n", + "from netCDF4 import Dataset\n", + "from DAmonitor.base import query_dataset, query_data, query_obj, to_dataframe\n", "\n", - "from DAmonitor.base import query_dataset, query_data, to_dataframe\n", - "from DAmonitor.obs import obsSpace, fit_rate" + "jdiag_file=f\"{pyDAmonitor_ROOT}/data/samples/mpasjedi/jdiag_aircar_t133.nc\"" ] }, { "cell_type": "markdown", - "id": "3", + "id": "8038f6bd-8ce6-41a3-b9e8-358422ed72a8", "metadata": {}, "source": [ - "## load ioda data (ioda obs files, or jedi output ioda file (i.e. jdiag files) )" + "## Use NetCDF4 to read jdiag files" ] }, { "cell_type": "code", "execution_count": null, - "id": "4", + "id": "9143e6bd-f740-4863-82b9-070cf744be72", "metadata": {}, "outputs": [], "source": [ - "ioda_file = f\"{pyDAmonitor_ROOT}/data/samples/mpasjedi/jdiag_aircar_t133.nc\"\n", - "obs_t133 = obsSpace(ioda_file)" + "dataset=Dataset(jdiag_file, mode='r')\n", + "query_dataset(dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "9a557d4e-7f30-4ec6-8445-2f5505370732", + "metadata": {}, + "source": [ + "Now we see that jdiag (or output ioda file) has the following group/variable structure:\n", + "```\n", + "EffectiveError0\n", + " airTemperature, \n", + "EffectiveError1\n", + " airTemperature, \n", + "EffectiveError2\n", + " airTemperature, \n", + "EffectiveQC0\n", + " airTemperature, \n", + "EffectiveQC1\n", + " airTemperature, \n", + "EffectiveQC2\n", + " airTemperature, \n", + "MetaData\n", + " stationIdentification, aircraftFlightPhase, timeOffset, prepbufrDataLvlCat, longitude, stationElevation, latitude, prepbufrReportType, pressure, longitude_latitude_pressure, dateTime, height, dumpReportType, aircraftFlightNumber, \n", + "ObsBias0\n", + " airTemperature, \n", + "ObsBias1\n", + " airTemperature, \n", + "ObsBias2\n", + " airTemperature, \n", + "ObsError\n", + " airTemperature, relativeHumidity, windEastward, pressure, windNorthward, \n", + "ObsType\n", + " specificHumidity, windEastward, airTemperature, windNorthward, \n", + "ObsValue\n", + " dewpointTemperature, specificHumidity, airTemperature, windEastward, windNorthward, \n", + "QualityMarker\n", + " airTemperature, pressure, windEastward, specificHumidity, height, windNorthward, \n", + "TempObsErrorData\n", + " airTemperature, \n", + "hofx0\n", + " airTemperature, \n", + "hofx1\n", + " airTemperature, \n", + "hofx2\n", + " airTemperature, \n", + "innov1\n", + " airTemperature, \n", + "oman\n", + " airTemperature, \n", + "ombg\n", + " airTemperature,\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a7913b9b-debc-4d55-98b2-46428f5635f2", + "metadata": {}, + "source": [ + "This group/variable structure is not conveniet to use. \n", + "So we define a `obsSpace` class under `DAmonitor` to help use deal with jdiag file more easily." ] }, { "cell_type": "markdown", - "id": "5", + "id": "306fe5e3-9c69-4995-8046-beb97acee417", "metadata": {}, "source": [ - "## examine data and convert to dataframe" + "## Use the `obsSpace` class to read jdiag files" ] }, { "cell_type": "code", "execution_count": null, - "id": "6", + "id": "4ea0b169-3d9b-48af-b933-cfb0fc486b3a", "metadata": {}, "outputs": [], "source": [ - "#query_dataset(obs_t133.dataset)\n", - "query_data(obs_t133.t)\n", - "\n", + "from DAmonitor.obs import obsSpace, fit_rate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bcf7383-ca9c-4809-92e5-1d3d366817e2", + "metadata": {}, + "outputs": [], + "source": [ + "# create a t133 object using the obsSpace class\n", + "t133=obsSpace(jdiag_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb939081-3d08-49d3-a993-49aef3ca5d2a", + "metadata": {}, + "outputs": [], + "source": [ + "# check the t133 object\n", + "query_obj(t133)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af2f98e0-d1f5-421c-8595-2b6050094495", + "metadata": {}, + "outputs": [], + "source": [ + "# play with some object attributes\n", + "(\n", + "t133.filepath,\n", + "t133.nlocs,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d21a0131-7cb2-4673-8811-f5a907368513", + "metadata": {}, + "outputs": [], + "source": [ + "# query dataset\n", + "query_dataset(t133.dataset) # the original jdiag file dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "756640bc-e890-48f8-aa6a-748cf811ebe5", + "metadata": {}, + "outputs": [], + "source": [ + "# query data\n", + "query_data(t133.q) # since the t133 object does not contain q observations, so it will display meta information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1cbb6ff-956a-4d0d-a36a-6ec132bc1980", + "metadata": {}, + "outputs": [], + "source": [ + "# query data\n", + "query_data(t133.t)" + ] + }, + { + "cell_type": "markdown", + "id": "86210e48-13a1-4148-a2a2-9113ef6b5921", + "metadata": {}, + "source": [ + "The above output shows that we reorganize the data structure based on the observation variable, i.e. `t`, `q`, `uv`, `bt` \n", + "We can access values easily using popular Python class strucutre, i.e. `t133.t`, `t133.t.ObsValue`, etc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2186cae7-6b4a-452c-a9a7-369c1172d577", + "metadata": {}, + "outputs": [], + "source": [ + "# print out array values\n", + "np.set_printoptions(threshold=500) # don't print out all array values\n", + "print(t133.t.ObsValue)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2182af38-e126-431f-97a1-0ffc77e18da0", + "metadata": {}, + "outputs": [], + "source": [ + "# this will geneate lots of output, so we comment out the following two line by default. Uncomment them to see the results\n", "# np.set_printoptions(threshold=np.inf) # print out all array values\n", - "# print(t133.t.longitude)\n", - "\n", - "df = to_dataframe(obs_t133.t)\n", + "# print(t133.t.longitude)" + ] + }, + { + "cell_type": "markdown", + "id": "4b1d013e-1026-49a3-b3dc-9176beca4175", + "metadata": {}, + "source": [ + "## Convert to Pandas DataFrame\n", + "Converting jdiag data into Pandas DataFrame brings up lots of benefits and we can use utilize lots of mature DataFrames capabilities. \n", + "We can see this from the following cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb4060e1-8606-4cda-a33d-2aa15159cf8d", + "metadata": {}, + "outputs": [], + "source": [ + "df = to_dataframe(t133.t)\n", "df" ] }, { "cell_type": "markdown", - "id": "7", + "id": "72fd1dac-3347-48e2-b85a-9c55f16c82d2", "metadata": {}, "source": [ - "## Histograms" + "## Plot hisgrams" ] }, { "cell_type": "markdown", - "id": "8", + "id": "9f218857-eac7-4d18-a2b7-a479475126fc", "metadata": {}, "source": [ - "##### example 1" + "### example 1" ] }, { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "2b371a38-a1c8-4fd7-9215-932cb9c6dc8b", "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(8, 5))\n", "#sns.histplot(df[\"oman\"], bins=50, kde=True, color=\"steelblue\")\n", - "sns.histplot(obs_t133.t.oman, bins=100, kde=True, color=\"steelblue\")\n", + "sns.histplot(t133.t.oman, bins=100, kde=True, color=\"steelblue\")\n", "plt.title(\"Histogram of oman\")\n", "plt.xlabel(\"oman values\")\n", "plt.ylabel(\"Density\")\n", @@ -132,20 +312,19 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "a29bb526-b145-403d-b96f-c3b5dbf46cce", "metadata": {}, "source": [ - "##### example 2" + "### example 2" ] }, { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "cccc1327-17f3-4a85-a49b-af21a555b191", "metadata": {}, "outputs": [], "source": [ - "# Melt into long format\n", "df_long = df[[\"oman\", \"ombg\"]].melt(var_name=\"variable\", value_name=\"value\")\n", "\n", "plt.figure(figsize=(8, 5))\n", @@ -157,16 +336,16 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "b6ee0a7a-abbc-415d-8bce-db4e6dfd9353", "metadata": {}, "source": [ - "##### example 3" + "### example 3" ] }, { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "03ea1d07-6ed1-4757-8da8-7a8167eb535b", "metadata": {}, "outputs": [], "source": [ @@ -184,34 +363,38 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "eee0fa58-d0a6-471c-b5c5-9d01e5082242", "metadata": {}, "source": [ - "## fit rate and plotting" + "## plot fitting rate\n", + "The rate fitting to observtions is an important metric to evaluate data assimilation performance. \n", + "We don't want a small fitting rate, which means very little observation impacts on the model forecast. \n", + "We don't want a large fitting rate either, which means we fit too close to the obervations and the model forecast will crash.\n", + "Ususally, a fitting rate of 20%~30% is expected." ] }, { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "ebeefb37-34d8-4b89-8381-12d8f4ce2c8c", "metadata": {}, "outputs": [], "source": [ - "# 1. Filter valid data (both 'oman' and 'ombg' are not NaN)\n", + "# Filter valid data (both 'oman' and 'ombg' are not NaN)\n", "valid_df = df[df[\"oman\"].notna() & df[\"ombg\"].notna()].copy()\n", "valid_df = valid_df.dropna(subset=[\"height\"]) # removes any rows in valid_df where height is missing (NaN)\n", - "print(valid_df[valid_df[\"height\"] < 0][\"height\"]) # negative height" + "# print(valid_df[valid_df[\"height\"] < 0][\"height\"]) # negative height" ] }, { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "bb48c4e8-ac3d-4b8e-95db-d3fc1bafd414", "metadata": {}, "outputs": [], "source": [ "dz = 1000\n", - "grouped = fit_rate(obs_t133.t, dz=dz)\n", + "grouped = fit_rate(t133.t, dz=dz)\n", "\n", "# 5. Plot vertical profile of fit_rate vs height\n", "plt.figure(figsize=(7, 6))\n", @@ -241,19 +424,223 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "4f1524c2-dba1-4ef2-9273-544064654929", "metadata": {}, "outputs": [], "source": [ "print(grouped[\"height_bin\"])" ] + }, + { + "cell_type": "markdown", + "id": "2923c1fa-7db6-49bb-bf51-b3a3bcb1e342", + "metadata": {}, + "source": [ + "## plot satellite observations" + ] + }, + { + "cell_type": "markdown", + "id": "ed46fd41-0dc1-4bf7-81cc-dbd774e22e43", + "metadata": {}, + "source": [ + "### load satellite data using `obsSpace`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1dbc64d-73e9-4066-b56f-409d8c316a00", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "cris_file = f\"{pyDAmonitor_ROOT}/data/samples/mpasjedi/jdiag_cris-fsr_n20.nc\"\n", + "obsCris = obsSpace(cris_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ec3715d-5583-4bc8-91d7-9157d00c2b4a", + "metadata": {}, + "outputs": [], + "source": [ + "query_data(obsCris.bt, meta_exclude=\"sensorCentralWavenumber_\") # removing the meta_exclude paramter will output all sensorCentralWavenumber_* information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56bd4372-3ff2-4499-a3c0-7cbef4a56a82", + "metadata": {}, + "outputs": [], + "source": [ + "print(obsCris.bt.hofx0)" + ] + }, + { + "cell_type": "markdown", + "id": "7f8bf63a-09ab-4752-aae4-9fbeaa56d27b", + "metadata": {}, + "source": [ + "### assemble target obs array" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcfa319e-9a8e-403e-9be2-8765d41c0096", + "metadata": {}, + "outputs": [], + "source": [ + "ncount=0\n", + "idx = []\n", + "idx2 = []\n", + "ch=61\n", + "for n in np.arange(len(obsCris.bt.ombg[:,ch])):\n", + " #if obsCris.bt.CloudDetectMinResidualIR[n,ch] == 1: \n", + " if obsCris.bt.ombg[n,ch] > -200 and obsCris.bt.ombg[n,ch] < 200:\n", + " idx.append(n)\n", + " ncount = ncount + 1 \n", + "\n", + "lat=obsCris.bt.latitude[idx]\n", + "lon=obsCris.bt.longitude[idx]\n", + "obarray=obsCris.bt.DerivedObsValue[idx,ch]\n", + "print(lon,lat,obarray)\n", + "print(ncount)" + ] + }, + { + "cell_type": "markdown", + "id": "ca11835f-643d-45da-8687-d468a388ddb8", + "metadata": {}, + "source": [ + "### prepare coloar map" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d84e0173-fa9d-4afb-9957-bf0478d8e4ab", + "metadata": {}, + "outputs": [], + "source": [ + "datmi = np.nanmin(obarray) # Min of the data\n", + "datma = np.nanmax(obarray) # Max of the data\n", + "\n", + "if np.nanmin(obarray) < 0:\n", + " cmax = datma\n", + " cmin = datmi\n", + " cmax=310\n", + " cmin=200\n", + " #cmax=1.0\n", + " #cmin=-1.0\n", + " cmap = 'RdBu'\n", + "else:\n", + " #cmax = omean+stdev\n", + " #cmin = np.maximum(omean-stdev, 0.0)\n", + " cma = datma\n", + " cmin = datmi\n", + " cmax=310\n", + " cmin=200\n", + " #cmax=1.0\n", + " #cmin=-1.0\n", + " cmap = 'RdBu'\n", + " cmap = 'viridis'\n", + " cmap = 'jet'\n", + "\n", + "\n", + "cmin = 200.\n", + "cmax = 310.\n", + "conus_12km = [-150, -50, 15, 55]\n", + "\n", + "color_map = plt.cm.get_cmap(cmap)\n", + "reversed_color_map = color_map.reversed()\n", + "units = 'K'\n", + "#units = '%'\n", + "fig = plt.figure(figsize=(10, 5))" + ] + }, + { + "cell_type": "markdown", + "id": "ae943422-37cb-4cb1-a8ef-8e6a052ddc52", + "metadata": {}, + "source": [ + "### make the plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "911e1eb0-732c-4c83-abf1-7fd7e21b3d28", + "metadata": {}, + "outputs": [], + "source": [ + "import cartopy.crs as ccrs\n", + "import matplotlib.ticker as mticker\n", + "import matplotlib.colors as colors\n", + "\n", + "ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=0))\n", + "\n", + "# Plot grid lines\n", + "# ----------------\n", + "gl = ax.gridlines(crs=ccrs.PlateCarree(central_longitude=0), draw_labels=True,\n", + " linewidth=1, color='gray', alpha=0.5, linestyle='-')\n", + "gl.top_labels = False\n", + "gl.xlabel_style = {'size': 10, 'color': 'black'}\n", + "gl.ylabel_style = {'size': 10, 'color': 'black'}\n", + "gl.xlocator = mticker.FixedLocator(\n", + " [-180, -135, -90, -45, 0, 45, 90, 135, 179.9])\n", + "ax.set_ylabel(\"Latitude\", fontsize=7)\n", + "ax.set_xlabel(\"Longitude\", fontsize=7)\n", + "\n", + "# Get scatter data\n", + "# ------------------\n", + "sc = ax.scatter(lon, lat,\n", + " c=obarray, s=4, linewidth=0,\n", + " transform=ccrs.PlateCarree(), cmap=cmap, vmin=cmin, vmax = cmax, norm=None, antialiased=True)\n", + "\n", + "# Plot colorbar\n", + "# --------------\n", + "cbar = plt.colorbar(sc, ax=ax, orientation=\"horizontal\", pad=.1, fraction=0.06,ticks=[200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310])\n", + "#cbar = plt.colorbar(sc, ax=ax, orientation=\"horizontal\", pad=.1, fraction=0.06,ticks=[-3, -2.5, -2, -1.5, -1, -0.5, 0, 0.5, 1.0, 1.5, 2.0, 2.5, 3 ])\n", + "#cbar = plt.colorbar(sc, ax=ax, orientation=\"horizontal\", pad=.1, fraction=0.06,ticks=[0, 10, 20, 20, 40, 50, 60, 70, 80, 90, 100])\n", + "cbar.ax.set_ylabel(units, fontsize=10)\n", + "# Plot globally\n", + "# --------------\n", + "#ax.set_global()\n", + "#ax.set_extent(conus)\n", + "ax.set_extent(conus_12km)\n", + "\n", + "# Draw coastlines\n", + "# ----------------\n", + "ax.coastlines()\n", + "ax.text(0.45, -0.1, 'Longitude', transform=ax.transAxes, ha='left')\n", + "ax.text(-0.08, 0.4, 'Latitude', transform=ax.transAxes,\n", + " rotation='vertical', va='bottom')\n", + "\n", + "#text = f\"Total Count:{datcont:0.0f}, Max/Min/Mean/Std: {datma:0.3f}/{datmi:0.3f}/{omean:0.3f}/{stdev:0.3f} {units}\"\n", + "#print(text)\n", + "#ax.text(0.67, -0.1, text, transform=ax.transAxes, va='bottom', fontsize=6.2)\n", + "\n", + "dpi=100\n", + "gl.top_labels = False\n", + "plt.tight_layout()\n", + "\n", + "# show plot\n", + "# -----------\n", + "# pname='test.png'\n", + "# plt.savefig(pname, dpi=dpi)" + ] } ], "metadata": { "kernelspec": { - "display_name": "bokeh", + "display_name": "pyDAmonitor", "language": "python", - "name": "bokeh" + "name": "pydamonitor" }, "language_info": { "codemirror_mode": { @@ -265,7 +652,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/ush/install_bokeh_jupyter_kernel.sh b/ush/install_pyDAmonitor_jupyter_kernel.sh similarity index 100% rename from ush/install_bokeh_jupyter_kernel.sh rename to ush/install_pyDAmonitor_jupyter_kernel.sh