diff --git a/notebooks/Network_comparison.ipynb b/notebooks/Network_comparison.ipynb new file mode 100644 index 000000000..a71aa199d --- /dev/null +++ b/notebooks/Network_comparison.ipynb @@ -0,0 +1,558 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e91978fd", + "metadata": {}, + "outputs": [], + "source": [ + "import modelskill as ms\n", + "import mikeio1d as m1d\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "c2cc9f94", + "metadata": {}, + "source": [ + "## Current workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c88c8a87", + "metadata": {}, + "outputs": [], + "source": [ + "# - Loading model_results\n", + "path_to_res1d = '../tests/testdata/network.res1d'\n", + "nt = m1d.open(path_to_res1d)\n", + "\n", + "# Loading observations\n", + "# - We fabricate an observation set to try the current workflow\n", + "np.random.seed(42)\n", + "\n", + "observations = nt.read()\n", + "observations = observations + np.random.normal(0, 10, observations.shape)\n", + "# Arbitrarily selecting 2 columns of WaterLevel that will be used as observations\n", + "relevant_columns = [col for col in observations.columns if \"WaterLevel\" in col][8:10]\n", + "observations = observations.loc[:, relevant_columns].rename(columns=lambda x: \"sensor_\" + x.split(\":\")[1])\n", + "observations = observations.resample(\"1min\").mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "568f6f18", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nbiasrmseurmsemaeccsir2
observation
sensor_91100.7437789.1679659.1377447.7013890.1818370.0472570.008068
sensor_101100.5358629.3369349.3215447.8073910.0170570.048156-0.003818
\n", + "
" + ], + "text/plain": [ + " n bias rmse urmse mae cc si \\\n", + "observation \n", + "sensor_9 110 0.743778 9.167965 9.137744 7.701389 0.181837 0.047257 \n", + "sensor_10 110 0.535862 9.336934 9.321544 7.807391 0.017057 0.048156 \n", + "\n", + " r2 \n", + "observation \n", + "sensor_9 0.008068 \n", + "sensor_10 -0.003818 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "obs_9 = ms.PointObservation(observations, item=\"sensor_9\")\n", + "obs_10 = ms.PointObservation(observations, item=\"sensor_10\")\n", + "\n", + "res1d_item = \"WaterLevel:7\"\n", + "nt = m1d.open(path_to_res1d)\n", + "df = nt.to_dataframe()\n", + "modres = ms.PointModelResult(df, item=res1d_item)\n", + "\n", + "comparer_1 = ms.match(obs_9, modres)\n", + "comparer_2 = ms.match(obs_10, modres)\n", + "\n", + "ccol0 = ms.ComparerCollection([comparer_1, comparer_2])\n", + "ccol0.skill()" + ] + }, + { + "cell_type": "markdown", + "id": "393b840c", + "metadata": {}, + "source": [ + "- We can simplify further the workflow and not initialize `PointObservation` and `PointModelResult`." + ] + }, + { + "cell_type": "markdown", + "id": "35f13324", + "metadata": {}, + "source": [ + "The challenge is the 1d matching (?)\n", + "\n", + "- [Design 1](https://github.com/DHI/modelskill/pull/536) assumes the user knows the sensor location in `mikeio1d` _coordinates_ (reach, chainage, node, catchment).\n", + "- [Design 2](https://github.com/DHI/modelskill/pull/536) suggests a more general approach where we would need to define a __shared__ coordinate system that represents a network architecture. We would need a plugin for each potential model data source (`mikeio1d`, `EPANET`, surrogates)" + ] + }, + { + "cell_type": "markdown", + "id": "8730b218", + "metadata": {}, + "source": [ + "### Design 1" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7607223b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\japr\\Repos\\modelskill\\src\\modelskill\\quantity.py:190: UserWarning: unit='meter' was automatically set for type_name='Water Level'\n", + " warnings.warn(f\"{unit=} was automatically set for {type_name=}\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nbiasrmseurmsemaeccsir2
observation
sensor_91100.7437789.1679659.1377447.7013890.1818370.0472570.008068
\n", + "
" + ], + "text/plain": [ + " n bias rmse urmse mae cc si \\\n", + "observation \n", + "sensor_9 110 0.743778 9.167965 9.137744 7.701389 0.181837 0.047257 \n", + "\n", + " r2 \n", + "observation \n", + "sensor_9 0.008068 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = ms.NetworkModelResult(path_to_res1d, \"Water Level\", node=7)\n", + "obs_1 = ms.PointObservation(observations, item=\"sensor_9\")\n", + "\n", + "comparer_1 = ms.match(obs_1, res)\n", + "comparer_1.skill()" + ] + }, + { + "cell_type": "markdown", + "id": "a59a1d87", + "metadata": {}, + "source": [ + "- Multiple observations with one result" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "80271095", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nbiasrmseurmsemaeccsir2
observation
sensor_91100.7437789.1679659.1377447.7013890.1818370.0472570.008068
sensor_101100.5358629.3369349.3215447.8073910.0170570.048156-0.003818
\n", + "
" + ], + "text/plain": [ + " n bias rmse urmse mae cc si \\\n", + "observation \n", + "sensor_9 110 0.743778 9.167965 9.137744 7.701389 0.181837 0.047257 \n", + "sensor_10 110 0.535862 9.336934 9.321544 7.807391 0.017057 0.048156 \n", + "\n", + " r2 \n", + "observation \n", + "sensor_9 0.008068 \n", + "sensor_10 -0.003818 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantity = ms.Quantity(\"Water Level\", unit=\"m3_per_sec\")\n", + "mod_item = ms.NetworkModelResult(path_to_res1d, quantity, node=7)\n", + "obs1 = ms.PointObservation(observations, quantity=quantity, item=\"sensor_9\")\n", + "obs2 = ms.PointObservation(observations, quantity=quantity, item=\"sensor_10\")\n", + "\n", + "comparer_1 = ms.match(obs1, mod_item)\n", + "comparer_2 = ms.match(obs2, mod_item)\n", + "\n", + "ccol = ms.ComparerCollection([comparer_1, comparer_2])\n", + "ccol.skill()" + ] + }, + { + "cell_type": "markdown", + "id": "55f42b62", + "metadata": {}, + "source": [ + "- Multiple models with one observation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9ee90ffe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nbiasrmseurmsemaeccsir2
modelobservation
model 1sensor_91100.7437789.1679659.1377447.7013890.1818370.0472570.008068
model 2sensor_91102.2485879.4089639.1363257.8827460.2052370.047249-0.044767
model 3sensor_91100.8088139.2402429.2047767.7722710.2123480.047603-0.007633
model 4sensor_91100.8088139.2402429.2047767.7722710.2123480.047603-0.007633
\n", + "
" + ], + "text/plain": [ + " n bias rmse urmse mae cc \\\n", + "model observation \n", + "model 1 sensor_9 110 0.743778 9.167965 9.137744 7.701389 0.181837 \n", + "model 2 sensor_9 110 2.248587 9.408963 9.136325 7.882746 0.205237 \n", + "model 3 sensor_9 110 0.808813 9.240242 9.204776 7.772271 0.212348 \n", + "model 4 sensor_9 110 0.808813 9.240242 9.204776 7.772271 0.212348 \n", + "\n", + " si r2 \n", + "model observation \n", + "model 1 sensor_9 0.047257 0.008068 \n", + "model 2 sensor_9 0.047249 -0.044767 \n", + "model 3 sensor_9 0.047603 -0.007633 \n", + "model 4 sensor_9 0.047603 -0.007633 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantity = ms.Quantity(\"Water Level\", unit=\"m3_per_sec\")\n", + "models = [\n", + " ms.NetworkModelResult(path_to_res1d, quantity, node=7, name=\"model 1\"),\n", + " ms.NetworkModelResult(path_to_res1d, quantity, reach=\"100l1\", gridpoint=\"start\", name=\"model 2\"),\n", + " ms.NetworkModelResult(path_to_res1d, quantity, reach=\"54l1\", gridpoint=0, name=\"model 3\"),\n", + " ms.NetworkModelResult(path_to_res1d, reach=\"54l1\", gridpoint=0, name=\"model 4\")\n", + "]\n", + "\n", + "obs1 = ms.PointObservation(observations, quantity=quantity, item=\"sensor_9\")\n", + "\n", + "comparer_1 = ms.match(obs1, models)\n", + "comparer_1.skill()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "modelskill", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/modelskill/__init__.py b/src/modelskill/__init__.py index 4a362ffb2..a1590fb33 100644 --- a/src/modelskill/__init__.py +++ b/src/modelskill/__init__.py @@ -39,8 +39,13 @@ GridModelResult, DfsuModelResult, DummyModelResult, + NetworkModelResult, +) +from .obs import ( + observation, + PointObservation, + TrackObservation, ) -from .obs import observation, PointObservation, TrackObservation from .matching import from_matched, match from .configuration import from_config from .settings import options, get_option, set_option, reset_option, load_style @@ -90,6 +95,7 @@ def load(filename: Union[str, Path]) -> Comparer | ComparerCollection: "GridModelResult", "DfsuModelResult", "DummyModelResult", + "NetworkModelResult", "observation", "PointObservation", "TrackObservation", diff --git a/src/modelskill/matching.py b/src/modelskill/matching.py index f8b15aa4d..5b5d66613 100644 --- a/src/modelskill/matching.py +++ b/src/modelskill/matching.py @@ -28,7 +28,13 @@ from .model.dummy import DummyModelResult from .model.grid import GridModelResult from .model.track import TrackModelResult -from .obs import Observation, PointObservation, TrackObservation, observation +from .model.network import NetworkModelResult +from .obs import ( + Observation, + PointObservation, + TrackObservation, + observation, +) from .timeseries import TimeSeries from .types import Period @@ -51,6 +57,7 @@ DfsuModelResult, TrackModelResult, DummyModelResult, + NetworkModelResult, ] ObsInputType = Union[ str, @@ -452,6 +459,7 @@ def _parse_single_model( | GridModelResult | DfsuModelResult | DummyModelResult + | NetworkModelResult ): if isinstance( mod, @@ -484,6 +492,7 @@ def _parse_single_model( GridModelResult, DfsuModelResult, DummyModelResult, + NetworkModelResult, ), ) return mod diff --git a/src/modelskill/model/__init__.py b/src/modelskill/model/__init__.py index b0ba468ae..c03a8e62e 100644 --- a/src/modelskill/model/__init__.py +++ b/src/modelskill/model/__init__.py @@ -21,6 +21,7 @@ from .dfsu import DfsuModelResult from .grid import GridModelResult from .dummy import DummyModelResult +from .network import NetworkModelResult __all__ = [ "PointModelResult", @@ -29,4 +30,5 @@ "GridModelResult", "model_result", "DummyModelResult", + "NetworkModelResult", ] diff --git a/src/modelskill/model/factory.py b/src/modelskill/model/factory.py index 4ee2cb7f3..26ec9ffbc 100644 --- a/src/modelskill/model/factory.py +++ b/src/modelskill/model/factory.py @@ -9,6 +9,7 @@ from .track import TrackModelResult from .dfsu import DfsuModelResult from .grid import GridModelResult +from .network import NetworkModelResult from ..types import GeometryType, DataInputType @@ -27,7 +28,13 @@ def model_result( aux_items: Optional[list[int | str]] = None, gtype: Optional[Literal["point", "track", "unstructured", "grid"]] = None, **kwargs: Any, -) -> PointModelResult | TrackModelResult | DfsuModelResult | GridModelResult: +) -> ( + PointModelResult + | TrackModelResult + | DfsuModelResult + | GridModelResult + | NetworkModelResult +): """A factory function for creating an appropriate object based on the data input. Parameters diff --git a/src/modelskill/model/network.py b/src/modelskill/model/network.py new file mode 100644 index 000000000..d481c252f --- /dev/null +++ b/src/modelskill/model/network.py @@ -0,0 +1,62 @@ +from __future__ import annotations +from typing import Optional, Sequence, Literal + +from mikeio1d import Res1D + +from ..timeseries import _parse_network_input +from ..quantity import Quantity +from .point import PointModelResult + + +class NetworkModelResult(PointModelResult): + """Model result for a network location. + + Construct a NetworkModelResult from a res1d data source. + + Parameters + ---------- + data : str, Path or mikeio1d.Res1D + filename (.res1d) or object with the data + quantity : str + The name of the model result, + by default None (will be set to file name or item name) + reach : str, optional + Reach id in the network + node : int, optional + Node id in the network + chainage : float, optional + Chainage number in its respective reach + gridpoint : int, optional + Index associated to the gridpoints in the reach + name : Optional[str], optional + The name of the model result, + by default None (will be set to file name or item name) + aux_items : Optional[list[int | str]], optional + Auxiliary items, by default None + """ + + def __init__( + self, + data: Res1D | str, + quantity: Optional[str | Quantity] = None, + *, + reach: Optional[str] = None, + node: Optional[int] = None, + chainage: Optional[float] = None, + gridpoint: Optional[int | Literal["start", "end"]] = None, + name: Optional[str] = None, + aux_items: Optional[Sequence[int | str]] = None, + ) -> None: + if isinstance(quantity, str): + quantity = Quantity.from_mikeio_eum_name(quantity) + + variable = quantity.name if isinstance(quantity, Quantity) else None + data = _parse_network_input( + data, + variable=variable, + reach=reach, + node=node, + chainage=chainage, + gridpoint=gridpoint, + ) + super().__init__(data=data, name=name, quantity=quantity, aux_items=aux_items) diff --git a/src/modelskill/timeseries/__init__.py b/src/modelskill/timeseries/__init__.py index 08fa449a4..1884b8910 100644 --- a/src/modelskill/timeseries/__init__.py +++ b/src/modelskill/timeseries/__init__.py @@ -1,9 +1,10 @@ from ._timeseries import TimeSeries -from ._point import _parse_point_input +from ._point import _parse_point_input, _parse_network_input from ._track import _parse_track_input __all__ = [ "TimeSeries", "_parse_point_input", "_parse_track_input", + "_parse_network_input", ] diff --git a/src/modelskill/timeseries/_point.py b/src/modelskill/timeseries/_point.py index 426d0c9d7..b5aca4149 100644 --- a/src/modelskill/timeseries/_point.py +++ b/src/modelskill/timeseries/_point.py @@ -2,12 +2,14 @@ from collections.abc import Hashable from dataclasses import dataclass from pathlib import Path -from typing import Sequence, get_args, List, Optional +from typing import Literal, Sequence, get_args, List, Optional import numpy as np import pandas as pd import xarray as xr +import warnings import mikeio +import mikeio1d from ..types import GeometryType, PointType from ..quantity import Quantity @@ -76,9 +78,14 @@ def _parse_point_input( stem = Path(data).stem data = xr.open_dataset(data) name = name or data.attrs.get("name") or stem + elif suffix == ".res1d": + name = name or Path(data).stem + data = mikeio1d.open(data) + elif isinstance(data, mikeio.Dfs0): data = data.read() # now mikeio.Dataset - + elif isinstance(data, mikeio1d.Res1D): + data = data.read() # now mikeio1d.Res1D # parse items if isinstance(data, (mikeio.DataArray, pd.Series, xr.DataArray)): item_name = data.name if data.name is not None else "PointModelResult" @@ -177,3 +184,73 @@ def _parse_point_input( assert isinstance(ds, xr.Dataset) return ds + + +def _parse_network_input( + data: mikeio1d.Res1D | str, + variable: Optional[str] = None, + *, + node: Optional[int] = None, + reach: Optional[str] = None, + chainage: Optional[str | float] = None, + gridpoint: Optional[int | Literal["start", "end"]] = None, +) -> pd.Series: + def variable_name_to_res1d(name: str) -> str: + return name.replace(" ", "").replace("_", "") + + if isinstance(data, (str, Path)): + if Path(data).suffix == ".res1d": + data = mikeio1d.open(data) + else: + raise ValueError("Input data must have '.res1d' file extension.") + + by_node = node is not None + by_reach = reach is not None + with_chainage = chainage is not None + with_index = gridpoint is not None + + if by_node and not by_reach: + location = data.nodes[str(node)] + if with_chainage or with_index: + warnings.warn( + "'chainage' or 'gridpoint' are only relevant when passed with 'reach' but they were passed with 'node', so they will be ignored." + ) + + elif by_reach and not by_node: + location = data.reaches[reach] + if with_index == with_chainage: + raise ValueError( + "Locations accessed by chainage must be specified either by chainage or by index, not both." + ) + + if with_index and not with_chainage: + gridpoint = 0 if gridpoint == "start" else gridpoint + gridpoint = -1 if gridpoint == "end" else gridpoint + chainage = location.chainages[gridpoint] + + location = location[chainage] + + else: + raise ValueError( + "A network location must be specified either by node or by reach." + ) + + if variable is None: + if len(location.quantities) != 1: + raise ValueError( + f"The network location does not have a unique quantity: {location.quantities}, in such case 'variable' argument cannot be None" + ) + res1d_name = location.quantities[0] + else: + # After filtering by node or by reach and chainage, a location will only + # have unique quantities + res1d_name = variable_name_to_res1d(variable) + df = location.to_dataframe() + if df.shape[1] == 1: + colname = df.columns[0] + if res1d_name not in colname: + raise ValueError(f"Column name '{colname}' does not contain '{res1d_name}'") + + return df.rename(columns={colname: res1d_name})[res1d_name].copy() + else: + raise ValueError(f"Multiple matching quantites found at location: {df.columns}") diff --git a/src/modelskill/types.py b/src/modelskill/types.py index 0ba3dcd63..77aef9a77 100644 --- a/src/modelskill/types.py +++ b/src/modelskill/types.py @@ -5,6 +5,7 @@ import pandas as pd import xarray as xr import mikeio +import mikeio1d class GeometryType(Enum): @@ -85,6 +86,7 @@ def from_string(s: str) -> "GeometryType": mikeio.DataArray, xr.Dataset, xr.DataArray, + mikeio1d.Res1D, ] TrackType = Union[str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset, xr.Dataset] diff --git a/tests/model/test_network.py b/tests/model/test_network.py new file mode 100644 index 000000000..92284c800 --- /dev/null +++ b/tests/model/test_network.py @@ -0,0 +1,52 @@ +import pytest +import mikeio1d + +import numpy as np +import pandas as pd +import modelskill as ms + +parse_network = ms.timeseries._parse_network_input + + +@pytest.fixture +def res1d_datapath() -> str: + return "tests/testdata/network.res1d" + + +@pytest.fixture +def res1d_object(res1d_datapath) -> mikeio1d.Res1D: + return mikeio1d.open(res1d_datapath) + + +def test_read_quantity_by_node(res1d_object): + series = parse_network(res1d_object, variable="Water Level", node=3) + df = res1d_object.read() + assert isinstance(series, pd.Series) + assert series.name == "WaterLevel" + np.testing.assert_allclose(df["WaterLevel:3"].values, series.values) + + +@pytest.mark.parametrize( + "network_kwargs", + [ + dict(gridpoint="end"), + dict(gridpoint=2), + dict(chainage=47.683), + dict(chainage="47.683"), + ], +) +def test_read_quantity_by_reach(res1d_object, network_kwargs): + series = parse_network( + res1d_object, variable="Water Level", reach="100l1", **network_kwargs + ) + df = res1d_object.read() + assert isinstance(series, pd.Series) + assert series.name == "WaterLevel" + np.testing.assert_allclose(df["WaterLevel:100l1:47.6827"].values, series.values) + + +def test_node_and_reach_as_arguments(res1d_object): + with pytest.raises( + ValueError, match="Item can only be specified either by node or by reach" + ): + parse_network(res1d_object, variable="Water Level", reach="100l1", node=2) diff --git a/tests/testdata/network.res1d b/tests/testdata/network.res1d new file mode 100644 index 000000000..45a661f6e Binary files /dev/null and b/tests/testdata/network.res1d differ