diff --git a/pyproject.toml b/pyproject.toml index 876b2d9..5a5cc97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "skypro" -version = "2.0.0" +version = "2.0.1" description = "Skyprospector by Cepro" authors = ["damonrand "] license = "AGPL-3.0" diff --git a/src/skypro/common/microgrid_analysis/breakdown.py b/src/skypro/common/microgrid_analysis/breakdown.py index 45bb23a..7e34955 100644 --- a/src/skypro/common/microgrid_analysis/breakdown.py +++ b/src/skypro/common/microgrid_analysis/breakdown.py @@ -5,6 +5,27 @@ import pandas as pd +def safe_sum(df: pd.DataFrame, nan_threshold: float = 0.05) -> float: + """ + Sum all values in a DataFrame, handling NaN with a threshold. + + If more than nan_threshold fraction of values are NaN, returns NaN. + Otherwise, sums only the valid values. + + Args: + nan_threshold: Maximum fraction of NaN values allowed (default 5%). + If exceeded, returns NaN to indicate unreliable result. + """ + flat = df.values.flatten() + nan_count = np.isnan(flat).sum() + nan_fraction = nan_count / len(flat) if len(flat) > 0 else 0 + + if nan_fraction > nan_threshold: + return np.nan # Too much missing data + + return np.nansum(flat) + + @dataclass class MicrogridBreakdown: """Summarises key info about a microgrid.""" @@ -129,12 +150,12 @@ def breakdown_microgrid_flows( if np.isnan(result.total_flows[flow_name]): result.total_int_vol_costs[flow_name] = np.nan else: - result.total_int_vol_costs[flow_name] = cost_df.sum(skipna=False).sum(skipna=False) + result.total_int_vol_costs[flow_name] = safe_sum(cost_df) for flow_name, cost_df in result.mkt_vol_costs_dfs.items(): if np.isnan(result.total_flows[flow_name]): result.total_mkt_vol_costs[flow_name] = np.nan else: - result.total_mkt_vol_costs[flow_name] = cost_df.sum(skipna=False).sum(skipna=False) + result.total_mkt_vol_costs[flow_name] = safe_sum(cost_df) result.total_int_bess_gain = - result.total_int_vol_costs["bess_discharge"] - result.total_int_vol_costs["bess_charge"] diff --git a/src/skypro/common/microgrid_analysis/output.py b/src/skypro/common/microgrid_analysis/output.py index 44f484e..066cc52 100644 --- a/src/skypro/common/microgrid_analysis/output.py +++ b/src/skypro/common/microgrid_analysis/output.py @@ -327,17 +327,36 @@ def apply_aggregation_functions(df: pd.DataFrame, agg_rules: Dict) -> pd.DataFra return result_df -def safe_average(a, weights=None): +def safe_average(a, weights=None, nan_threshold=0.05): """ - Wraps np.average and handles the case where weights sum to zero by returning NaN (np.average throws an exception) + Wraps np.average and handles: + - NaN values in the input (excluded if below threshold, otherwise returns NaN) + - Weights that sum to zero (returns 0.0 instead of raising exception) + + Args: + nan_threshold: Maximum fraction of NaN values allowed (default 5%). + If exceeded, returns NaN to indicate unreliable result. """ + a = np.array(a) + nan_count = np.isnan(a).sum() + nan_fraction = nan_count / len(a) if len(a) > 0 else 0 - if weights is not None and np.sum(weights) == 0: - ret_val = 0.0 - else: - ret_val = np.average(a, weights=weights) + if nan_fraction > nan_threshold: + return np.nan # Too much missing data - result would be unreliable + + mask = ~np.isnan(a) + + if weights is not None: + weights = np.array(weights)[mask] + if np.sum(weights) == 0: + return 0.0 + + a = a[mask] + + if len(a) == 0: + return np.nan - return ret_val + return np.average(a, weights=weights) def ensure_consistent_value_across_aggregation_window(df: pd.DataFrame, rows_per_agg_window: int):