From f5ca622f47ce1b18f36c2ae8ceec53213b3fc021 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 11 Mar 2026 18:21:53 +0100
Subject: [PATCH 01/39] add smoke test

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .gitignore                        |  6 ++-
 integrationtests/default.yaml     | 48 +++++++++++++++++++
 integrationtests/test_base_set.py | 77 +++++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 integrationtests/default.yaml
 create mode 100644 integrationtests/test_base_set.py

diff --git a/.gitignore b/.gitignore
index 837ff76..33c0c40 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-venv
+*venv
 venv_pp
 /data/
 __pycache__/
@@ -10,3 +10,7 @@ gridfm_graphkit.egg-info
 mlruns
 *.pt
 .DS_Store
+integrationtests/data_out*
+.julia
+*logs*
+*data_out*
\ No newline at end of file
diff --git a/integrationtests/default.yaml b/integrationtests/default.yaml
new file mode 100644
index 0000000..1812d18
--- /dev/null
+++ b/integrationtests/default.yaml
@@ -0,0 +1,48 @@
+network:
+  name: "case14_ieee" # Name of the power grid network (without extension)
+  source: "pglib" # Data source for the grid; options: pglib, file
+  # WARNING: the following parameter is only used if source is "file"
+  network_dir: "scripts/grids" # if using source "file", this is the directory containing the network file
+
+load:
+  generator: "agg_load_profile" # Name of the load generator; options: agg_load_profile, powergraph
+  agg_profile: "default" # Name of the aggregated load profile
+  scenarios: 10000 # Number of different load scenarios to generate
+  # WARNING: the following parameters are only used if generator is "agg_load_profile"
+  # if using generator "powergraph", these parameters are ignored
+  sigma: 0.2 # max local noise
+  change_reactive_power: true # If true, changes reactive power of loads. If False, keeps the ones from the case file
+  global_range: 0.4 # Range of the global scaling factor. used to set the lower bound of the scaling factor
+  max_scaling_factor: 4.0 # Max upper bound of the global scaling factor
+  step_size: 0.1 # Step size when finding the upper bound of the global scaling factor
+  start_scaling_factor: 1.0 # Initial value of the global scaling factor
+
+topology_perturbation:
+  type: "random" # Type of topology generator; options: n_minus_k, random, none
+  # WARNING: the following parameters are only used if type is not "none"
+  k: 1 # Maximum number of components to drop in each perturbation
+  n_topology_variants: 2 # Number of unique perturbed topologies per scenario
+  elements: [branch, gen] # elements to perturb. options: branch, gen
+
+generation_perturbation:
+  type: "cost_permutation" # Type of generation perturbation; options: cost_permutation, cost_perturbation, none
+  # WARNING: the following parameter is only used if type is "cost_permutation"
+  sigma: 1.0 # Size of range used for sampling scaling factor
+
+admittance_perturbation:
+  type: "random_perturbation" # Type of admittance perturbation; options: random_perturbation, none
+  # WARNING: the following parameter is only used if type is "random_perturbation"
+  sigma: 0.2 # Size of range used for sampling scaling factor
+
+settings:
+  num_processes: 16 # Number of parallel processes to use
+  data_dir: "./data_out" # Directory to save generated data relative to the project root
+  large_chunk_size: 1000 # Number of load scenarios processed before saving
+  overwrite: true # If true, overwrites existing files, if false, appends to files
+  mode: "pf" # Mode of the script; options: pf, opf. pf: power flow data where one or more operating limits – the inequality constraints defined in OPF, e.g., voltage magnitude or branch limits – may be violated. opf:  generates datapoints for training OPF solvers, with cost-optimal dispatches that satisfy all operating limits (OPF-feasible)
+  include_dc_res: true # If true, also stores the results of dc power flow or dc optimal power flow
+  enable_solver_logs: true # If true, write OPF/PF logs to {data_dir}/solver_log; PF fast and DCPF fast do not log.
+  pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF.
+  dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl)
+  max_iter: 200 # Max iterations for Ipopt-based solvers
+  seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME.
diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
new file mode 100644
index 0000000..f0b2b1e
--- /dev/null
+++ b/integrationtests/test_base_set.py
@@ -0,0 +1,77 @@
+import pytest
+import subprocess
+import os
+import glob
+import pandas as pd
+
+def execute_and_fail(cmd) -> None:
+    """
+    Execute a CLI command and fail in case return code is not 0.
+    """
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        shell=True,
+    )
+    assert result.returncode == 0, (
+        f"{cmd} failed (exit {result.returncode}).\n"
+        f"stdout:\n{result.stdout}\n"
+        f"stderr:\n{result.stderr}"
+    )
+
+def test_prepare_data():
+    """
+    gridfm-datakit must be installable via pip with exit code 0.
+
+    This test explicitly re-runs the install command and asserts that pip
+    exits successfully, making the install step a first-class test rather
+    than a silent fixture side-effect.
+    """
+
+    # Check if data already exists, if not generate it
+    data_dir = "data_out"
+    if not os.path.exists(data_dir) or not os.listdir(data_dir):
+        print("Data directory not found or empty, generating data...")
+        execute_and_fail(
+            'gridfm_datakit generate default.yaml'
+        )
+    else:
+        print(f"Data directory '{data_dir}' already exists, skipping data generation.")
+    
+    execute_and_fail(
+        'gridfm_graphkit train --config examples/config/HGNS_PF_datakit_case14.yaml --data_path data_out/ --exp_name exp1 --run_name run1 --log_dir logs'
+    )
+    
+    # Find the latest log directory
+    log_base = "logs"
+    exp_dirs = glob.glob(os.path.join(log_base, "*"))
+    assert len(exp_dirs) > 0, "No experiment directories found in logs/"
+    
+    latest_exp_dir = max(exp_dirs, key=os.path.getmtime)
+    run_dirs = glob.glob(os.path.join(latest_exp_dir, "*"))
+    assert len(run_dirs) > 0, f"No run directories found in {latest_exp_dir}"
+    
+    latest_run_dir = max(run_dirs, key=os.path.getmtime)
+    metrics_file = os.path.join(latest_run_dir, "artifacts", "test", "case14_ieee_metrics.csv")
+    
+    assert os.path.exists(metrics_file), f"Metrics file not found: {metrics_file}"
+    
+    # Read the metrics CSV
+    df = pd.read_csv(metrics_file)
+    
+    # Find PBE Mean value
+    pbe_mean_row = df[df['Metric'] == 'PBE Mean']
+    assert len(pbe_mean_row) > 0, "PBE Mean metric not found in CSV"
+    
+    pbe_mean_value = float(pbe_mean_row.iloc[0]['Value'])
+    
+    # Check if PBE Mean is within acceptable range [1.4, 1.6]
+    assert 1.4 <= pbe_mean_value <= 1.6, (
+        f"PBE Mean value {pbe_mean_value} is outside acceptable range [1.4, 1.6]"
+    )
+    
+    print(f"✓ PBE Mean value {pbe_mean_value} is within acceptable range [1.4, 1.6]")
+
+
+

From adacfe8ff92d659d0eb4b6bc93ab276d5a9dce6c Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 11 Mar 2026 19:53:08 +0100
Subject: [PATCH 02/39] fix value range

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 integrationtests/test_base_set.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index f0b2b1e..16f4a02 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -66,8 +66,7 @@ def test_prepare_data():
     
     pbe_mean_value = float(pbe_mean_row.iloc[0]['Value'])
     
-    # Check if PBE Mean is within acceptable range [1.4, 1.6]
-    assert 1.4 <= pbe_mean_value <= 1.6, (
+    assert 1.1 <= pbe_mean_value <= 2.9, (
         f"PBE Mean value {pbe_mean_value} is outside acceptable range [1.4, 1.6]"
     )
     

From 21720f55f595f1822c1785f8b4a3e6d337cadac4 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Fri, 13 Mar 2026 08:22:15 +0100
Subject: [PATCH 03/39] fix doc, improve test

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .gitignore                           |  3 +-
 docs/datasets/data_modules.md        |  4 +--
 docs/datasets/data_normalization.md  | 47 +++++++++++----------------
 docs/datasets/powergrid.md           |  4 +--
 docs/datasets/transforms.md          | 24 +++++---------
 docs/models/models.md                | 35 +++++++++++++++++---
 docs/tasks/feature_reconstruction.md | 30 +++++++++++++++--
 docs/training/loss.md                | 38 ++++++++++++++--------
 integrationtests/default.yaml        | 48 ----------------------------
 integrationtests/test_base_set.py    | 39 +++++++++++++++++++++-
 10 files changed, 154 insertions(+), 118 deletions(-)
 delete mode 100644 integrationtests/default.yaml

diff --git a/.gitignore b/.gitignore
index 33c0c40..164d66b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,4 +13,5 @@ mlruns
 integrationtests/data_out*
 .julia
 *logs*
-*data_out*
\ No newline at end of file
+*data_out*
+site*
\ No newline at end of file
diff --git a/docs/datasets/data_modules.md b/docs/datasets/data_modules.md
index bf47118..a5e4dff 100644
--- a/docs/datasets/data_modules.md
+++ b/docs/datasets/data_modules.md
@@ -1,3 +1,3 @@
-# LitGridDataModule
+# LitGridHeteroDataModule
 
-::: gridfm_graphkit.datasets.powergrid_datamodule.LitGridDataModule
+::: gridfm_graphkit.datasets.hetero_powergrid_datamodule.LitGridHeteroDataModule
diff --git a/docs/datasets/data_normalization.md b/docs/datasets/data_normalization.md
index f41334d..1747fd1 100644
--- a/docs/datasets/data_normalization.md
+++ b/docs/datasets/data_normalization.md
@@ -3,12 +3,10 @@
 
 
 Normalization improves neural network training by ensuring features are well-scaled, preventing issues like exploding gradients and slow convergence. In power grids, where variables like voltage and power span wide ranges, normalization is essential.
-The `gridfm-graphkit` package offers four methods:
+The `gridfm-graphkit` package offers normalization methods based on the per-unit (p.u.) system:
 
-- [`Min-Max Normalization`](#minmaxnormalizer)
-- [`Standardization (Z-score)`](#standardizer)
-- [`Identity (no normalization)`](#identitynormalizer)
-- [`BaseMVA Normalization`](#basemvanormalizer)
+- [`BaseMVA Normalization`](#heterodatamvanormalizer)
+- [`Per-Sample BaseMVA Normalization`](#heterodatapersamplemvanormalizer)
 
 Each of these strategies implements a unified interface and can be used interchangeably depending on the learning task and data characteristics.
 
@@ -25,27 +23,15 @@ Each of these strategies implements a unified interface and can be used intercha
 
 ---
 
-### `MinMaxNormalizer`
+### `HeteroDataMVANormalizer`
 
-::: gridfm_graphkit.datasets.normalizers.MinMaxNormalizer
+::: gridfm_graphkit.datasets.normalizers.HeteroDataMVANormalizer
 
 ---
 
-### `Standardizer`
+### `HeteroDataPerSampleMVANormalizer`
 
-::: gridfm_graphkit.datasets.normalizers.Standardizer
-
----
-
-### `BaseMVANormalizer`
-
-::: gridfm_graphkit.datasets.normalizers.BaseMVANormalizer
-
----
-
-### `IdentityNormalizer`
-
-::: gridfm_graphkit.datasets.normalizers.IdentityNormalizer
+::: gridfm_graphkit.datasets.normalizers.HeteroDataPerSampleMVANormalizer
 
 ---
 
@@ -54,13 +40,18 @@ Each of these strategies implements a unified interface and can be used intercha
 Example:
 
 ```python
-from gridfm_graphkit.datasets.normalizers import MinMaxNormalizer
-import torch
+from gridfm_graphkit.datasets.normalizers import HeteroDataMVANormalizer
+from torch_geometric.data import HeteroData
+
+# Create normalizer
+normalizer = HeteroDataMVANormalizer(args)
+
+# Fit on training data
+params = normalizer.fit(data_path, scenario_ids)
 
-data = torch.randn(100, 5)  # Example tensor
+# Transform data
+normalizer.transform(hetero_data)
 
-normalizer = MinMaxNormalizer(node_data=True,args=None)
-params = normalizer.fit(data)
-normalized = normalizer.transform(data)
-restored = normalizer.inverse_transform(normalized)
+# Inverse transform to restore original scale
+normalizer.inverse_transform(hetero_data)
 ```
diff --git a/docs/datasets/powergrid.md b/docs/datasets/powergrid.md
index 45476ac..1f983a5 100644
--- a/docs/datasets/powergrid.md
+++ b/docs/datasets/powergrid.md
@@ -1,3 +1,3 @@
-## `GridDatasetDisk`
+## `HeteroGridDatasetDisk`
 
-::: gridfm_graphkit.datasets.powergrid_dataset.GridDatasetDisk
+::: gridfm_graphkit.datasets.powergrid_hetero_dataset.HeteroGridDatasetDisk
diff --git a/docs/datasets/transforms.md b/docs/datasets/transforms.md
index dd7f66d..0dcf981 100644
--- a/docs/datasets/transforms.md
+++ b/docs/datasets/transforms.md
@@ -2,26 +2,18 @@
 
 > Each transformation class inherits from [`BaseTransform`](https://pytorch-geometric.readthedocs.io/en/latest/modules/transforms.html#torch_geometric.transforms.BaseTransform) provided by [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/).
 
-### `AddNormalizedRandomWalkPE`
+### `RemoveInactiveGenerators`
 
-::: gridfm_graphkit.datasets.transforms.AddNormalizedRandomWalkPE
+::: gridfm_graphkit.datasets.transforms.RemoveInactiveGenerators
 
-### `AddEdgeWeights`
+### `RemoveInactiveBranches`
 
-::: gridfm_graphkit.datasets.transforms.AddEdgeWeights
+::: gridfm_graphkit.datasets.transforms.RemoveInactiveBranches
 
-### `AddIdentityMask`
+### `ApplyMasking`
 
-::: gridfm_graphkit.datasets.transforms.AddIdentityMask
+::: gridfm_graphkit.datasets.transforms.ApplyMasking
 
-### `AddRandomMask`
+### `LoadGridParamsFromPath`
 
-::: gridfm_graphkit.datasets.transforms.AddRandomMask
-
-### `AddPFMask`
-
-::: gridfm_graphkit.datasets.transforms.AddPFMask
-
-### `AddOPFMask`
-
-::: gridfm_graphkit.datasets.transforms.AddOPFMask
+::: gridfm_graphkit.datasets.transforms.LoadGridParamsFromPath
diff --git a/docs/models/models.md b/docs/models/models.md
index 9e822ca..7c8c5c6 100644
--- a/docs/models/models.md
+++ b/docs/models/models.md
@@ -1,10 +1,37 @@
 # Models
 
-### `GPSTransformer`
+### `GNS_heterogeneous`
 
-::: gridfm_graphkit.models.gps_transformer.GPSTransformer
+::: gridfm_graphkit.models.gnn_heterogeneous_gns.GNS_heterogeneous
 
+---
 
-### `GNN_TransformerConv`
+## Physics Decoders
 
-::: gridfm_graphkit.models.gnn_transformer.GNN_TransformerConv
+### `PhysicsDecoderOPF`
+
+::: gridfm_graphkit.models.utils.PhysicsDecoderOPF
+
+### `PhysicsDecoderPF`
+
+::: gridfm_graphkit.models.utils.PhysicsDecoderPF
+
+### `PhysicsDecoderSE`
+
+::: gridfm_graphkit.models.utils.PhysicsDecoderSE
+
+---
+
+## Utility Modules
+
+### `ComputeBranchFlow`
+
+::: gridfm_graphkit.models.utils.ComputeBranchFlow
+
+### `ComputeNodeInjection`
+
+::: gridfm_graphkit.models.utils.ComputeNodeInjection
+
+### `ComputeNodeResiduals`
+
+::: gridfm_graphkit.models.utils.ComputeNodeResiduals
diff --git a/docs/tasks/feature_reconstruction.md b/docs/tasks/feature_reconstruction.md
index 39e0823..99a0321 100644
--- a/docs/tasks/feature_reconstruction.md
+++ b/docs/tasks/feature_reconstruction.md
@@ -1,3 +1,29 @@
-# Feature Reconstruction Task
+# Reconstruction Tasks
 
-::: gridfm_graphkit.tasks.feature_reconstruction_task.FeatureReconstructionTask
+## Base Task
+
+::: gridfm_graphkit.tasks.base_task.BaseTask
+
+---
+
+## Reconstruction Task
+
+::: gridfm_graphkit.tasks.reconstruction_tasks.ReconstructionTask
+
+---
+
+## Optimal Power Flow Task
+
+::: gridfm_graphkit.tasks.opf_task.OptimalPowerFlowTask
+
+---
+
+## Power Flow Task
+
+::: gridfm_graphkit.tasks.pf_task.PowerFlowTask
+
+---
+
+## State Estimation Task
+
+::: gridfm_graphkit.tasks.se_task.StateEstimationTask
diff --git a/docs/training/loss.md b/docs/training/loss.md
index 5cde707..0d08ba3 100644
--- a/docs/training/loss.md
+++ b/docs/training/loss.md
@@ -1,16 +1,12 @@
 # Loss Functions
 
-### `Power Balance Equation Loss`
+## Base Loss
 
-$$
-\mathcal{L}_{\text{PBE}} = \frac{1}{N} \sum_{i=1}^N \left| (P_{G,i} - P_{D,i}) + j(Q_{G,i} - Q_{D,i}) - S_{\text{injection}, i} \right|
-$$
-
-::: gridfm_graphkit.training.loss.PBELoss
+::: gridfm_graphkit.training.loss.BaseLoss
 
 ---
 
-### `Mean Squared Error Loss`
+## Mean Squared Error Loss
 
 $$
 \mathcal{L}_{\text{MSE}} = \frac{1}{N} \sum_{i=1}^N (y_i - \hat{y}_i)^2
@@ -20,7 +16,7 @@ $$
 
 ---
 
-### `Masked Mean Squared Error Loss`
+## Masked Mean Squared Error Loss
 
 $$
 \mathcal{L}_{\text{MaskedMSE}} = \frac{1}{|M|} \sum_{i \in M} (y_i - \hat{y}_i)^2
@@ -30,20 +26,34 @@ $$
 
 ---
 
-### `Scaled Cosine Error Loss`
+## Masked Generator MSE Loss
 
-$$
-\mathcal{L}_{\text{SCE}} = \frac{1}{N} \sum_{i=1}^N \left(1 - \frac{\hat{y}^T_i \cdot y_i}{\|\hat{y}_i\| \|y_i\|}\right)^\alpha \text{ , } \alpha \geq 1
-$$
+::: gridfm_graphkit.training.loss.MaskedGenMSE
+
+---
+
+## Masked Bus MSE Loss
 
-::: gridfm_graphkit.training.loss.SCELoss
+::: gridfm_graphkit.training.loss.MaskedBusMSE
 
 ---
 
-### `Mixed Loss`
+## Mixed Loss
 
 $$
 \mathcal{L}_{\text{Mixed}} = \sum_{m=1}^M w_m \cdot \mathcal{L}_m
 $$
 
 ::: gridfm_graphkit.training.loss.MixedLoss
+
+---
+
+## Layered Weighted Physics Loss
+
+::: gridfm_graphkit.training.loss.LayeredWeightedPhysicsLoss
+
+---
+
+## Loss Per Dimension
+
+::: gridfm_graphkit.training.loss.LossPerDim
diff --git a/integrationtests/default.yaml b/integrationtests/default.yaml
deleted file mode 100644
index 1812d18..0000000
--- a/integrationtests/default.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-network:
-  name: "case14_ieee" # Name of the power grid network (without extension)
-  source: "pglib" # Data source for the grid; options: pglib, file
-  # WARNING: the following parameter is only used if source is "file"
-  network_dir: "scripts/grids" # if using source "file", this is the directory containing the network file
-
-load:
-  generator: "agg_load_profile" # Name of the load generator; options: agg_load_profile, powergraph
-  agg_profile: "default" # Name of the aggregated load profile
-  scenarios: 10000 # Number of different load scenarios to generate
-  # WARNING: the following parameters are only used if generator is "agg_load_profile"
-  # if using generator "powergraph", these parameters are ignored
-  sigma: 0.2 # max local noise
-  change_reactive_power: true # If true, changes reactive power of loads. If False, keeps the ones from the case file
-  global_range: 0.4 # Range of the global scaling factor. used to set the lower bound of the scaling factor
-  max_scaling_factor: 4.0 # Max upper bound of the global scaling factor
-  step_size: 0.1 # Step size when finding the upper bound of the global scaling factor
-  start_scaling_factor: 1.0 # Initial value of the global scaling factor
-
-topology_perturbation:
-  type: "random" # Type of topology generator; options: n_minus_k, random, none
-  # WARNING: the following parameters are only used if type is not "none"
-  k: 1 # Maximum number of components to drop in each perturbation
-  n_topology_variants: 2 # Number of unique perturbed topologies per scenario
-  elements: [branch, gen] # elements to perturb. options: branch, gen
-
-generation_perturbation:
-  type: "cost_permutation" # Type of generation perturbation; options: cost_permutation, cost_perturbation, none
-  # WARNING: the following parameter is only used if type is "cost_permutation"
-  sigma: 1.0 # Size of range used for sampling scaling factor
-
-admittance_perturbation:
-  type: "random_perturbation" # Type of admittance perturbation; options: random_perturbation, none
-  # WARNING: the following parameter is only used if type is "random_perturbation"
-  sigma: 0.2 # Size of range used for sampling scaling factor
-
-settings:
-  num_processes: 16 # Number of parallel processes to use
-  data_dir: "./data_out" # Directory to save generated data relative to the project root
-  large_chunk_size: 1000 # Number of load scenarios processed before saving
-  overwrite: true # If true, overwrites existing files, if false, appends to files
-  mode: "pf" # Mode of the script; options: pf, opf. pf: power flow data where one or more operating limits – the inequality constraints defined in OPF, e.g., voltage magnitude or branch limits – may be violated. opf:  generates datapoints for training OPF solvers, with cost-optimal dispatches that satisfy all operating limits (OPF-feasible)
-  include_dc_res: true # If true, also stores the results of dc power flow or dc optimal power flow
-  enable_solver_logs: true # If true, write OPF/PF logs to {data_dir}/solver_log; PF fast and DCPF fast do not log.
-  pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF.
-  dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl)
-  max_iter: 200 # Max iterations for Ipopt-based solvers
-  seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME.
diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index 16f4a02..d43ed86 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -3,6 +3,8 @@
 import os
 import glob
 import pandas as pd
+import yaml
+import urllib.request
 
 def execute_and_fail(cmd) -> None:
     """
@@ -20,6 +22,36 @@ def execute_and_fail(cmd) -> None:
         f"stderr:\n{result.stderr}"
     )
 
+def prepare_config():
+    """
+    Download default.yaml from gridfm-datakit repo and modify it with test parameters.
+    """
+    config_url = "https://raw.githubusercontent.com/gridfm/gridfm-datakit/refs/heads/main/scripts/config/default.yaml"
+    config_path = "integrationtests/default.yaml"
+    
+    print(f"Downloading config from {config_url}...")
+    with urllib.request.urlopen(config_url) as response:
+        config_content = response.read().decode('utf-8')
+    
+    # Parse YAML
+    config = yaml.safe_load(config_content)
+    
+    # Update values as specified (nested structure)
+    config['network']['name'] = 'case14_ieee'
+    config['load']['scenarios'] = 10000
+    config['topology_perturbation']['n_topology_variants'] = 2
+    
+    # Write modified config
+    with open(config_path, 'w') as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+    
+    print(f"✓ Config prepared at {config_path} with:")
+    print(f"  - network.name: {config['network']['name']}")
+    print(f"  - load.scenarios: {config['load']['scenarios']}")
+    print(f"  - topology_perturbation.n_topology_variants: {config['topology_perturbation']['n_topology_variants']}")
+    
+    return config_path
+
 def test_prepare_data():
     """
     gridfm-datakit must be installable via pip with exit code 0.
@@ -33,8 +65,13 @@ def test_prepare_data():
     data_dir = "data_out"
     if not os.path.exists(data_dir) or not os.listdir(data_dir):
         print("Data directory not found or empty, generating data...")
+        
+        # Prepare the config file
+        config_path = prepare_config()
+        
+        # Generate data using the prepared config
         execute_and_fail(
-            'gridfm_datakit generate default.yaml'
+            f'gridfm_datakit generate {config_path}'
         )
     else:
         print(f"Data directory '{data_dir}' already exists, skipping data generation.")

From 8568d913ef0a1f3d20a82ff39e28b498b284cc81 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Fri, 13 Mar 2026 10:41:20 +0100
Subject: [PATCH 04/39] add missing doc

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 docs/index.md                    |  10 +-
 docs/tasks/optimal_power_flow.md | 136 ++++++++++++++++++++++++
 docs/tasks/power_flow.md         | 171 +++++++++++++++++++++++++++++++
 docs/tasks/state_estimation.md   |  77 ++++++++++++++
 mkdocs.yml                       |   3 +
 5 files changed, 396 insertions(+), 1 deletion(-)
 create mode 100644 docs/tasks/optimal_power_flow.md
 create mode 100644 docs/tasks/power_flow.md
 create mode 100644 docs/tasks/state_estimation.md

diff --git a/docs/index.md b/docs/index.md
index e843631..f465518 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,4 +14,12 @@ This library is brought to you by the GridFM team to train, finetune and interac
 
 
 
-## Citation: TBD
+## Citation:
+```bibtex
+@software{gridfm_graphkit_2024,
+  author = {Matteo Mazzonelli, Celia Cintas, Alban Puech and others},
+  title = {GridFM GraphKit},
+  url = {https://github.com/gridfm/gridfm-graphkit},
+  year = {2024}
+}
+```
\ No newline at end of file
diff --git a/docs/tasks/optimal_power_flow.md b/docs/tasks/optimal_power_flow.md
new file mode 100644
index 0000000..1837e6b
--- /dev/null
+++ b/docs/tasks/optimal_power_flow.md
@@ -0,0 +1,136 @@
+# Optimal Power Flow Task
+
+The Optimal Power Flow (OPF) task solves the optimization problem of determining the most economical operation of a power system while satisfying physical and operational constraints. This task predicts optimal generator setpoints, voltage profiles, and reactive power dispatch.
+
+## Overview
+
+Optimal Power Flow is a fundamental optimization problem in power systems that minimizes generation costs while ensuring:
+
+- **Power balance**: Supply meets demand at all buses
+- **Voltage constraints**: Bus voltages remain within acceptable limits
+- **Thermal limits**: Branch flows don't exceed capacity
+- **Generator limits**: Active and reactive power generation within bounds
+- **Angle difference limits**: Voltage angle differences across branches are acceptable
+
+The `OptimalPowerFlowTask` extends the `ReconstructionTask` to include OPF-specific physics-based constraints and economic metrics.
+
+## Key Features
+
+- **Economic optimization**: Tracks generation costs and optimality gap
+- **Constraint violation monitoring**: Measures violations of thermal, voltage, and angle limits
+- **Physics-based evaluation**: Computes power balance errors and residuals
+- **Bus type differentiation**: Separate metrics for PQ, PV, and REF buses
+- **Comprehensive reporting**: Generates detailed CSV reports and correlation plots
+
+## OptimalPowerFlowTask
+
+::: gridfm_graphkit.tasks.opf_task.OptimalPowerFlowTask
+
+## Metrics
+
+The Optimal Power Flow task computes extensive metrics during testing:
+
+### Economic Metrics
+- **Optimality Gap (%)**: Percentage difference between predicted and optimal generation costs
+- **Generation Cost**: Total cost computed from quadratic cost curves (c₀ + c₁·Pg + c₂·Pg²)
+
+### Power Balance Metrics
+- **Active Power Loss (MW)**: Mean absolute active power residual across all buses
+- **Reactive Power Loss (MVar)**: Mean absolute reactive power residual across all buses
+
+### Constraint Violations
+- **Branch Thermal Violations (MVA)**: 
+  - Forward direction: Mean excess flow above thermal limits
+  - Reverse direction: Mean excess flow above thermal limits
+- **Branch Angle Violations (radians)**: Mean violation of angle difference constraints
+- **Reactive Power Violations**:
+  - PV buses: Mean Qg violation (exceeding min/max limits)
+  - REF buses: Mean Qg violation (exceeding min/max limits)
+
+### Prediction Accuracy (RMSE)
+Computed separately for each bus type (PQ, PV, REF):
+- **Voltage Magnitude (Vm)**: p.u.
+- **Voltage Angle (Va)**: radians
+- **Active Power Generation (Pg)**: MW
+- **Reactive Power Generation (Qg)**: MVar
+
+### Residual Statistics (when verbose=True)
+For each bus type and power type (P, Q):
+- Mean residual per graph
+- Maximum residual per graph
+
+## Bus Types
+
+The task evaluates performance separately for three bus types:
+
+- **PQ Buses**: Load buses with specified active and reactive power demand
+- **PV Buses**: Generator buses with specified active power and voltage magnitude
+- **REF Buses**: Reference/slack buses that balance the system
+
+## Outputs
+
+### CSV Reports
+Two CSV files are generated per test dataset:
+
+1. **`{dataset}_RMSE.csv`**: RMSE metrics by bus type
+   - Columns: Metric, Pg (MW), Qg (MVar), Vm (p.u.), Va (radians)
+   - Rows: RMSE-PQ, RMSE-PV, RMSE-REF
+
+2. **`{dataset}_metrics.csv`**: Comprehensive metrics including:
+   - Average active/reactive residuals
+   - RMSE for generator active power
+   - Mean optimality gap
+   - Branch thermal violations (from/to)
+   - Branch angle difference violations
+   - Qg violations for PV and REF buses
+
+### Visualizations (when verbose=True)
+
+1. **Cost Correlation Plot**: Predicted vs. ground truth generation costs with correlation coefficient
+2. **Residual Histograms**: Distribution of power balance residuals by bus type
+3. **Feature Correlation Plots**: Predictions vs. targets for Vm, Va, Pg, Qg by bus type, including Qg violation highlighting
+
+## Configuration Example
+
+```yaml
+task:
+  name: OptimalPowerFlow
+  verbose: true
+
+training:
+  batch_size: 32
+  epochs: 100
+  losses: ["MaskedMSE", "PBE"]
+  loss_weights: [0.01, 0.99]
+
+optimizer:
+  name: Adam
+  lr: 0.001
+```
+
+## Physics-Based Constraints
+
+The task uses specialized layers to compute physical quantities:
+
+- **`ComputeBranchFlow`**: Calculates active (Pft) and reactive (Qft) power flows on branches
+- **`ComputeNodeInjection`**: Aggregates branch flows to compute net injections at buses
+- **`ComputeNodeResiduals`**: Computes power balance violations (residuals)
+
+These ensure predictions are evaluated not just on accuracy but also on physical feasibility.
+
+## Usage
+
+The Optimal Power Flow task is automatically selected when you specify `task.name: OptimalPowerFlow` in your YAML configuration file. The task:
+
+1. Performs forward pass through the model
+2. Inverse normalizes predictions and targets
+3. Computes branch flows and power balance residuals
+4. Evaluates constraint violations
+5. Calculates economic metrics (costs, optimality gap)
+6. Generates comprehensive reports and visualizations
+
+## Related
+
+- [Power Flow Task](power_flow.md): For standard power flow analysis without optimization
+- [State Estimation Task](state_estimation.md): For state estimation from measurements
+- [Feature Reconstruction](feature_reconstruction.md): Base reconstruction task
\ No newline at end of file
diff --git a/docs/tasks/power_flow.md b/docs/tasks/power_flow.md
new file mode 100644
index 0000000..606bcc0
--- /dev/null
+++ b/docs/tasks/power_flow.md
@@ -0,0 +1,171 @@
+# Power Flow Task
+
+The Power Flow task solves the fundamental problem of determining the steady-state operating conditions of a power system. Given load demands and generator setpoints, it computes voltage magnitudes, voltage angles, and power flows throughout the network.
+
+## Overview
+
+Power Flow (also known as Load Flow) analysis is essential for power system planning and operation. It determines:
+
+- **Voltage profiles**: Magnitude and angle at each bus
+- **Power flows**: Active and reactive power on transmission lines
+- **Power injections**: Net power generation/consumption at buses
+- **System losses**: Total active and reactive power losses
+
+The `PowerFlowTask` extends the `ReconstructionTask` to include physics-based power balance evaluation and comprehensive metrics for different bus types.
+
+## Key Features
+
+- **Physics-based validation**: Computes power balance errors (PBE) to verify physical consistency
+- **Bus type differentiation**: Separate metrics for PQ, PV, and REF buses
+- **Distributed training support**: Handles multi-GPU training with proper metric aggregation
+- **Detailed predictions**: Provides per-bus predictions with residuals for analysis
+- **Comprehensive reporting**: Generates CSV reports and correlation plots
+
+## PowerFlowTask
+
+::: gridfm_graphkit.tasks.pf_task.PowerFlowTask
+
+## Metrics
+
+The Power Flow task computes the following metrics during testing:
+
+### Power Balance Metrics
+- **Active Power Loss (MW)**: Mean absolute active power residual across all buses
+- **Reactive Power Loss (MVar)**: Mean absolute reactive power residual across all buses
+- **PBE Mean**: Mean Power Balance Error magnitude across all buses (√(P² + Q²))
+- **PBE Max**: Maximum Power Balance Error across all buses
+
+### Prediction Accuracy (RMSE)
+Computed separately for each bus type (PQ, PV, REF):
+- **Voltage Magnitude (Vm)**: p.u.
+- **Voltage Angle (Va)**: radians
+- **Active Power Generation (Pg)**: MW
+- **Reactive Power Generation (Qg)**: MVar
+
+### Residual Statistics (when verbose=True)
+For each bus type (PQ, PV, REF) and power type (P, Q):
+- Mean residual per graph
+- Maximum residual per graph
+
+## Bus Types
+
+The task evaluates performance separately for three bus types:
+
+- **PQ Buses**: Load buses with specified active and reactive power demand
+- **PV Buses**: Generator buses with specified active power and voltage magnitude
+- **REF Buses**: Reference/slack buses that balance the system
+
+## Power Balance Error (PBE)
+
+The Power Balance Error is a critical metric that measures how well predictions satisfy Kirchhoff's laws:
+
+$$
+\text{PBE} = \sqrt{(\Delta P)^2 + (\Delta Q)^2}
+$$
+
+where:
+- $\Delta P$ = Active power residual (generation - demand - losses)
+- $\Delta Q$ = Reactive power residual (generation - demand - losses)
+
+Lower PBE values indicate better physical consistency of the predictions.
+
+## Outputs
+
+### CSV Reports
+Two CSV files are generated per test dataset:
+
+1. **`{dataset}_RMSE.csv`**: RMSE metrics by bus type
+   - Columns: Metric, Pg (MW), Qg (MVar), Vm (p.u.), Va (radians)
+   - Rows: RMSE-PQ, RMSE-PV, RMSE-REF
+
+2. **`{dataset}_metrics.csv`**: Power balance metrics
+   - Avg. active res. (MW)
+   - Avg. reactive res. (MVar)
+   - PBE Mean
+   - PBE Max
+
+### Visualizations (when verbose=True)
+
+1. **Residual Histograms**: Distribution of power balance residuals by bus type (PQ, PV, REF)
+2. **Feature Correlation Plots**: Predictions vs. targets for Vm, Va, Pg, Qg by bus type
+
+### Prediction Output
+
+The `predict_step` method returns detailed per-bus information:
+
+```python
+{
+    'scenario': scenario IDs,
+    'bus': bus indices,
+    'pd_mw': active power demand,
+    'qd_mvar': reactive power demand,
+    'vm_pu_target': target voltage magnitude,
+    'va_target': target voltage angle,
+    'pg_mw_target': target active power generation,
+    'qg_mvar_target': target reactive power generation,
+    'is_pq': PQ bus indicator,
+    'is_pv': PV bus indicator,
+    'is_ref': REF bus indicator,
+    'vm_pu': predicted voltage magnitude,
+    'va': predicted voltage angle,
+    'pg_mw': predicted active power generation,
+    'qg_mvar': predicted reactive power generation,
+    'active res. (MW)': active power residual,
+    'reactive res. (MVar)': reactive power residual,
+    'PBE': power balance error magnitude
+}
+```
+
+## Configuration Example
+
+```yaml
+task:
+  name: PowerFlow
+  verbose: true
+
+training:
+  batch_size: 32
+  epochs: 100
+  losses: ["MaskedMSE", "PBE"]
+  loss_weights: [0.01, 0.99]
+
+optimizer:
+  name: Adam
+  lr: 0.001
+```
+
+## Physics-Based Constraints
+
+The task uses specialized layers to compute physical quantities:
+
+- **`ComputeBranchFlow`**: Calculates active (Pft) and reactive (Qft) power flows on branches using the power flow equations
+- **`ComputeNodeInjection`**: Aggregates branch flows to compute net power injections at each bus
+- **`ComputeNodeResiduals`**: Computes power balance violations by comparing injections with generation and demand
+
+These layers ensure that predictions are evaluated not only on accuracy but also on their adherence to fundamental power system physics.
+
+## Distributed Training
+
+The PowerFlowTask includes special handling for distributed training:
+
+- **Metric aggregation**: Uses `sync_dist=True` to properly aggregate metrics across GPUs
+- **Verbose output gathering**: Collects test outputs from all ranks to rank 0 for complete visualization
+- **Max reduction for PBE Max**: Uses `reduce_fx="max"` to find the global maximum PBE across all processes
+
+## Usage
+
+The Power Flow task is automatically selected when you specify `task.name: PowerFlow` in your YAML configuration file. The task:
+
+1. Performs forward pass through the model
+2. Inverse normalizes predictions and targets
+3. Computes branch flows using power flow equations
+4. Calculates power balance residuals and PBE
+5. Evaluates metrics separately for each bus type
+6. Generates comprehensive reports and visualizations
+7. Provides detailed per-bus predictions for analysis
+
+## Related
+
+- [Optimal Power Flow Task](optimal_power_flow.md): For optimization-based power flow with economic objectives
+- [State Estimation Task](state_estimation.md): For state estimation from noisy measurements
+- [Feature Reconstruction](feature_reconstruction.md): Base reconstruction task
\ No newline at end of file
diff --git a/docs/tasks/state_estimation.md b/docs/tasks/state_estimation.md
new file mode 100644
index 0000000..46953ae
--- /dev/null
+++ b/docs/tasks/state_estimation.md
@@ -0,0 +1,77 @@
+# State Estimation Task
+
+The State Estimation task focuses on estimating the true state of a power grid from noisy measurements. This is a critical problem in power system operations, where sensor measurements may contain errors, outliers, or missing data.
+
+## Overview
+
+State estimation in power systems involves determining voltage magnitudes, voltage angles, and power injections at buses from available measurements. The `StateEstimationTask` extends the `ReconstructionTask` to handle:
+
+- **Noisy measurements**: Input features include measurement noise and potential outliers
+- **Missing data**: Some measurements may be masked or unavailable
+- **Outlier detection**: The task tracks and evaluates performance on outlier measurements separately
+
+## Key Features
+
+- **Measurement-based prediction**: Estimates true grid state from noisy sensor data
+- **Outlier handling**: Distinguishes between normal measurements, masked values, and outliers
+- **Correlation analysis**: Generates plots comparing predictions vs. targets and predictions vs. measurements
+- **Multi-mask evaluation**: Evaluates performance separately for outliers, masked values, and clean measurements
+
+## StateEstimationTask
+
+::: gridfm_graphkit.tasks.se_task.StateEstimationTask
+
+## Metrics
+
+The State Estimation task computes and logs the following metrics during testing:
+
+### Prediction Quality
+- **Voltage Magnitude (Vm)**: Accuracy of estimated voltage magnitudes at buses
+- **Voltage Angle (Va)**: Accuracy of estimated voltage angles at buses  
+- **Active Power Injection (Pg)**: Accuracy of estimated active power at buses
+- **Reactive Power Injection (Qg)**: Accuracy of estimated reactive power at buses
+
+### Evaluation Categories
+Metrics are computed separately for three categories:
+- **Outliers**: Measurements identified as outliers
+- **Masked**: Intentionally masked/missing measurements
+- **Non-outliers**: Clean measurements without outliers or masking
+
+## Visualization
+
+When `verbose=True` in the configuration, the task generates correlation plots:
+
+1. **Predictions vs. Targets**: Shows how well predictions match ground truth
+2. **Predictions vs. Measurements**: Shows how predictions compare to noisy input measurements
+3. **Measurements vs. Targets**: Shows the quality of input measurements
+
+These plots are generated for each feature (Vm, Va, Pg, Qg) and saved to the test artifacts directory.
+
+## Configuration Example
+
+```yaml
+task:
+  name: StateEstimation
+  verbose: true
+
+training:
+  batch_size: 32
+  epochs: 100
+  losses: ["MaskedMSE"]
+  loss_weights: [1.0]
+```
+
+## Usage
+
+The State Estimation task is automatically selected when you specify `task.name: StateEstimation` in your YAML configuration file. The task handles:
+
+1. Forward pass through the model with masked/noisy inputs
+2. Inverse normalization of predictions and targets
+3. Computation of metrics for different measurement categories
+4. Generation of correlation plots and analysis
+
+## Related
+
+- [Power Flow Task](power_flow.md): For standard power flow analysis
+- [Optimal Power Flow Task](optimal_power_flow.md): For optimization-based power flow
+- [Feature Reconstruction](feature_reconstruction.md): Base reconstruction task
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index afc3359..7c78cce 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -20,6 +20,9 @@ nav:
       - Transforms: datasets/transforms.md
     - Tasks:
       - Feature Reconstruction: tasks/feature_reconstruction.md
+      - Power Flow: tasks/power_flow.md
+      - Optimal Power Flow: tasks/optimal_power_flow.md
+      - State Estimation: tasks/state_estimation.md
     - Models: models/models.md
     - Training:
       - Losses: training/loss.md

From 18684ca01369e7b18d46ed24b749fe5c8fdc6702 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Fri, 13 Mar 2026 18:16:18 +0100
Subject: [PATCH 05/39] improve test

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 integrationtests/test_base_set.py | 50 +++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index d43ed86..1d1eb33 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -6,20 +6,14 @@
 import yaml
 import urllib.request
 
-def execute_and_fail(cmd) -> None:
-    """
-    Execute a CLI command and fail in case return code is not 0.
-    """
+def execute_and_live_output(cmd) -> None:
+    # Remove capture_output=True
+    # We use check=True to raise an exception automatically if returncode != 0
     result = subprocess.run(
         cmd,
-        capture_output=True,
         text=True,
         shell=True,
-    )
-    assert result.returncode == 0, (
-        f"{cmd} failed (exit {result.returncode}).\n"
-        f"stdout:\n{result.stdout}\n"
-        f"stderr:\n{result.stderr}"
+        check=True 
     )
 
 def prepare_config():
@@ -52,6 +46,29 @@ def prepare_config():
     
     return config_path
 
+def prepare_training_config():
+    """
+    Modify the training config to set epochs to 2 for testing.
+    """
+    config_path = "examples/config/HGNS_PF_datakit_case14.yaml"
+    
+    # Read the config
+    with open(config_path, 'r') as f:
+        config = yaml.safe_load(f)
+    
+    # Ensure epochs is set to 2
+    if 'training' not in config:
+        config['training'] = {}
+    config['training']['epochs'] = 2
+    
+    # Write back the modified config
+    with open(config_path, 'w') as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+    
+    print(f"Training config updated: epochs set to {config['training']['epochs']}")
+    
+    return config_path
+
 def test_prepare_data():
     """
     gridfm-datakit must be installable via pip with exit code 0.
@@ -70,14 +87,17 @@ def test_prepare_data():
         config_path = prepare_config()
         
         # Generate data using the prepared config
-        execute_and_fail(
+        execute_and_live_output(
             f'gridfm_datakit generate {config_path}'
         )
     else:
         print(f"Data directory '{data_dir}' already exists, skipping data generation.")
     
-    execute_and_fail(
-        'gridfm_graphkit train --config examples/config/HGNS_PF_datakit_case14.yaml --data_path data_out/ --exp_name exp1 --run_name run1 --log_dir logs'
+    # Prepare training config with epochs=2
+    training_config_path = prepare_training_config()
+    
+    execute_and_live_output(
+        f'gridfm_graphkit train --config {training_config_path} --data_path data_out/ --exp_name exp1 --run_name run1 --log_dir logs'
     )
     
     # Find the latest log directory
@@ -104,10 +124,10 @@ def test_prepare_data():
     pbe_mean_value = float(pbe_mean_row.iloc[0]['Value'])
     
     assert 1.1 <= pbe_mean_value <= 2.9, (
-        f"PBE Mean value {pbe_mean_value} is outside acceptable range [1.4, 1.6]"
+        f"PBE Mean value {pbe_mean_value} is outside acceptable range [1.1, 2.9]"
     )
     
-    print(f"✓ PBE Mean value {pbe_mean_value} is within acceptable range [1.4, 1.6]")
+    print(f"PBE Mean value {pbe_mean_value} is within acceptable range [1.1, 2.9]")
 
 
 

From acc507ea68d981307b21f6f8b2c252865353509d Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Sat, 14 Mar 2026 13:10:42 +0100
Subject: [PATCH 06/39] fix docs

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 docs/index.md                        |  10 +-
 docs/tasks/base_task.md              | 216 ++++++++++++++++++++
 docs/tasks/feature_reconstruction.md | 186 +++++++++++++++--
 docs/tasks/optimal_power_flow.md     |   4 +-
 docs/tasks/power_flow.md             |   4 +-
 docs/tasks/reconstruction_task.md    | 293 +++++++++++++++++++++++++++
 docs/tasks/state_estimation.md       |   4 +-
 docs/training/loss.md                |  12 --
 mkdocs.yml                           |   4 +-
 9 files changed, 693 insertions(+), 40 deletions(-)
 create mode 100644 docs/tasks/base_task.md
 create mode 100644 docs/tasks/reconstruction_task.md

diff --git a/docs/index.md b/docs/index.md
index f465518..e38d000 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,12 +14,4 @@ This library is brought to you by the GridFM team to train, finetune and interac
 
 
 
-## Citation:
-```bibtex
-@software{gridfm_graphkit_2024,
-  author = {Matteo Mazzonelli, Celia Cintas, Alban Puech and others},
-  title = {GridFM GraphKit},
-  url = {https://github.com/gridfm/gridfm-graphkit},
-  year = {2024}
-}
-```
\ No newline at end of file
+## Citation: TBD
\ No newline at end of file
diff --git a/docs/tasks/base_task.md b/docs/tasks/base_task.md
new file mode 100644
index 0000000..a0acbae
--- /dev/null
+++ b/docs/tasks/base_task.md
@@ -0,0 +1,216 @@
+# Base Task
+
+The `BaseTask` class is an abstract base class that provides the foundation for all task implementations in GridFM-GraphKit. It extends PyTorch Lightning's `LightningModule` and defines the common interface and shared functionality for training, validation, and testing.
+
+## Overview
+
+`BaseTask` serves as the parent class for all task-specific implementations, providing:
+
+- **Abstract method definitions**: Enforces implementation of core methods in subclasses
+- **Optimizer configuration**: Sets up AdamW optimizer with learning rate scheduling
+- **Normalization statistics logging**: Saves normalization parameters for reproducibility
+- **Hyperparameter management**: Automatically saves hyperparameters for experiment tracking
+
+## BaseTask Class
+
+::: gridfm_graphkit.tasks.base_task.BaseTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - forward
+        - training_step
+        - validation_step
+        - test_step
+        - predict_step
+        - on_fit_start
+        - configure_optimizers
+
+## Methods
+
+### `__init__(args, data_normalizers)`
+
+Initialize the base task with configuration and normalizers.
+
+**Parameters:**
+
+- `args` (NestedNamespace): Experiment configuration containing all hyperparameters
+- `data_normalizers` (list): List of normalizer objects, one per dataset
+
+**Attributes Set:**
+
+- `self.args`: Stores the configuration
+- `self.data_normalizers`: Stores the normalizers
+- Automatically calls `save_hyperparameters()` for experiment tracking
+
+---
+
+### `forward(*args, **kwargs)` (Abstract)
+
+Defines the forward pass through the model. Must be implemented by subclasses.
+
+**Returns:**
+
+- Model output (structure depends on task implementation)
+
+---
+
+### `training_step(batch)` (Abstract)
+
+Executes one training step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the training dataloader
+
+**Returns:**
+
+- Loss tensor for backpropagation
+
+---
+
+### `validation_step(batch, batch_idx)` (Abstract)
+
+Executes one validation step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the validation dataloader
+- `batch_idx` (int): Index of the current batch
+
+**Returns:**
+
+- Loss tensor or metrics dictionary
+
+---
+
+### `test_step(batch, batch_idx, dataloader_idx=0)` (Abstract)
+
+Executes one test step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the test dataloader
+- `batch_idx` (int): Index of the current batch
+- `dataloader_idx` (int): Index of the dataloader (for multiple test datasets)
+
+**Returns:**
+
+- Metrics dictionary or None
+
+---
+
+### `predict_step(batch, batch_idx, dataloader_idx=0)` (Abstract)
+
+Executes one prediction step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the prediction dataloader
+- `batch_idx` (int): Index of the current batch
+- `dataloader_idx` (int): Index of the dataloader
+
+**Returns:**
+
+- Predictions dictionary
+
+---
+
+### `on_fit_start()`
+
+Called at the beginning of training. Saves normalization statistics to disk.
+
+**Behavior:**
+
+- Creates a `stats` directory in the logging directory
+- Saves human-readable normalization statistics to `normalization_stats.txt`
+- Saves machine-loadable statistics to `normalizer_stats.pt` (PyTorch format)
+- Only executes on rank 0 in distributed training (via `@rank_zero_only` decorator)
+
+**Output Files:**
+
+1. **`normalization_stats.txt`**: Human-readable text file with statistics for each dataset
+2. **`normalizer_stats.pt`**: PyTorch file containing a dictionary keyed by network name
+
+---
+
+### `configure_optimizers()`
+
+Configures the optimizer and learning rate scheduler.
+
+**Optimizer:**
+
+- **Type**: AdamW
+- **Learning Rate**: From `args.optimizer.learning_rate`
+- **Betas**: From `args.optimizer.beta1` and `args.optimizer.beta2`
+
+**Scheduler:**
+
+- **Type**: ReduceLROnPlateau
+- **Mode**: Minimize
+- **Factor**: From `args.optimizer.lr_decay`
+- **Patience**: From `args.optimizer.lr_patience`
+- **Monitored Metric**: "Validation loss"
+
+**Returns:**
+
+- Dictionary with optimizer and lr_scheduler configuration
+
+## Usage
+
+`BaseTask` is not used directly. Instead, create a subclass that implements all abstract methods:
+
+```python
+from gridfm_graphkit.tasks.base_task import BaseTask
+
+class MyCustomTask(BaseTask):
+    def __init__(self, args, data_normalizers):
+        super().__init__(args, data_normalizers)
+        # Initialize task-specific components
+        
+    def forward(self, x_dict, edge_index_dict, edge_attr_dict, mask_dict):
+        # Implement forward pass
+        pass
+        
+    def training_step(self, batch):
+        # Implement training logic
+        pass
+        
+    def validation_step(self, batch, batch_idx):
+        # Implement validation logic
+        pass
+        
+    def test_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement test logic
+        pass
+        
+    def predict_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement prediction logic
+        pass
+```
+
+## Configuration Example
+
+The base task uses the following configuration sections:
+
+```yaml
+optimizer:
+  learning_rate: 0.001
+  beta1: 0.9
+  beta2: 0.999
+  lr_decay: 0.7
+  lr_patience: 5
+
+data:
+  networks:
+    - case14_ieee
+    - case118_ieee
+```
+
+## Related
+
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
+- [Power Flow Task](power_flow.md): Concrete implementation for power flow
+- [Optimal Power Flow Task](optimal_power_flow.md): Concrete implementation for OPF
+- [State Estimation Task](state_estimation.md): Concrete implementation for state estimation
\ No newline at end of file
diff --git a/docs/tasks/feature_reconstruction.md b/docs/tasks/feature_reconstruction.md
index 99a0321..6356c82 100644
--- a/docs/tasks/feature_reconstruction.md
+++ b/docs/tasks/feature_reconstruction.md
@@ -1,29 +1,185 @@
-# Reconstruction Tasks
+# Task Classes Overview
 
-## Base Task
+GridFM-GraphKit provides a hierarchical task system for power grid analysis. All tasks inherit from a common base class and share core functionality while implementing domain-specific logic.
 
-::: gridfm_graphkit.tasks.base_task.BaseTask
+## Task Hierarchy
 
----
+```
+BaseTask (Abstract)
+    └── ReconstructionTask
+            ├── PowerFlowTask
+            ├── OptimalPowerFlowTask
+            └── StateEstimationTask
+```
 
-## Reconstruction Task
+## Available Task Classes
 
-::: gridfm_graphkit.tasks.reconstruction_tasks.ReconstructionTask
+### Base Classes
 
----
+- **[BaseTask](base_task.md)**: Abstract base class providing common functionality for all tasks
+    - Optimizer configuration
+    - Learning rate scheduling
+    - Normalization statistics logging
+    - Abstract method definitions
 
-## Optimal Power Flow Task
+- **[ReconstructionTask](reconstruction_task.md)**: Base class for feature reconstruction tasks
+    - Model integration
+    - Loss function handling
+    - Shared training/validation logic
+    - Test output management
 
-::: gridfm_graphkit.tasks.opf_task.OptimalPowerFlowTask
+### Concrete Task Implementations
 
----
+- **[PowerFlowTask](power_flow.md)**: Power flow analysis
+    - Computes voltage profiles and power flows
+    - Physics-based validation with Power Balance Error (PBE)
+    - Separate metrics for PQ, PV, and REF buses
+    - Detailed per-bus predictions
 
-## Power Flow Task
+- **[OptimalPowerFlowTask](optimal_power_flow.md)**: Optimal power flow with economic optimization
+    - Minimizes generation costs
+    - Tracks optimality gap
+    - Monitors constraint violations (thermal, voltage, angle)
+    - Evaluates reactive power limits
 
-::: gridfm_graphkit.tasks.pf_task.PowerFlowTask
+- **[StateEstimationTask](state_estimation.md)**: State estimation from noisy measurements
+    - Handles measurement noise and outliers
+    - Separate evaluation for outliers, masked values, and clean measurements
+    - Correlation analysis between predictions, measurements, and targets
 
----
+## Quick Reference
 
-## State Estimation Task
+### Method Overview
 
-::: gridfm_graphkit.tasks.se_task.StateEstimationTask
+All task classes implement the following core methods:
+
+| Method | Purpose | Implemented In |
+|--------|---------|----------------|
+| `__init__` | Initialize task with config and normalizers | All classes |
+| `forward` | Forward pass through model | ReconstructionTask+ |
+| `training_step` | Execute one training step | ReconstructionTask+ |
+| `validation_step` | Execute one validation step | ReconstructionTask+ |
+| `test_step` | Execute one test step | Concrete tasks |
+| `predict_step` | Execute one prediction step | Concrete tasks |
+| `on_fit_start` | Save normalization stats before training | BaseTask |
+| `on_test_end` | Generate reports and plots after testing | Concrete tasks |
+| `configure_optimizers` | Setup optimizer and scheduler | BaseTask |
+
+### Task Selection
+
+Tasks are automatically selected based on your YAML configuration:
+
+```yaml
+task:
+  task_name: PowerFlow  # or OptimalPowerFlow, StateEstimation
+```
+
+The task registry automatically instantiates the correct task class based on the `task_name` field.
+
+## Common Features
+
+All tasks share these features:
+
+### 1. Distributed Training Support
+- Multi-GPU training with proper metric synchronization
+- Rank 0 handles logging and file I/O
+- Automatic gathering of test outputs across ranks
+
+### 2. Comprehensive Logging
+- Training and validation metrics logged to MLflow or TensorBoard
+- Automatic hyperparameter tracking
+- Normalization statistics saved for reproducibility
+
+### 3. Test Outputs
+- CSV reports with detailed metrics
+- Visualization plots (when `verbose=True`)
+- Per-dataset analysis for multiple test sets
+
+### 4. Physics-Based Evaluation
+- Power balance error computation
+- Branch flow calculations
+- Residual analysis by bus type
+
+## Configuration
+
+### Basic Configuration
+
+```yaml
+task:
+  task_name: PowerFlow
+  verbose: true
+
+training:
+  batch_size: 64
+  epochs: 100
+  losses: ["MaskedMSE", "PBE"]
+  loss_weights: [0.01, 0.99]
+
+optimizer:
+  learning_rate: 0.001
+  beta1: 0.9
+  beta2: 0.999
+  lr_decay: 0.7
+  lr_patience: 5
+```
+
+### Task-Specific Options
+
+Each task may have additional configuration options. See the individual task documentation for details:
+
+- [Power Flow Configuration](power_flow.md#configuration-example)
+- [Optimal Power Flow Configuration](optimal_power_flow.md#configuration-example)
+- [State Estimation Configuration](state_estimation.md#configuration-example)
+
+## Creating Custom Tasks
+
+To create a custom task, extend `ReconstructionTask` or `BaseTask`:
+
+```python
+from gridfm_graphkit.tasks.reconstruction_tasks import ReconstructionTask
+from gridfm_graphkit.io.registries import TASK_REGISTRY
+
+@TASK_REGISTRY.register("MyCustomTask")
+class MyCustomTask(ReconstructionTask):
+    def __init__(self, args, data_normalizers):
+        super().__init__(args, data_normalizers)
+        # Add custom initialization
+    
+    def test_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement custom test logic
+        output, loss_dict = self.shared_step(batch)
+        
+        # Add custom metrics
+        custom_metric = self.compute_custom_metric(output, batch)
+        loss_dict["Custom Metric"] = custom_metric
+        
+        # Log metrics
+        for metric, value in loss_dict.items():
+            self.log(f"{dataset_name}/{metric}", value)
+        
+        return loss_dict["loss"]
+    
+    def predict_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement custom prediction logic
+        output, _ = self.shared_step(batch)
+        return {"predictions": output}
+    
+    def on_test_end(self):
+        # Custom analysis and visualization
+        # Generate reports, plots, etc.
+        super().on_test_end()
+```
+
+Then use it in your configuration:
+
+```yaml
+task:
+  task_name: MyCustomTask
+```
+
+## Related Documentation
+
+- [Loss Functions](../training/loss.md): Available loss functions and their configuration
+- [Data Modules](../datasets/data_modules.md): Data loading and preprocessing
+- [Models](../models/models.md): Available model architectures
+- [Quick Start Guide](../quick_start/quick_start.md): Getting started with training
diff --git a/docs/tasks/optimal_power_flow.md b/docs/tasks/optimal_power_flow.md
index 1837e6b..1c5e955 100644
--- a/docs/tasks/optimal_power_flow.md
+++ b/docs/tasks/optimal_power_flow.md
@@ -131,6 +131,8 @@ The Optimal Power Flow task is automatically selected when you specify `task.nam
 
 ## Related
 
+- [Base Task](base_task.md): Abstract base class for all tasks
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
 - [Power Flow Task](power_flow.md): For standard power flow analysis without optimization
 - [State Estimation Task](state_estimation.md): For state estimation from measurements
-- [Feature Reconstruction](feature_reconstruction.md): Base reconstruction task
\ No newline at end of file
+- [Task Overview](feature_reconstruction.md): Overview of all task classes
\ No newline at end of file
diff --git a/docs/tasks/power_flow.md b/docs/tasks/power_flow.md
index 606bcc0..b3a21c3 100644
--- a/docs/tasks/power_flow.md
+++ b/docs/tasks/power_flow.md
@@ -166,6 +166,8 @@ The Power Flow task is automatically selected when you specify `task.name: Power
 
 ## Related
 
+- [Base Task](base_task.md): Abstract base class for all tasks
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
 - [Optimal Power Flow Task](optimal_power_flow.md): For optimization-based power flow with economic objectives
 - [State Estimation Task](state_estimation.md): For state estimation from noisy measurements
-- [Feature Reconstruction](feature_reconstruction.md): Base reconstruction task
\ No newline at end of file
+- [Task Overview](feature_reconstruction.md): Overview of all task classes
\ No newline at end of file
diff --git a/docs/tasks/reconstruction_task.md b/docs/tasks/reconstruction_task.md
new file mode 100644
index 0000000..28bd8aa
--- /dev/null
+++ b/docs/tasks/reconstruction_task.md
@@ -0,0 +1,293 @@
+# Reconstruction Task
+
+The `ReconstructionTask` class is a concrete implementation of `BaseTask` that provides the foundation for node feature reconstruction on power grid graphs. It wraps a GridFM model and defines the training, validation, and testing logic for reconstructing masked node features.
+
+## Overview
+
+`ReconstructionTask` serves as the base class for all reconstruction-based tasks in GridFM-GraphKit, including:
+
+- Power Flow (PF)
+- Optimal Power Flow (OPF)
+- State Estimation (SE)
+
+It provides:
+
+- **Model integration**: Loads and wraps the GridFM model
+- **Loss function handling**: Configures and applies loss functions
+- **Shared training logic**: Common training and validation steps
+- **Test output management**: Collects and manages test outputs for analysis
+
+## ReconstructionTask Class
+
+::: gridfm_graphkit.tasks.reconstruction_tasks.ReconstructionTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - forward
+        - shared_step
+        - training_step
+        - validation_step
+        - on_test_end
+
+## Methods
+
+### `__init__(args, data_normalizers)`
+
+Initialize the reconstruction task with model, loss function, and configuration.
+
+**Parameters:**
+
+- `args` (NestedNamespace): Experiment configuration with fields like:
+    - `training.batch_size`: Batch size for training
+    - `optimizer.*`: Optimizer configuration
+    - `model.*`: Model architecture configuration
+    - `training.losses`: List of loss functions to use
+    - `data.networks`: List of network names
+- `data_normalizers` (list): One normalizer per dataset for feature normalization/denormalization
+
+**Attributes Set:**
+
+- `self.model`: GridFM model loaded via `load_model()`
+- `self.loss_fn`: Loss function resolved from configuration via `get_loss_function()`
+- `self.batch_size`: Training batch size
+- `self.test_outputs`: Dictionary to store test outputs per dataset (keyed by dataloader index)
+
+**Example:**
+
+```python
+task = ReconstructionTask(args, data_normalizers)
+```
+
+---
+
+### `forward(x_dict, edge_index_dict, edge_attr_dict, mask_dict)`
+
+Forward pass through the model.
+
+**Parameters:**
+
+- `x_dict` (dict): Node features dictionary with keys like `"bus"`, `"gen"`
+- `edge_index_dict` (dict): Edge indices dictionary for heterogeneous edges
+- `edge_attr_dict` (dict): Edge attributes dictionary
+- `mask_dict` (dict): Masking dictionary indicating which features are masked
+
+**Returns:**
+
+- Model output dictionary with predicted node features
+
+**Example:**
+
+```python
+output = task.forward(
+    x_dict=batch.x_dict,
+    edge_index_dict=batch.edge_index_dict,
+    edge_attr_dict=batch.edge_attr_dict,
+    mask_dict=batch.mask_dict
+)
+```
+
+---
+
+### `shared_step(batch)`
+
+Common logic for training and validation steps.
+
+**Parameters:**
+
+- `batch`: A batch from the dataloader containing:
+    - `x_dict`: Input node features
+    - `y_dict`: Target node features
+    - `edge_index_dict`: Edge connectivity
+    - `edge_attr_dict`: Edge attributes
+    - `mask_dict`: Feature masks
+
+**Returns:**
+
+- `output` (dict): Model predictions
+- `loss_dict` (dict): Dictionary containing:
+    - `"loss"`: Total loss value
+    - Additional loss components (if applicable)
+
+**Behavior:**
+
+1. Performs forward pass through the model
+2. Computes loss using the configured loss function
+3. Returns both predictions and loss dictionary
+
+**Example:**
+
+```python
+output, loss_dict = task.shared_step(batch)
+total_loss = loss_dict["loss"]
+```
+
+---
+
+### `training_step(batch)`
+
+Execute one training step.
+
+**Parameters:**
+
+- `batch`: Training batch from dataloader
+
+**Returns:**
+
+- Loss tensor for backpropagation
+
+**Logged Metrics:**
+
+- `"Training Loss"`: Total training loss
+- `"Learning Rate"`: Current learning rate
+
+**Logging Configuration:**
+
+- `batch_size`: Number of graphs in batch
+- `sync_dist=False`: No synchronization across GPUs during training
+- `on_epoch=False`: Log per step, not per epoch
+- `on_step=True`: Log at each training step
+- `prog_bar=False`: Don't show in progress bar
+- `logger=True`: Send to logger (e.g., MLflow)
+
+---
+
+### `validation_step(batch, batch_idx)`
+
+Execute one validation step.
+
+**Parameters:**
+
+- `batch`: Validation batch from dataloader
+- `batch_idx` (int): Index of the current batch
+
+**Returns:**
+
+- Loss tensor
+
+**Logged Metrics:**
+
+- `"Validation loss"`: Total validation loss
+- Additional loss components (if multiple losses are used)
+
+**Logging Configuration:**
+
+- `batch_size`: Number of graphs in batch
+- `sync_dist=True`: Synchronize metrics across GPUs
+- `on_epoch=True`: Aggregate and log at epoch end
+- `on_step=False`: Don't log individual steps
+- `logger=True`: Send to logger
+
+**Note:** The validation loss is monitored by the learning rate scheduler for automatic learning rate reduction.
+
+---
+
+### `on_test_end()`
+
+Called at the end of testing. Clears stored test outputs.
+
+**Behavior:**
+
+- Clears the `self.test_outputs` dictionary
+- Only executes on rank 0 in distributed training (via `@rank_zero_only` decorator)
+- Subclasses typically override this to add custom analysis, plotting, and CSV generation
+
+**Note:** This is a minimal implementation. Task-specific subclasses (PowerFlowTask, OptimalPowerFlowTask, StateEstimationTask) override this method to:
+
+- Generate detailed metrics CSV files
+- Create visualization plots
+- Save analysis results
+
+---
+
+## Usage
+
+`ReconstructionTask` can be used directly for simple reconstruction tasks, but is typically subclassed for specific power system tasks:
+
+```python
+from gridfm_graphkit.tasks.reconstruction_tasks import ReconstructionTask
+
+# Direct usage (simple reconstruction)
+task = ReconstructionTask(args, data_normalizers)
+
+# Or create a subclass for custom behavior
+class CustomReconstructionTask(ReconstructionTask):
+    def test_step(self, batch, batch_idx, dataloader_idx=0):
+        # Custom test logic
+        output, loss_dict = self.shared_step(batch)
+        # Add custom metrics
+        return loss_dict["loss"]
+    
+    def on_test_end(self):
+        # Custom analysis and visualization
+        super().on_test_end()
+```
+
+## Configuration Example
+
+```yaml
+task:
+  task_name: Reconstruction  # Or PowerFlow, OptimalPowerFlow, StateEstimation
+
+model:
+  type: GNS_heterogeneous
+  hidden_size: 48
+  num_layers: 12
+  attention_head: 8
+
+training:
+  batch_size: 64
+  epochs: 100
+  losses:
+    - MaskedMSE
+  loss_weights:
+    - 1.0
+
+optimizer:
+  learning_rate: 0.001
+  beta1: 0.9
+  beta2: 0.999
+  lr_decay: 0.7
+  lr_patience: 5
+```
+
+## Loss Functions
+
+The reconstruction task supports various loss functions configured via the YAML file:
+
+- **MaskedMSE**: Mean squared error on masked features only
+- **MaskedBusMSE**: MSE specifically for bus node features
+- **LayeredWeightedPhysics**: Physics-based loss with layer-wise weighting
+- **PBE**: Power Balance Error loss
+
+Multiple losses can be combined with weights:
+
+```yaml
+training:
+  losses:
+    - LayeredWeightedPhysics
+    - MaskedBusMSE
+  loss_weights:
+    - 0.1
+    - 0.9
+  loss_args:
+    - base_weight: 0.5
+    - {}
+```
+
+## Subclasses
+
+The following task classes extend `ReconstructionTask`:
+
+- **[PowerFlowTask](power_flow.md)**: Adds power flow-specific metrics and physics validation
+- **[OptimalPowerFlowTask](optimal_power_flow.md)**: Adds economic optimization metrics and constraint violation tracking
+- **[StateEstimationTask](state_estimation.md)**: Adds measurement-based estimation and outlier handling
+
+## Related
+
+- [Base Task](base_task.md): Abstract base class for all tasks
+- [Power Flow Task](power_flow.md): Power flow analysis implementation
+- [Optimal Power Flow Task](optimal_power_flow.md): OPF optimization implementation
+- [State Estimation Task](state_estimation.md): State estimation implementation
+- [Loss Functions](../training/loss.md): Available loss functions
\ No newline at end of file
diff --git a/docs/tasks/state_estimation.md b/docs/tasks/state_estimation.md
index 46953ae..b293509 100644
--- a/docs/tasks/state_estimation.md
+++ b/docs/tasks/state_estimation.md
@@ -72,6 +72,8 @@ The State Estimation task is automatically selected when you specify `task.name:
 
 ## Related
 
+- [Base Task](base_task.md): Abstract base class for all tasks
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
 - [Power Flow Task](power_flow.md): For standard power flow analysis
 - [Optimal Power Flow Task](optimal_power_flow.md): For optimization-based power flow
-- [Feature Reconstruction](feature_reconstruction.md): Base reconstruction task
\ No newline at end of file
+- [Task Overview](feature_reconstruction.md): Overview of all task classes
\ No newline at end of file
diff --git a/docs/training/loss.md b/docs/training/loss.md
index 0d08ba3..de56d4b 100644
--- a/docs/training/loss.md
+++ b/docs/training/loss.md
@@ -8,20 +8,12 @@
 
 ## Mean Squared Error Loss
 
-$$
-\mathcal{L}_{\text{MSE}} = \frac{1}{N} \sum_{i=1}^N (y_i - \hat{y}_i)^2
-$$
-
 ::: gridfm_graphkit.training.loss.MSELoss
 
 ---
 
 ## Masked Mean Squared Error Loss
 
-$$
-\mathcal{L}_{\text{MaskedMSE}} = \frac{1}{|M|} \sum_{i \in M} (y_i - \hat{y}_i)^2
-$$
-
 ::: gridfm_graphkit.training.loss.MaskedMSELoss
 
 ---
@@ -40,10 +32,6 @@ $$
 
 ## Mixed Loss
 
-$$
-\mathcal{L}_{\text{Mixed}} = \sum_{m=1}^M w_m \cdot \mathcal{L}_m
-$$
-
 ::: gridfm_graphkit.training.loss.MixedLoss
 
 ---
diff --git a/mkdocs.yml b/mkdocs.yml
index 7c78cce..acb1771 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -19,7 +19,9 @@ nav:
       - Data Modules: datasets/data_modules.md
       - Transforms: datasets/transforms.md
     - Tasks:
-      - Feature Reconstruction: tasks/feature_reconstruction.md
+      - Overview: tasks/feature_reconstruction.md
+      - Base Task: tasks/base_task.md
+      - Reconstruction Task: tasks/reconstruction_task.md
       - Power Flow: tasks/power_flow.md
       - Optimal Power Flow: tasks/optimal_power_flow.md
       - State Estimation: tasks/state_estimation.md

From b8ba2d3be3fa36c74306630030039ff3df6eb317 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Sat, 14 Mar 2026 13:44:47 +0100
Subject: [PATCH 07/39] fix ordering

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 integrationtests/test_base_set.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index 1d1eb33..9672979 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -39,7 +39,7 @@ def prepare_config():
     with open(config_path, 'w') as f:
         yaml.dump(config, f, default_flow_style=False, sort_keys=False)
     
-    print(f"✓ Config prepared at {config_path} with:")
+    print(f"Config prepared at {config_path} with:")
     print(f"  - network.name: {config['network']['name']}")
     print(f"  - load.scenarios: {config['load']['scenarios']}")
     print(f"  - topology_perturbation.n_topology_variants: {config['topology_perturbation']['n_topology_variants']}")
@@ -105,7 +105,7 @@ def test_prepare_data():
     exp_dirs = glob.glob(os.path.join(log_base, "*"))
     assert len(exp_dirs) > 0, "No experiment directories found in logs/"
     
-    latest_exp_dir = max(exp_dirs, key=os.path.getmtime)
+    latest_exp_dir = sorted(exp_dirs, key=os.path.getctime)[-1]
     run_dirs = glob.glob(os.path.join(latest_exp_dir, "*"))
     assert len(run_dirs) > 0, f"No run directories found in {latest_exp_dir}"
     

From 3b08c57a6825cd6ffde36fa8710f304f47c40be8 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Sat, 14 Mar 2026 14:07:15 +0100
Subject: [PATCH 08/39] add fixure for cleanup

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 integrationtests/test_base_set.py | 164 ++++++++++++++++++------------
 1 file changed, 99 insertions(+), 65 deletions(-)

diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index 9672979..8015830 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -5,129 +5,163 @@
 import pandas as pd
 import yaml
 import urllib.request
+import shutil
+
 
 def execute_and_live_output(cmd) -> None:
-    # Remove capture_output=True
-    # We use check=True to raise an exception automatically if returncode != 0
     result = subprocess.run(
         cmd,
         text=True,
         shell=True,
-        check=True 
+        check=True
     )
 
+
 def prepare_config():
     """
     Download default.yaml from gridfm-datakit repo and modify it with test parameters.
     """
     config_url = "https://raw.githubusercontent.com/gridfm/gridfm-datakit/refs/heads/main/scripts/config/default.yaml"
     config_path = "integrationtests/default.yaml"
-    
+
     print(f"Downloading config from {config_url}...")
     with urllib.request.urlopen(config_url) as response:
-        config_content = response.read().decode('utf-8')
-    
-    # Parse YAML
+        config_content = response.read().decode("utf-8")
+
     config = yaml.safe_load(config_content)
-    
-    # Update values as specified (nested structure)
-    config['network']['name'] = 'case14_ieee'
-    config['load']['scenarios'] = 10000
-    config['topology_perturbation']['n_topology_variants'] = 2
-    
-    # Write modified config
-    with open(config_path, 'w') as f:
+
+    config["network"]["name"] = "case14_ieee"
+    config["load"]["scenarios"] = 10000
+    config["topology_perturbation"]["n_topology_variants"] = 2
+
+    with open(config_path, "w") as f:
         yaml.dump(config, f, default_flow_style=False, sort_keys=False)
-    
+
     print(f"Config prepared at {config_path} with:")
     print(f"  - network.name: {config['network']['name']}")
     print(f"  - load.scenarios: {config['load']['scenarios']}")
-    print(f"  - topology_perturbation.n_topology_variants: {config['topology_perturbation']['n_topology_variants']}")
-    
+    print(
+        f"  - topology_perturbation.n_topology_variants: "
+        f"{config['topology_perturbation']['n_topology_variants']}"
+    )
+
     return config_path
 
+
 def prepare_training_config():
     """
     Modify the training config to set epochs to 2 for testing.
     """
     config_path = "examples/config/HGNS_PF_datakit_case14.yaml"
-    
-    # Read the config
-    with open(config_path, 'r') as f:
+
+    with open(config_path, "r") as f:
         config = yaml.safe_load(f)
-    
-    # Ensure epochs is set to 2
-    if 'training' not in config:
-        config['training'] = {}
-    config['training']['epochs'] = 2
-    
-    # Write back the modified config
-    with open(config_path, 'w') as f:
+
+    if "training" not in config:
+        config["training"] = {}
+
+    config["training"]["epochs"] = 2
+
+    with open(config_path, "w") as f:
         yaml.dump(config, f, default_flow_style=False, sort_keys=False)
-    
+
     print(f"Training config updated: epochs set to {config['training']['epochs']}")
-    
+
     return config_path
 
-def test_prepare_data():
+
+@pytest.fixture
+def cleanup_test_artifacts():
+    """
+    Backup modified files and remove generated artifacts after the test.
     """
-    gridfm-datakit must be installable via pip with exit code 0.
+    training_config = "examples/config/HGNS_PF_datakit_case14.yaml"
+    backup_config = training_config + ".bak"
+
+    if os.path.exists(training_config):
+        shutil.copy2(training_config, backup_config)
+
+    yield
+
+    # Restore training config
+    if os.path.exists(backup_config):
+        shutil.move(backup_config, training_config)
+
+    # Remove downloaded config
+    config_file = "integrationtests/default.yaml"
+    if os.path.exists(config_file):
+        os.remove(config_file)
+
+    # Remove generated directories
+    for d in ["data_out", "logs"]:
+        if os.path.exists(d):
+            shutil.rmtree(d, ignore_errors=True)
 
-    This test explicitly re-runs the install command and asserts that pip
-    exits successfully, making the install step a first-class test rather
-    than a silent fixture side-effect.
+
+def test_prepare_data(cleanup_test_artifacts):
+    """
+    Integration test for gridfm-datakit data generation and gridfm-graphkit training.
+
+    Steps:
+    1. Generate power grid data using gridfm-datakit
+    2. Train a model using gridfm-graphkit
+    3. Validate the PBE Mean metric
     """
 
-    # Check if data already exists, if not generate it
     data_dir = "data_out"
+
     if not os.path.exists(data_dir) or not os.listdir(data_dir):
         print("Data directory not found or empty, generating data...")
-        
-        # Prepare the config file
+
         config_path = prepare_config()
-        
-        # Generate data using the prepared config
+
         execute_and_live_output(
-            f'gridfm_datakit generate {config_path}'
+            f"gridfm_datakit generate {config_path}"
         )
     else:
-        print(f"Data directory '{data_dir}' already exists, skipping data generation.")
-    
-    # Prepare training config with epochs=2
+        print(f"Data directory '{data_dir}' already exists, skipping generation.")
+
     training_config_path = prepare_training_config()
-    
+
     execute_and_live_output(
-        f'gridfm_graphkit train --config {training_config_path} --data_path data_out/ --exp_name exp1 --run_name run1 --log_dir logs'
+        f"gridfm_graphkit train "
+        f"--config {training_config_path} "
+        f"--data_path data_out/ "
+        f"--exp_name exp1 "
+        f"--run_name run1 "
+        f"--log_dir logs"
     )
-    
-    # Find the latest log directory
+
     log_base = "logs"
+
     exp_dirs = glob.glob(os.path.join(log_base, "*"))
     assert len(exp_dirs) > 0, "No experiment directories found in logs/"
-    
+
     latest_exp_dir = sorted(exp_dirs, key=os.path.getctime)[-1]
+
     run_dirs = glob.glob(os.path.join(latest_exp_dir, "*"))
     assert len(run_dirs) > 0, f"No run directories found in {latest_exp_dir}"
-    
+
     latest_run_dir = max(run_dirs, key=os.path.getmtime)
-    metrics_file = os.path.join(latest_run_dir, "artifacts", "test", "case14_ieee_metrics.csv")
-    
+
+    metrics_file = os.path.join(
+        latest_run_dir,
+        "artifacts",
+        "test",
+        "case14_ieee_metrics.csv"
+    )
+
     assert os.path.exists(metrics_file), f"Metrics file not found: {metrics_file}"
-    
-    # Read the metrics CSV
+
     df = pd.read_csv(metrics_file)
-    
-    # Find PBE Mean value
-    pbe_mean_row = df[df['Metric'] == 'PBE Mean']
+
+    pbe_mean_row = df[df["Metric"] == "PBE Mean"]
     assert len(pbe_mean_row) > 0, "PBE Mean metric not found in CSV"
-    
-    pbe_mean_value = float(pbe_mean_row.iloc[0]['Value'])
-    
+
+    pbe_mean_value = float(pbe_mean_row.iloc[0]["Value"])
+
     assert 1.1 <= pbe_mean_value <= 2.9, (
         f"PBE Mean value {pbe_mean_value} is outside acceptable range [1.1, 2.9]"
     )
-    
-    print(f"PBE Mean value {pbe_mean_value} is within acceptable range [1.1, 2.9]")
-
-
 
+    print(f"PBE Mean value {pbe_mean_value} is within acceptable range [1.1, 2.9]")
\ No newline at end of file

From 8e7fceb1a7636dffcba5ca8fe413e515f2f84dab Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Sat, 14 Mar 2026 18:25:22 +0100
Subject: [PATCH 09/39] fix test name

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 integrationtests/test_base_set.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index 8015830..e33e11c 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -98,7 +98,7 @@ def cleanup_test_artifacts():
             shutil.rmtree(d, ignore_errors=True)
 
 
-def test_prepare_data(cleanup_test_artifacts):
+def test_train(cleanup_test_artifacts):
     """
     Integration test for gridfm-datakit data generation and gridfm-graphkit training.
 

From ae1bb49833b41c44497fa8af403cae0b4d0fc481 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Sun, 15 Mar 2026 20:37:19 +0100
Subject: [PATCH 10/39] remove bad doc

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 docs/tasks/optimal_power_flow.md | 138 ------------------------
 docs/tasks/power_flow.md         | 173 -------------------------------
 docs/tasks/state_estimation.md   |  79 --------------
 3 files changed, 390 deletions(-)
 delete mode 100644 docs/tasks/optimal_power_flow.md
 delete mode 100644 docs/tasks/power_flow.md
 delete mode 100644 docs/tasks/state_estimation.md

diff --git a/docs/tasks/optimal_power_flow.md b/docs/tasks/optimal_power_flow.md
deleted file mode 100644
index 1c5e955..0000000
--- a/docs/tasks/optimal_power_flow.md
+++ /dev/null
@@ -1,138 +0,0 @@
-# Optimal Power Flow Task
-
-The Optimal Power Flow (OPF) task solves the optimization problem of determining the most economical operation of a power system while satisfying physical and operational constraints. This task predicts optimal generator setpoints, voltage profiles, and reactive power dispatch.
-
-## Overview
-
-Optimal Power Flow is a fundamental optimization problem in power systems that minimizes generation costs while ensuring:
-
-- **Power balance**: Supply meets demand at all buses
-- **Voltage constraints**: Bus voltages remain within acceptable limits
-- **Thermal limits**: Branch flows don't exceed capacity
-- **Generator limits**: Active and reactive power generation within bounds
-- **Angle difference limits**: Voltage angle differences across branches are acceptable
-
-The `OptimalPowerFlowTask` extends the `ReconstructionTask` to include OPF-specific physics-based constraints and economic metrics.
-
-## Key Features
-
-- **Economic optimization**: Tracks generation costs and optimality gap
-- **Constraint violation monitoring**: Measures violations of thermal, voltage, and angle limits
-- **Physics-based evaluation**: Computes power balance errors and residuals
-- **Bus type differentiation**: Separate metrics for PQ, PV, and REF buses
-- **Comprehensive reporting**: Generates detailed CSV reports and correlation plots
-
-## OptimalPowerFlowTask
-
-::: gridfm_graphkit.tasks.opf_task.OptimalPowerFlowTask
-
-## Metrics
-
-The Optimal Power Flow task computes extensive metrics during testing:
-
-### Economic Metrics
-- **Optimality Gap (%)**: Percentage difference between predicted and optimal generation costs
-- **Generation Cost**: Total cost computed from quadratic cost curves (c₀ + c₁·Pg + c₂·Pg²)
-
-### Power Balance Metrics
-- **Active Power Loss (MW)**: Mean absolute active power residual across all buses
-- **Reactive Power Loss (MVar)**: Mean absolute reactive power residual across all buses
-
-### Constraint Violations
-- **Branch Thermal Violations (MVA)**: 
-  - Forward direction: Mean excess flow above thermal limits
-  - Reverse direction: Mean excess flow above thermal limits
-- **Branch Angle Violations (radians)**: Mean violation of angle difference constraints
-- **Reactive Power Violations**:
-  - PV buses: Mean Qg violation (exceeding min/max limits)
-  - REF buses: Mean Qg violation (exceeding min/max limits)
-
-### Prediction Accuracy (RMSE)
-Computed separately for each bus type (PQ, PV, REF):
-- **Voltage Magnitude (Vm)**: p.u.
-- **Voltage Angle (Va)**: radians
-- **Active Power Generation (Pg)**: MW
-- **Reactive Power Generation (Qg)**: MVar
-
-### Residual Statistics (when verbose=True)
-For each bus type and power type (P, Q):
-- Mean residual per graph
-- Maximum residual per graph
-
-## Bus Types
-
-The task evaluates performance separately for three bus types:
-
-- **PQ Buses**: Load buses with specified active and reactive power demand
-- **PV Buses**: Generator buses with specified active power and voltage magnitude
-- **REF Buses**: Reference/slack buses that balance the system
-
-## Outputs
-
-### CSV Reports
-Two CSV files are generated per test dataset:
-
-1. **`{dataset}_RMSE.csv`**: RMSE metrics by bus type
-   - Columns: Metric, Pg (MW), Qg (MVar), Vm (p.u.), Va (radians)
-   - Rows: RMSE-PQ, RMSE-PV, RMSE-REF
-
-2. **`{dataset}_metrics.csv`**: Comprehensive metrics including:
-   - Average active/reactive residuals
-   - RMSE for generator active power
-   - Mean optimality gap
-   - Branch thermal violations (from/to)
-   - Branch angle difference violations
-   - Qg violations for PV and REF buses
-
-### Visualizations (when verbose=True)
-
-1. **Cost Correlation Plot**: Predicted vs. ground truth generation costs with correlation coefficient
-2. **Residual Histograms**: Distribution of power balance residuals by bus type
-3. **Feature Correlation Plots**: Predictions vs. targets for Vm, Va, Pg, Qg by bus type, including Qg violation highlighting
-
-## Configuration Example
-
-```yaml
-task:
-  name: OptimalPowerFlow
-  verbose: true
-
-training:
-  batch_size: 32
-  epochs: 100
-  losses: ["MaskedMSE", "PBE"]
-  loss_weights: [0.01, 0.99]
-
-optimizer:
-  name: Adam
-  lr: 0.001
-```
-
-## Physics-Based Constraints
-
-The task uses specialized layers to compute physical quantities:
-
-- **`ComputeBranchFlow`**: Calculates active (Pft) and reactive (Qft) power flows on branches
-- **`ComputeNodeInjection`**: Aggregates branch flows to compute net injections at buses
-- **`ComputeNodeResiduals`**: Computes power balance violations (residuals)
-
-These ensure predictions are evaluated not just on accuracy but also on physical feasibility.
-
-## Usage
-
-The Optimal Power Flow task is automatically selected when you specify `task.name: OptimalPowerFlow` in your YAML configuration file. The task:
-
-1. Performs forward pass through the model
-2. Inverse normalizes predictions and targets
-3. Computes branch flows and power balance residuals
-4. Evaluates constraint violations
-5. Calculates economic metrics (costs, optimality gap)
-6. Generates comprehensive reports and visualizations
-
-## Related
-
-- [Base Task](base_task.md): Abstract base class for all tasks
-- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
-- [Power Flow Task](power_flow.md): For standard power flow analysis without optimization
-- [State Estimation Task](state_estimation.md): For state estimation from measurements
-- [Task Overview](feature_reconstruction.md): Overview of all task classes
\ No newline at end of file
diff --git a/docs/tasks/power_flow.md b/docs/tasks/power_flow.md
deleted file mode 100644
index b3a21c3..0000000
--- a/docs/tasks/power_flow.md
+++ /dev/null
@@ -1,173 +0,0 @@
-# Power Flow Task
-
-The Power Flow task solves the fundamental problem of determining the steady-state operating conditions of a power system. Given load demands and generator setpoints, it computes voltage magnitudes, voltage angles, and power flows throughout the network.
-
-## Overview
-
-Power Flow (also known as Load Flow) analysis is essential for power system planning and operation. It determines:
-
-- **Voltage profiles**: Magnitude and angle at each bus
-- **Power flows**: Active and reactive power on transmission lines
-- **Power injections**: Net power generation/consumption at buses
-- **System losses**: Total active and reactive power losses
-
-The `PowerFlowTask` extends the `ReconstructionTask` to include physics-based power balance evaluation and comprehensive metrics for different bus types.
-
-## Key Features
-
-- **Physics-based validation**: Computes power balance errors (PBE) to verify physical consistency
-- **Bus type differentiation**: Separate metrics for PQ, PV, and REF buses
-- **Distributed training support**: Handles multi-GPU training with proper metric aggregation
-- **Detailed predictions**: Provides per-bus predictions with residuals for analysis
-- **Comprehensive reporting**: Generates CSV reports and correlation plots
-
-## PowerFlowTask
-
-::: gridfm_graphkit.tasks.pf_task.PowerFlowTask
-
-## Metrics
-
-The Power Flow task computes the following metrics during testing:
-
-### Power Balance Metrics
-- **Active Power Loss (MW)**: Mean absolute active power residual across all buses
-- **Reactive Power Loss (MVar)**: Mean absolute reactive power residual across all buses
-- **PBE Mean**: Mean Power Balance Error magnitude across all buses (√(P² + Q²))
-- **PBE Max**: Maximum Power Balance Error across all buses
-
-### Prediction Accuracy (RMSE)
-Computed separately for each bus type (PQ, PV, REF):
-- **Voltage Magnitude (Vm)**: p.u.
-- **Voltage Angle (Va)**: radians
-- **Active Power Generation (Pg)**: MW
-- **Reactive Power Generation (Qg)**: MVar
-
-### Residual Statistics (when verbose=True)
-For each bus type (PQ, PV, REF) and power type (P, Q):
-- Mean residual per graph
-- Maximum residual per graph
-
-## Bus Types
-
-The task evaluates performance separately for three bus types:
-
-- **PQ Buses**: Load buses with specified active and reactive power demand
-- **PV Buses**: Generator buses with specified active power and voltage magnitude
-- **REF Buses**: Reference/slack buses that balance the system
-
-## Power Balance Error (PBE)
-
-The Power Balance Error is a critical metric that measures how well predictions satisfy Kirchhoff's laws:
-
-$$
-\text{PBE} = \sqrt{(\Delta P)^2 + (\Delta Q)^2}
-$$
-
-where:
-- $\Delta P$ = Active power residual (generation - demand - losses)
-- $\Delta Q$ = Reactive power residual (generation - demand - losses)
-
-Lower PBE values indicate better physical consistency of the predictions.
-
-## Outputs
-
-### CSV Reports
-Two CSV files are generated per test dataset:
-
-1. **`{dataset}_RMSE.csv`**: RMSE metrics by bus type
-   - Columns: Metric, Pg (MW), Qg (MVar), Vm (p.u.), Va (radians)
-   - Rows: RMSE-PQ, RMSE-PV, RMSE-REF
-
-2. **`{dataset}_metrics.csv`**: Power balance metrics
-   - Avg. active res. (MW)
-   - Avg. reactive res. (MVar)
-   - PBE Mean
-   - PBE Max
-
-### Visualizations (when verbose=True)
-
-1. **Residual Histograms**: Distribution of power balance residuals by bus type (PQ, PV, REF)
-2. **Feature Correlation Plots**: Predictions vs. targets for Vm, Va, Pg, Qg by bus type
-
-### Prediction Output
-
-The `predict_step` method returns detailed per-bus information:
-
-```python
-{
-    'scenario': scenario IDs,
-    'bus': bus indices,
-    'pd_mw': active power demand,
-    'qd_mvar': reactive power demand,
-    'vm_pu_target': target voltage magnitude,
-    'va_target': target voltage angle,
-    'pg_mw_target': target active power generation,
-    'qg_mvar_target': target reactive power generation,
-    'is_pq': PQ bus indicator,
-    'is_pv': PV bus indicator,
-    'is_ref': REF bus indicator,
-    'vm_pu': predicted voltage magnitude,
-    'va': predicted voltage angle,
-    'pg_mw': predicted active power generation,
-    'qg_mvar': predicted reactive power generation,
-    'active res. (MW)': active power residual,
-    'reactive res. (MVar)': reactive power residual,
-    'PBE': power balance error magnitude
-}
-```
-
-## Configuration Example
-
-```yaml
-task:
-  name: PowerFlow
-  verbose: true
-
-training:
-  batch_size: 32
-  epochs: 100
-  losses: ["MaskedMSE", "PBE"]
-  loss_weights: [0.01, 0.99]
-
-optimizer:
-  name: Adam
-  lr: 0.001
-```
-
-## Physics-Based Constraints
-
-The task uses specialized layers to compute physical quantities:
-
-- **`ComputeBranchFlow`**: Calculates active (Pft) and reactive (Qft) power flows on branches using the power flow equations
-- **`ComputeNodeInjection`**: Aggregates branch flows to compute net power injections at each bus
-- **`ComputeNodeResiduals`**: Computes power balance violations by comparing injections with generation and demand
-
-These layers ensure that predictions are evaluated not only on accuracy but also on their adherence to fundamental power system physics.
-
-## Distributed Training
-
-The PowerFlowTask includes special handling for distributed training:
-
-- **Metric aggregation**: Uses `sync_dist=True` to properly aggregate metrics across GPUs
-- **Verbose output gathering**: Collects test outputs from all ranks to rank 0 for complete visualization
-- **Max reduction for PBE Max**: Uses `reduce_fx="max"` to find the global maximum PBE across all processes
-
-## Usage
-
-The Power Flow task is automatically selected when you specify `task.name: PowerFlow` in your YAML configuration file. The task:
-
-1. Performs forward pass through the model
-2. Inverse normalizes predictions and targets
-3. Computes branch flows using power flow equations
-4. Calculates power balance residuals and PBE
-5. Evaluates metrics separately for each bus type
-6. Generates comprehensive reports and visualizations
-7. Provides detailed per-bus predictions for analysis
-
-## Related
-
-- [Base Task](base_task.md): Abstract base class for all tasks
-- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
-- [Optimal Power Flow Task](optimal_power_flow.md): For optimization-based power flow with economic objectives
-- [State Estimation Task](state_estimation.md): For state estimation from noisy measurements
-- [Task Overview](feature_reconstruction.md): Overview of all task classes
\ No newline at end of file
diff --git a/docs/tasks/state_estimation.md b/docs/tasks/state_estimation.md
deleted file mode 100644
index b293509..0000000
--- a/docs/tasks/state_estimation.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# State Estimation Task
-
-The State Estimation task focuses on estimating the true state of a power grid from noisy measurements. This is a critical problem in power system operations, where sensor measurements may contain errors, outliers, or missing data.
-
-## Overview
-
-State estimation in power systems involves determining voltage magnitudes, voltage angles, and power injections at buses from available measurements. The `StateEstimationTask` extends the `ReconstructionTask` to handle:
-
-- **Noisy measurements**: Input features include measurement noise and potential outliers
-- **Missing data**: Some measurements may be masked or unavailable
-- **Outlier detection**: The task tracks and evaluates performance on outlier measurements separately
-
-## Key Features
-
-- **Measurement-based prediction**: Estimates true grid state from noisy sensor data
-- **Outlier handling**: Distinguishes between normal measurements, masked values, and outliers
-- **Correlation analysis**: Generates plots comparing predictions vs. targets and predictions vs. measurements
-- **Multi-mask evaluation**: Evaluates performance separately for outliers, masked values, and clean measurements
-
-## StateEstimationTask
-
-::: gridfm_graphkit.tasks.se_task.StateEstimationTask
-
-## Metrics
-
-The State Estimation task computes and logs the following metrics during testing:
-
-### Prediction Quality
-- **Voltage Magnitude (Vm)**: Accuracy of estimated voltage magnitudes at buses
-- **Voltage Angle (Va)**: Accuracy of estimated voltage angles at buses  
-- **Active Power Injection (Pg)**: Accuracy of estimated active power at buses
-- **Reactive Power Injection (Qg)**: Accuracy of estimated reactive power at buses
-
-### Evaluation Categories
-Metrics are computed separately for three categories:
-- **Outliers**: Measurements identified as outliers
-- **Masked**: Intentionally masked/missing measurements
-- **Non-outliers**: Clean measurements without outliers or masking
-
-## Visualization
-
-When `verbose=True` in the configuration, the task generates correlation plots:
-
-1. **Predictions vs. Targets**: Shows how well predictions match ground truth
-2. **Predictions vs. Measurements**: Shows how predictions compare to noisy input measurements
-3. **Measurements vs. Targets**: Shows the quality of input measurements
-
-These plots are generated for each feature (Vm, Va, Pg, Qg) and saved to the test artifacts directory.
-
-## Configuration Example
-
-```yaml
-task:
-  name: StateEstimation
-  verbose: true
-
-training:
-  batch_size: 32
-  epochs: 100
-  losses: ["MaskedMSE"]
-  loss_weights: [1.0]
-```
-
-## Usage
-
-The State Estimation task is automatically selected when you specify `task.name: StateEstimation` in your YAML configuration file. The task handles:
-
-1. Forward pass through the model with masked/noisy inputs
-2. Inverse normalization of predictions and targets
-3. Computation of metrics for different measurement categories
-4. Generation of correlation plots and analysis
-
-## Related
-
-- [Base Task](base_task.md): Abstract base class for all tasks
-- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
-- [Power Flow Task](power_flow.md): For standard power flow analysis
-- [Optimal Power Flow Task](optimal_power_flow.md): For optimization-based power flow
-- [Task Overview](feature_reconstruction.md): Overview of all task classes
\ No newline at end of file

From 122c8cadd1d8fa34c41825acaffe2c61723fbfd6 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Sun, 15 Mar 2026 20:39:04 +0100
Subject: [PATCH 11/39] fix broken links

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 mkdocs.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index acb1771..7bc8052 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -22,9 +22,6 @@ nav:
       - Overview: tasks/feature_reconstruction.md
       - Base Task: tasks/base_task.md
       - Reconstruction Task: tasks/reconstruction_task.md
-      - Power Flow: tasks/power_flow.md
-      - Optimal Power Flow: tasks/optimal_power_flow.md
-      - State Estimation: tasks/state_estimation.md
     - Models: models/models.md
     - Training:
       - Losses: training/loss.md

From 2712a5225d84c7760ebd760a7421ba6f9c7c7047 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Sun, 15 Mar 2026 22:48:40 +0100
Subject: [PATCH 12/39] fix documentation

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 docs/install/installation.md      | 26 +++++++++-
 docs/tasks/optimal_power_flow.md  | 79 +++++++++++++++++++++++++++++++
 docs/tasks/power_flow.md          | 74 +++++++++++++++++++++++++++++
 docs/tasks/state_estimation.md    | 66 ++++++++++++++++++++++++++
 gridfm_graphkit/tasks/opf_task.py |  4 +-
 mkdocs.yml                        |  3 ++
 6 files changed, 249 insertions(+), 3 deletions(-)
 create mode 100644 docs/tasks/optimal_power_flow.md
 create mode 100644 docs/tasks/power_flow.md
 create mode 100644 docs/tasks/state_estimation.md

diff --git a/docs/install/installation.md b/docs/install/installation.md
index 07dc502..89ee4be 100644
--- a/docs/install/installation.md
+++ b/docs/install/installation.md
@@ -1,14 +1,18 @@
+# Installation
+
 You can install `gridfm-graphkit` directly from PyPI:
 
 ```bash
 pip install gridfm-graphkit
 ```
 
+For GPU support and compatibility with PyTorch Geometric's scatter operations, install PyTorch (and optionally CUDA) first, then install the matching `torch-scatter` wheel. See [PyTorch and torch-scatter](#pytorch-and-torch-scatter-optional) below.
+
 ---
 
 ## Development Setup
 
-To contribute or develop locally, clone the repository and install in editable mode:
+To contribute or develop locally, clone the repository and install in editable mode. Use Python 3.10, 3.11, or 3.12 (3.12 is recommended).
 
 ```bash
 git clone git@github.com:gridfm/gridfm-graphkit.git
@@ -18,6 +22,26 @@ source venv/bin/activate
 pip install -e .
 ```
 
+### PyTorch and torch-scatter (optional)
+
+If you need GPU acceleration or PyTorch Geometric scatter ops (used by the library), install PyTorch and the matching `torch-scatter` wheel:
+
+1. Install PyTorch (see [pytorch.org](https://pytorch.org/) for your platform and CUDA version).
+
+2. Get your Torch + CUDA version string:
+   ```bash
+   TORCH_CUDA_VERSION=$(python -c "import torch; print(torch.__version__ + ('+cpu' if torch.version.cuda is None else ''))")
+   ```
+
+3. Install the correct `torch-scatter` wheel:
+   ```bash
+   pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH_CUDA_VERSION}.html
+   ```
+
+---
+
+## Optional extras
+
 For documentation generation and unit testing, install with the optional `dev` and `test` extras:
 
 ```bash
diff --git a/docs/tasks/optimal_power_flow.md b/docs/tasks/optimal_power_flow.md
new file mode 100644
index 0000000..90a98ab
--- /dev/null
+++ b/docs/tasks/optimal_power_flow.md
@@ -0,0 +1,79 @@
+# Optimal Power Flow Task
+
+The `OptimalPowerFlowTask` class is a concrete implementation of `ReconstructionTask` for **optimal power flow (OPF)**. It adds economic optimization metrics (generation cost, optimality gap), constraint violation tracking (thermal, voltage angle, reactive power limits), and the same physics-based validation as the power flow task.
+
+## Overview
+
+`OptimalPowerFlowTask` extends `ReconstructionTask` and provides:
+
+- **Economic metrics**: Generation cost from quadratic cost coefficients (C0, C1, C2) and **optimality gap** (relative difference between predicted and ground-truth cost)
+- **Constraint violations**: Branch thermal limits (RATE_A), branch angle limits (ANG_MIN, ANG_MAX), and reactive power limits (Qg min/max) for PV and REF buses
+- **Physics validation**: Same branch flow, node injection, and power balance residuals as PowerFlowTask
+- **Per-bus-type MSE**: Separate MSE for PQ, PV, and REF buses (PG, QG, VM, VA)
+
+## OptimalPowerFlowTask Class
+
+::: gridfm_graphkit.tasks.opf_task.OptimalPowerFlowTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - test_step
+        - on_test_end
+
+## Configuration Example
+
+Use the task by setting `task_name: OptimalPowerFlow` in your YAML:
+
+```yaml
+task:
+  task_name: OptimalPowerFlow
+
+model:
+  type: GNS_heterogeneous
+  hidden_size: 48
+  num_layers: 12
+  attention_head: 8
+
+training:
+  batch_size: 64
+  epochs: 100
+  losses:
+    - MaskedMSE
+  loss_weights:
+    - 1.0
+
+data:
+  networks:
+    - case14_ieee
+    - case118_ieee
+
+verbose: true
+```
+
+## Test Metrics
+
+During evaluation, `OptimalPowerFlowTask` logs (per dataset):
+
+| Metric | Description |
+|--------|-------------|
+| Test loss | Main reconstruction loss |
+| Opt gap | Mean absolute percentage difference between predicted and ground-truth generation cost |
+| MSE PG | MSE on generator active power |
+| Active / Reactive Power Loss | Mean absolute P/Q residuals |
+| Branch thermal violation from | Mean thermal limit excess on forward branch (apparent power vs RATE_A) |
+| Branch thermal violation to | Mean thermal limit excess on reverse branch (apparent power vs RATE_A) |
+| Branch voltage angle difference violations | Mean angle limit violation (degrees) |
+| Mean Qg violation PV buses | Mean reactive power limit violation on PV buses |
+| Mean Qg violation REF buses | Mean reactive power limit violation on REF buses |
+| MSE PQ/PV/REF nodes - PG/QG/VM/VA | MSE per bus type and output dimension |
+
+With `verbose: true`, CSV reports and plots are written to MLflow artifacts.
+
+## Related
+
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
+- [Power Flow Task](power_flow.md): Power flow analysis (no cost or constraint metrics)
+- [Base Task](base_task.md): Abstract base class for all tasks
+- [Loss Functions](../training/loss.md): Available loss functions
diff --git a/docs/tasks/power_flow.md b/docs/tasks/power_flow.md
new file mode 100644
index 0000000..df29a40
--- /dev/null
+++ b/docs/tasks/power_flow.md
@@ -0,0 +1,74 @@
+# Power Flow Task
+
+The `PowerFlowTask` class is a concrete implementation of `ReconstructionTask` for **power flow analysis**. It computes voltage profiles and power flows from given injections and adds physics-based validation using Power Balance Error (PBE) and per-bus-type metrics.
+
+## Overview
+
+`PowerFlowTask` extends `ReconstructionTask` and provides:
+
+- **Physics-based validation**: Branch flow computation, node injection, and power balance residuals (P, Q)
+- **Per-bus-type metrics**: Separate MSE and residual statistics for PQ, PV, and REF buses (PG, QG, VM, VA)
+- **Power Balance Error (PBE)**: Mean and max PBE across the test set
+- **Optional verbose output**: Residual histograms and correlation plots when `args.verbose` is true
+
+## PowerFlowTask Class
+
+::: gridfm_graphkit.tasks.pf_task.PowerFlowTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - test_step
+        - on_test_end
+
+## Configuration Example
+
+Use the task by setting `task_name: PowerFlow` in your YAML:
+
+```yaml
+task:
+  task_name: PowerFlow
+
+model:
+  type: GNS_heterogeneous
+  hidden_size: 48
+  num_layers: 12
+  attention_head: 8
+
+training:
+  batch_size: 64
+  epochs: 100
+  losses:
+    - MaskedMSE
+  loss_weights:
+    - 1.0
+
+data:
+  networks:
+    - case14_ieee
+    - case118_ieee
+
+verbose: true
+```
+
+## Test Metrics
+
+During evaluation, `PowerFlowTask` logs (per dataset):
+
+| Metric | Description |
+|--------|-------------|
+| Test loss | Main reconstruction loss |
+| Active Power Loss | Mean absolute active power residual |
+| Reactive Power Loss | Mean absolute reactive power residual |
+| PBE Mean | Mean power balance error magnitude |
+| PBE Max | Maximum power balance error (reduced with max across batches) |
+| MSE PQ/PV/REF nodes - PG/QG/VM/VA | MSE per bus type and output dimension |
+
+With `verbose: true`, CSV reports and residual histograms are written to MLflow artifacts.
+
+## Related
+
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
+- [Base Task](base_task.md): Abstract base class for all tasks
+- [Loss Functions](../training/loss.md): Available loss functions (e.g. MaskedMSE, LayeredWeightedPhysics)
diff --git a/docs/tasks/state_estimation.md b/docs/tasks/state_estimation.md
new file mode 100644
index 0000000..02ff831
--- /dev/null
+++ b/docs/tasks/state_estimation.md
@@ -0,0 +1,66 @@
+# State Estimation Task
+
+The `StateEstimationTask` class is a concrete implementation of `ReconstructionTask` for **state estimation** from noisy measurements. It evaluates predictions against ground truth and measurements, with separate handling for outliers, masked values, and clean measurements.
+
+## Overview
+
+`StateEstimationTask` extends `ReconstructionTask` and provides:
+
+- **Measurement-based setup**: Inputs are (noisy) measurements; targets are true states. The model reconstructs the state from measurements.
+- **Three-way evaluation**: Comparisons between predictions vs targets, predictions vs measurements, and measurements vs targets, with masks for outliers, masked (hidden) values, and non-outliers.
+- **Correlation plots**: When `verbose: true`, scatter plots (pred vs target, pred vs measured, measured vs target) per feature (Vm, Va, Pg, Qg) are saved to MLflow artifacts.
+
+## StateEstimationTask Class
+
+::: gridfm_graphkit.tasks.se_task.StateEstimationTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - test_step
+        - on_test_end
+        - predict_step
+
+## Configuration Example
+
+Use the task by setting `task_name: StateEstimation` in your YAML:
+
+```yaml
+task:
+  task_name: StateEstimation
+
+model:
+  type: GNS_heterogeneous
+  hidden_size: 48
+  num_layers: 12
+  attention_head: 8
+
+training:
+  batch_size: 64
+  epochs: 100
+  losses:
+    - MaskedMSE
+  loss_weights:
+    - 1.0
+
+data:
+  networks:
+    - case14_ieee
+    - case118_ieee
+
+verbose: true
+```
+
+## Test Outputs
+
+- **test_step**: Runs the shared reconstruction step, then computes targets and measurements (Vm, Va, P_injection, Q_injection). Uses `mask_dict["outliers_bus"]`, `mask_dict["bus"]`, and non-outlier masks to separate evaluation groups. Stores predictions, targets, and measurements for `on_test_end`.
+- **on_test_end**: If `verbose`, writes correlation plots (pred vs target, pred vs measured, measured vs target) per dataset to `artifacts/test_plots/<dataset_name>/`.
+- **predict_step**: Currently a stub; override in a subclass or in a future release for custom prediction behavior.
+
+## Related
+
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
+- [Power Flow Task](power_flow.md): Power flow analysis
+- [Optimal Power Flow Task](optimal_power_flow.md): OPF with cost and constraint metrics
+- [Base Task](base_task.md): Abstract base class for all tasks
diff --git a/gridfm_graphkit/tasks/opf_task.py b/gridfm_graphkit/tasks/opf_task.py
index b28c5a0..06d938d 100644
--- a/gridfm_graphkit/tasks/opf_task.py
+++ b/gridfm_graphkit/tasks/opf_task.py
@@ -256,8 +256,8 @@ def test_step(self, batch, batch_idx, dataloader_idx=0):
         loss_dict["Opt gap"] = optimality_gap
         loss_dict["MSE PG"] = mse_PG[PG_H]
 
-        loss_dict["Branch termal violation from"] = mean_thermal_violation_forward
-        loss_dict["Branch termal violation to"] = mean_thermal_violation_reverse
+        loss_dict["Branch thermal violation from"] = mean_thermal_violation_forward
+        loss_dict["Branch thermal violation to"] = mean_thermal_violation_reverse
         loss_dict["Branch voltage angle difference violations"] = (
             branch_angle_violation_mean
         )
diff --git a/mkdocs.yml b/mkdocs.yml
index 7bc8052..6581214 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -22,6 +22,9 @@ nav:
       - Overview: tasks/feature_reconstruction.md
       - Base Task: tasks/base_task.md
       - Reconstruction Task: tasks/reconstruction_task.md
+      - Power Flow Task: tasks/power_flow.md
+      - Optimal Power Flow Task: tasks/optimal_power_flow.md
+      - State Estimation Task: tasks/state_estimation.md
     - Models: models/models.md
     - Training:
       - Losses: training/loss.md

From 786659556fdc0b3dbf2d15eb43bfed643add034d Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Mon, 16 Mar 2026 14:18:41 +0100
Subject: [PATCH 13/39] strip down

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 docs/tasks/state_estimation.md | 55 ----------------------------------
 1 file changed, 55 deletions(-)

diff --git a/docs/tasks/state_estimation.md b/docs/tasks/state_estimation.md
index 02ff831..c3adbbc 100644
--- a/docs/tasks/state_estimation.md
+++ b/docs/tasks/state_estimation.md
@@ -1,17 +1,5 @@
 # State Estimation Task
 
-The `StateEstimationTask` class is a concrete implementation of `ReconstructionTask` for **state estimation** from noisy measurements. It evaluates predictions against ground truth and measurements, with separate handling for outliers, masked values, and clean measurements.
-
-## Overview
-
-`StateEstimationTask` extends `ReconstructionTask` and provides:
-
-- **Measurement-based setup**: Inputs are (noisy) measurements; targets are true states. The model reconstructs the state from measurements.
-- **Three-way evaluation**: Comparisons between predictions vs targets, predictions vs measurements, and measurements vs targets, with masks for outliers, masked (hidden) values, and non-outliers.
-- **Correlation plots**: When `verbose: true`, scatter plots (pred vs target, pred vs measured, measured vs target) per feature (Vm, Va, Pg, Qg) are saved to MLflow artifacts.
-
-## StateEstimationTask Class
-
 ::: gridfm_graphkit.tasks.se_task.StateEstimationTask
     options:
       show_root_heading: true
@@ -21,46 +9,3 @@ The `StateEstimationTask` class is a concrete implementation of `ReconstructionT
         - test_step
         - on_test_end
         - predict_step
-
-## Configuration Example
-
-Use the task by setting `task_name: StateEstimation` in your YAML:
-
-```yaml
-task:
-  task_name: StateEstimation
-
-model:
-  type: GNS_heterogeneous
-  hidden_size: 48
-  num_layers: 12
-  attention_head: 8
-
-training:
-  batch_size: 64
-  epochs: 100
-  losses:
-    - MaskedMSE
-  loss_weights:
-    - 1.0
-
-data:
-  networks:
-    - case14_ieee
-    - case118_ieee
-
-verbose: true
-```
-
-## Test Outputs
-
-- **test_step**: Runs the shared reconstruction step, then computes targets and measurements (Vm, Va, P_injection, Q_injection). Uses `mask_dict["outliers_bus"]`, `mask_dict["bus"]`, and non-outlier masks to separate evaluation groups. Stores predictions, targets, and measurements for `on_test_end`.
-- **on_test_end**: If `verbose`, writes correlation plots (pred vs target, pred vs measured, measured vs target) per dataset to `artifacts/test_plots/<dataset_name>/`.
-- **predict_step**: Currently a stub; override in a subclass or in a future release for custom prediction behavior.
-
-## Related
-
-- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
-- [Power Flow Task](power_flow.md): Power flow analysis
-- [Optimal Power Flow Task](optimal_power_flow.md): OPF with cost and constraint metrics
-- [Base Task](base_task.md): Abstract base class for all tasks

From 2841e0fd14dcfa9f22e9c208957887fcb35f7466 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Mon, 16 Mar 2026 16:52:11 +0100
Subject: [PATCH 14/39] simplify doc

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 docs/tasks/optimal_power_flow.md | 67 --------------------------------
 docs/tasks/power_flow.md         | 62 -----------------------------
 2 files changed, 129 deletions(-)

diff --git a/docs/tasks/optimal_power_flow.md b/docs/tasks/optimal_power_flow.md
index 90a98ab..3d13a57 100644
--- a/docs/tasks/optimal_power_flow.md
+++ b/docs/tasks/optimal_power_flow.md
@@ -1,16 +1,5 @@
 # Optimal Power Flow Task
 
-The `OptimalPowerFlowTask` class is a concrete implementation of `ReconstructionTask` for **optimal power flow (OPF)**. It adds economic optimization metrics (generation cost, optimality gap), constraint violation tracking (thermal, voltage angle, reactive power limits), and the same physics-based validation as the power flow task.
-
-## Overview
-
-`OptimalPowerFlowTask` extends `ReconstructionTask` and provides:
-
-- **Economic metrics**: Generation cost from quadratic cost coefficients (C0, C1, C2) and **optimality gap** (relative difference between predicted and ground-truth cost)
-- **Constraint violations**: Branch thermal limits (RATE_A), branch angle limits (ANG_MIN, ANG_MAX), and reactive power limits (Qg min/max) for PV and REF buses
-- **Physics validation**: Same branch flow, node injection, and power balance residuals as PowerFlowTask
-- **Per-bus-type MSE**: Separate MSE for PQ, PV, and REF buses (PG, QG, VM, VA)
-
 ## OptimalPowerFlowTask Class
 
 ::: gridfm_graphkit.tasks.opf_task.OptimalPowerFlowTask
@@ -21,59 +10,3 @@ The `OptimalPowerFlowTask` class is a concrete implementation of `Reconstruction
         - __init__
         - test_step
         - on_test_end
-
-## Configuration Example
-
-Use the task by setting `task_name: OptimalPowerFlow` in your YAML:
-
-```yaml
-task:
-  task_name: OptimalPowerFlow
-
-model:
-  type: GNS_heterogeneous
-  hidden_size: 48
-  num_layers: 12
-  attention_head: 8
-
-training:
-  batch_size: 64
-  epochs: 100
-  losses:
-    - MaskedMSE
-  loss_weights:
-    - 1.0
-
-data:
-  networks:
-    - case14_ieee
-    - case118_ieee
-
-verbose: true
-```
-
-## Test Metrics
-
-During evaluation, `OptimalPowerFlowTask` logs (per dataset):
-
-| Metric | Description |
-|--------|-------------|
-| Test loss | Main reconstruction loss |
-| Opt gap | Mean absolute percentage difference between predicted and ground-truth generation cost |
-| MSE PG | MSE on generator active power |
-| Active / Reactive Power Loss | Mean absolute P/Q residuals |
-| Branch thermal violation from | Mean thermal limit excess on forward branch (apparent power vs RATE_A) |
-| Branch thermal violation to | Mean thermal limit excess on reverse branch (apparent power vs RATE_A) |
-| Branch voltage angle difference violations | Mean angle limit violation (degrees) |
-| Mean Qg violation PV buses | Mean reactive power limit violation on PV buses |
-| Mean Qg violation REF buses | Mean reactive power limit violation on REF buses |
-| MSE PQ/PV/REF nodes - PG/QG/VM/VA | MSE per bus type and output dimension |
-
-With `verbose: true`, CSV reports and plots are written to MLflow artifacts.
-
-## Related
-
-- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
-- [Power Flow Task](power_flow.md): Power flow analysis (no cost or constraint metrics)
-- [Base Task](base_task.md): Abstract base class for all tasks
-- [Loss Functions](../training/loss.md): Available loss functions
diff --git a/docs/tasks/power_flow.md b/docs/tasks/power_flow.md
index df29a40..8912a26 100644
--- a/docs/tasks/power_flow.md
+++ b/docs/tasks/power_flow.md
@@ -1,16 +1,5 @@
 # Power Flow Task
 
-The `PowerFlowTask` class is a concrete implementation of `ReconstructionTask` for **power flow analysis**. It computes voltage profiles and power flows from given injections and adds physics-based validation using Power Balance Error (PBE) and per-bus-type metrics.
-
-## Overview
-
-`PowerFlowTask` extends `ReconstructionTask` and provides:
-
-- **Physics-based validation**: Branch flow computation, node injection, and power balance residuals (P, Q)
-- **Per-bus-type metrics**: Separate MSE and residual statistics for PQ, PV, and REF buses (PG, QG, VM, VA)
-- **Power Balance Error (PBE)**: Mean and max PBE across the test set
-- **Optional verbose output**: Residual histograms and correlation plots when `args.verbose` is true
-
 ## PowerFlowTask Class
 
 ::: gridfm_graphkit.tasks.pf_task.PowerFlowTask
@@ -21,54 +10,3 @@ The `PowerFlowTask` class is a concrete implementation of `ReconstructionTask` f
         - __init__
         - test_step
         - on_test_end
-
-## Configuration Example
-
-Use the task by setting `task_name: PowerFlow` in your YAML:
-
-```yaml
-task:
-  task_name: PowerFlow
-
-model:
-  type: GNS_heterogeneous
-  hidden_size: 48
-  num_layers: 12
-  attention_head: 8
-
-training:
-  batch_size: 64
-  epochs: 100
-  losses:
-    - MaskedMSE
-  loss_weights:
-    - 1.0
-
-data:
-  networks:
-    - case14_ieee
-    - case118_ieee
-
-verbose: true
-```
-
-## Test Metrics
-
-During evaluation, `PowerFlowTask` logs (per dataset):
-
-| Metric | Description |
-|--------|-------------|
-| Test loss | Main reconstruction loss |
-| Active Power Loss | Mean absolute active power residual |
-| Reactive Power Loss | Mean absolute reactive power residual |
-| PBE Mean | Mean power balance error magnitude |
-| PBE Max | Maximum power balance error (reduced with max across batches) |
-| MSE PQ/PV/REF nodes - PG/QG/VM/VA | MSE per bus type and output dimension |
-
-With `verbose: true`, CSV reports and residual histograms are written to MLflow artifacts.
-
-## Related
-
-- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
-- [Base Task](base_task.md): Abstract base class for all tasks
-- [Loss Functions](../training/loss.md): Available loss functions (e.g. MaskedMSE, LayeredWeightedPhysics)

From 8a59967b096fa4d86d478756d8a40a88ddc75b6a Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Tue, 17 Mar 2026 18:31:47 +0100
Subject: [PATCH 15/39] add support for dataset wrapper

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .gitignore                                    |  2 +-
 gridfm_graphkit/__main__.py                   | 24 +++++++++++++++++++
 gridfm_graphkit/cli.py                        |  2 ++
 .../datasets/hetero_powergrid_datamodule.py   | 24 +++++++++++++++++++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 164d66b..00f7a2f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,4 @@ integrationtests/data_out*
 .julia
 *logs*
 *data_out*
-site*
\ No newline at end of file
+site*
diff --git a/gridfm_graphkit/__main__.py b/gridfm_graphkit/__main__.py
index 21046f7..d006718 100644
--- a/gridfm_graphkit/__main__.py
+++ b/gridfm_graphkit/__main__.py
@@ -18,6 +18,12 @@ def main():
     train_parser.add_argument("--run_name", type=str, default="run")
     train_parser.add_argument("--log_dir", type=str, default="mlruns")
     train_parser.add_argument("--data_path", type=str, default="data")
+    train_parser.add_argument(
+        "--dataset_wrapper",
+        type=str,
+        default=None,
+        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+    )
 
     # ---- FINETUNE SUBCOMMAND ----
     finetune_parser = subparsers.add_parser("finetune", help="Run fine-tuning")
@@ -27,6 +33,12 @@ def main():
     finetune_parser.add_argument("--run_name", type=str, default="run")
     finetune_parser.add_argument("--log_dir", type=str, default="mlruns")
     finetune_parser.add_argument("--data_path", type=str, default="data")
+    finetune_parser.add_argument(
+        "--dataset_wrapper",
+        type=str,
+        default=None,
+        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+    )
 
     # ---- EVALUATE SUBCOMMAND ----
     evaluate_parser = subparsers.add_parser(
@@ -46,6 +58,12 @@ def main():
     evaluate_parser.add_argument("--run_name", type=str, default="run")
     evaluate_parser.add_argument("--log_dir", type=str, default="mlruns")
     evaluate_parser.add_argument("--data_path", type=str, default="data")
+    evaluate_parser.add_argument(
+        "--dataset_wrapper",
+        type=str,
+        default=None,
+        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+    )
     evaluate_parser.add_argument(
         "--compute_dc_ac_metrics",
         action="store_true",
@@ -71,6 +89,12 @@ def main():
     predict_parser.add_argument("--run_name", type=str, default="run")
     predict_parser.add_argument("--log_dir", type=str, default="mlruns")
     predict_parser.add_argument("--data_path", type=str, default="data")
+    predict_parser.add_argument(
+        "--dataset_wrapper",
+        type=str,
+        default=None,
+        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+    )
     predict_parser.add_argument("--output_path", type=str, default="data")
 
     args = parser.parse_args()
diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index 90c63b9..5c35a88 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -55,10 +55,12 @@ def main_cli(args):
     L.seed_everything(config_args.seed, workers=True)
 
     normalizer_stats_path = getattr(args, "normalizer_stats", None)
+    dataset_wrapper = getattr(args, "dataset_wrapper", None)
     litGrid = LitGridHeteroDataModule(
         config_args,
         args.data_path,
         normalizer_stats_path=normalizer_stats_path,
+        dataset_wrapper=dataset_wrapper,
     )
     model = get_task(config_args, litGrid.data_normalizers)
     if args.command != "train":
diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index 4ac0125..dfd2d67 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -87,9 +87,11 @@ def __init__(
         args: NestedNamespace,
         data_dir: str = "./data",
         normalizer_stats_path: str = None,
+        dataset_wrapper: str = None,
     ):
         super().__init__()
         self.data_dir = data_dir
+        self.dataset_wrapper = dataset_wrapper
         self.batch_size = int(args.training.batch_size)
         self.split_by_load_scenario_idx = getattr(
             args.data,
@@ -149,6 +151,28 @@ def setup(self, stage: str):
                     data_normalizer=data_normalizer,
                     transform=get_task_transforms(args=self.args),
                 )
+
+            if self.dataset_wrapper is not None:
+                import importlib
+                if "." not in self.dataset_wrapper:
+                    raise ValueError(
+                        f"dataset_wrapper '{self.dataset_wrapper}' is not a fully-qualified "
+                        "class name (expected 'module.ClassName').",
+                    )
+                module_name, class_name = self.dataset_wrapper.rsplit(".", 1)
+                try:
+                    module = importlib.import_module(module_name)
+                except ModuleNotFoundError as e:
+                    raise ModuleNotFoundError(
+                        f"dataset_wrapper module '{module_name}' could not be imported: {e}.",
+                    ) from e
+                wrapper_cls = getattr(module, class_name, None)
+                if wrapper_cls is None:
+                    raise AttributeError(
+                        f"dataset_wrapper class '{class_name}' not found in module '{module_name}'.",
+                    )
+                dataset = wrapper_cls(dataset)
+
             self.datasets.append(dataset)
 
             num_scenarios = self.args.data.scenarios[i]

From ac95dde00ebd6e7db6b3e7c07d6c3902c8349650 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 18 Mar 2026 11:46:18 +0100
Subject: [PATCH 16/39] swap call order to support wrapping of ds

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .../datasets/hetero_powergrid_datamodule.py   | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index dfd2d67..f79461b 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -152,27 +152,6 @@ def setup(self, stage: str):
                     transform=get_task_transforms(args=self.args),
                 )
 
-            if self.dataset_wrapper is not None:
-                import importlib
-                if "." not in self.dataset_wrapper:
-                    raise ValueError(
-                        f"dataset_wrapper '{self.dataset_wrapper}' is not a fully-qualified "
-                        "class name (expected 'module.ClassName').",
-                    )
-                module_name, class_name = self.dataset_wrapper.rsplit(".", 1)
-                try:
-                    module = importlib.import_module(module_name)
-                except ModuleNotFoundError as e:
-                    raise ModuleNotFoundError(
-                        f"dataset_wrapper module '{module_name}' could not be imported: {e}.",
-                    ) from e
-                wrapper_cls = getattr(module, class_name, None)
-                if wrapper_cls is None:
-                    raise AttributeError(
-                        f"dataset_wrapper class '{class_name}' not found in module '{module_name}'.",
-                    )
-                dataset = wrapper_cls(dataset)
-
             self.datasets.append(dataset)
 
             num_scenarios = self.args.data.scenarios[i]
@@ -195,6 +174,27 @@ def setup(self, stage: str):
 
             dataset = Subset(dataset, subset_indices)
 
+            if self.dataset_wrapper is not None:
+                import importlib
+                if "." not in self.dataset_wrapper:
+                    raise ValueError(
+                        f"dataset_wrapper '{self.dataset_wrapper}' is not a fully-qualified "
+                        "class name (expected 'module.ClassName').",
+                    )
+                module_name, class_name = self.dataset_wrapper.rsplit(".", 1)
+                try:
+                    module = importlib.import_module(module_name)
+                except ModuleNotFoundError as e:
+                    raise ModuleNotFoundError(
+                        f"dataset_wrapper module '{module_name}' could not be imported: {e}.",
+                    ) from e
+                wrapper_cls = getattr(module, class_name, None)
+                if wrapper_cls is None:
+                    raise AttributeError(
+                        f"dataset_wrapper class '{class_name}' not found in module '{module_name}'.",
+                    )
+                dataset = wrapper_cls(dataset)
+
             # Random seed set before every split, same as above
             np.random.seed(self.args.seed)
             if self.split_by_load_scenario_idx:

From 5f637e4de606d7072289116cadde76a65f8d68c9 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 18 Mar 2026 15:22:39 +0100
Subject: [PATCH 17/39] fix shared memroy file handler issue

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .../datasets/hetero_powergrid_datamodule.py   | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index f79461b..0d24398 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -367,13 +367,22 @@ def save_scenario_splits(self, log_dir: str):
             with open(splits_path, "w") as f:
                 json.dump(splits, f, indent=2)
 
+    def _dataloader_kwargs(self):
+        num_workers = self.args.data.workers
+        kwargs = dict(
+            num_workers=num_workers,
+            pin_memory=torch.cuda.is_available(),
+        )
+        if num_workers > 0:
+            kwargs["multiprocessing_context"] = "fork"
+        return kwargs
+
     def train_dataloader(self):
         return DataLoader(
             self.train_dataset_multi,
             batch_size=self.batch_size,
             shuffle=True,
-            num_workers=self.args.data.workers,
-            pin_memory=True,
+            **self._dataloader_kwargs(),
         )
 
     def val_dataloader(self):
@@ -381,8 +390,7 @@ def val_dataloader(self):
             self.val_dataset_multi,
             batch_size=self.batch_size,
             shuffle=False,
-            num_workers=self.args.data.workers,
-            pin_memory=True,
+            **self._dataloader_kwargs(),
         )
 
     def test_dataloader(self):
@@ -391,8 +399,7 @@ def test_dataloader(self):
                 i,
                 batch_size=self.batch_size,
                 shuffle=False,
-                num_workers=self.args.data.workers,
-                pin_memory=True,
+                **self._dataloader_kwargs(),
             )
             for i in self.test_datasets
         ]
@@ -403,8 +410,7 @@ def predict_dataloader(self):
                 i,
                 batch_size=self.batch_size,
                 shuffle=False,
-                num_workers=self.args.data.workers,
-                pin_memory=True,
+                **self._dataloader_kwargs(),
             )
             for i in self.test_datasets
         ]

From eaaca573f4785729392552078c5c97737d249d99 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 18 Mar 2026 16:25:02 +0100
Subject: [PATCH 18/39] fix race condition

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/datasets/hetero_powergrid_datamodule.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index 0d24398..89472ab 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -372,9 +372,8 @@ def _dataloader_kwargs(self):
         kwargs = dict(
             num_workers=num_workers,
             pin_memory=torch.cuda.is_available(),
+            persistent_workers=num_workers > 0,
         )
-        if num_workers > 0:
-            kwargs["multiprocessing_context"] = "fork"
         return kwargs
 
     def train_dataloader(self):

From abdfe1b79e9ed05f97d75afb759d12a3d37ce37e Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 18 Mar 2026 17:03:00 +0100
Subject: [PATCH 19/39] add logging for debugging race condition

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .../datasets/hetero_powergrid_datamodule.py   | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index 89472ab..a345b66 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -1,9 +1,24 @@
 import json
 import torch
+import sys
+import os
+import logging
 from torch_geometric.loader import DataLoader
 from torch.utils.data import ConcatDataset
 from torch.utils.data import Subset
 import torch.distributed as dist
+
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(asctime)s [%(process)d] %(levelname)s %(message)s",
+    stream=sys.stderr,
+    force=True,
+)
+_dm_log = logging.getLogger("gridfm.datamodule")
+
+
+def _debug_worker_init(worker_id):
+    _dm_log.debug("Worker %d started (pid=%d)", worker_id, os.getpid())
 from gridfm_graphkit.io.param_handler import (
     NestedNamespace,
     load_normalizer,
@@ -175,6 +190,7 @@ def setup(self, stage: str):
             dataset = Subset(dataset, subset_indices)
 
             if self.dataset_wrapper is not None:
+                _dm_log.debug("Wrapping dataset with '%s' (size=%d)", self.dataset_wrapper, len(dataset))
                 import importlib
                 if "." not in self.dataset_wrapper:
                     raise ValueError(
@@ -194,6 +210,24 @@ def setup(self, stage: str):
                         f"dataset_wrapper class '{class_name}' not found in module '{module_name}'.",
                     )
                 dataset = wrapper_cls(dataset)
+                _dm_log.debug("Dataset wrapped successfully: %s", type(dataset).__name__)
+
+                # Monkey-patch __getitem__ to log every access and detect hangs
+                _original_getitem = dataset.__class__.__getitem__
+
+                def _traced_getitem(self_inner, idx):
+                    _dm_log.debug(
+                        "__getitem__(%d) called on %s (pid=%d)",
+                        idx, type(self_inner).__name__, os.getpid(),
+                    )
+                    result = _original_getitem(self_inner, idx)
+                    _dm_log.debug(
+                        "__getitem__(%d) returned on %s (pid=%d)",
+                        idx, type(self_inner).__name__, os.getpid(),
+                    )
+                    return result
+
+                dataset.__class__.__getitem__ = _traced_getitem
 
             # Random seed set before every split, same as above
             np.random.seed(self.args.seed)
@@ -369,14 +403,22 @@ def save_scenario_splits(self, log_dir: str):
 
     def _dataloader_kwargs(self):
         num_workers = self.args.data.workers
+        _dm_log.debug(
+            "_dataloader_kwargs: num_workers=%d pin_memory=%s persistent_workers=%s",
+            num_workers,
+            torch.cuda.is_available(),
+            num_workers > 0,
+        )
         kwargs = dict(
             num_workers=num_workers,
             pin_memory=torch.cuda.is_available(),
             persistent_workers=num_workers > 0,
+            worker_init_fn=_debug_worker_init,
         )
         return kwargs
 
     def train_dataloader(self):
+        _dm_log.debug("Creating train_dataloader (dataset size=%d)", len(self.train_dataset_multi))
         return DataLoader(
             self.train_dataset_multi,
             batch_size=self.batch_size,
@@ -385,6 +427,7 @@ def train_dataloader(self):
         )
 
     def val_dataloader(self):
+        _dm_log.debug("Creating val_dataloader (dataset size=%d)", len(self.val_dataset_multi))
         return DataLoader(
             self.val_dataset_multi,
             batch_size=self.batch_size,

From 7b02258ac3a620541d300c50bcf603817a06f0b5 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 18 Mar 2026 18:13:38 +0100
Subject: [PATCH 20/39] add registry support for (3rd party) dataset wrapper

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/__main__.py                   | 32 ++++++++++++++++---
 gridfm_graphkit/cli.py                        | 12 +++++++
 .../datasets/hetero_powergrid_datamodule.py   | 20 ++----------
 gridfm_graphkit/io/registries.py              |  1 +
 4 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/gridfm_graphkit/__main__.py b/gridfm_graphkit/__main__.py
index d006718..9f6cf7b 100644
--- a/gridfm_graphkit/__main__.py
+++ b/gridfm_graphkit/__main__.py
@@ -22,7 +22,13 @@ def main():
         "--dataset_wrapper",
         type=str,
         default=None,
-        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+        help="Registered name of a dataset wrapper (see DATASET_WRAPPER_REGISTRY), e.g. SharedMemoryCacheDataset",
+    )
+    train_parser.add_argument(
+        "--plugins",
+        nargs="*",
+        default=[],
+        help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
 
     # ---- FINETUNE SUBCOMMAND ----
@@ -37,7 +43,13 @@ def main():
         "--dataset_wrapper",
         type=str,
         default=None,
-        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+        help="Registered name of a dataset wrapper (see DATASET_WRAPPER_REGISTRY), e.g. SharedMemoryCacheDataset",
+    )
+    finetune_parser.add_argument(
+        "--plugins",
+        nargs="*",
+        default=[],
+        help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
 
     # ---- EVALUATE SUBCOMMAND ----
@@ -62,7 +74,13 @@ def main():
         "--dataset_wrapper",
         type=str,
         default=None,
-        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+        help="Registered name of a dataset wrapper (see DATASET_WRAPPER_REGISTRY), e.g. SharedMemoryCacheDataset",
+    )
+    evaluate_parser.add_argument(
+        "--plugins",
+        nargs="*",
+        default=[],
+        help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
     evaluate_parser.add_argument(
         "--compute_dc_ac_metrics",
@@ -93,7 +111,13 @@ def main():
         "--dataset_wrapper",
         type=str,
         default=None,
-        help="Fully-qualified class name of a dataset wrapper to apply, e.g. mypackage.module.MyWrapper",
+        help="Registered name of a dataset wrapper (see DATASET_WRAPPER_REGISTRY), e.g. SharedMemoryCacheDataset",
+    )
+    predict_parser.add_argument(
+        "--plugins",
+        nargs="*",
+        default=[],
+        help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
     predict_parser.add_argument("--output_path", type=str, default="data")
 
diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index 5c35a88..a0ec5bc 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -56,6 +56,18 @@ def main_cli(args):
 
     normalizer_stats_path = getattr(args, "normalizer_stats", None)
     dataset_wrapper = getattr(args, "dataset_wrapper", None)
+
+    # Import plugin packages so their @DATASET_WRAPPER_REGISTRY.register decorators fire
+    import importlib
+    for plugin_pkg in getattr(args, "plugins", []):
+        try:
+            importlib.import_module(plugin_pkg)
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                f"Plugin package '{plugin_pkg}' could not be imported: {e}. "
+                "Make sure it is installed in the current environment."
+            ) from e
+
     litGrid = LitGridHeteroDataModule(
         config_args,
         args.data_path,
diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index a345b66..401910a 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -19,6 +19,7 @@
 
 def _debug_worker_init(worker_id):
     _dm_log.debug("Worker %d started (pid=%d)", worker_id, os.getpid())
+from gridfm_graphkit.io.registries import DATASET_WRAPPER_REGISTRY
 from gridfm_graphkit.io.param_handler import (
     NestedNamespace,
     load_normalizer,
@@ -191,24 +192,7 @@ def setup(self, stage: str):
 
             if self.dataset_wrapper is not None:
                 _dm_log.debug("Wrapping dataset with '%s' (size=%d)", self.dataset_wrapper, len(dataset))
-                import importlib
-                if "." not in self.dataset_wrapper:
-                    raise ValueError(
-                        f"dataset_wrapper '{self.dataset_wrapper}' is not a fully-qualified "
-                        "class name (expected 'module.ClassName').",
-                    )
-                module_name, class_name = self.dataset_wrapper.rsplit(".", 1)
-                try:
-                    module = importlib.import_module(module_name)
-                except ModuleNotFoundError as e:
-                    raise ModuleNotFoundError(
-                        f"dataset_wrapper module '{module_name}' could not be imported: {e}.",
-                    ) from e
-                wrapper_cls = getattr(module, class_name, None)
-                if wrapper_cls is None:
-                    raise AttributeError(
-                        f"dataset_wrapper class '{class_name}' not found in module '{module_name}'.",
-                    )
+                wrapper_cls = DATASET_WRAPPER_REGISTRY.get(self.dataset_wrapper)
                 dataset = wrapper_cls(dataset)
                 _dm_log.debug("Dataset wrapped successfully: %s", type(dataset).__name__)
 
diff --git a/gridfm_graphkit/io/registries.py b/gridfm_graphkit/io/registries.py
index 27d56b7..32feb20 100644
--- a/gridfm_graphkit/io/registries.py
+++ b/gridfm_graphkit/io/registries.py
@@ -43,3 +43,4 @@ def __len__(self):
 TASK_REGISTRY = Registry("task")
 TRANSFORM_REGISTRY = Registry("transform")
 PHYSICS_DECODER_REGISTRY = Registry("physics_decoder")
+DATASET_WRAPPER_REGISTRY = Registry("dataset_wrapper")

From 52ee998713c0ffe1f7787c9c599681488bd425e6 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 18 Mar 2026 23:41:21 +0100
Subject: [PATCH 21/39] remove debug code, add parameter

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/__main__.py                   | 24 +++++++++++
 gridfm_graphkit/cli.py                        |  5 +++
 .../datasets/hetero_powergrid_datamodule.py   | 42 -------------------
 3 files changed, 29 insertions(+), 42 deletions(-)

diff --git a/gridfm_graphkit/__main__.py b/gridfm_graphkit/__main__.py
index 9f6cf7b..c19ae0e 100644
--- a/gridfm_graphkit/__main__.py
+++ b/gridfm_graphkit/__main__.py
@@ -30,6 +30,12 @@ def main():
         default=[],
         help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
+    train_parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=None,
+        help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
+    )
 
     # ---- FINETUNE SUBCOMMAND ----
     finetune_parser = subparsers.add_parser("finetune", help="Run fine-tuning")
@@ -51,6 +57,12 @@ def main():
         default=[],
         help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
+    finetune_parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=None,
+        help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
+    )
 
     # ---- EVALUATE SUBCOMMAND ----
     evaluate_parser = subparsers.add_parser(
@@ -82,6 +94,12 @@ def main():
         default=[],
         help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
+    evaluate_parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=None,
+        help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
+    )
     evaluate_parser.add_argument(
         "--compute_dc_ac_metrics",
         action="store_true",
@@ -119,6 +137,12 @@ def main():
         default=[],
         help="Python packages to import for plugin registration, e.g. gridfm_graphkit_ee",
     )
+    predict_parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=None,
+        help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
+    )
     predict_parser.add_argument("--output_path", type=str, default="data")
 
     args = parser.parse_args()
diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index a0ec5bc..d9bae4e 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -57,6 +57,11 @@ def main_cli(args):
     normalizer_stats_path = getattr(args, "normalizer_stats", None)
     dataset_wrapper = getattr(args, "dataset_wrapper", None)
 
+    # CLI --num_workers overrides the YAML value (useful for debugging with 0)
+    num_workers_override = getattr(args, "num_workers", None)
+    if num_workers_override is not None:
+        config_args.data.workers = num_workers_override
+
     # Import plugin packages so their @DATASET_WRAPPER_REGISTRY.register decorators fire
     import importlib
     for plugin_pkg in getattr(args, "plugins", []):
diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index 401910a..230198a 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -1,24 +1,10 @@
 import json
 import torch
-import sys
 import os
-import logging
 from torch_geometric.loader import DataLoader
 from torch.utils.data import ConcatDataset
 from torch.utils.data import Subset
 import torch.distributed as dist
-
-logging.basicConfig(
-    level=logging.DEBUG,
-    format="%(asctime)s [%(process)d] %(levelname)s %(message)s",
-    stream=sys.stderr,
-    force=True,
-)
-_dm_log = logging.getLogger("gridfm.datamodule")
-
-
-def _debug_worker_init(worker_id):
-    _dm_log.debug("Worker %d started (pid=%d)", worker_id, os.getpid())
 from gridfm_graphkit.io.registries import DATASET_WRAPPER_REGISTRY
 from gridfm_graphkit.io.param_handler import (
     NestedNamespace,
@@ -191,27 +177,8 @@ def setup(self, stage: str):
             dataset = Subset(dataset, subset_indices)
 
             if self.dataset_wrapper is not None:
-                _dm_log.debug("Wrapping dataset with '%s' (size=%d)", self.dataset_wrapper, len(dataset))
                 wrapper_cls = DATASET_WRAPPER_REGISTRY.get(self.dataset_wrapper)
                 dataset = wrapper_cls(dataset)
-                _dm_log.debug("Dataset wrapped successfully: %s", type(dataset).__name__)
-
-                # Monkey-patch __getitem__ to log every access and detect hangs
-                _original_getitem = dataset.__class__.__getitem__
-
-                def _traced_getitem(self_inner, idx):
-                    _dm_log.debug(
-                        "__getitem__(%d) called on %s (pid=%d)",
-                        idx, type(self_inner).__name__, os.getpid(),
-                    )
-                    result = _original_getitem(self_inner, idx)
-                    _dm_log.debug(
-                        "__getitem__(%d) returned on %s (pid=%d)",
-                        idx, type(self_inner).__name__, os.getpid(),
-                    )
-                    return result
-
-                dataset.__class__.__getitem__ = _traced_getitem
 
             # Random seed set before every split, same as above
             np.random.seed(self.args.seed)
@@ -387,22 +354,14 @@ def save_scenario_splits(self, log_dir: str):
 
     def _dataloader_kwargs(self):
         num_workers = self.args.data.workers
-        _dm_log.debug(
-            "_dataloader_kwargs: num_workers=%d pin_memory=%s persistent_workers=%s",
-            num_workers,
-            torch.cuda.is_available(),
-            num_workers > 0,
-        )
         kwargs = dict(
             num_workers=num_workers,
             pin_memory=torch.cuda.is_available(),
             persistent_workers=num_workers > 0,
-            worker_init_fn=_debug_worker_init,
         )
         return kwargs
 
     def train_dataloader(self):
-        _dm_log.debug("Creating train_dataloader (dataset size=%d)", len(self.train_dataset_multi))
         return DataLoader(
             self.train_dataset_multi,
             batch_size=self.batch_size,
@@ -411,7 +370,6 @@ def train_dataloader(self):
         )
 
     def val_dataloader(self):
-        _dm_log.debug("Creating val_dataloader (dataset size=%d)", len(self.val_dataset_multi))
         return DataLoader(
             self.val_dataset_multi,
             batch_size=self.batch_size,

From 1a4de9577503353edc07d221b316ddeb9b1b1d68 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 12:03:15 +0100
Subject: [PATCH 22/39] remove unnecessary param

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/__main__.py                   | 24 +++++++++++++++++++
 gridfm_graphkit/cli.py                        |  2 ++
 .../datasets/hetero_powergrid_datamodule.py   |  8 ++++++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/gridfm_graphkit/__main__.py b/gridfm_graphkit/__main__.py
index c19ae0e..3a3947b 100644
--- a/gridfm_graphkit/__main__.py
+++ b/gridfm_graphkit/__main__.py
@@ -36,6 +36,12 @@ def main():
         default=None,
         help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
     )
+    train_parser.add_argument(
+        "--dataset_wrapper_cache_dir",
+        type=str,
+        default=None,
+        help="Directory for the dataset wrapper's disk cache. If set, cache is loaded from here when present and saved here after first population.",
+    )
 
     # ---- FINETUNE SUBCOMMAND ----
     finetune_parser = subparsers.add_parser("finetune", help="Run fine-tuning")
@@ -63,6 +69,12 @@ def main():
         default=None,
         help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
     )
+    finetune_parser.add_argument(
+        "--dataset_wrapper_cache_dir",
+        type=str,
+        default=None,
+        help="Directory for the dataset wrapper's disk cache. If set, cache is loaded from here when present and saved here after first population.",
+    )
 
     # ---- EVALUATE SUBCOMMAND ----
     evaluate_parser = subparsers.add_parser(
@@ -100,6 +112,12 @@ def main():
         default=None,
         help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
     )
+    evaluate_parser.add_argument(
+        "--dataset_wrapper_cache_dir",
+        type=str,
+        default=None,
+        help="Directory for the dataset wrapper's disk cache. If set, cache is loaded from here when present and saved here after first population.",
+    )
     evaluate_parser.add_argument(
         "--compute_dc_ac_metrics",
         action="store_true",
@@ -143,6 +161,12 @@ def main():
         default=None,
         help="Override data.workers from the YAML config. Use 0 to debug worker crashes.",
     )
+    predict_parser.add_argument(
+        "--dataset_wrapper_cache_dir",
+        type=str,
+        default=None,
+        help="Directory for the dataset wrapper's disk cache. If set, cache is loaded from here when present and saved here after first population.",
+    )
     predict_parser.add_argument("--output_path", type=str, default="data")
 
     args = parser.parse_args()
diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index d9bae4e..f2bcf24 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -56,6 +56,7 @@ def main_cli(args):
 
     normalizer_stats_path = getattr(args, "normalizer_stats", None)
     dataset_wrapper = getattr(args, "dataset_wrapper", None)
+    dataset_wrapper_cache_dir = getattr(args, "dataset_wrapper_cache_dir", None)
 
     # CLI --num_workers overrides the YAML value (useful for debugging with 0)
     num_workers_override = getattr(args, "num_workers", None)
@@ -78,6 +79,7 @@ def main_cli(args):
         args.data_path,
         normalizer_stats_path=normalizer_stats_path,
         dataset_wrapper=dataset_wrapper,
+        dataset_wrapper_cache_dir=dataset_wrapper_cache_dir,
     )
     model = get_task(config_args, litGrid.data_normalizers)
     if args.command != "train":
diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index 230198a..cda3c40 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -90,10 +90,12 @@ def __init__(
         data_dir: str = "./data",
         normalizer_stats_path: str = None,
         dataset_wrapper: str = None,
+        dataset_wrapper_cache_dir: str = None,
     ):
         super().__init__()
         self.data_dir = data_dir
         self.dataset_wrapper = dataset_wrapper
+        self.dataset_wrapper_cache_dir = dataset_wrapper_cache_dir
         self.batch_size = int(args.training.batch_size)
         self.split_by_load_scenario_idx = getattr(
             args.data,
@@ -178,7 +180,11 @@ def setup(self, stage: str):
 
             if self.dataset_wrapper is not None:
                 wrapper_cls = DATASET_WRAPPER_REGISTRY.get(self.dataset_wrapper)
-                dataset = wrapper_cls(dataset)
+                dataset = wrapper_cls(dataset, cache_dir=self.dataset_wrapper_cache_dir)
+                # Populate the cache eagerly so the progress bar is visible
+                # and the sanity check doesn't appear frozen.
+                if hasattr(dataset, "_setup_cache"):
+                    dataset._setup_cache()
 
             # Random seed set before every split, same as above
             np.random.seed(self.args.seed)

From d75df26acbf1a89a75a9f2ec0e827428bdc891a6 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 13:17:27 +0100
Subject: [PATCH 23/39] fix chache buildup order

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/datasets/hetero_powergrid_datamodule.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index cda3c40..d957a7b 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -181,10 +181,6 @@ def setup(self, stage: str):
             if self.dataset_wrapper is not None:
                 wrapper_cls = DATASET_WRAPPER_REGISTRY.get(self.dataset_wrapper)
                 dataset = wrapper_cls(dataset, cache_dir=self.dataset_wrapper_cache_dir)
-                # Populate the cache eagerly so the progress bar is visible
-                # and the sanity check doesn't appear frozen.
-                if hasattr(dataset, "_setup_cache"):
-                    dataset._setup_cache()
 
             # Random seed set before every split, same as above
             np.random.seed(self.args.seed)
@@ -244,6 +240,11 @@ def setup(self, stage: str):
                     saved_stats,
                 )
 
+            # Populate the wrapper cache now that the normalizer is fitted,
+            # so transform() has BaseMVA set when __getitem__ is called.
+            if self.dataset_wrapper is not None and hasattr(dataset, "_setup_cache"):
+                dataset._setup_cache()
+
             self.train_datasets.append(train_dataset)
             self.val_datasets.append(val_dataset)
             self.test_datasets.append(test_dataset)

From b70d64e8371c95f0964a20188ea6a4fe0827df59 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 13:48:30 +0100
Subject: [PATCH 24/39] changed introduced by precommit

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .github/workflows/ci-build.yaml      |  4 ++--
 docs/index.md                        |  2 +-
 docs/tasks/base_task.md              | 12 ++++++------
 docs/tasks/feature_reconstruction.md | 12 ++++++------
 docs/tasks/reconstruction_task.md    |  4 ++--
 gridfm_graphkit/cli.py               |  3 ++-
 integrationtests/test_base_set.py    |  2 +-
 7 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml
index 01b8b42..453d25e 100644
--- a/.github/workflows/ci-build.yaml
+++ b/.github/workflows/ci-build.yaml
@@ -67,11 +67,11 @@ jobs:
           python3 -m pip install --upgrade pip
           python3 -m pip install --upgrade "git+https://github.com/ibm/detect-secrets.git@master#egg=detect-secrets"
           python3 -m pip install boxsdk
-          
+
       - name: Scan repository & write snapshot
         run: |
           mkdir -p security-outputs
-      
+
           # Run detect-secrets while skipping binary files
           detect-secrets scan \
             --exclude-files '.*\.ipynb$|.*\.(png|jpg|jpeg|gif|pdf|onnx|pt|pth|bin|zip)$' \
diff --git a/docs/index.md b/docs/index.md
index e38d000..e843631 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,4 +14,4 @@ This library is brought to you by the GridFM team to train, finetune and interac
 
 
 
-## Citation: TBD
\ No newline at end of file
+## Citation: TBD
diff --git a/docs/tasks/base_task.md b/docs/tasks/base_task.md
index a0acbae..1153a2d 100644
--- a/docs/tasks/base_task.md
+++ b/docs/tasks/base_task.md
@@ -168,23 +168,23 @@ class MyCustomTask(BaseTask):
     def __init__(self, args, data_normalizers):
         super().__init__(args, data_normalizers)
         # Initialize task-specific components
-        
+
     def forward(self, x_dict, edge_index_dict, edge_attr_dict, mask_dict):
         # Implement forward pass
         pass
-        
+
     def training_step(self, batch):
         # Implement training logic
         pass
-        
+
     def validation_step(self, batch, batch_idx):
         # Implement validation logic
         pass
-        
+
     def test_step(self, batch, batch_idx, dataloader_idx=0):
         # Implement test logic
         pass
-        
+
     def predict_step(self, batch, batch_idx, dataloader_idx=0):
         # Implement prediction logic
         pass
@@ -213,4 +213,4 @@ data:
 - [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
 - [Power Flow Task](power_flow.md): Concrete implementation for power flow
 - [Optimal Power Flow Task](optimal_power_flow.md): Concrete implementation for OPF
-- [State Estimation Task](state_estimation.md): Concrete implementation for state estimation
\ No newline at end of file
+- [State Estimation Task](state_estimation.md): Concrete implementation for state estimation
diff --git a/docs/tasks/feature_reconstruction.md b/docs/tasks/feature_reconstruction.md
index 6356c82..fbde3ea 100644
--- a/docs/tasks/feature_reconstruction.md
+++ b/docs/tasks/feature_reconstruction.md
@@ -144,26 +144,26 @@ class MyCustomTask(ReconstructionTask):
     def __init__(self, args, data_normalizers):
         super().__init__(args, data_normalizers)
         # Add custom initialization
-    
+
     def test_step(self, batch, batch_idx, dataloader_idx=0):
         # Implement custom test logic
         output, loss_dict = self.shared_step(batch)
-        
+
         # Add custom metrics
         custom_metric = self.compute_custom_metric(output, batch)
         loss_dict["Custom Metric"] = custom_metric
-        
+
         # Log metrics
         for metric, value in loss_dict.items():
             self.log(f"{dataset_name}/{metric}", value)
-        
+
         return loss_dict["loss"]
-    
+
     def predict_step(self, batch, batch_idx, dataloader_idx=0):
         # Implement custom prediction logic
         output, _ = self.shared_step(batch)
         return {"predictions": output}
-    
+
     def on_test_end(self):
         # Custom analysis and visualization
         # Generate reports, plots, etc.
diff --git a/docs/tasks/reconstruction_task.md b/docs/tasks/reconstruction_task.md
index 28bd8aa..54e9e5a 100644
--- a/docs/tasks/reconstruction_task.md
+++ b/docs/tasks/reconstruction_task.md
@@ -218,7 +218,7 @@ class CustomReconstructionTask(ReconstructionTask):
         output, loss_dict = self.shared_step(batch)
         # Add custom metrics
         return loss_dict["loss"]
-    
+
     def on_test_end(self):
         # Custom analysis and visualization
         super().on_test_end()
@@ -290,4 +290,4 @@ The following task classes extend `ReconstructionTask`:
 - [Power Flow Task](power_flow.md): Power flow analysis implementation
 - [Optimal Power Flow Task](optimal_power_flow.md): OPF optimization implementation
 - [State Estimation Task](state_estimation.md): State estimation implementation
-- [Loss Functions](../training/loss.md): Available loss functions
\ No newline at end of file
+- [Loss Functions](../training/loss.md): Available loss functions
diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index f2bcf24..14146ad 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -65,13 +65,14 @@ def main_cli(args):
 
     # Import plugin packages so their @DATASET_WRAPPER_REGISTRY.register decorators fire
     import importlib
+
     for plugin_pkg in getattr(args, "plugins", []):
         try:
             importlib.import_module(plugin_pkg)
         except ModuleNotFoundError as e:
             raise ModuleNotFoundError(
                 f"Plugin package '{plugin_pkg}' could not be imported: {e}. "
-                "Make sure it is installed in the current environment."
+                "Make sure it is installed in the current environment.",
             ) from e
 
     litGrid = LitGridHeteroDataModule(
diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index e33e11c..195104e 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -75,7 +75,7 @@ def cleanup_test_artifacts():
     """
     Backup modified files and remove generated artifacts after the test.
     """
-    training_config = "examples/config/HGNS_PF_datakit_case14.yaml"
+    training_config = " "
     backup_config = training_config + ".bak"
 
     if os.path.exists(training_config):

From bc2d5d286076ed41437c2726b71cebf0b3b49b6f Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 13:49:36 +0100
Subject: [PATCH 25/39] manual precommit fixes

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/datasets/hetero_powergrid_datamodule.py | 1 -
 integrationtests/test_base_set.py                       | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index d957a7b..95034ee 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -19,7 +19,6 @@
 import numpy as np
 import random
 import warnings
-import os
 import lightning as L
 from typing import List
 from lightning.pytorch.loggers import MLFlowLogger
diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index 195104e..03eed26 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -9,7 +9,7 @@
 
 
 def execute_and_live_output(cmd) -> None:
-    result = subprocess.run(
+    subprocess.run(
         cmd,
         text=True,
         shell=True,

From da3f58d14af61be8a5e21f20d6f2dcf70ff8e50b Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 15:30:54 +0100
Subject: [PATCH 26/39] fix precommit hook

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 integrationtests/test_base_set.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index 03eed26..f8e1cda 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -9,12 +9,7 @@
 
 
 def execute_and_live_output(cmd) -> None:
-    subprocess.run(
-        cmd,
-        text=True,
-        shell=True,
-        check=True
-    )
+    subprocess.run(cmd, text=True, shell=True, check=True)
 
 
 def prepare_config():
@@ -42,7 +37,7 @@ def prepare_config():
     print(f"  - load.scenarios: {config['load']['scenarios']}")
     print(
         f"  - topology_perturbation.n_topology_variants: "
-        f"{config['topology_perturbation']['n_topology_variants']}"
+        f"{config['topology_perturbation']['n_topology_variants']}",
     )
 
     return config_path
@@ -115,9 +110,7 @@ def test_train(cleanup_test_artifacts):
 
         config_path = prepare_config()
 
-        execute_and_live_output(
-            f"gridfm_datakit generate {config_path}"
-        )
+        execute_and_live_output(f"gridfm_datakit generate {config_path}")
     else:
         print(f"Data directory '{data_dir}' already exists, skipping generation.")
 
@@ -129,7 +122,7 @@ def test_train(cleanup_test_artifacts):
         f"--data_path data_out/ "
         f"--exp_name exp1 "
         f"--run_name run1 "
-        f"--log_dir logs"
+        f"--log_dir logs",
     )
 
     log_base = "logs"
@@ -145,10 +138,7 @@ def test_train(cleanup_test_artifacts):
     latest_run_dir = max(run_dirs, key=os.path.getmtime)
 
     metrics_file = os.path.join(
-        latest_run_dir,
-        "artifacts",
-        "test",
-        "case14_ieee_metrics.csv"
+        latest_run_dir, "artifacts", "test", "case14_ieee_metrics.csv",
     )
 
     assert os.path.exists(metrics_file), f"Metrics file not found: {metrics_file}"
@@ -164,4 +154,4 @@ def test_train(cleanup_test_artifacts):
         f"PBE Mean value {pbe_mean_value} is outside acceptable range [1.1, 2.9]"
     )
 
-    print(f"PBE Mean value {pbe_mean_value} is within acceptable range [1.1, 2.9]")
\ No newline at end of file
+    print(f"PBE Mean value {pbe_mean_value} is within acceptable range [1.1, 2.9]")

From 2d9fe9217fc557c36ad1dc6216912693b9c1763f Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 15:35:06 +0100
Subject: [PATCH 27/39] precommit fix

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 integrationtests/test_base_set.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
index f8e1cda..90da468 100644
--- a/integrationtests/test_base_set.py
+++ b/integrationtests/test_base_set.py
@@ -138,7 +138,10 @@ def test_train(cleanup_test_artifacts):
     latest_run_dir = max(run_dirs, key=os.path.getmtime)
 
     metrics_file = os.path.join(
-        latest_run_dir, "artifacts", "test", "case14_ieee_metrics.csv",
+        latest_run_dir,
+        "artifacts",
+        "test",
+        "case14_ieee_metrics.csv",
     )
 
     assert os.path.exists(metrics_file), f"Metrics file not found: {metrics_file}"

From ae60499bb1a0c47400940add31f6774c9ca18880 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 15:43:03 +0100
Subject: [PATCH 28/39] bump trivy

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .github/workflows/ci-build.yaml | 37 ++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml
index 453d25e..5e9bf42 100644
--- a/.github/workflows/ci-build.yaml
+++ b/.github/workflows/ci-build.yaml
@@ -168,20 +168,23 @@ jobs:
         uses: pypa/gh-action-pip-audit@v1.1.0
 
   trivy_repo:
-    name: Trivy (repo scan)
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: Run Trivy filesystem scan
-        uses: aquasecurity/trivy-action@0.33.1
-        with:
-          scan-type: 'fs'
-          scan-ref: '.'
-          format: 'sarif'
-          output: 'trivy-results.sarif'
-          severity: 'HIGH,CRITICAL'
-          ignore-unfixed: true
-      - name: Upload SARIF to Code Scanning
-        uses: github/codeql-action/upload-sarif@v3
-        with:
-          sarif_file: trivy-results.sarif
+      name: Trivy (repo scan)
+      runs-on: ubuntu-latest
+      permissions:
+        security-events: write
+      steps:
+        - uses: actions/checkout@v4
+        - name: Run Trivy filesystem scan
+          # Updated to v0.34.0 which contains the fix for the v0.65.0 binary install error
+          uses: aquasecurity/trivy-action@0.34.0
+          with:
+            scan-type: 'fs'
+            scan-ref: '.'
+            format: 'sarif'
+            output: 'trivy-results.sarif'
+            severity: 'HIGH,CRITICAL'
+            ignore-unfixed: true
+        - name: Upload SARIF to Code Scanning
+          uses: github/codeql-action/upload-sarif@v3
+          with:
+            sarif_file: trivy-results.sarif
\ No newline at end of file

From 6db91ccc913f58f7666f9ec9150e488231cbcccc Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 15:48:24 +0100
Subject: [PATCH 29/39] fix precommit

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .github/workflows/ci-build.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml
index 5e9bf42..8c1ee1e 100644
--- a/.github/workflows/ci-build.yaml
+++ b/.github/workflows/ci-build.yaml
@@ -187,4 +187,4 @@ jobs:
         - name: Upload SARIF to Code Scanning
           uses: github/codeql-action/upload-sarif@v3
           with:
-            sarif_file: trivy-results.sarif
\ No newline at end of file
+            sarif_file: trivy-results.sarif

From ccac4dbe4e2b95c98a7faa4e33dccab378ff241c Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Thu, 19 Mar 2026 16:53:36 +0100
Subject: [PATCH 30/39] fix trivy

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .github/workflows/ci-build.yaml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml
index 8c1ee1e..ea796b9 100644
--- a/.github/workflows/ci-build.yaml
+++ b/.github/workflows/ci-build.yaml
@@ -174,17 +174,19 @@ jobs:
         security-events: write
       steps:
         - uses: actions/checkout@v4
-        - name: Run Trivy filesystem scan
-          # Updated to v0.34.0 which contains the fix for the v0.65.0 binary install error
-          uses: aquasecurity/trivy-action@0.34.0
+
+        - name: Run Trivy vulnerability scanner in repo mode
+          # We use the official container-based action to avoid binary install issues
+          uses: aquasecurity/trivy-action@master
           with:
             scan-type: 'fs'
-            scan-ref: '.'
+            ignore-unfixed: true
             format: 'sarif'
             output: 'trivy-results.sarif'
             severity: 'HIGH,CRITICAL'
-            ignore-unfixed: true
+
         - name: Upload SARIF to Code Scanning
           uses: github/codeql-action/upload-sarif@v3
+          if: always() # Upload results even if vulnerabilities are found
           with:
-            sarif_file: trivy-results.sarif
+            sarif_file: 'trivy-results.sarif'

From f8f4bbd7cf8a958875d3bb9cbb66717749862633 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Tue, 24 Mar 2026 09:38:52 +0100
Subject: [PATCH 31/39] add support for dataloading performance tests

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/__main__.py | 45 +++++++++++++++++-
 gridfm_graphkit/cli.py      | 95 ++++++++++++++++++++++++++++++++-----
 2 files changed, 127 insertions(+), 13 deletions(-)

diff --git a/gridfm_graphkit/__main__.py b/gridfm_graphkit/__main__.py
index 3a3947b..7de2b41 100644
--- a/gridfm_graphkit/__main__.py
+++ b/gridfm_graphkit/__main__.py
@@ -1,6 +1,6 @@
 import argparse
 from datetime import datetime
-from gridfm_graphkit.cli import main_cli
+from gridfm_graphkit.cli import main_cli, benchmark_cli
 
 
 def main():
@@ -169,8 +169,49 @@ def main():
     )
     predict_parser.add_argument("--output_path", type=str, default="data")
 
+    # ---- BENCHMARK SUBCOMMAND ----
+    benchmark_parser = subparsers.add_parser(
+        "benchmark",
+        help="Benchmark train-dataloader iteration speed",
+    )
+    benchmark_parser.add_argument("--config", type=str, required=True)
+    benchmark_parser.add_argument("--data_path", type=str, default="data")
+    benchmark_parser.add_argument(
+        "--epochs",
+        type=int,
+        default=3,
+        help="Number of epochs to iterate through the train dataloader.",
+    )
+    benchmark_parser.add_argument(
+        "--dataset_wrapper",
+        type=str,
+        default=None,
+        help="Registered name of a dataset wrapper (see DATASET_WRAPPER_REGISTRY), e.g. SharedMemoryCacheDataset",
+    )
+    benchmark_parser.add_argument(
+        "--dataset_wrapper_cache_dir",
+        type=str,
+        default=None,
+        help="Directory for the dataset wrapper's disk cache.",
+    )
+    benchmark_parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=None,
+        help="Override data.workers from the YAML config.",
+    )
+    benchmark_parser.add_argument(
+        "--plugins",
+        nargs="*",
+        default=[],
+        help="Python packages to import for plugin registration.",
+    )
+
     args = parser.parse_args()
-    main_cli(args)
+    if args.command == "benchmark":
+        benchmark_cli(args)
+    else:
+        main_cli(args)
 
 
 if __name__ == "__main__":
diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index 14146ad..d3143d2 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -1,8 +1,11 @@
 from gridfm_graphkit.datasets.hetero_powergrid_datamodule import LitGridHeteroDataModule
 from gridfm_graphkit.io.param_handler import NestedNamespace
+from gridfm_graphkit.io.registries import DATASET_WRAPPER_REGISTRY
 from gridfm_graphkit.training.callbacks import SaveBestModelStateDict
+import importlib
 import numpy as np
 import os
+import time
 import yaml
 import torch
 import pandas as pd
@@ -15,6 +18,85 @@
 import lightning as L
 
 
+def _load_plugins(plugins: list[str]) -> None:
+    """Import plugin packages so their registry decorators fire."""
+    for plugin_pkg in plugins:
+        try:
+            importlib.import_module(plugin_pkg)
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                f"Plugin package '{plugin_pkg}' could not be imported: {e}. "
+                "Make sure it is installed in the current environment.",
+            ) from e
+
+
+def _validate_dataset_wrapper(name: str | None) -> None:
+    """Raise a helpful error if *name* is not registered in DATASET_WRAPPER_REGISTRY."""
+    if name is None:
+        return
+    if name not in DATASET_WRAPPER_REGISTRY:
+        available = list(DATASET_WRAPPER_REGISTRY)
+        raise KeyError(
+            f"Dataset wrapper '{name}' is not registered. "
+            f"Available wrappers: {available}. "
+            "If it lives in a plugin package, pass it via --plugins."
+        )
+
+def benchmark_cli(args):
+    """Benchmark train-dataloader iteration speed over one or more epochs."""
+    with open(args.config, "r") as f:
+        base_config = yaml.safe_load(f)
+
+    config_args = NestedNamespace(**base_config)
+
+    num_workers_override = getattr(args, "num_workers", None)
+    if num_workers_override is not None:
+        config_args.data.workers = num_workers_override
+
+    _load_plugins(getattr(args, "plugins", []))
+
+    dataset_wrapper = getattr(args, "dataset_wrapper", None)
+    dataset_wrapper_cache_dir = getattr(args, "dataset_wrapper_cache_dir", None)
+    _validate_dataset_wrapper(dataset_wrapper)
+
+    print("Setting up datamodule...")
+    t0 = time.perf_counter()
+    dm = LitGridHeteroDataModule(
+        config_args,
+        args.data_path,
+        dataset_wrapper=dataset_wrapper,
+        dataset_wrapper_cache_dir=dataset_wrapper_cache_dir,
+    )
+    dm.setup(stage="fit")
+    setup_time = time.perf_counter() - t0
+    print(f"  Setup time        : {setup_time:.2f}s")
+
+    loader = dm.train_dataloader()
+    num_batches = len(loader)
+    print(f"  Train batches     : {num_batches}")
+    print(f"  Batch size        : {config_args.training.batch_size}")
+    print(f"  Workers           : {config_args.data.workers}")
+    print(f"  Dataset wrapper   : {dataset_wrapper or 'none'}")
+    print()
+
+    epoch_times = []
+    for epoch in range(args.epochs):
+        t_start = time.perf_counter()
+        for _batch in loader:
+            pass
+        elapsed = time.perf_counter() - t_start
+        per_batch = elapsed / num_batches if num_batches > 0 else 0.0
+        epoch_times.append(elapsed)
+        print(
+            f"Epoch {epoch:>3}: {elapsed:7.3f}s total  "
+            f"{per_batch:.4f}s/batch  ({num_batches} batches)"
+        )
+
+    if args.epochs > 1:
+        avg = sum(epoch_times) / len(epoch_times)
+        print(f"\nAverage over {args.epochs} epochs: {avg:.3f}s")
+
+
 def get_training_callbacks(args):
     early_stop_callback = EarlyStopping(
         monitor="Validation loss",
@@ -57,23 +139,14 @@ def main_cli(args):
     normalizer_stats_path = getattr(args, "normalizer_stats", None)
     dataset_wrapper = getattr(args, "dataset_wrapper", None)
     dataset_wrapper_cache_dir = getattr(args, "dataset_wrapper_cache_dir", None)
+    _validate_dataset_wrapper(dataset_wrapper)
 
     # CLI --num_workers overrides the YAML value (useful for debugging with 0)
     num_workers_override = getattr(args, "num_workers", None)
     if num_workers_override is not None:
         config_args.data.workers = num_workers_override
 
-    # Import plugin packages so their @DATASET_WRAPPER_REGISTRY.register decorators fire
-    import importlib
-
-    for plugin_pkg in getattr(args, "plugins", []):
-        try:
-            importlib.import_module(plugin_pkg)
-        except ModuleNotFoundError as e:
-            raise ModuleNotFoundError(
-                f"Plugin package '{plugin_pkg}' could not be imported: {e}. "
-                "Make sure it is installed in the current environment.",
-            ) from e
+    _load_plugins(getattr(args, "plugins", []))
 
     litGrid = LitGridHeteroDataModule(
         config_args,

From c617c18c344c529a59493da917177c12a43e0033 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Tue, 24 Mar 2026 21:56:08 +0100
Subject: [PATCH 32/39] fix validation order

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index d3143d2..8fa9583 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -139,7 +139,6 @@ def main_cli(args):
     normalizer_stats_path = getattr(args, "normalizer_stats", None)
     dataset_wrapper = getattr(args, "dataset_wrapper", None)
     dataset_wrapper_cache_dir = getattr(args, "dataset_wrapper_cache_dir", None)
-    _validate_dataset_wrapper(dataset_wrapper)
 
     # CLI --num_workers overrides the YAML value (useful for debugging with 0)
     num_workers_override = getattr(args, "num_workers", None)
@@ -147,6 +146,7 @@ def main_cli(args):
         config_args.data.workers = num_workers_override
 
     _load_plugins(getattr(args, "plugins", []))
+    _validate_dataset_wrapper(dataset_wrapper)
 
     litGrid = LitGridHeteroDataModule(
         config_args,

From d890cf08304ceee15b36b5709ff9e0620fb78167 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 25 Mar 2026 09:23:31 +0100
Subject: [PATCH 33/39] precommit fix

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/cli.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gridfm_graphkit/cli.py b/gridfm_graphkit/cli.py
index 8fa9583..e18aa5f 100644
--- a/gridfm_graphkit/cli.py
+++ b/gridfm_graphkit/cli.py
@@ -39,9 +39,10 @@ def _validate_dataset_wrapper(name: str | None) -> None:
         raise KeyError(
             f"Dataset wrapper '{name}' is not registered. "
             f"Available wrappers: {available}. "
-            "If it lives in a plugin package, pass it via --plugins."
+            "If it lives in a plugin package, pass it via --plugins.",
         )
 
+
 def benchmark_cli(args):
     """Benchmark train-dataloader iteration speed over one or more epochs."""
     with open(args.config, "r") as f:
@@ -89,7 +90,7 @@ def benchmark_cli(args):
         epoch_times.append(elapsed)
         print(
             f"Epoch {epoch:>3}: {elapsed:7.3f}s total  "
-            f"{per_batch:.4f}s/batch  ({num_batches} batches)"
+            f"{per_batch:.4f}s/batch  ({num_batches} batches)",
         )
 
     if args.epochs > 1:

From af0fb350dbbe65f7ce87acd4adaa7746961ed02d Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 25 Mar 2026 09:28:23 +0100
Subject: [PATCH 34/39] security fix

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 2b6c523..a9b00be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ dependencies = [
     "seaborn",
     "urllib3>=2.6.0",
     "gridfm-datakit>=1.0.2",
+    "pygments>=2.19.3",
 ]
 
 [project.optional-dependencies]

From 6d886f83dfaff12355a74ba46d93cd6443880eed Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 25 Mar 2026 09:32:28 +0100
Subject: [PATCH 35/39] fix missing package and circular import

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .github/workflows/ci-build.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml
index ea796b9..c0918a9 100644
--- a/.github/workflows/ci-build.yaml
+++ b/.github/workflows/ci-build.yaml
@@ -49,6 +49,8 @@ jobs:
         run: |
           python -m pip install --upgrade pip wheel
           pip install -e ".[test]"
+          TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
+          pip install torch-scatter -f "https://data.pyg.org/whl/torch-${TORCH_VERSION}+cpu.html"
 
       - name: Unit tests
         run: |

From 7feb3e7e5a9f5c46464dadd27249a45b24a8cd68 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 25 Mar 2026 09:41:00 +0100
Subject: [PATCH 36/39] ignore security CVE-2026-4539 as not relevant

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .github/workflows/ci-build.yaml | 3 +++
 pyproject.toml                  | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml
index c0918a9..68ee525 100644
--- a/.github/workflows/ci-build.yaml
+++ b/.github/workflows/ci-build.yaml
@@ -168,6 +168,9 @@ jobs:
           pip install -e .[dev,test] || pip install -e .
       - name: Run pip-audit
         uses: pypa/gh-action-pip-audit@v1.1.0
+        with:
+          # CVE-2026-4539: pygments AdlLexer ReDoS, local-only attack vector, no fix released yet
+          ignore-vulns: CVE-2026-4539
 
   trivy_repo:
       name: Trivy (repo scan)
diff --git a/pyproject.toml b/pyproject.toml
index a9b00be..2b6c523 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,6 @@ dependencies = [
     "seaborn",
     "urllib3>=2.6.0",
     "gridfm-datakit>=1.0.2",
-    "pygments>=2.19.3",
 ]
 
 [project.optional-dependencies]

From 1f05c7fa8eac71aa0db1f6699fac22f2682979b6 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 25 Mar 2026 10:03:26 +0100
Subject: [PATCH 37/39] fix tests

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .../datasets/hetero_powergrid_datamodule.py   |  2 +-
 tests/conftest.py                             | 60 +++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 tests/conftest.py

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index 95034ee..ab00997 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -259,7 +259,7 @@ def setup(self, stage: str):
         is_rank0 = (
             not dist.is_available() or not dist.is_initialized() or dist.get_rank() == 0
         )
-        if is_rank0 and self.trainer is not None and self.trainer.logger is not None:
+        if is_rank0 and self.trainer is not None and getattr(self.trainer, "logger", None) is not None:
             logger = self.trainer.logger
             if isinstance(logger, MLFlowLogger):
                 log_dir = os.path.join(
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..be6a68c
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,60 @@
+"""
+Session-scoped fixture that ensures the processed test data directory is
+populated before any test that needs it runs.
+
+Specifically it:
+  1. Runs LitGridHeteroDataModule.setup("fit") which triggers
+     HeteroGridDatasetDisk to write the ``processed/`` .pt files.
+  2. Persists the fitted normalizer stats as
+     ``tests/data/case14_ieee/processed/data_stats_HeteroDataMVANormalizer.pt``
+     so that test_edge_flows.py and test_simulate_measurements.py can load
+     them directly without needing a full DM setup.
+"""
+
+import os
+
+import pytest
+import torch
+import yaml
+
+from gridfm_graphkit.datasets.hetero_powergrid_datamodule import LitGridHeteroDataModule
+from gridfm_graphkit.datasets.normalizers import HeteroDataMVANormalizer
+from gridfm_graphkit.io.param_handler import NestedNamespace
+
+_STATS_PATH = "tests/data/case14_ieee/processed/data_stats_HeteroDataMVANormalizer.pt"
+_CONFIG_PATH = "tests/config/datamodule_test_base_config.yaml"
+
+
+class _DummyTrainer:
+    """Minimal stand-in for a Lightning Trainer used only during test setup."""
+
+    is_global_zero = True
+    logger = None  # prevents AttributeError in hetero_powergrid_datamodule.setup()
+
+
+@pytest.fixture(scope="session", autouse=True)
+def generate_processed_test_data():
+    """
+    Generate processed test data files that are needed by tests which load
+    them directly (test_edge_flows, test_simulate_measurements).
+
+    Skipped silently if the stats file already exists (e.g., second pytest run
+    in the same environment without cleaning the processed/ directory).
+    """
+    if os.path.exists(_STATS_PATH):
+        return
+
+    with open(_CONFIG_PATH) as f:
+        config_dict = yaml.safe_load(f)
+
+    args = NestedNamespace(**config_dict)
+    dm = LitGridHeteroDataModule(args, data_dir="tests/data")
+    dm.trainer = _DummyTrainer()
+    dm.setup("fit")
+
+    # Persist the fitted normalizer stats under the name used by the tests.
+    normalizer = dm.data_normalizers[0]
+    assert isinstance(normalizer, HeteroDataMVANormalizer), (
+        f"Expected HeteroDataMVANormalizer, got {type(normalizer).__name__}"
+    )
+    torch.save(normalizer.get_stats(), _STATS_PATH)

From 36fdf409a9b3cb642474451c37a18fc9a3b25a8d Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Wed, 25 Mar 2026 10:50:31 +0100
Subject: [PATCH 38/39] fix precommit

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 gridfm_graphkit/datasets/hetero_powergrid_datamodule.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
index ab00997..acd45aa 100644
--- a/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
+++ b/gridfm_graphkit/datasets/hetero_powergrid_datamodule.py
@@ -259,7 +259,11 @@ def setup(self, stage: str):
         is_rank0 = (
             not dist.is_available() or not dist.is_initialized() or dist.get_rank() == 0
         )
-        if is_rank0 and self.trainer is not None and getattr(self.trainer, "logger", None) is not None:
+        if (
+            is_rank0
+            and self.trainer is not None
+            and getattr(self.trainer, "logger", None) is not None
+        ):
             logger = self.trainer.logger
             if isinstance(logger, MLFlowLogger):
                 log_dir = os.path.join(

From 797529bdc3303c6ceb5466952e44d5c134a57605 Mon Sep 17 00:00:00 2001
From: Romeo Kienzler <romeo.kienzler1@ibm.com>
Date: Fri, 20 Mar 2026 19:16:18 +0100
Subject: [PATCH 39/39] add profiler cli argunent

Signed-off-by: Romeo Kienzler <romeo.kienzler1@ibm.com>
---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 00f7a2f..561bc59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,7 +10,6 @@ gridfm_graphkit.egg-info
 mlruns
 *.pt
 .DS_Store
-integrationtests/data_out*
 .julia
 *logs*
 *data_out*