From 0d67640287380a62a8675b5c0dde80876005b604 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Mon, 8 Aug 2022 15:35:19 +0200
Subject: [PATCH 01/31] kurtosis check

---
 mlmc/estimator.py                  | 84 +++++++++++++++++++++++++++++-
 mlmc/plot/diagnostic_plots.py      |  0
 mlmc/quantity/quantity_estimate.py | 53 +++++++++++++++++--
 mlmc/tool/hdf5.py                  |  2 +-
 4 files changed, 133 insertions(+), 6 deletions(-)
 create mode 100644 mlmc/plot/diagnostic_plots.py

diff --git a/mlmc/estimator.py b/mlmc/estimator.py
index af4f6e03..9e9fad64 100644
--- a/mlmc/estimator.py
+++ b/mlmc/estimator.py
@@ -3,6 +3,7 @@
 import scipy.integrate as integrate
 import mlmc.quantity.quantity_estimate as qe
 import mlmc.tool.simple_distribution
+from mlmc.quantity.quantity_estimate import mask_nan_samples
 from mlmc.quantity.quantity_types import ScalarType
 from mlmc.plot import plots
 from mlmc.quantity.quantity_spec import ChunkSpec
@@ -16,6 +17,7 @@ def __init__(self, quantity, sample_storage, moments_fn=None):
         self._quantity = quantity
         self._sample_storage = sample_storage
         self._moments_fn = moments_fn
+        self._moments_mean = None
 
     @property
     def quantity(self):
@@ -29,6 +31,16 @@ def quantity(self, quantity):
     def n_moments(self):
         return self._moments_fn.size
 
+    @property
+    def moments_mean_obj(self):
+        return self._moments_mean
+
+    @moments_mean_obj.setter
+    def moments_mean_obj(self, moments_mean):
+        if not isinstance(moments_mean, mlmc.quantity.quantity.QuantityMean):
+            raise TypeError
+        self._moments_mean = moments_mean
+
     def estimate_moments(self, moments_fn=None):
         """
         Use collected samples to estimate moments and variance of this estimate.
@@ -39,6 +51,7 @@ def estimate_moments(self, moments_fn=None):
             moments_fn = self._moments_fn
 
         moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn))
+        self.moments_mean_obj = moments_mean
         return moments_mean.mean, moments_mean.var
 
     def estimate_covariance(self, moments_fn=None):
@@ -340,6 +353,51 @@ def get_level_samples(self, level_id, n_samples=None):
         chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=n_samples))
         return self._quantity.samples(chunk_spec=chunk_spec)
 
+    def kurtosis_check(self, quantity=None):
+        if quantity is None:
+            quantity = self._quantity
+        moments_mean_quantity = qe.estimate_mean(quantity)
+        kurtosis = qe.level_kurtosis(quantity, moments_mean_quantity)
+        return kurtosis
+
+
+def consistency_check(quantity, sample_storage=None):
+
+    fine_samples = {}
+    coarse_samples = {}
+    for chunk_spec in quantity.get_quantity_storage().chunks():
+        samples = quantity.samples(chunk_spec)
+        chunk, n_mask_samples = mask_nan_samples(samples)
+
+        # No samples in chunk
+        if chunk.shape[1] == 0:
+            continue
+
+        fine_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 0])
+        if chunk_spec.level_id > 0:
+            coarse_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 1])
+
+    cons_check_val = {}
+    for level_id in range(sample_storage.get_n_levels()):
+        if level_id > 0:
+            fine_mean = np.mean(fine_samples[level_id])
+            coarse_mean = np.mean(coarse_samples[level_id])
+            diff_mean = np.mean(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id]))
+
+            fine_var = np.var(fine_samples[level_id])
+            coarse_var = np.var(fine_samples[level_id])
+            diff_var = np.var(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id]))
+
+            val = np.abs(coarse_mean - fine_mean + diff_mean) / (
+                        3 * (np.sqrt(coarse_var) + np.sqrt(fine_var) + np.sqrt(diff_var)))
+
+            assert np.isclose(coarse_mean - fine_mean + diff_mean, 0)
+            assert val < 0.9
+
+            cons_check_val[level_id] = val
+
+    return cons_check_val
+
 
 def estimate_domain(quantity, sample_storage, quantile=None):
     """
@@ -363,7 +421,23 @@ def estimate_domain(quantity, sample_storage, quantile=None):
     return np.min(ranges[:, 0]), np.max(ranges[:, 1])
 
 
-def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels):
+def coping_with_high_kurtosis(vars, costs, kurtosis, kurtosis_threshold=100):
+    """
+    Coping with high kurtosis is recommended by prof. M. Giles in http://people.maths.ox.ac.uk/~gilesm/talks/MCQMC_22_b.pdf
+    :param vars: vars[L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance.
+    :param costs: cost of level's sample
+    :param kurtosis: each level's sample kurtosis
+    :param kurtosis_threshold: Kurtosis is considered to be too high if it is above this threshold.
+                              Original variances are underestimated and therefore modified in this metod
+    :return: vars
+    """
+    for l_id in range(2, vars.shape[0]):
+        if kurtosis[l_id] > kurtosis_threshold:
+            vars[l_id] = np.maximum(vars[l_id], 0.5 * vars[l_id - 1] * costs[l_id - 1] / costs[l_id])
+    return vars
+
+
+def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels, theta=0, kurtosis=None):
     """
     Estimate optimal number of samples for individual levels that should provide a target variance of
     resulting moment estimate.
@@ -372,12 +446,20 @@ def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_op
     :param prescribe_vars: vars[ L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance.
     :param n_ops: number of operations at each level
     :param n_levels: number of levels
+    :param theta: number of samples N_l control parameter, suitable values: 0.25 ... 0.5
+    :param kurtosis: levels' kurtosis
     :return: np.array with number of optimal samples for individual levels and moments_fn, array (LxR)
     """
     vars = prescribe_vars
+
+    if kurtosis is not None and len(vars) == len(kurtosis):
+        vars = coping_with_high_kurtosis(vars, n_ops, kurtosis)
+
     sqrt_var_n = np.sqrt(vars.T * n_ops)  # moments_fn in rows, levels in cols
     total = np.sum(sqrt_var_n, axis=1)  # sum over levels
     n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int)  # moments_fn in cols
+    n_samples_estimate = 1/(1-theta) * n_samples_estimate
+
     # Limit maximal number of samples per level
     n_samples_estimate_safe = np.maximum(
         np.minimum(n_samples_estimate, vars * n_levels / target_variance), 2)
diff --git a/mlmc/plot/diagnostic_plots.py b/mlmc/plot/diagnostic_plots.py
new file mode 100644
index 00000000..e69de29b
diff --git a/mlmc/quantity/quantity_estimate.py b/mlmc/quantity/quantity_estimate.py
index 2436d7b9..a1f63ddd 100644
--- a/mlmc/quantity/quantity_estimate.py
+++ b/mlmc/quantity/quantity_estimate.py
@@ -19,13 +19,17 @@ def cache_clear():
     mlmc.quantity.quantity.QuantityConst.samples.cache_clear()
 
 
-def estimate_mean(quantity):
+def estimate_mean(quantity, form="diff", operation_func=None, **kwargs):
     """
     MLMC mean estimator.
     The MLMC method is used to compute the mean estimate to the Quantity dependent on the collected samples.
     The squared error of the estimate (the estimator variance) is estimated using the central limit theorem.
     Data is processed by chunks, so that it also supports big data processing
     :param quantity: Quantity
+    :param form: if "diff" estimates based on difference between fine and coarse data = MLMC approach
+                    "fine" estimates based on level's fine data
+                    "coarse" estimates based on level's coarse data
+    :param operation_func: function to process level data, e.g. kurtosis estimation
     :return: QuantityMean which holds both mean and variance
     """
     cache_clear()
@@ -56,10 +60,26 @@ def estimate_mean(quantity):
             sums = [np.zeros(chunk.shape[0]) for _ in range(n_levels)]
             sums_of_squares = [np.zeros(chunk.shape[0]) for _ in range(n_levels)]
 
-        if chunk_spec.level_id == 0:
-            chunk_diff = chunk[:, :, 0]
+        # Estimates of level's fine data
+        if form == "fine":
+            if chunk_spec.level_id == 0:
+                chunk_diff = chunk[:, :, 0]
+            else:
+                chunk_diff = chunk[:, :, 0]
+        # Estimate of level's coarse data
+        elif form == "coarse":
+            if chunk_spec.level_id == 0:
+                chunk_diff = np.zeros(chunk[:, :, 0].shape)
+            else:
+                chunk_diff = chunk[:, :, 1]
         else:
-            chunk_diff = chunk[:, :, 0] - chunk[:, :, 1]
+            if chunk_spec.level_id == 0:
+                chunk_diff = chunk[:, :, 0]
+            else:
+                chunk_diff = chunk[:, :, 0] - chunk[:, :, 1]
+
+        if operation_func is not None:
+            chunk_diff = operation_func(chunk_diff, chunk_spec, **kwargs)
 
         sums[chunk_spec.level_id] += np.sum(chunk_diff, axis=1)
         sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff**2, axis=1)
@@ -154,3 +174,28 @@ def eval_cov(x):
     else:
         moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=quantity.qtype)
     return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_cov)
+
+
+def kurtosis_numerator(chunk_diff, chunk_spec, l_means):
+    """
+    Estimate sample kurtosis nominator:
+            E[(Y_l - E[Y_l])^4]
+    :param chunk_diff: np.ndarray, [quantity shape, number of samples]
+    :param chunk_spec: quantity_spec.ChunkSpec
+    :return: np.ndarray, unchanged shape
+    """
+    chunk_diff = (chunk_diff - l_means[chunk_spec.level_id]) ** 4
+    return chunk_diff
+
+
+def level_kurtosis(quantity, means_obj):
+    """
+    Estimate sample kurtosis at each level as:
+            E[(Y_l - E[Y_l])^4] / (Var[Y_l])^2, where Y_l = fine_l - coarse_l
+    :param quantity: Quantity
+    :param means_obj: Quantity.QuantityMean
+    :return: np.ndarray, kurtosis per level
+    """
+    numerator_means_obj = estimate_mean(quantity, operation_func=kurtosis_numerator, l_means=means_obj.l_means)
+    kurtosis = numerator_means_obj.l_means / (means_obj.l_vars)**2
+    return kurtosis
diff --git a/mlmc/tool/hdf5.py b/mlmc/tool/hdf5.py
index f6f3219c..a83f6a8a 100644
--- a/mlmc/tool/hdf5.py
+++ b/mlmc/tool/hdf5.py
@@ -357,7 +357,7 @@ def chunks(self, n_samples=None):
             dataset = hdf_file["/".join([self.level_group_path, "collected_values"])]
 
             if n_samples is not None:
-                yield ChunkSpec(chunk_id=0, chunk_slice=slice(0, n_samples, 1), level_id=int(self.level_id))
+                yield ChunkSpec(chunk_id=0, chunk_slice=slice(0, n_samples, ...), level_id=int(self.level_id))
             else:
                 for chunk_id, chunk in enumerate(dataset.iter_chunks()):
                     yield ChunkSpec(chunk_id=chunk_id, chunk_slice=chunk[0], level_id=int(self.level_id))  # slice, level_id

From 1d387443c7e6f2ec302f24a24381380c0251d977 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Tue, 23 Aug 2022 11:58:45 +0200
Subject: [PATCH 02/31] mlmc diagnostic plots

---
 examples/shooting/shooting_1D_mcqmc.py | 252 +++++++++++++++++++++++++
 mlmc/plot/diagnostic_plots.py          |  97 ++++++++++
 test/test_estimates.py                 | 234 +++++++++++++++++++++++
 test/test_sampler.py                   |  43 +++++
 4 files changed, 626 insertions(+)
 create mode 100644 examples/shooting/shooting_1D_mcqmc.py
 create mode 100644 test/test_estimates.py

diff --git a/examples/shooting/shooting_1D_mcqmc.py b/examples/shooting/shooting_1D_mcqmc.py
new file mode 100644
index 00000000..bcf5f5fe
--- /dev/null
+++ b/examples/shooting/shooting_1D_mcqmc.py
@@ -0,0 +1,252 @@
+import numpy as np
+import mlmc.estimator as est
+from mlmc.estimator import Estimate, estimate_n_samples_for_target_variance
+from mlmc.sampler import Sampler
+from mlmc.sample_storage import Memory
+from mlmc.sampling_pool import OneProcessPool
+from examples.shooting.simulation_shooting_1D import ShootingSimulation1D
+from mlmc.quantity.quantity import make_root_quantity
+from mlmc.quantity.quantity_estimate import moments, estimate_mean
+from mlmc.moments import Legendre
+from mlmc.plot.plots import Distribution
+
+
+# Tutorial class for 1D shooting simulation, includes
+# - samples scheduling
+# - process results:
+#           - create Quantity instance
+#           - approximate density
+class ProcessShooting1D:
+
+    def __init__(self):
+        n_levels = 3
+        # Number of MLMC levels
+        step_range = [1, 1e-3]
+        # step_range [simulation step at the coarsest level, simulation step at the finest level]
+        level_parameters = ProcessShooting1D.determine_level_parameters(n_levels, step_range)
+        # Determine each level parameters (in this case, simulation step at each level), level_parameters should be
+        # simulation dependent
+        self._sample_sleep = 0#30
+        # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run
+        self._sample_timeout = 60
+        # Maximum waiting time for running simulations
+        self._adding_samples_coef = 0.1
+        self._n_moments = 20
+        # number of generalized statistical moments used for MLMC number of samples estimation
+        self._quantile = 0.01
+        # Setting parameters that are utilized when scheduling samples
+        ###
+        # MLMC run
+        ###
+        sampler = self.create_sampler(level_parameters=level_parameters)
+        # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run
+        self.generate_samples(sampler, n_samples=None, target_var=1e-3)
+        # Generate MLMC samples, there are two ways:
+        # 1) set exact number of samples at each level,
+        #    e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50])
+        # 2) set target variance of MLMC estimates,
+        #    e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6)
+        self.all_collect(sampler)
+        # Check if all samples are finished
+        ###
+        # Postprocessing
+        ###
+        self.process_results(sampler, n_levels)
+        # Postprocessing, MLMC is finished at this point
+
+    def create_sampler(self, level_parameters):
+        """
+        Create:
+        # sampling pool - the way sample simulations are executed
+        # sample storage - stores sample results
+        # sampler - controls MLMC execution
+        :param level_parameters: list of lists
+        :return: mlmc.sampler.Sampler instance
+        """
+        # Create OneProcessPool - all run in the same process
+        sampling_pool = OneProcessPool()
+        # There is another option mlmc.sampling_pool.ProcessPool() - supports local parallel sample simulation run
+        # sampling_pool = ProcessPool(n), n - number of parallel simulations, depends on computer architecture
+
+        # Simulation configuration which is passed to simulation constructor
+        simulation_config = {
+            "start_position": np.array([0, 0]),
+            "start_velocity": np.array([10, 0]),
+            "area_borders":  np.array([-100, 200, -300, 400]),
+            "max_time": 10,
+            "complexity": 2,  # used for initial estimate of number of operations per sample
+            'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
+        }
+
+        # Create simulation factory, instance of class that inherits from mlmc.sim.simulation
+        simulation_factory = ShootingSimulation1D(config=simulation_config)
+
+        # Create simple sample storage
+        # Memory() storage keeps samples in computer main memory
+        sample_storage = Memory()
+        # We support also HDF file storage mlmc.sample_storage_hdf.SampleStorageHDF()
+        # sample_storage = SampleStorageHDF(file_path=path_to_HDF5_file)
+
+        # Create sampler
+        # Controls the execution of MLMC
+        sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
+                          level_parameters=level_parameters)
+        return sampler
+
+    def generate_samples(self, sampler, n_samples=None, target_var=None):
+        """
+        Generate MLMC samples
+        :param sampler: mlmc.sampler.Sampler instance
+        :param n_samples: None or list, number of samples at each level
+        :param target_var: target variance of MLMC estimates
+        :return: None
+        """
+        # The number of samples is set by user
+        if n_samples is not None:
+            sampler.set_initial_n_samples(n_samples)
+        # The number of initial samples is determined automatically
+        else:
+            sampler.set_initial_n_samples()
+        # Samples are scheduled and the program is waiting for all of them to be completed.
+        sampler.schedule_samples()
+        sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout)
+        self.all_collect(sampler)
+
+        # MLMC estimates target variance is set
+        if target_var is not None:
+            # The mlmc.quantity.quantity.Quantity instance is created
+            # parameters 'storage' and 'q_specs' are obtained from sample_storage,
+            # originally 'q_specs' is set in the simulation class
+            root_quantity = make_root_quantity(storage=sampler.sample_storage,
+                                               q_specs=sampler.sample_storage.load_result_format())
+
+            # Moment functions object is created
+            # The MLMC algorithm determines number of samples according to the moments variance,
+            # Type of moment functions (Legendre by default) might affect the total number of MLMC samples
+            moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments)
+            estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage,
+                                                   moments_fn=moments_fn)
+
+            # Initial estimation of the number of samples at each level
+            variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
+            # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples
+            n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+                                                                           n_levels=sampler.n_levels)
+
+            #####
+            # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples
+            #####
+            # Loop until number of estimated samples is greater than the number of scheduled samples
+            while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef,
+                                                     timeout=self._sample_timeout):
+                # New estimation according to already finished samples
+                variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+                n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+                                                                     n_levels=sampler.n_levels)
+
+    def set_moments(self, quantity, sample_storage, n_moments=25):
+        true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
+        return Legendre(n_moments, true_domain)
+
+    def all_collect(self, sampler):
+        """
+        Collect samples, wait until all samples are finished
+        :param sampler: mlmc.sampler.Sampler object
+        :return: None
+        """
+        running = 1
+        while running > 0:
+            running = 0
+            running += sampler.ask_sampling_pool_for_samples()
+            print("N running: ", running)
+
+    def process_results(self, sampler, n_levels):
+        """
+        Process MLMC results
+        :param sampler: mlmc.sampler.Sampler instance
+        :param n_levels: int, number of MLMC levels
+        :return: None
+        """
+        sample_storage = sampler.sample_storage
+        # Load result format from the sample storage
+        result_format = sample_storage.load_result_format()
+        # Create Quantity instance representing our real quantity of interest
+        root_quantity = make_root_quantity(sample_storage, result_format)
+
+        print("N collected ", sample_storage.get_n_collected())
+
+        # It is possible to access items of the quantity according to the result format
+        target = root_quantity['target']
+        time = target[10]
+        position = time['0']
+        q_value = position[0]
+
+        # Compute moments, first estimate domain of moment functions
+        estimated_domain = Estimate.estimate_domain(q_value, sample_storage, quantile=self._quantile)
+        moments_fn = Legendre(self._n_moments, estimated_domain)
+
+        # Create estimator for the quantity
+        estimator = Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
+        # Estimate moment means and variances
+        means, vars = estimator.estimate_moments(moments_fn)
+
+        est.plot_checks(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
+        est.consistency_check(quantity=q_value, sample_storage=sample_storage)
+        estimator.kurtosis_check(q_value)
+
+        # Generally, root quantity has different domain than its items
+        root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage,
+                                                                                 quantile=self._quantile)
+        root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain)
+
+        # There is another possible approach to calculating all moments at once and then select desired quantity
+        moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True)
+        moments_mean = estimate_mean(moments_quantity)
+        target_mean = moments_mean['target']
+        time_mean = target_mean[10]  # times: [1]
+        location_mean = time_mean['0']  # locations: ['0']
+        value_mean = location_mean[0]  # result shape: (1,)
+
+        assert value_mean.mean[0] == 1
+        self.approx_distribution(estimator, n_levels, tol=1e-8)
+
+    def approx_distribution(self, estimator, n_levels, tol=1.95):
+        """
+        Probability density function approximation
+        :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated
+        :param n_levels: int, number of MLMC levels
+        :param tol: Tolerance of the fitting problem, with account for variances in moments.
+        :return: None
+        """
+        distr_obj, result, _, _ = estimator.construct_density(tol=tol)
+        distr_plot = Distribution(title="distributions", error_plot=None)
+        distr_plot.add_distribution(distr_obj)
+
+        if n_levels == 1:
+            samples = estimator.get_level_samples(level_id=0)[..., 0]
+            distr_plot.add_raw_samples(np.squeeze(samples))
+        distr_plot.show(None)
+        distr_plot.reset()
+
+    @staticmethod
+    def determine_level_parameters(n_levels, step_range):
+        """
+        Determine level parameters,
+        In this case, a step of fine simulation at each level
+        :param n_levels: number of MLMC levels
+        :param step_range: simulation step range
+        :return: list of lists
+        """
+        assert step_range[0] > step_range[1]
+        level_parameters = []
+        for i_level in range(n_levels):
+            if n_levels == 1:
+                level_param = 1
+            else:
+                level_param = i_level / (n_levels - 1)
+            level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param])
+        return level_parameters
+
+
+if __name__ == "__main__":
+    ProcessShooting1D()
diff --git a/mlmc/plot/diagnostic_plots.py b/mlmc/plot/diagnostic_plots.py
index e69de29b..5d6a844e 100644
--- a/mlmc/plot/diagnostic_plots.py
+++ b/mlmc/plot/diagnostic_plots.py
@@ -0,0 +1,97 @@
+import numpy as np
+import scipy.stats as st
+from scipy import interpolate
+import matplotlib
+
+matplotlib.rcParams.update({'font.size': 22})
+from matplotlib.patches import Patch
+import matplotlib.pyplot as plt
+
+
+# def log_var_level(variances, l_vars, err_variances=0, err_l_vars=0, moments=[1,2,3,4]):
+#     fig, ax1 = plt.subplots(figsize=(8, 5))
+#     for m in moments:
+#         # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
+#         # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
+#         #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
+#         line2, = ax1.plot(np.log2(l_vars[:, m]), label="m={}".format(m), marker="s")
+#
+#     ax1.set_ylabel('log' + r'$_2$' + 'variance')
+#     ax1.set_xlabel('level' + r'$l$')
+#     plt.legend()
+#     #plt.savefig("MLMC_cost_saves.pdf")
+#     plt.show()
+
+
+def log_var_per_level(l_vars, err_variances=0, err_l_vars=0, moments=[1, 2, 3, 4]):
+    fig, ax1 = plt.subplots(figsize=(8, 5))
+    for m in moments:
+        # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
+        # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
+        #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
+        line2, = ax1.plot(np.log2(l_vars[:, m]), label="m={}".format(m), marker="s")
+
+    ax1.set_ylabel('log' + r'$_2$' + 'variance')
+    ax1.set_xlabel('level' + r'$l$')
+    plt.legend()
+    #plt.savefig("MLMC_cost_saves.pdf")
+    plt.show()
+
+
+# def log_mean_level(means, l_means, err_means=0, err_l_means=0, moments=[1,2,3,4]):
+#     fig, ax1 = plt.subplots(figsize=(8, 5))
+#     for m in moments:
+#         # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
+#         # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
+#         #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
+#         line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label="m={}".format(m), marker="s")
+#
+#     ax1.set_ylabel('log' + r'$_2$' + 'mean')
+#     ax1.set_xlabel('level' + r'$l$')
+#     plt.legend()
+#     #plt.savefig("MLMC_cost_saves.pdf")
+#     plt.show()
+
+
+def log_mean_per_level(l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]):
+    fig, ax1 = plt.subplots(figsize=(8, 5))
+    for m in moments:
+        # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
+        # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
+        #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
+        line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label="m={}".format(m), marker="s")
+
+    ax1.set_ylabel('log' + r'$_2$' + 'mean')
+    ax1.set_xlabel('level' + r'$l$')
+    plt.legend()
+    #plt.savefig("MLMC_cost_saves.pdf")
+    plt.show()
+
+
+def sample_cost_per_level(costs):
+    fig, ax1 = plt.subplots(figsize=(8, 5))
+    line2, = ax1.plot(np.log2(costs), marker="s")
+
+    ax1.set_ylabel('log' + r'$_2$' + 'cost per sample')
+    ax1.set_xlabel('level' + r'$l$')
+    plt.legend()
+    #plt.savefig("MLMC_cost_saves.pdf")
+    plt.show()
+
+
+def kurtosis_per_level(means, l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]):
+    fig, ax1 = plt.subplots(figsize=(8, 5))
+    for m in moments:
+        # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
+        # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
+        #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
+        line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label="m={}".format(m), marker="s")
+
+    ax1.set_ylabel('log ' + r'$_2$ ' + 'mean')
+    ax1.set_xlabel('level ' + r'$l$')
+    plt.legend()
+    #plt.savefig("MLMC_cost_saves.pdf")
+    plt.show()
+
+
+
diff --git a/test/test_estimates.py b/test/test_estimates.py
new file mode 100644
index 00000000..b8eb1ac5
--- /dev/null
+++ b/test/test_estimates.py
@@ -0,0 +1,234 @@
+import os
+import shutil
+import numpy as np
+from scipy import stats
+import pytest
+from mlmc.sim.synth_simulation import SynthSimulationWorkspace
+from test.synth_sim_for_tests import SynthSimulationForTests
+from mlmc.sampler import Sampler
+from mlmc.sample_storage import Memory
+from mlmc.sample_storage_hdf import SampleStorageHDF
+from mlmc.sampling_pool import OneProcessPool, ProcessPool
+from mlmc.moments import Legendre
+from mlmc.quantity.quantity import make_root_quantity
+import mlmc.estimator
+
+# Set work dir
+os.chdir(os.path.dirname(os.path.realpath(__file__)))
+work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '_test_tmp')
+if os.path.exists(work_dir):
+    shutil.rmtree(work_dir)
+os.makedirs(work_dir)
+
+# Create simulations
+failed_fraction = 0.0
+distr = stats.norm()
+simulation_config = dict(distr=distr, complexity=2, nan_fraction=failed_fraction, sim_method='_sample_fn')
+
+shutil.copyfile('synth_sim_config.yaml', os.path.join(work_dir, 'synth_sim_config.yaml'))
+simulation_config_workspace = {"config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')}
+
+
+def hdf_storage_factory(file_name="mlmc_test.hdf5"):
+    os.chdir(os.path.dirname(os.path.realpath(__file__)))
+    work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '_test_tmp')
+    if os.path.exists(work_dir):
+        shutil.rmtree(work_dir)
+    os.makedirs(work_dir)
+
+    # Create sample storages
+    return SampleStorageHDF(file_path=os.path.join(work_dir, file_name))
+
+
+def mlmc_test(test_case):
+    #np.random.seed(1234)
+    n_moments = 20
+    step_range = [[0.1], [0.005], [0.00025]]
+
+    simulation_factory, sample_storage, sampling_pool = test_case
+
+    if simulation_factory.need_workspace:
+        os.chdir(os.path.dirname(os.path.realpath(__file__)))
+        shutil.copyfile('synth_sim_config.yaml', os.path.join(work_dir, 'synth_sim_config.yaml'))
+
+    sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
+                      level_parameters=step_range)
+
+    true_domain = distr.ppf([0.0001, 0.9999])
+    moments_fn = Legendre(n_moments, true_domain)
+    # moments_fn = Monomial(n_moments, true_domain)
+
+    sampler.set_initial_n_samples([100, 50, 25])
+    # sampler.set_initial_n_samples([10000])
+    sampler.schedule_samples()
+    sampler.ask_sampling_pool_for_samples()
+
+    target_var = 1e-3
+    sleep = 0
+    add_coef = 0.1
+
+    quantity = make_root_quantity(sample_storage, q_specs=simulation_factory.result_format())
+
+    length = quantity['length']
+    time = length[1]
+    location = time['10']
+    value_quantity = location[0]
+
+    estimator = mlmc.estimator.Estimate(value_quantity, sample_storage, moments_fn)
+
+    # New estimation according to already finished samples
+    variances, n_ops = estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+    n_estimated = mlmc.estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+                                                                       n_levels=sampler.n_levels)
+
+
+    # Loop until number of estimated samples is greater than the number of scheduled samples
+    while not sampler.process_adding_samples(n_estimated, sleep, add_coef):
+        print("n estimated ", n_estimated)
+        # New estimation according to already finished samples
+        variances, n_ops = estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+        n_estimated = mlmc.estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+                                                                           n_levels=sampler.n_levels)
+
+    means, vars = estimator.estimate_moments(moments_fn)
+    assert means[0] == 1
+    assert vars[0] == 0
+
+    return estimator.moments_mean_obj, sample_storage.get_n_collected(), sample_storage.get_n_ops()
+
+
+def mlmc_test_mcqmc(test_case):
+    # np.random.seed(1234)
+    n_moments = 10
+    step_range = [[0.1], [0.005], [0.00025]]
+
+    simulation_factory, sample_storage, sampling_pool = test_case
+
+    if simulation_factory.need_workspace:
+        os.chdir(os.path.dirname(os.path.realpath(__file__)))
+        shutil.copyfile('synth_sim_config.yaml', os.path.join(work_dir, 'synth_sim_config.yaml'))
+
+    sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
+                      level_parameters=step_range)
+
+    true_domain = distr.ppf([0.0001, 0.9999])
+    moments_fn = Legendre(n_moments, true_domain)
+    # moments_fn = Monomial(n_moments, true_domain)
+
+    sampler.set_initial_n_samples([100, 50, 25])
+    # sampler.set_initial_n_samples([10000])
+    sampler.schedule_samples()
+    sampler.ask_sampling_pool_for_samples()
+
+    target_var = 1e-4
+    sleep = 0
+    add_coef = 0.1
+
+    quantity = make_root_quantity(sample_storage, q_specs=simulation_factory.result_format())
+
+    length = quantity['length']
+    time = length[1]
+    location = time['10']
+    value_quantity = location[0]
+
+    estimator = mlmc.estimator.Estimate(value_quantity, sample_storage, moments_fn)
+
+    # New estimation according to already finished samples
+    variances, n_ops = estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+    n_estimated = mlmc.estimator.estimate_n_samples_for_target_variance_giles(target_var, variances, n_ops,
+                                                                       n_levels=sampler.n_levels, theta=0.25)
+
+
+    # Loop until number of estimated samples is greater than the number of scheduled samples
+    while not sampler.process_adding_samples(n_estimated, sleep, add_coef):
+        # New estimation according to already finished samples
+        kurtosis = estimator.kurtosis_check()
+        variances, n_ops = estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+        n_estimated = mlmc.estimator.estimate_n_samples_for_target_variance_giles(target_var, variances, n_ops,
+                                                                           n_levels=sampler.n_levels, theta=0.25,
+                                                                                  kurtosis=kurtosis)
+
+    means, vars = estimator.estimate_moments(moments_fn)
+
+    estimator.kurtosis_check(value_quantity)
+
+    assert means[0] == 1
+    assert vars[0] == 0
+
+    return estimator.moments_mean_obj, sample_storage.get_n_collected(), sample_storage.get_n_ops()
+
+
+def mlmc_test_data(method):
+    from mlmc.plot.diagnostic_plots import log_var_per_level, log_mean_per_level, sample_cost_per_level, kurtosis_per_level
+    means = []
+    l_means = []
+    vars = []
+    l_vars = []
+    n_collected = []
+    n_ops = []
+    for _ in range(2):
+        moments_mean_obj, n_col, n_op = method((SynthSimulationForTests(simulation_config), hdf_storage_factory(file_name="mlmc_test.hdf5"), OneProcessPool()))
+        means.append(moments_mean_obj.mean)
+        vars.append(moments_mean_obj.var)
+        l_means.append(moments_mean_obj.l_means)
+        l_vars.append(moments_mean_obj.l_vars)
+        n_collected.append(n_col)
+        n_ops.append(n_op)
+
+        print("means ", np.mean(means, axis=0))
+        print("vars ", np.mean(vars, axis=0))
+
+        log_var_per_level(l_vars=np.mean(l_vars, axis=0), moments=[1, 2, 5])
+        log_mean_per_level(l_means=np.mean(l_means, axis=0), moments=[1, 2, 5])
+        #sample_cost_level(costs=np.mean(n_ops, axis=0))
+
+        print("n collected ", np.mean(n_collected, axis=0))
+        print("cost on level ", np.mean(n_ops, axis=0))
+        print("total cost ", np.sum(np.mean(n_ops * np.array(n_collected), axis=0)))
+
+
+if __name__ == "__main__":
+    # means_mlmc = []
+    # means_mlmc_giles = []
+    # n_collected_mlmc = []
+    # n_collected_mlmc_giles = []
+    # vars_mlmc = []
+    # vars_mlmc_giles = []
+    for i in range(3):
+        # print("############### Original estimator ###################")
+        # mlmc_test_data(mlmc_test)
+        print("######################################################")
+        print("############### Improved estimator ###################")
+        mlmc_test_data(mlmc_test_mcqmc)
+        # mean, var, n_estimated, n_ops = mlmc_test((SynthSimulationForTests(simulation_config),
+        #                                            hdf_storage_factory(file_name="mlmc_test.hdf5"),
+        #                                            OneProcessPool()))
+        # means_mlmc.append(mean)
+        # vars_mlmc.append(var)
+        # n_collected_mlmc.append(n_estimated)
+        # mean, var, n_estimated, n_ops = mlmc_test_giles((SynthSimulationForTests(simulation_config),
+        #                                                  hdf_storage_factory(file_name="mlmc_giles_test.hdf5"),
+        #                                                  OneProcessPool()))
+        # means_mlmc_giles.append(mean)
+        # vars_mlmc_giles.append(var)
+        # n_collected_mlmc_giles.append(n_estimated)
+
+    #
+    # print("mlmc means ", np.mean(means_mlmc, axis=0))
+    # print("mlmc vars ", np.mean(vars_mlmc, axis=0))
+    # print("mlmc n collected ", np.mean(n_collected_mlmc, axis=0))
+    #
+    # print("mlmc giles means ", np.mean(means_mlmc_giles, axis=0))
+    # print("mlmc giles vars ", np.mean(vars_mlmc_giles, axis=0))
+    # print("mlmc giles n collected ", np.mean(n_collected_mlmc_giles, axis=0))
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/test_sampler.py b/test/test_sampler.py
index dc846f3a..d559b6ea 100644
--- a/test/test_sampler.py
+++ b/test/test_sampler.py
@@ -1,5 +1,8 @@
+import os
+import shutil
 import numpy as np
 from scipy import stats
+import mlmc
 from mlmc.sample_storage import Memory
 from mlmc.sim.synth_simulation import SynthSimulation
 from mlmc.sampling_pool import OneProcessPool
@@ -34,3 +37,43 @@ def test_sampler():
     n_estimated = np.array([100, 50, 20])
     sampler.process_adding_samples(n_estimated, 0, 0.1)
     assert np.allclose(sampler._n_target_samples, init_samples + (n_estimated * 0.1), atol=1)
+
+
+def test_sampler_hdf():
+    # Create simulations
+    failed_fraction = 0.1
+    distr = stats.norm()
+    simulation_config = dict(distr=distr, complexity=2, nan_fraction=failed_fraction, sim_method='_sample_fn')
+    simulation = SynthSimulation(simulation_config)
+
+    work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '_test_tmp')
+    if os.path.exists(work_dir):
+        shutil.rmtree(work_dir)
+    os.makedirs(work_dir)
+    file_path = os.path.join(work_dir, "mlmc_test.hdf5")
+    storage = mlmc.SampleStorageHDF(file_path=file_path)
+    sampling_pool = OneProcessPool()
+
+    step_range = [[0.1], [0.01], [0.001]]
+
+    sampler = Sampler(sample_storage=storage, sampling_pool=sampling_pool, sim_factory=simulation,
+                      level_parameters=step_range)
+
+    assert len(sampler._level_sim_objects) == len(step_range)
+    for step, level_sim in zip(step_range, sampler._level_sim_objects):
+        assert step[0] == level_sim.config_dict['fine']['step']
+
+    init_samples = list(np.ones(len(step_range)) * 10)
+
+    sampler.set_initial_n_samples(init_samples)
+    assert np.allclose(sampler._n_target_samples, init_samples)
+    assert 0 == sampler.ask_sampling_pool_for_samples()
+    sampler.schedule_samples()
+    assert np.allclose(sampler._n_scheduled_samples, init_samples)
+
+    n_estimated = np.array([100, 50, 20])
+    sampler.process_adding_samples(n_estimated, 0, 0.1)
+    assert np.allclose(sampler._n_target_samples, init_samples + (n_estimated * 0.1), atol=1)
+
+
+test_sampler_hdf()
\ No newline at end of file

From 067bbce3a984919ec118814250e6b0b52fa31570 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Tue, 9 May 2023 14:31:56 +0200
Subject: [PATCH 03/31] numpy 1.24 fix

---
 mlmc/sample_storage.py     | 9 ++++++---
 mlmc/sample_storage_hdf.py | 8 ++++----
 mlmc/tool/hdf5.py          | 2 +-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/mlmc/sample_storage.py b/mlmc/sample_storage.py
index 01be9082..ec05080f 100644
--- a/mlmc/sample_storage.py
+++ b/mlmc/sample_storage.py
@@ -168,12 +168,15 @@ def _save_successful(self, samples):
         :return: None
         """
         for level_id, res in samples.items():
-            res = np.array(res)
+            res = np.array(res, dtype=object)
             fine_coarse_res = res[:, 1]
 
-            result_type = np.dtype((np.float, np.array(fine_coarse_res[0]).shape))
+            result_type = np.dtype((float, np.array(fine_coarse_res[0], dtype=object).shape))
             results = np.empty(shape=(len(res),), dtype=result_type)
-            results[:] = [val for val in fine_coarse_res]
+
+            for idx, val in enumerate(fine_coarse_res):
+                results[idx, 0] = val[0]
+                results[idx, 1] = val[1]
 
             # Save sample ids
             self._successful_sample_ids.setdefault(level_id, []).extend(res[:, 0])
diff --git a/mlmc/sample_storage_hdf.py b/mlmc/sample_storage_hdf.py
index 7e5fbef5..68b1af9d 100644
--- a/mlmc/sample_storage_hdf.py
+++ b/mlmc/sample_storage_hdf.py
@@ -39,7 +39,7 @@ def _hdf_result_format(self, locations, times):
         :return:
         """
         if len(locations[0]) == 3:
-            tuple_dtype = np.dtype((np.float, (3,)))
+            tuple_dtype = np.dtype((float, (3,)))
             loc_dtype = np.dtype((tuple_dtype, (len(locations),)))
         else:
             loc_dtype = np.dtype(('S50', (len(locations),)))
@@ -48,14 +48,14 @@ def _hdf_result_format(self, locations, times):
                         'formats': ('S50',
                                     'S50',
                                     np.dtype((np.int32, (2,))),
-                                    np.dtype((np.float, (len(times),))),
+                                    np.dtype((float, (len(times),))),
                                     loc_dtype
                                     )
                         }
 
         return result_dtype
 
-    def save_global_data(self, level_parameters: List[np.float], result_format: List[QuantitySpec]):
+    def save_global_data(self, level_parameters: List[float], result_format: List[QuantitySpec]):
         """
         Save hdf5 file global attributes
         :param level_parameters: list of simulation steps
@@ -125,7 +125,7 @@ def save_samples(self, successful, failed):
     def _save_succesful(self, successful_samples):
         for level, samples in successful_samples.items():
             if len(samples) > 0:
-                self._level_groups[level].append_successful(np.array(samples))
+                self._level_groups[level].append_successful(np.array(samples, dtype=object))
 
     def _save_failed(self, failed_samples):
         for level, samples in failed_samples.items():
diff --git a/mlmc/tool/hdf5.py b/mlmc/tool/hdf5.py
index f6f3219c..32e24149 100644
--- a/mlmc/tool/hdf5.py
+++ b/mlmc/tool/hdf5.py
@@ -309,7 +309,7 @@ def append_successful(self, samples: np.array):
         self._append_dataset(self.collected_ids_dset, samples[:, 0])
 
         values = samples[:, 1]
-        result_type = np.dtype((np.float, np.array(values[0]).shape))
+        result_type = np.dtype((float, np.array(values[0]).shape))
 
         # Create dataset for failed samples
         self._make_dataset(name='collected_values', shape=(0,),

From f8b7f3fa40812cd5beb5dd2a6693713aa0501eb8 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 3 Aug 2023 13:09:06 +0200
Subject: [PATCH 04/31] pbs run fix

---
 mlmc/random/correlated_field.py |   6 +-
 mlmc/sampler.py                 |  25 ++-
 mlmc/sampling_pool_pbs.py       | 266 ++++++++++++++++++++++++--------
 mlmc/tool/pbs_job.py            |  13 +-
 4 files changed, 240 insertions(+), 70 deletions(-)

diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py
index ba83e15b..b5c809a7 100644
--- a/mlmc/random/correlated_field.py
+++ b/mlmc/random/correlated_field.py
@@ -226,7 +226,11 @@ def sample(self):
         for field in self.fields:
             sample = field.sample()
             if field.is_outer:
-                result[field.name] = np.zeros(self.n_elements)
+                if field.name == "cond_tn":
+                    result[field.name] = np.zeros((self.n_elements, 3))
+                else:
+                    result[field.name] = np.zeros(self.n_elements)
+                #result[field.name] = np.zeros(self.n_elements)
                 result[field.name][field.full_sample_ids] = sample
         return result
 
diff --git a/mlmc/sampler.py b/mlmc/sampler.py
index 6a550726..17b7dce0 100644
--- a/mlmc/sampler.py
+++ b/mlmc/sampler.py
@@ -119,7 +119,7 @@ def _get_sample_tag(self, level_id):
         """
         return "L{:02d}_S{:07d}".format(level_id, int(self._n_scheduled_samples[level_id]))
 
-    def schedule_samples(self, timeout=None):
+    def schedule_samples(self, timeout=None, level_id=None, n_samples=None):
         """
         Create simulation samples, loop through "levels" and its samples (given the number of target samples):
             1) generate sample tag (same for fine and coarse simulation)
@@ -132,7 +132,9 @@ def schedule_samples(self, timeout=None):
         self.ask_sampling_pool_for_samples(timeout=timeout)
         plan_samples = self._n_target_samples - self._n_scheduled_samples
 
-        for level_id, n_samples in enumerate(plan_samples):
+        if level_id is None:
+            level_id = len(plan_samples) - 1
+        if n_samples is not None:
             samples = []
             for _ in range(int(n_samples)):
                 # Unique sample id
@@ -143,11 +145,28 @@ def schedule_samples(self, timeout=None):
                 self._sampling_pool.schedule_sample(sample_id, level_sim)
                 # Increment number of created samples at current level
                 self._n_scheduled_samples[level_id] += 1
-
                 samples.append(sample_id)
 
             # Store scheduled samples
             self.sample_storage.save_scheduled_samples(level_id, samples)
+        else:
+            for n_samples in np.flip(plan_samples):
+                samples = []
+                for _ in range(int(n_samples)):
+                    # Unique sample id
+                    sample_id = self._get_sample_tag(level_id)
+                    level_sim = self._level_sim_objects[level_id]
+
+                    # Schedule current sample
+                    self._sampling_pool.schedule_sample(sample_id, level_sim)
+                    # Increment number of created samples at current level
+                    self._n_scheduled_samples[level_id] += 1
+
+                    samples.append(sample_id)
+
+                # Store scheduled samples
+                self.sample_storage.save_scheduled_samples(level_id, samples)
+                level_id -= 1
 
     def _check_failed_samples(self):
         """
diff --git a/mlmc/sampling_pool_pbs.py b/mlmc/sampling_pool_pbs.py
index 01c4128b..2142fdcd 100644
--- a/mlmc/sampling_pool_pbs.py
+++ b/mlmc/sampling_pool_pbs.py
@@ -5,6 +5,8 @@
 import pickle
 import json
 import glob
+import time
+import numpy as np
 from mlmc.level_simulation import LevelSimulation
 from mlmc.sampling_pool import SamplingPool
 from mlmc.tool.pbs_job import PbsJob
@@ -133,9 +135,17 @@ def pbs_common_setting(self, **kwargs):
         :return: None
         """
         # Script header
-        select_flags_list = kwargs.get('select_flags', [])
-        if select_flags_list:
-            kwargs['select_flags'] = ":" + ":".join(select_flags_list)
+        select_flags_dict = kwargs.get('select_flags', {})
+
+        # Set scratch dir
+        if any(re.compile('scratch.*').match(flag) for flag in list(select_flags_dict.keys())):
+            if kwargs['scratch_dir'] is None:
+                kwargs['scratch_dir'] = "$SCRATCHDIR"
+        else:
+            kwargs['scratch_dir'] = ''
+
+        if select_flags_dict:
+            kwargs['select_flags'] = ":" + ':'.join('{}={}'.format(*item) for item in select_flags_dict.items())
         else:
             kwargs['select_flags'] = ""
 
@@ -162,7 +172,7 @@ def pbs_common_setting(self, **kwargs):
             kwargs['optional_pbs_requests'])  # e.g. ['#PBS -m ae'] means mail is sent when the job aborts or terminates
         self._pbs_header_template.extend(('MLMC_WORKDIR=\"{}\"'.format(self._work_dir),))
         self._pbs_header_template.extend(kwargs['env_setting'])
-        self._pbs_header_template.extend(('{python} -m mlmc.tool.pbs_job {output_dir} {job_name} >'
+        self._pbs_header_template.extend(('{python} -m mlmc.tool.pbs_job {output_dir} {job_name} {scratch_dir} >'
                                           '{pbs_output_dir}/{job_name}_STDOUT 2>&1',))
         self._pbs_config = kwargs
 
@@ -221,28 +231,31 @@ def execute(self):
             script_content = "\n".join(self.pbs_script)
             self.write_script(script_content, job_file)
 
-            process = subprocess.run(['qsub', job_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-            try:
-                if process.returncode != 0:
-                    raise Exception(process.stderr.decode('ascii'))
-                # Find all finished jobs
-                self._qsub_failed_n = 0
-                # Write current job count
-                self._job_count += 1
-
-                # Get pbs_id from qsub output
-                pbs_id = process.stdout.decode("ascii").split(".")[0]
-                # Store pbs id for future qstat calls
-                self._pbs_ids.append(pbs_id)
-                pbs_process.write_pbs_id(pbs_id)
-
-                self._current_job_weight = 0
-                self._n_samples_in_job = 0
-                self._scheduled = []
-            except:
-                self._qsub_failed_n += 1
-                if self._qsub_failed_n > SamplingPoolPBS.QSUB_FAILED_MAX_N:
-                    raise Exception(process.stderr.decode("ascii"))
+            while self._qsub_failed_n <= SamplingPoolPBS.QSUB_FAILED_MAX_N:
+                process = subprocess.run(['qsub', job_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+                try:
+                    if process.returncode != 0:
+                        raise Exception(process.stderr.decode('ascii'))
+                    # Find all finished jobs
+                    self._qsub_failed_n = 0
+                    # Write current job count
+                    self._job_count += 1
+
+                    # Get pbs_id from qsub output
+                    pbs_id = process.stdout.decode("ascii").split(".")[0]
+                    # Store pbs id for future qstat calls
+                    self._pbs_ids.append(pbs_id)
+                    pbs_process.write_pbs_id(pbs_id)
+
+                    self._current_job_weight = 0
+                    self._n_samples_in_job = 0
+                    self._scheduled = []
+                    break
+                except:
+                    self._qsub_failed_n += 1
+                    time.sleep(30)
+                    if self._qsub_failed_n > SamplingPoolPBS.QSUB_FAILED_MAX_N:
+                        raise Exception(process.stderr.decode("ascii"))
 
     def _create_script(self):
         """
@@ -277,6 +290,64 @@ def get_finished(self):
         finished_pbs_jobs, unfinished_pbs_jobs = self._qstat_pbs_job()
         return self._get_result_files(finished_pbs_jobs, unfinished_pbs_jobs)
 
+    def collect_data(self):
+        successful_results = {}
+        failed_results = {}
+        times = {}
+        sim_data_results = {}
+        # running_times = {}
+        # extract_mesh_times = {}
+        # make_field_times = {}
+        # generate_rnd_times = {}
+        # fine_flow_times = {}
+        # coarse_flow_times = {}
+        n_running = 0
+
+        os.chdir(self._jobs_dir)
+        for file in glob.glob("*_STDOUT"):
+            job_id = re.findall(r'(\d+)_STDOUT', file)[0]
+
+            successful, failed, time = PbsJob.read_results(job_id, self._jobs_dir)
+
+            # Split results to levels
+            for level_id, results in successful.items():
+                successful_results.setdefault(level_id, []).extend(results)
+            for level_id, results in failed.items():
+                failed_results.setdefault(level_id, []).extend(results)
+            for level_id, results in time.items():
+                if level_id in times:
+                    times[level_id][0] += results[-1][0]
+                    times[level_id][1] += results[-1][1]
+                else:
+                    times[level_id] = list(results[-1])
+
+            # # Optional simulation data
+            # for level_id, results in sim_data.items():
+            #     sim_data_results.setdefault(level_id, []).extend(results)
+
+            # for level_id, results in running_time.items():
+            #     running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in extract_mesh.items():
+            #     extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in make_field.items():
+            #     make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in generate_rnd.items():
+            #     generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in fine_flow.items():
+            #     fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in coarse_flow.items():
+            #     coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+
+        return successful_results, failed_results, n_running #, sim_data_results #list(times.items()), list(running_times.items()), \
+               # list(extract_mesh_times.items()), list(make_field_times.items()), list(generate_rnd_times.items()), \
+               # list(fine_flow_times.items()), \
+               # list(coarse_flow_times.items())
+
     def _qstat_pbs_job(self):
         """
         Parse qstat output and get all unfinished job ids
@@ -286,23 +357,36 @@ def _qstat_pbs_job(self):
         if len(self._pbs_ids) > 0:
             # Get PBS id's status,
             # '-x' - displays status information for finished and moved jobs in addition to queued and running jobs.
-            qstat_call = ["qstat", "-x"]
+            qstat_call = ["qstat", "-xs"]
             qstat_call.extend(self._pbs_ids)
 
-            # qstat call
-            process = subprocess.run(qstat_call, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-            try:
-                if process.returncode != 0:
-                    raise Exception(process.stderr.decode("ascii"))
-                output = process.stdout.decode("ascii")
-                # Find all finished jobs
-                finished_pbs_jobs = re.findall(r"(\d+)\..*\d+ F", output)
-                self._qstat_failed_n = 0
-            except:
-                self._qstat_failed_n += 1
-                if self._qstat_failed_n > SamplingPoolPBS.QSTAT_FAILED_MAX_N:
-                    raise Exception(process.stderr.decode("ascii"))
-                finished_pbs_jobs = []
+            while self._qstat_failed_n <= SamplingPoolPBS.QSTAT_FAILED_MAX_N:
+                # qstat call
+                unknown_job_ids = []
+                process = subprocess.run(qstat_call, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+                try:
+                    if process.returncode != 0:
+                        err_output = process.stderr.decode("ascii")
+                        # Presumably, Job Ids are 'unknown' for PBS after some time of their inactivity
+                        unknown_job_ids = re.findall(r"Unknown Job Id (\d+)\.", err_output)
+
+                        if len(unknown_job_ids) == 0:
+                            raise Exception(process.stderr.decode("ascii"))
+
+                    output = process.stdout.decode("ascii")
+                    # Find all finished jobs
+                    finished_pbs_jobs = re.findall(r"(\d+)\..*\d+ F", output)
+                    finished_moved_pbs_jobs = re.findall(r"(\d+)\..*\d+ M.*\n.*Job finished", output)
+                    finished_pbs_jobs.extend(finished_moved_pbs_jobs)
+                    finished_pbs_jobs.extend(unknown_job_ids)
+                    self._qstat_failed_n = 0
+                    break
+                except:
+                    self._qstat_failed_n += 1
+                    time.sleep(30)
+                    if self._qstat_failed_n > SamplingPoolPBS.QSTAT_FAILED_MAX_N:
+                        raise Exception(process.stderr.decode("ascii"))
+                    finished_pbs_jobs = []
 
         # Get unfinished as diff between planned and finished
         unfinished_pbs_jobs = []
@@ -327,7 +411,7 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
         :return: successful_results: Dict[level_id, List[Tuple[sample_id: str, Tuple[fine_result: np.ndarray, coarse_result: n.ndarray]]]]
                  failed_results: Dict[level_id, List[Tuple[sample_id: str, err_msg: str]]]
                  n_running: int, number of running samples
-                 times:
+             times:
         """
         os.chdir(self._jobs_dir)
 
@@ -343,6 +427,14 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
         successful_results = {}
         failed_results = {}
         times = {}
+        #sim_data_results = {}
+        # running_times = {}
+        # extract_mesh_times = {}
+        # make_field_times = {}
+        # generate_rnd_times = {}
+        # fine_flow_times = {}
+        # coarse_flow_times = {}
+
         for pbs_id in finished_pbs_jobs:
             reg = "*_{}".format(pbs_id)  # JobID_PbsId file
             file = glob.glob(reg)
@@ -359,6 +451,8 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
                     successful_results.setdefault(level_id, []).extend(results)
                 for level_id, results in failed.items():
                     failed_results.setdefault(level_id, []).extend(results)
+                # for level_id, results in sim_data.items():
+                #     sim_data_results.setdefault(level_id, []).extend(results)
                 for level_id, results in time.items():
                     if level_id in times:
                         times[level_id][0] += results[-1][0]
@@ -366,14 +460,38 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
                     else:
                         times[level_id] = list(results[-1])
 
+                # for level_id, results in running_time.items():
+                #     running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+                #
+                # for level_id, results in extract_mesh.items():
+                #     extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+                #
+                # for level_id, results in make_field.items():
+                #     make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+                #
+                # for level_id, results in generate_rnd.items():
+                #     generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+                #
+                # for level_id, results in fine_flow.items():
+                #     fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+                #
+                # for level_id, results in coarse_flow.items():
+                #     coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
                 # Delete pbsID file - it means job is finished
                 SamplingPoolPBS.delete_pbs_id_file(file)
 
         if self._unfinished_sample_ids:
             successful_results, failed_results, times = self._collect_unfinished(successful_results,
-                                                                                 failed_results, times)
+                                                                                              failed_results, times,
+                                                                                              )
+                                                                                              # running_times
+                                                                                              # extract_mesh_times,
+                                                                                              # make_field_times,
+                                                                                              # generate_rnd_times,
+                                                                                              # fine_flow_times,
+                                                                                              # coarse_flow_times)
 
-        return successful_results, failed_results, n_running, list(times.items())
+        return successful_results, failed_results, n_running, list(times.items())#, sim_data_results
 
     def _collect_unfinished(self, successful_results, failed_results, times):
         """
@@ -384,42 +502,64 @@ def _collect_unfinished(self, successful_results, failed_results, times):
         :return: all input dictionaries
         """
         already_collected = set()
+
         for sample_id in self._unfinished_sample_ids:
             if sample_id in already_collected:
                 continue
 
-            job_id = PbsJob.job_id_from_sample_id(sample_id, self._jobs_dir)
+            try:
+                job_id = PbsJob.job_id_from_sample_id(sample_id, self._jobs_dir)
+            except (FileNotFoundError, KeyError) as e:
+                level_id = int(re.findall(r'L0?(\d*)', sample_id)[0])
+                failed_results.setdefault(level_id, []).append((sample_id, "".format(e)))
+                continue
+
             successful, failed, time = PbsJob.read_results(job_id, self._jobs_dir)
 
             # Split results to levels
             for level_id, results in successful.items():
-                for res in results:
-                    if res[0] in self._unfinished_sample_ids:
-                        already_collected.add(res[0])
-                        successful_results.setdefault(level_id, []).append(res)
-
-            for level_id, results in failed_results.items():
-                for res in results:
-                    if res[0] in self._unfinished_sample_ids:
-                        already_collected.add(res[0])
-                        failed_results.setdefault(level_id, []).append(res)
-
-            for level_id, results in times.items():
-                for res in results:
-                    if res[0] in self._unfinished_sample_ids:
-                        times.setdefault(level_id, []).append(res)
-                times[level_id] = results
+                successful_results.setdefault(level_id, []).extend(results)
+            for level_id, results in failed.items():
+                failed_results.setdefault(level_id, []).extend(results)
+            for level_id, results in time.items():
+                times[level_id] = results[-1]
+
+            # for level_id, results in sim_data.items():
+            #     sim_data_results.setdefault(level_id, []).extend(results)
+
+            # for level_id, results in running_time.items():
+            #     running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in extract_mesh.items():
+            #     extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in make_field.items():
+            #     make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in generate_rnd.items():
+            #     generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in fine_flow.items():
+            #     fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+            #
+            # for level_id, results in coarse_flow.items():
+            #     coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+
+            level_id_sample_id_seed = PbsJob.get_scheduled_sample_ids(job_id, self._jobs_dir)
+
+            for level_id, sample_id, _ in level_id_sample_id_seed:
+                already_collected.add(sample_id)
 
             # Delete pbsID file - it means job is finished
             # SamplingPoolPBS.delete_pbs_id_file(file)
 
         self._unfinished_sample_ids = set()
 
-        return successful_results, failed_results, times
+        return successful_results, failed_results, times#, sim_data_results
 
     def have_permanent_samples(self, sample_ids):
         """
-        List of unfinished sample ids, the corresponding samples are collecting in next get_finished() call .
+        List of unfinished sample ids, the corresponding samples are collecting in next get_finished() call
         """
         self._unfinished_sample_ids = set(sample_ids)
 
diff --git a/mlmc/tool/pbs_job.py b/mlmc/tool/pbs_job.py
index a39b8646..6b8a6535 100644
--- a/mlmc/tool/pbs_job.py
+++ b/mlmc/tool/pbs_job.py
@@ -149,11 +149,13 @@ def calculate_samples(self):
         current_samples = []
         # Currently saved samples
         start_time = time.time()
+        successful_samples_time = 0
         times = []
         # Sample calculation time - Tuple(level_id, [n samples, cumul time for n sample])
         n_times = 0
         successful_dest_dir = os.path.join(self._output_dir, SamplingPool.SEVERAL_SUCCESSFUL_DIR)
         for level_id, sample_id, seed in level_id_sample_id_seed:
+            start_time = time.time()
             # Deserialize level simulation config
             if level_id not in self._level_simulations:
                 self._get_level_sim(level_id)
@@ -161,9 +163,10 @@ def calculate_samples(self):
             # Start measuring time
             if current_level != level_id:
                 # Save previous level times
-                times.append((current_level, time.time() - start_time, n_times))
+                times.append((current_level, successful_samples_time, n_times))
                 n_times = 0
                 start_time = time.time()
+                successful_samples_time = 0
                 current_level = level_id
 
             level_sim = self._level_simulations[current_level]
@@ -178,6 +181,10 @@ def calculate_samples(self):
                     SamplingPool.move_successful_rm(sample_id, level_sim,
                                                     output_dir=self._output_dir,
                                                     dest_dir=SamplingPool.SEVERAL_SUCCESSFUL_DIR)
+                n_times += 1
+                successful_samples_time += (time.time() - start_time)
+                print("sample time ", time.time() - start_time)
+                # times.append((current_level, time.time() - start_time, n_times))
             else:
                 failed.append((current_level, sample_id, err_msg))
                 SamplingPool.move_failed_rm(sample_id, level_sim,
@@ -185,8 +192,8 @@ def calculate_samples(self):
                                             dest_dir=SamplingPool.FAILED_DIR)
 
             current_samples.append(sample_id)
-            n_times += 1
-            times.append((current_level, time.time() - start_time, n_times))
+            #n_times += 1
+            times.append((current_level, successful_samples_time, n_times))
             self._save_to_file(success, failed, times, current_samples)
 
             success = []

From 95633ded1e80f0640218d70694fa77063841eb53 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Mon, 12 Aug 2024 12:08:54 +0200
Subject: [PATCH 05/31] scikit-learn

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 765d3159..f9301bd2 100644
--- a/setup.py
+++ b/setup.py
@@ -61,5 +61,5 @@ def read(*names, **kwargs):
     # include automatically all files in the template MANIFEST.in
     include_package_data=True,
     zip_safe=False,
-    install_requires=['numpy', 'scipy', 'sklearn', 'h5py>=3.1.0', 'ruamel.yaml', 'attrs', 'gstools', 'memoization'],
+    install_requires=['numpy', 'scipy', 'scikit-learn', 'h5py>=3.1.0', 'ruamel.yaml', 'attrs', 'gstools', 'memoization'],
 )

From 9d94434dda74ad50eb06eec227a1d01998c2b74d Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Mon, 12 Aug 2024 12:47:41 +0200
Subject: [PATCH 06/31] hdf estimator fix

---
 mlmc/estimator.py | 66 ++++++++++++++++++++++++++++-------------------
 mlmc/tool/hdf5.py |  2 +-
 2 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/mlmc/estimator.py b/mlmc/estimator.py
index 9e9fad64..b5957f80 100644
--- a/mlmc/estimator.py
+++ b/mlmc/estimator.py
@@ -82,7 +82,9 @@ def estimate_diff_vars_regression(self, n_created_samples, moments_fn=None, raw_
             raw_vars, n_samples = self.estimate_diff_vars(moments_fn)
 
         sim_steps = np.squeeze(self._sample_storage.get_level_parameters())
+        # print("sim steps ", sim_steps)
         vars = self._all_moments_variance_regression(raw_vars, sim_steps)
+
         # We need to get n_ops_estimate from storage
         return vars, self._sample_storage.get_n_ops()
 
@@ -95,6 +97,8 @@ def estimate_diff_vars(self, moments_fn=None):
             n_samples -  shape L, num samples for individual levels.
         """
         moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn))
+        # print("moments_mean.l_vars ", moments_mean.l_vars)
+        # print("moments_mean.n_samples ", moments_mean.n_samples)
         return moments_mean.l_vars, moments_mean.n_samples
 
     def _all_moments_variance_regression(self, raw_vars, sim_steps):
@@ -196,7 +200,7 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
         bs_l_means = []
         bs_l_vars = []
         for i in range(n_subsamples):
-            quantity_subsample = self.quantity.select(self.quantity.subsample(sample_vec=sample_vector))
+            quantity_subsample = self.quantity.subsample(sample_vec=sample_vector)
             moments_quantity = qe.moments(quantity_subsample, moments_fn=moments_fn, mom_at_bottom=False)
             q_mean = qe.estimate_mean(moments_quantity)
 
@@ -205,6 +209,9 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
             bs_l_means.append(q_mean.l_means)
             bs_l_vars.append(q_mean.l_vars)
 
+        self.bs_mean = bs_mean
+        self.bs_var = bs_var
+
         self.mean_bs_mean = np.mean(bs_mean, axis=0)
         self.mean_bs_var = np.mean(bs_var, axis=0)
         self.mean_bs_l_means = np.mean(bs_l_means, axis=0)
@@ -217,14 +224,15 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
 
         self._bs_level_mean_variance = self.var_bs_l_means * np.array(self._sample_storage.get_n_collected())[:, None]
 
-    def bs_target_var_n_estimated(self, target_var, sample_vec=None):
+    def bs_target_var_n_estimated(self, target_var, sample_vec=None, n_subsamples=100):
         sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
                                           n_levels=self._sample_storage.get_n_levels(),
                                           sample_vector=sample_vec)
 
-        self.est_bootstrap(n_subsamples=300, sample_vector=sample_vec)
+        self.est_bootstrap(n_subsamples=n_subsamples, sample_vector=sample_vec)
 
         variances, n_ops = self.estimate_diff_vars_regression(sample_vec, raw_vars=self.mean_bs_l_vars)
+
         n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
                                                              n_levels=self._sample_storage.get_n_levels())
 
@@ -304,10 +312,17 @@ def estimate_domain(quantity, sample_storage, quantile=None):
             except AttributeError:
                 print("No collected values for level {}".format(level_id))
                 break
-            chunk_spec = next(sample_storage.chunks(n_samples=sample_storage.get_n_collected()[level_id]))
-            fine_samples = quantity.samples(chunk_spec)[..., 0]  # Fine samples at level 0
 
+            #print("sample_storage.get_n_collected()[level_id] ", type(sample_storage.get_n_collected()[level_id]))
+            print("sample_storage.get_n_collected() ", type(sample_storage.get_n_collected()[0]))
+
+            if isinstance(sample_storage.get_n_collected()[level_id], AttributeError):
+                print("continue")
+                continue
+            chunk_spec = next(sample_storage.chunks(level_id=level_id, n_samples=sample_storage.get_n_collected()[level_id]))
+            fine_samples = quantity.samples(chunk_spec)[..., 0]  # Fine samples at level 0
             fine_samples = np.squeeze(fine_samples)
+            print("fine samples ", fine_samples)
             fine_samples = fine_samples[~np.isnan(fine_samples)]  # remove NaN
             ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)]))
 
@@ -399,26 +414,26 @@ def consistency_check(quantity, sample_storage=None):
     return cons_check_val
 
 
-def estimate_domain(quantity, sample_storage, quantile=None):
-    """
-    Estimate moments domain from MLMC samples.
-    :param quantity: mlmc.quantity.Quantity instance, represents the real quantity
-    :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples
-    :param quantile: float in interval (0, 1), None means whole sample range
-    :return: lower_bound, upper_bound
-    """
-    ranges = []
-    if quantile is None:
-        quantile = 0.01
-
-    for level_id in range(sample_storage.get_n_levels()):
-        fine_samples = quantity.samples(ChunkSpec(level_id=level_id, n_samples=sample_storage.get_n_collected()[0]))[..., 0]
-
-        fine_samples = np.squeeze(fine_samples)
-        ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)]))
-
-    ranges = np.array(ranges)
-    return np.min(ranges[:, 0]), np.max(ranges[:, 1])
+# def estimate_domain(quantity, sample_storage, quantile=None):
+#     """
+#     Estimate moments domain from MLMC samples.
+#     :param quantity: mlmc.quantity.Quantity instance, represents the real quantity
+#     :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples
+#     :param quantile: float in interval (0, 1), None means whole sample range
+#     :return: lower_bound, upper_bound
+#     """
+#     ranges = []
+#     if quantile is None:
+#         quantile = 0.01
+#
+#     for level_id in range(sample_storage.get_n_levels()):
+#         fine_samples = quantity.samples(ChunkSpec(level_id=level_id, n_samples=sample_storage.get_n_collected()[0]))[..., 0]
+#
+#         fine_samples = np.squeeze(fine_samples)
+#         ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)]))
+#
+#     ranges = np.array(ranges)
+#     return np.min(ranges[:, 0]), np.max(ranges[:, 1])
 
 
 def coping_with_high_kurtosis(vars, costs, kurtosis, kurtosis_threshold=100):
@@ -531,4 +546,3 @@ def determine_n_samples(n_levels, n_samples=None):
 
     return n_samples
 
-
diff --git a/mlmc/tool/hdf5.py b/mlmc/tool/hdf5.py
index 62a9ac99..32e24149 100644
--- a/mlmc/tool/hdf5.py
+++ b/mlmc/tool/hdf5.py
@@ -357,7 +357,7 @@ def chunks(self, n_samples=None):
             dataset = hdf_file["/".join([self.level_group_path, "collected_values"])]
 
             if n_samples is not None:
-                yield ChunkSpec(chunk_id=0, chunk_slice=slice(0, n_samples, ...), level_id=int(self.level_id))
+                yield ChunkSpec(chunk_id=0, chunk_slice=slice(0, n_samples, 1), level_id=int(self.level_id))
             else:
                 for chunk_id, chunk in enumerate(dataset.iter_chunks()):
                     yield ChunkSpec(chunk_id=chunk_id, chunk_slice=chunk[0], level_id=int(self.level_id))  # slice, level_id

From edf1f5a4160eb68e34879024acf4904fdc0ced8e Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Mon, 12 Aug 2024 13:13:05 +0200
Subject: [PATCH 07/31] srf

---
 mlmc/random/correlated_field.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py
index b5c809a7..cebd1572 100644
--- a/mlmc/random/correlated_field.py
+++ b/mlmc/random/correlated_field.py
@@ -198,7 +198,9 @@ def set_points(self, points, region_ids=[], region_map={}):
         :return:
         """
         self.n_elements = len(points)
-        assert len(points) == len(region_ids)
+        print("n elements: {}, len(points): {}".format(self.n_elements, len(points)))
+
+        #assert len(points) == len(region_ids)
         reg_points = {}
         for i, reg_id in enumerate(region_ids):
             reg_list = reg_points.get(reg_id, [])
@@ -504,14 +506,14 @@ def _sample(self):
 
 class GSToolsSpatialCorrelatedField(RandomFieldBase):
 
-    def __init__(self, model, mode_no=1000, log=False, sigma=1):
+    def __init__(self, model, mode_no=1000, log=False, sigma=1, seed=None):
         """
         :param model: instance of covariance model class, which parent is gstools.covmodel.CovModel
         :param mode_no: number of Fourier modes, default: 1000 as in gstools package
         """
         self.model = model
         self.mode_no = mode_no
-        self.srf = gstools.SRF(model, mode_no=mode_no)
+        self.srf = gstools.SRF(model, mode_no=mode_no, seed=seed)
         self.mu = self.srf.mean
         self.sigma = sigma
         self.dim = model.dim
@@ -753,7 +755,3 @@ def _sample(self):
         :return: Random field evaluated in points given by 'set_points'.
         """
         return self.random_field()
-
-        if not self.log:
-            return field
-        return np.exp(field)

From 6e06ecc6e8a3640a05b3dc8bfd68136b23f89051 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 21 Aug 2024 14:34:09 +0200
Subject: [PATCH 08/31] ruamel.yaml lower version

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index ad19c689..83fe5a4a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy
 scipy
 sklearn
 h5py>=3.1.0
-ruamel.yaml
+ruamel.yaml<0.18.0
 attrs
 gstools
 memoization

From bc22fa969fb4f56ceec5ffe181f182aa42ab415c Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 21 Aug 2024 14:48:10 +0200
Subject: [PATCH 09/31] remove py36 from tox

---
 tox.ini | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tox.ini b/tox.ini
index 7a4d4ef6..704732ff 100644
--- a/tox.ini
+++ b/tox.ini
@@ -3,12 +3,11 @@
 
 # content of: tox.ini , put in same dir as setup.py
 [tox]
-envlist = py36, py37, py38
+envlist = py37, py38
 #envlist = py36
 
 [gh-actions]
 python =
-    3.6: py36
     3.7: py37
     3.8: py38
 

From b6f5df1e43806935bf7c2a0264f92e0842f24a30 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 21 Aug 2024 15:10:21 +0200
Subject: [PATCH 10/31] ruaml.yaml 0.17

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 83fe5a4a..8931ac1d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy
 scipy
 sklearn
 h5py>=3.1.0
-ruamel.yaml<0.18.0
+ruamel.yaml==0.17.26
 attrs
 gstools
 memoization

From 9bc3168df84b64d69bf662a2247d537d3ecc2675 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Mon, 14 Jul 2025 15:37:39 +0200
Subject: [PATCH 11/31] error msg print

---
 mlmc/sampling_pool.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlmc/sampling_pool.py b/mlmc/sampling_pool.py
index 0d402325..cbbbb360 100644
--- a/mlmc/sampling_pool.py
+++ b/mlmc/sampling_pool.py
@@ -122,6 +122,7 @@ def calculate_sample(sample_id, level_sim, work_dir=None, seed=None):
         except Exception:
             str_list = traceback.format_exception(*sys.exc_info())
             err_msg = "".join(str_list)
+            print("Error msg: ", err_msg)
 
         return sample_id, res, err_msg, running_time
 

From b2b0a9038458fa4cfe2e6b8c69e067f8f8b05b40 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 16 Jul 2025 10:51:07 +0200
Subject: [PATCH 12/31] tox update

---
 mlmc/sample_storage_hdf.py   |  2 --
 mlmc/sim/synth_simulation.py |  3 ++-
 test/test_hdf.py             |  4 ++--
 test/test_sampling_pools.py  |  4 +++-
 tox.ini                      | 10 +++-------
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/mlmc/sample_storage_hdf.py b/mlmc/sample_storage_hdf.py
index 68b1af9d..a53a8c64 100644
--- a/mlmc/sample_storage_hdf.py
+++ b/mlmc/sample_storage_hdf.py
@@ -4,8 +4,6 @@
 from mlmc.sample_storage import SampleStorage
 from mlmc.quantity.quantity_spec import QuantitySpec, ChunkSpec
 import mlmc.tool.hdf5 as hdf
-import warnings
-warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
 
 
 class SampleStorageHDF(SampleStorage):
diff --git a/mlmc/sim/synth_simulation.py b/mlmc/sim/synth_simulation.py
index 53417219..0a5176c6 100644
--- a/mlmc/sim/synth_simulation.py
+++ b/mlmc/sim/synth_simulation.py
@@ -1,5 +1,5 @@
 import os
-import ruamel.yaml as yaml
+import ruamel.yaml as ruyaml
 import numpy as np
 from typing import List
 import scipy.stats as stats
@@ -291,6 +291,7 @@ def n_ops_estimate(self, step):
     @staticmethod
     def _read_config():
         with open(os.path.join(os.getcwd(), SynthSimulationWorkspace.CONFIG_FILE)) as file:
+            yaml = ruyaml.YAML(typ='rt')
             config = yaml.load(file)
 
         return config
diff --git a/test/test_hdf.py b/test/test_hdf.py
index 8778a60b..f06ec98e 100644
--- a/test/test_hdf.py
+++ b/test/test_hdf.py
@@ -87,10 +87,10 @@ def load_from_file(hdf_obj, obligatory_attributes):
 
 SCHEDULED_SAMPLES = ['L00_S0000000', 'L00_S0000001', 'L00_S0000002', 'L00_S0000003', 'L00_S0000004']
 
-RESULT_DATA_DTYPE = [("value", np.float), ("time", np.float)]
+RESULT_DATA_DTYPE = [("value", np.float64), ("time", np.float64)]
 
 COLLECTED_SAMPLES = np.array([['L00S0000000', (np.array([10, 20]), np.array([5, 6]))],
-                     ['L00S0000001', (np.array([1, 2]), np.array([50, 60]))]])
+                     ['L00S0000001', (np.array([1, 2]), np.array([50, 60]))]], dtype=object)
 
 
 
diff --git a/test/test_sampling_pools.py b/test/test_sampling_pools.py
index 7d87251f..f3b72361 100644
--- a/test/test_sampling_pools.py
+++ b/test/test_sampling_pools.py
@@ -33,7 +33,9 @@
 simulation_config = dict(distr='norm', complexity=2, nan_fraction=failed_fraction, sim_method='_sample_fn')
 
 with open('synth_sim_config_test.yaml', "w") as file:
-    yaml.dump(simulation_config, file, default_flow_style=False)
+    yaml = yaml.YAML(typ='full')
+    yaml.dump(simulation_config, file)
+
 shutil.copyfile('synth_sim_config_test.yaml', os.path.join(work_dir, 'synth_sim_config.yaml'))
 sim_config_workspace = {"config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')}
 
diff --git a/tox.ini b/tox.ini
index 704732ff..71e6f5f3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,15 +1,11 @@
-
-
-
 # content of: tox.ini , put in same dir as setup.py
 [tox]
-envlist = py37, py38
-#envlist = py36
+envlist = py310, py312
 
 [gh-actions]
 python =
-    3.7: py37
-    3.8: py38
+    3.10: py310
+    3.12: py312
 
 
 [testenv]

From a12dfc4bffbdd47e930b34e963aba044ff7131a9 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 16 Jul 2025 10:55:29 +0200
Subject: [PATCH 13/31] gh actions python version update

---
 .github/workflows/pythonpackage.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index e3bddcb2..9b13faa5 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:      
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.10, 3.12]
 
     steps:
     - uses: actions/checkout@v2

From 7f16981868f0a27d64039603a1050dd7d8e956d4 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 16 Jul 2025 10:58:25 +0200
Subject: [PATCH 14/31] gh actions python version update fix

---
 .github/workflows/pythonpackage.yml | 2 +-
 requirements.txt                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 9b13faa5..6da8b668 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:      
       matrix:
-        python-version: [3.10, 3.12]
+        python-version: [3.10.18, 3.12]
 
     steps:
     - uses: actions/checkout@v2
diff --git a/requirements.txt b/requirements.txt
index 8931ac1d..f57915c6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 numpy
 scipy
-sklearn
+scikit-learn
 h5py>=3.1.0
 ruamel.yaml==0.17.26
 attrs

From d15e24e92f1498f6a646b7295a60b7f8eceaef68 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 16 Jul 2025 11:26:43 +0200
Subject: [PATCH 15/31] sampler test ignore rm err

---
 test/test_sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_sampler.py b/test/test_sampler.py
index d559b6ea..e03dab97 100644
--- a/test/test_sampler.py
+++ b/test/test_sampler.py
@@ -48,7 +48,7 @@ def test_sampler_hdf():
 
     work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '_test_tmp')
     if os.path.exists(work_dir):
-        shutil.rmtree(work_dir)
+        shutil.rmtree(work_dir, ignore_errors=True)
     os.makedirs(work_dir)
     file_path = os.path.join(work_dir, "mlmc_test.hdf5")
     storage = mlmc.SampleStorageHDF(file_path=file_path)

From b3d6367dbcf174bbbf579e1d4348704111c5d0fc Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 15 Oct 2025 14:28:35 +0200
Subject: [PATCH 16/31] improved comments generated

---
 mlmc/level_simulation.py           |  39 +-
 mlmc/moments.py                    | 249 +++++++--
 mlmc/plot/diagnostic_plots.py      | 187 +++++--
 mlmc/plot/violinplot.py            | 145 ++++-
 mlmc/quantity/quantity.py          | 459 ++++++++++------
 mlmc/quantity/quantity_estimate.py | 192 ++++---
 mlmc/quantity/quantity_spec.py     |  46 +-
 mlmc/quantity/quantity_types.py    | 276 +++++++---
 mlmc/random/correlated_field.py    | 571 ++++++++------------
 mlmc/random/frac_geom.py           | 140 -----
 mlmc/sample_storage.py             |  88 +--
 mlmc/sample_storage_hdf.py         | 204 ++++---
 mlmc/sampler.py                    | 203 ++++---
 mlmc/sampling_pool.py              | 419 +++++++++-----
 mlmc/sim/simulation.py             |  42 +-
 mlmc/sim/synth_simulation.py       | 225 ++++++++
 mlmc/tool/context_statprof.py      |  13 -
 mlmc/tool/distribution.py          |  39 --
 mlmc/tool/flow_mc.py               | 287 +++++-----
 mlmc/tool/gmsh_io.py               | 180 ++++---
 mlmc/tool/hdf5.py                  | 293 ++++++----
 mlmc/tool/pbs_job.py               | 227 ++++----
 mlmc/tool/process_base.py          | 251 ++++-----
 mlmc/tool/simple_distribution.py   | 840 +++++++++++------------------
 mlmc/tool/stats_tests.py           |  78 +--
 25 files changed, 3176 insertions(+), 2517 deletions(-)
 delete mode 100644 mlmc/random/frac_geom.py
 delete mode 100644 mlmc/tool/context_statprof.py

diff --git a/mlmc/level_simulation.py b/mlmc/level_simulation.py
index 67c5c23d..593f28c8 100644
--- a/mlmc/level_simulation.py
+++ b/mlmc/level_simulation.py
@@ -1,34 +1,37 @@
 import attr
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional, Callable
 from mlmc.quantity.quantity_spec import QuantitySpec
 
 
 @attr.s(auto_attribs=True)
 class LevelSimulation:
     """
-    This class is used to pass simulation data at a given level between a Sampler and a SamplingPool
-    User shouldn't change this class
+    Class for passing simulation configuration and metadata for a given level between
+    a Sampler and a SamplingPool.
+
+    User shouldn't modify this class manually.
     """
+
     config_dict: Dict[Any, Any]
-    # Calculate configuration.
+    # Level-specific simulation configuration dictionary.
 
-    common_files: List[str] = None
-    # List of files in the level workspace to copy/symlink to the sample workspace.
+    common_files: Optional[List[str]] = None
+    # List of files in the level workspace to copy or symlink to the sample workspace.
 
     need_sample_workspace: bool = False
-    # If the simulation needs sample workspace at all.
+    # Whether the simulation requires an individual workspace for each sample.
 
-    task_size: int = 0
-    # Relative size of the simulation at this level.
-    # When using PBS, keep in mind that the pbs job size is the sum of task_sizes, and if this sum is above 1,
-    # the job is scheduled and PBS scheduler manages it
+    task_size: float = 0.0
+    # Relative size (or computational cost) of the simulation task at this level.
+    # When using PBS or SLURM, note that the job size is the sum of task_sizes.
+    # If this sum exceeds 1.0, the job is queued and scheduled by the system.
 
-    ### User shouldn't modify the following attributes ###
-    _calculate: Any = None
-    # Calculate method
+    ### Internal attributes — users should not modify these ###
+    _calculate: Optional[Callable] = None
+    # Calculation method used internally by the sampler.
 
-    _level_id: int = None
-    # Level id is set by mlmc.sampler.Sampler. It is internal variable and user shouldn't change it.
+    _level_id: Optional[int] = None
+    # Level identifier, set automatically by mlmc.sampler.Sampler.
 
-    _result_format: List[QuantitySpec] = None
-    # Simulation result format
+    _result_format: Optional[List[QuantitySpec]] = None
+    # Format specification for simulation results (defined by QuantitySpec instances).
diff --git a/mlmc/moments.py b/mlmc/moments.py
index 2329566e..ba1f932a 100644
--- a/mlmc/moments.py
+++ b/mlmc/moments.py
@@ -5,9 +5,26 @@
 
 class Moments:
     """
-    Class for calculating moments of a random variable
+    Base class for computing moment functions of a random variable.
+
+    Provides transformation, scaling, and evaluation utilities common
+    to various types of generalized moment bases (monomial, Fourier, Legendre, etc.).
     """
+
     def __init__(self, size, domain, log=False, safe_eval=True):
+        """
+        Initialize the moment function set.
+
+        :param size: int
+            Number of moment functions.
+        :param domain: tuple(float, float)
+            Domain of the input variable (min, max).
+        :param log: bool
+            If True, use logarithmic transformation of the domain.
+        :param safe_eval: bool
+            If True, clip transformed values outside the reference domain
+            and replace them with NaN.
+        """
         assert size > 0
         self.size = size
         self.domain = domain
@@ -25,6 +42,7 @@ def __init__(self, size, domain, log=False, safe_eval=True):
         self._linear_scale = (self.ref_domain[1] - self.ref_domain[0]) / diff
         self._linear_shift = lin_domain[0]
 
+        # Define transformation and inverse transformation functions
         if safe_eval and log:
             self.transform = lambda val: self.clip(self.linear(np.log(val)))
             self.inv_transform = lambda ref: np.exp(self.inv_linear(ref))
@@ -40,78 +58,153 @@ def __init__(self, size, domain, log=False, safe_eval=True):
 
     def __eq__(self, other):
         """
-        Compare two moment functions. Equal if they returns same values.
+        Compare two Moments objects for equality.
+
+        :param other: Moments
+            Another Moments instance.
+        :return: bool
+            True if both instances have the same parameters and configuration.
         """
-        return type(self) is type(other) \
-                and self.size == other.size \
-                and np.all(self.domain == other.domain) \
-                and self._is_log == other._is_log \
-                and self._is_clip == other._is_clip
+        return (
+            type(self) is type(other)
+            and self.size == other.size
+            and np.all(self.domain == other.domain)
+            and self._is_log == other._is_log
+            and self._is_clip == other._is_clip
+        )
 
     def change_size(self, size):
         """
-        Return moment object with different size.
-        :param size: int, new number of _moments_fn
+        Return a new moment object with a different number of basis functions.
+
+        :param size: int
+            New number of moment functions.
+        :return: Moments
+            New instance of the same class with updated size.
         """
         return self.__class__(size, self.domain, self._is_log, self._is_clip)
 
     def clip(self, value):
         """
-        Remove outliers and replace them with NaN
-        :param value: array of numbers
-        :return: masked_array, out
+        Clip values to the reference domain, replacing outliers with NaN.
+
+        :param value: array-like
+            Input data to be clipped.
+        :return: ndarray
+            Array with out-of-bound values replaced by NaN.
         """
-        # Masked array
         out = ma.masked_outside(value, self.ref_domain[0], self.ref_domain[1])
-        # Replace outliers with NaN
         return ma.filled(out, np.nan)
 
     def linear(self, value):
+        """Apply linear transformation to reference domain."""
         return (value - self._linear_shift) * self._linear_scale + self.ref_domain[0]
 
     def inv_linear(self, value):
+        """Inverse linear transformation back to the original domain."""
         return (value - self.ref_domain[0]) / self._linear_scale + self._linear_shift
 
     def __call__(self, value):
+        """Evaluate all moment functions for the given value(s)."""
         return self._eval_all(value, self.size)
 
     def eval(self, i, value):
-        return self._eval_all(value, i+1)[:, -1]
+        """
+        Evaluate the i-th moment function.
+
+        :param i: int
+            Index of the moment function to evaluate (0-based).
+        :param value: float or array-like
+            Input value(s).
+        :return: ndarray
+            Values of the i-th moment function.
+        """
+        return self._eval_all(value, i + 1)[:, -1]
 
     def eval_single_moment(self, i, value):
         """
-        Be aware this implementation is inefficient for large i
-        :param i: int, order of moment
-        :param value: float
-        :return: np.ndarray
+        Evaluate a single moment function (less efficient for large i).
+
+        :param i: int
+            Order of the moment.
+        :param value: float or array-like
+            Input value(s).
+        :return: ndarray
+            Evaluated moment values.
         """
-        return self._eval_all(value, i+1)[..., i]
+        return self._eval_all(value, i + 1)[..., i]
 
     def eval_all(self, value, size=None):
+        """
+        Evaluate all moments up to the specified size.
+
+        :param value: float or array-like
+            Input value(s).
+        :param size: int or None
+            Number of moments to evaluate. If None, use self.size.
+        :return: ndarray
+            Matrix of evaluated moments.
+        """
         if size is None:
             size = self.size
         return self._eval_all(value, size)
 
     def eval_all_der(self, value, size=None, degree=1):
+        """
+        Evaluate derivatives of all moment functions.
+
+        :param value: float or array-like
+            Input value(s).
+        :param size: int or None
+            Number of moments to evaluate.
+        :param degree: int
+            Derivative degree (1 for first derivative, etc.).
+        :return: ndarray
+            Matrix of evaluated derivatives.
+        """
         if size is None:
             size = self.size
         return self._eval_all_der(value, size, degree)
 
     def eval_diff(self, value, size=None):
+        """
+        Evaluate first derivatives of all moment functions.
+
+        :param value: float or array-like
+            Input value(s).
+        :param size: int or None
+            Number of moments to evaluate.
+        :return: ndarray
+            Matrix of first derivatives.
+        """
         if size is None:
             size = self.size
         return self._eval_diff(value, size)
 
     def eval_diff2(self, value, size=None):
+        """
+        Evaluate second derivatives of all moment functions.
+
+        :param value: float or array-like
+            Input value(s).
+        :param size: int or None
+            Number of moments to evaluate.
+        :return: ndarray
+            Matrix of second derivatives.
+        """
         if size is None:
             size = self.size
         return self._eval_diff2(value, size)
 
 
+# -------------------------------------------------------------------------
+# Specific moment types
+# -------------------------------------------------------------------------
 class Monomial(Moments):
     """
-    Monomials generalized moments
+    Monomial basis functions for generalized moment evaluation.
     """
+
     def __init__(self, size, domain=(0, 1), ref_domain=None, log=False, safe_eval=True):
         if ref_domain is not None:
             self.ref_domain = ref_domain
@@ -120,33 +213,49 @@ def __init__(self, size, domain=(0, 1), ref_domain=None, log=False, safe_eval=Tr
         super().__init__(size, domain, log=log, safe_eval=safe_eval)
 
     def _eval_all(self, value, size):
-        # Create array from values and transform values outside the ref domain
+        """
+        Evaluate monomial basis (Vandermonde matrix).
+
+        :param value: array-like
+            Input values.
+        :param size: int
+            Number of moments to compute.
+        :return: ndarray
+            Vandermonde matrix of monomials.
+        """
         t = self.transform(np.atleast_1d(value))
-        # Vandermonde matrix
         return np.polynomial.polynomial.polyvander(t, deg=size - 1)
 
     def eval(self, i, value):
+        """Evaluate the i-th monomial t^i."""
         t = self.transform(np.atleast_1d(value))
-        return t**i
+        return t ** i
 
 
 class Fourier(Moments):
     """
-    Fourier functions generalized moments
+    Fourier basis functions for generalized moment evaluation.
     """
-    def __init__(self, size, domain=(0, 2*np.pi), ref_domain=None, log=False, safe_eval=True):
+
+    def __init__(self, size, domain=(0, 2 * np.pi), ref_domain=None, log=False, safe_eval=True):
         if ref_domain is not None:
             self.ref_domain = ref_domain
         else:
-            self.ref_domain = (0, 2*np.pi)
-
+            self.ref_domain = (0, 2 * np.pi)
         super().__init__(size, domain, log=log, safe_eval=safe_eval)
 
     def _eval_all(self, value, size):
-        # Transform values
+        """
+        Evaluate Fourier moment basis (cosine/sine terms).
+
+        :param value: array-like
+            Input values.
+        :param size: int
+            Number of moments to compute.
+        :return: ndarray
+            Matrix of evaluated Fourier functions.
+        """
         t = self.transform(np.atleast_1d(value))
-
-        # Half the number of moments
         R = int(size / 2)
         shorter_sin = 1 - int(size % 2)
         k = np.arange(1, R + 1)
@@ -154,26 +263,33 @@ def _eval_all(self, value, size):
 
         res = np.empty((len(t), size))
         res[:, 0] = 1
-
-        # Odd column index
         res[:, 1::2] = np.cos(kx[:, :])
-        # Even column index
         res[:, 2::2] = np.sin(kx[:, : R - shorter_sin])
         return res
 
     def eval(self, i, value):
+        """
+        Evaluate a single Fourier basis function.
+
+        :param i: int
+            Index of the moment function.
+        :param value: float or array-like
+            Input values.
+        :return: ndarray
+            Evaluated function values.
+        """
         t = self.transform(np.atleast_1d(value))
         if i == 0:
             return 1
         elif i % 2 == 1:
-            return np.sin( (i - 1) / 2 * t)
+            return np.sin((i - 1) / 2 * t)
         else:
             return np.cos(i / 2 * t)
 
 
 class Legendre(Moments):
     """
-    Legendre polynomials generalized moments
+    Legendre polynomial basis functions for generalized moments.
     """
 
     def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True):
@@ -182,6 +298,7 @@ def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True):
         else:
             self.ref_domain = (-1, 1)
 
+        # Precompute derivative matrices
         self.diff_mat = np.zeros((size, size))
         for n in range(size - 1):
             self.diff_mat[n, n + 1::2] = 2 * n + 1
@@ -190,19 +307,26 @@ def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True):
         super().__init__(size, domain, log, safe_eval)
 
     def _eval_value(self, x, size):
-        return np.polynomial.legendre.legvander(x, deg=size-1)
+        """Evaluate Legendre polynomials up to the given order."""
+        return np.polynomial.legendre.legvander(x, deg=size - 1)
 
     def _eval_all(self, value, size):
+        """Evaluate all Legendre polynomials."""
         value = self.transform(np.atleast_1d(value))
         return np.polynomial.legendre.legvander(value, deg=size - 1)
 
     def _eval_all_der(self, value, size, degree=1):
         """
-        Derivative of Legendre polynomials
-        :param value: values to evaluate
-        :param size: number of moments
-        :param degree: degree of derivative
-        :return:
+        Evaluate derivatives of Legendre polynomials.
+
+        :param value: array-like
+            Points at which to evaluate.
+        :param size: int
+            Number of moment functions.
+        :param degree: int
+            Derivative order.
+        :return: ndarray
+            Matrix of derivative values.
         """
         value = self.transform(np.atleast_1d(value))
         eval_values = np.empty((value.shape + (size,)))
@@ -211,7 +335,7 @@ def _eval_all_der(self, value, size, degree=1):
             if s == 0:
                 coef = [1]
             else:
-                coef = np.zeros(s+1)
+                coef = np.zeros(s + 1)
                 coef[-1] = 1
 
             coef = np.polynomial.legendre.legder(coef, degree)
@@ -219,26 +343,38 @@ def _eval_all_der(self, value, size, degree=1):
         return eval_values
 
     def _eval_diff(self, value, size):
+        """Evaluate first derivatives using precomputed differentiation matrix."""
         t = self.transform(np.atleast_1d(value))
         P_n = np.polynomial.legendre.legvander(t, deg=size - 1)
         return P_n @ self.diff_mat
 
     def _eval_diff2(self, value, size):
+        """Evaluate second derivatives using precomputed differentiation matrix."""
         t = self.transform(np.atleast_1d(value))
         P_n = np.polynomial.legendre.legvander(t, deg=size - 1)
         return P_n @ self.diff2_mat
 
 
 class TransformedMoments(Moments):
+    """
+    Linearly transformed moment basis.
+
+    Creates a new set of moment functions as linear combinations
+    of another existing set of basis functions.
+    """
+
     def __init__(self, other_moments, matrix):
         """
-        Set a new moment functions as linear combination of the previous.
-        new_moments = matrix . old_moments
+        Initialize transformed moment functions.
+
+        :param other_moments: Moments
+            Original set of moment functions.
+        :param matrix: ndarray
+            Linear transformation matrix where:
+            new_moments = matrix @ old_moments
 
-        We assume that new_moments[0] is still == 1. That means
-        first row of the matrix must be (1, 0 , ...).
-        :param other_moments: Original _moments_fn.
-        :param matrix: Linear combinations of the original _moments_fn.
+            The first row must correspond to (1, 0, 0, ...),
+            ensuring that new_moments[0] = 1.
         """
         n, m = matrix.shape
         assert m == other_moments.size
@@ -248,27 +384,34 @@ def __init__(self, other_moments, matrix):
         self._transform = matrix
 
     def __eq__(self, other):
-        return type(self) is type(other) \
-                and self.size == other.size \
-                and self._origin == other._origin \
-                and np.all(self._transform == other._transform)
+        """Check equality with another TransformedMoments object."""
+        return (
+            type(self) is type(other)
+            and self.size == other.size
+            and self._origin == other._origin
+            and np.all(self._transform == other._transform)
+        )
 
     def _eval_all(self, value, size):
+        """Evaluate all transformed moment functions."""
         orig_moments = self._origin._eval_all(value, self._origin.size)
         x1 = np.matmul(orig_moments, self._transform.T)
         return x1[..., :size]
 
     def _eval_all_der(self, value, size, degree=1):
+        """Evaluate derivatives of transformed moment functions."""
         orig_moments = self._origin._eval_all_der(value, self._origin.size, degree=degree)
         x1 = np.matmul(orig_moments, self._transform.T)
         return x1[..., :size]
 
     def _eval_diff(self, value, size):
+        """Evaluate first derivatives of transformed moment functions."""
         orig_moments = self._origin.eval_diff(value, self._origin.size)
         x1 = np.matmul(orig_moments, self._transform.T)
         return x1[..., :size]
 
     def _eval_diff2(self, value, size):
+        """Evaluate second derivatives of transformed moment functions."""
         orig_moments = self._origin.eval_diff2(value, self._origin.size)
         x1 = np.matmul(orig_moments, self._transform.T)
         return x1[..., :size]
diff --git a/mlmc/plot/diagnostic_plots.py b/mlmc/plot/diagnostic_plots.py
index 5d6a844e..cefd0520 100644
--- a/mlmc/plot/diagnostic_plots.py
+++ b/mlmc/plot/diagnostic_plots.py
@@ -1,97 +1,166 @@
 import numpy as np
-import scipy.stats as st
-from scipy import interpolate
 import matplotlib
 
 matplotlib.rcParams.update({'font.size': 22})
-from matplotlib.patches import Patch
 import matplotlib.pyplot as plt
 
 
-# def log_var_level(variances, l_vars, err_variances=0, err_l_vars=0, moments=[1,2,3,4]):
-#     fig, ax1 = plt.subplots(figsize=(8, 5))
-#     for m in moments:
-#         # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
-#         # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
-#         #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
-#         line2, = ax1.plot(np.log2(l_vars[:, m]), label="m={}".format(m), marker="s")
-#
-#     ax1.set_ylabel('log' + r'$_2$' + 'variance')
-#     ax1.set_xlabel('level' + r'$l$')
-#     plt.legend()
-#     #plt.savefig("MLMC_cost_saves.pdf")
-#     plt.show()
+def log_var_per_level(l_vars, levels=None, moments=[0], err_l_vars=None):
+    """
+    Plot log₂ of variance per level and fit a slope to estimate the decay rate β.
 
+    The function plots the base-2 logarithm of the variance for each level
+    and fits a linear model to estimate the convergence rate β, based on
+    the slope of log₂(variance) vs. level.
 
-def log_var_per_level(l_vars, err_variances=0, err_l_vars=0, moments=[1, 2, 3, 4]):
-    fig, ax1 = plt.subplots(figsize=(8, 5))
-    for m in moments:
-        # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
-        # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
-        #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
-        line2, = ax1.plot(np.log2(l_vars[:, m]), label="m={}".format(m), marker="s")
-
-    ax1.set_ylabel('log' + r'$_2$' + 'variance')
-    ax1.set_xlabel('level' + r'$l$')
-    plt.legend()
-    #plt.savefig("MLMC_cost_saves.pdf")
-    plt.show()
+    :param l_vars: Array of shape (n_levels, n_moments) representing
+                   the variance of each moment at each level.
+    :param levels: Optional array of level indices (default: np.arange(n_levels)).
+    :param moments: List of moment indices to include in the plot.
+    :param err_l_vars: Optional array of errors corresponding to l_vars.
+    :return: None
+    """
+    n_levels = l_vars.shape[0]
+    if levels is None:
+        levels = np.arange(n_levels)
 
+    fig, ax = plt.subplots(figsize=(8, 5))
 
-# def log_mean_level(means, l_means, err_means=0, err_l_means=0, moments=[1,2,3,4]):
-#     fig, ax1 = plt.subplots(figsize=(8, 5))
-#     for m in moments:
-#         # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
-#         # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
-#         #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
-#         line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label="m={}".format(m), marker="s")
-#
-#     ax1.set_ylabel('log' + r'$_2$' + 'mean')
-#     ax1.set_xlabel('level' + r'$l$')
-#     plt.legend()
-#     #plt.savefig("MLMC_cost_saves.pdf")
-#     plt.show()
+    for m in moments:
+        y = np.log2(l_vars[:, m])
+        ax.plot(levels, y, 'o-', label=f'm={m}')
+
+        slope, intercept = np.polyfit(levels, y, 1)
+        beta = -slope
+        ax.plot(
+            levels,
+            slope * levels + intercept,
+            '--',
+            label=f'fit m={m}: slope={slope:.2f}, beta≈{beta:.2f}'
+        )
+
+    ax.set_ylabel(r'$\log_2 \, V_\ell$')
+    ax.set_xlabel('level $\ell$')
+    ax.legend()
+    ax.grid(True, which="both")
+    plt.tight_layout()
+    plt.show()
 
 
 def log_mean_per_level(l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]):
+    """
+    Plot log₂ of absolute mean per level for specified statistical moments.
+
+    :param l_means: Array of mean values per level and moment.
+    :param err_means: Optional array of mean estimation errors (unused).
+    :param err_l_means: Optional array of level-mean estimation errors (unused).
+    :param moments: List of moment indices to include in the plot.
+    :return: None
+    """
     fig, ax1 = plt.subplots(figsize=(8, 5))
+    print("l means ", l_means)
     for m in moments:
-        # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
-        # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
-        #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
-        line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label="m={}".format(m), marker="s")
+        line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label=f"m={m}", marker="s")
 
     ax1.set_ylabel('log' + r'$_2$' + 'mean')
     ax1.set_xlabel('level' + r'$l$')
     plt.legend()
-    #plt.savefig("MLMC_cost_saves.pdf")
+    plt.tight_layout()
+    plt.show()
+
+
+def sample_cost_per_level(costs, levels=None):
+    """
+    Plot log₂ of sample cost per level and fit a slope to estimate γ.
+
+    The slope of the linear regression line provides an estimate of the
+    cost scaling parameter γ.
+
+    :param costs: Array of computational costs per sample for each level.
+    :param levels: Optional array of level indices (default: 0, 1, ...).
+    :return: Estimated γ (float), the slope of the fitted line.
+    """
+    n_levels = len(costs)
+    if levels is None:
+        levels = np.arange(n_levels)
+
+    y = np.log2(costs)
+    slope, intercept = np.polyfit(levels, y, 1)
+    gamma = slope
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    ax.plot(levels, y, 'o-', label='log2(cost)')
+    ax.plot(
+        levels,
+        slope * levels + intercept,
+        '--',
+        label=f'fit: slope={slope:.2f}, gamma≈{gamma:.2f}'
+    )
+
+    ax.set_ylabel(r'$\log_2 \, C_\ell$')
+    ax.set_xlabel('level $\ell$')
+    ax.legend()
+    ax.grid(True, which="both")
+    plt.tight_layout()
     plt.show()
 
+    return gamma
+
+
+def variance_to_cost_ratio(l_vars, costs, moments=[1, 2, 3, 4]):
+    """
+    Plot the log₂ of variance-to-cost ratio per level for given statistical moments.
 
-def sample_cost_per_level(costs):
+    The ratio Vₗ/Cₗ is computed for each level, and the slope of its
+    log₂-linear fit indicates the decay behavior relative to computational cost.
+
+    :param l_vars: Array of variances per level and moment (shape: n_levels × n_moments).
+    :param costs: Array of costs per sample for each level.
+    :param moments: List of moment indices to include in the plot.
+    :return: None
+    """
+    print("l_vars ", l_vars)
+    print(costs)
+    n_levels = l_vars.shape[0]
+    levels = np.arange(n_levels)
     fig, ax1 = plt.subplots(figsize=(8, 5))
-    line2, = ax1.plot(np.log2(costs), marker="s")
+    print('costs ', costs)
+    print("levels ", levels)
+    for m in moments:
+        line2, = ax1.plot(np.log2(l_vars[:, m] / costs), label=f"m={m}", marker="s")
 
-    ax1.set_ylabel('log' + r'$_2$' + 'cost per sample')
+        print("l vars ", l_vars[:, m])
+        print("np.log2(l_vars[:, m]/costs) ", np.log2(l_vars[:, m] / costs))
+
+        # Fit a straight line: log2(V/C) ≈ a + b * level
+        coeffs = np.polyfit(levels, np.log2(l_vars[:, m] / costs), 1)
+        slope, intercept = coeffs[0], coeffs[1]
+        ax1.plot(levels, slope * levels + intercept, '--', label=f'fit: slope={slope:.2f}')
+
+    ax1.set_ylabel('log' + r'$_2$' + 'variance to cost ratio')
     ax1.set_xlabel('level' + r'$l$')
     plt.legend()
-    #plt.savefig("MLMC_cost_saves.pdf")
+    plt.tight_layout()
     plt.show()
 
 
 def kurtosis_per_level(means, l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]):
+    """
+    Plot log₂ of mean values per level (often used for analyzing kurtosis trends).
+
+    :param means: Array of global mean values per moment (unused in plotting).
+    :param l_means: Array of level-wise mean values per moment.
+    :param err_means: Optional array of mean estimation errors (unused).
+    :param err_l_means: Optional array of level-mean estimation errors (unused).
+    :param moments: List of moment indices to include in the plot.
+    :return: None
+    """
     fig, ax1 = plt.subplots(figsize=(8, 5))
     for m in moments:
-        # line1, = ax1.errorbar(np.log2(variances[m]), yerr=err_variances, label="m={}".format(m), marker="o")
-        # line2, = ax1.errorbar(np.log2(l_vars[:, m]), yerr=err_l_vars, label="m={}".format(m), marker="s")
-        #line1, = ax1.plot(np.log2(variances[m]),  label="m={}".format(m), marker="o")
-        line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label="m={}".format(m), marker="s")
+        line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label=f"m={m}", marker="s")
 
     ax1.set_ylabel('log ' + r'$_2$ ' + 'mean')
     ax1.set_xlabel('level ' + r'$l$')
     plt.legend()
-    #plt.savefig("MLMC_cost_saves.pdf")
+    plt.tight_layout()
     plt.show()
-
-
-
diff --git a/mlmc/plot/violinplot.py b/mlmc/plot/violinplot.py
index 85e9a1b1..74dfe836 100644
--- a/mlmc/plot/violinplot.py
+++ b/mlmc/plot/violinplot.py
@@ -2,18 +2,56 @@
 import seaborn
 from mlmc.plot.plots import _show_and_save
 import matplotlib
+
+# Set default font size for all plots
 matplotlib.rcParams.update({'font.size': 22})
+
 import matplotlib.pyplot as plt
 
 
 class ViolinPlotter(seaborn.categorical._ViolinPlotter):
+    """
+    Custom subclass of seaborn's internal _ViolinPlotter to modify how quartiles
+    and mean lines are drawn inside a violin plot.
+
+    This class extends the default behavior by drawing the 25th, 50th, and 75th
+    percentiles as dashed lines, and the mean as a solid line across the violin body.
+    """
+
     def draw_quartiles(self, ax, data, support, density, center, split=False):
+        """
+        Draw quartile and mean lines on the violin plot.
+
+        Parameters
+        ----------
+        ax : matplotlib.axes.Axes
+            The axes object to draw on.
+        data : array-like
+            Input data for a single violin.
+        support : array-like
+            Grid over which the kernel density was evaluated.
+        density : array-like
+            Corresponding kernel density values.
+        center : float
+            Position of the violin on the categorical axis.
+        split : bool, default=False
+            Whether the violin is split by hue (two sides).
+
+        Notes
+        -----
+        - The mean is drawn as a solid line.
+        - Quartiles (25%, 50%, 75%) are drawn as dashed lines.
+        - The density scaling follows seaborn’s internal behavior.
+        """
+        # Compute quartiles and mean of the data
         q25, q50, q75 = np.percentile(data, [25, 50, 75])
         mean = np.mean(data)
 
+        # Draw mean line (solid)
         self.draw_to_density(ax, center, mean, support, density, split,
                              linewidth=self.linewidth)
 
+        # Draw quartile lines (dashed)
         self.draw_to_density(ax, center, q25, support, density, split,
                              linewidth=self.linewidth,
                              dashes=[self.linewidth * 1.5] * 2)
@@ -33,39 +71,110 @@ def violinplot(
     bw="scott", cut=2, scale="area", scale_hue=True, gridsize=100,
     width=.8, inner="box", split=False, dodge=True, orient=None,
     linewidth=None, color=None, palette=None, saturation=.75,
-    ax=None, **kwargs,):
-
-    plotter = ViolinPlotter(x, y, hue, data, order, hue_order,
-                             bw, cut, scale, scale_hue, gridsize,
-                             width, inner, split, dodge, orient, linewidth,
-                             color, palette, saturation)
-
+    ax=None, **kwargs,
+):
+    """
+    Wrapper around the custom ViolinPlotter class to generate a violin plot.
+
+    Parameters
+    ----------
+    x, y, hue : str, optional
+        Variable names for the categorical axis, numeric axis, and hue grouping.
+    data : DataFrame, optional
+        Dataset containing the variables.
+    order, hue_order : list, optional
+        Order of categories for x and hue variables.
+    bw : str or float, default="scott"
+        Bandwidth method or scalar for kernel density estimation.
+    cut : float, default=2
+        How far the violin extends beyond extreme data points.
+    scale : {"area", "count", "width"}, default="area"
+        Method for scaling the width of each violin.
+    scale_hue : bool, default=True
+        Whether to scale by hue levels within each category.
+    gridsize : int, default=100
+        Number of points in the KDE grid.
+    width : float, default=0.8
+        Width of each violin.
+    inner : {"box", "quartile", "point", "stick", None}, default="box"
+        Representation inside each violin.
+    split : bool, default=False
+        Draw half-violins when hue is used.
+    dodge : bool, default=True
+        Separate violins for each hue level.
+    orient : {"v", "h"}, optional
+        Plot orientation; inferred if not specified.
+    linewidth : float, optional
+        Width of the line used for drawing violins and quartiles.
+    color : matplotlib color, optional
+        Color for all violins.
+    palette : str or sequence, optional
+        Color palette for hue levels.
+    saturation : float, default=0.75
+        Saturation for colors.
+    ax : matplotlib.axes.Axes, optional
+        Axes object to draw on; created if None.
+    **kwargs :
+        Additional arguments passed to seaborn’s internal methods.
+
+    Returns
+    -------
+    ax : matplotlib.axes.Axes
+        The axes containing the drawn violin plot.
+    """
+    # Initialize a custom violin plotter instance
+    plotter = ViolinPlotter(
+        x, y, hue, data, order, hue_order,
+        bw, cut, scale, scale_hue, gridsize,
+        width, inner, split, dodge, orient, linewidth,
+        color, palette, saturation
+    )
+
+    # Create a new axes if none provided
     if ax is None:
         ax = plt.gca()
 
+    # Draw the plot using the seaborn-based custom plotter
     plotter.plot(ax)
     return ax
 
 
 def fine_coarse_violinplot(data_frame):
+    """
+    Generate a split violin plot comparing fine and coarse simulation samples per level.
+
+    Parameters
+    ----------
+    data_frame : pandas.DataFrame
+        Must contain the columns:
+        - 'level' : int, simulation level
+        - 'samples' : float, sample values
+        - 'type' : str, either 'fine' or 'coarse'
+
+    Notes
+    -----
+    - Uses log scale on the y-axis.
+    - Calls `_show_and_save` to display and save the resulting plot.
+    - Produces a split violin plot (fine vs coarse) for each level.
+    """
+    # Create a single subplot for the violin plot
     fig, axes = plt.subplots(1, 1, figsize=(22, 10))
 
-    # mean with confidence interval
-    # sns.pointplot(x='level', y='samples', hue='type', data=data_frame, estimator=np.mean,
-    #               palette="Set2", join=False, ax=axes)
-
-    # line is not suitable for our purpose
-    # sns.lineplot(x="level", y="samples", hue="type",# err_style="band", ci='sd'
-    #              estimator=np.median, data=data_frame, ax=axes)
-
-    violinplot(x="level", y="samples", hue='type', data=data_frame, palette="Set2",
-                           split=True, scale="area", inner="quartile", ax=axes)
+    # Draw split violin plot for 'fine' and 'coarse' samples per level
+    violinplot(
+        x="level", y="samples", hue='type', data=data_frame,
+        palette="Set2", split=True, scale="area",
+        inner="quartile", ax=axes
+    )
 
+    # Use logarithmic y-scale (typical for MLMC variance/cost visualizations)
     axes.set_yscale('log')
     axes.set_ylabel('')
     axes.set_xlabel('')
+
+    # Remove legend frame and content
     axes.legend([], [], frameon=False)
 
+    # Display and save plot using utility function
     _show_and_save(fig, "violinplot", "violinplot")
     _show_and_save(fig, None, "violinplot")
-
diff --git a/mlmc/quantity/quantity.py b/mlmc/quantity/quantity.py
index bdeddb96..6a1696f4 100644
--- a/mlmc/quantity/quantity.py
+++ b/mlmc/quantity/quantity.py
@@ -13,12 +13,13 @@
 
 def make_root_quantity(storage: SampleStorage, q_specs: List[QuantitySpec]):
     """
-    Create a root quantity that has QuantityStorage as the input quantity,
+    Create a root quantity that has QuantityStorage as the input quantity.
     QuantityStorage is the only class that directly accesses the stored data.
-    Quantity type is created based on the q_spec parameter
-    :param storage: SampleStorage
-    :param q_specs: same as result format in simulation class
-    :return: QuantityStorage
+    The returned QuantityStorage uses a QType built from provided QuantitySpec objects.
+
+    :param storage: SampleStorage instance that provides stored samples
+    :param q_specs: list of QuantitySpec describing the simulation result format
+    :return: QuantityStorage that wraps the provided SampleStorage with a matching QType
     """
     dict_types = []
     for q_spec in q_specs:
@@ -33,29 +34,37 @@ def make_root_quantity(storage: SampleStorage, q_specs: List[QuantitySpec]):
 
 
 class Quantity:
+    """
+    Represents a quantity (a measurable value or expression) constructed from a QType,
+    an operation (callable) and zero-or-more input quantities. Quantities are lazy:
+    their actual data are returned by calling `samples(chunk_spec)`.
+
+    - qtype: structure description (QType)
+    - _operation: callable that takes sample-chunks from input_quantities and returns result chunks
+    - _input_quantities: dependencies (other Quantity instances)
+    """
+
     def __init__(self, quantity_type, operation, input_quantities=[]):
         """
-        Quantity class represents real quantity and also provides operation that can be performed with stored values.
-        Each Quantity has Qtype which describes its structure.
-        :param quantity_type: QType instance
-        :param operation: function
-        :param input_quantities: List[Quantity]
+        :param quantity_type: QType instance describing the shape/structure
+        :param operation: callable implementing the transform on input chunks
+        :param input_quantities: List[Quantity] dependencies (may be empty for constants)
         """
         self.qtype = quantity_type
         self._operation = operation
         self._input_quantities = input_quantities
-        # List of quantities on which the 'self' depends, their number have to match number of arguments
-        # to the operation.
+        # Underlying QuantityStorage (inherited from one of the inputs, if present)
         self._storage = self.get_quantity_storage()
-        # QuantityStorage instance
+        # Selection identifier - used to tie selections together (set by select)
         self._selection_id = self.set_selection_id()
-        # Identifier of selection, should be set in select() method
+        # Validate that input quantities use consistent selection/storage
         self._check_selection_ids()
 
     def get_quantity_storage(self):
         """
-        Get QuantityStorage instance
-        :return: None, QuantityStorage
+        Find the first QuantityStorage among inputs (if any) and return it.
+
+        :return: QuantityStorage instance or None if not found
         """
         if len(self._input_quantities) == 0:
             return None
@@ -68,9 +77,11 @@ def get_quantity_storage(self):
 
     def set_selection_id(self):
         """
-        Set selection id
-        selection id is None by default,
-         but if we create new quantity from quantities that are result of selection we need to pass selection id
+        Determine the selection id for this Quantity. If inputs have a selection id
+        (created by select), inherit it; if multiple different selection ids are
+        present among inputs, raise an exception.
+
+        :return: selection id or None
         """
         selection_id = None
         for input_quantity in self._input_quantities:
@@ -82,12 +93,11 @@ def set_selection_id(self):
 
     def _check_selection_ids(self):
         """
-        Make sure the all input quantities come from the same QuantityStorage
+        Ensure that all input quantities that have selection ids share the same one.
+        If no QuantityStorage is present, nothing to check.
         """
-        # All input quantities are QuantityConst instances
         if self._storage is None:
             return
-        # Check selection ids otherwise
         for input_quantity in self._input_quantities:
             sel_id = input_quantity.selection_id()
             if sel_id is None:
@@ -97,8 +107,10 @@ def _check_selection_ids(self):
 
     def selection_id(self):
         """
-        Get storage ids of all input quantities
-        :return: List[int]
+        Return this Quantity's selection id. If not set, use id(self._storage) to
+        identify the underlying storage instance.
+
+        :return: selection identifier (int or None)
         """
         if self._selection_id is not None:
             return self._selection_id
@@ -109,71 +121,93 @@ def selection_id(self):
 
     def size(self) -> int:
         """
-        Quantity size from qtype
+        Return the number of scalar components described by the QType.
+
         :return: int
         """
         return self.qtype.size()
 
     def get_cache_key(self, chunk_spec):
         """
-        Create cache key
+        Create a cache key used by memoization for samples. We include:
+          - level id
+          - chunk id
+          - chunk size (derived from slice)
+          - id(self) to distinguish different quantity instances
+
+        :param chunk_spec: ChunkSpec
+        :return: tuple key
         """
         chunk_size = None
         if chunk_spec.chunk_slice is not None:
             chunk_size = chunk_spec.chunk_slice.stop - chunk_spec.chunk_slice.start
-        return (chunk_spec.level_id, chunk_spec.chunk_id, chunk_size, id(self))  # redundant parentheses needed due to py36, py37
+        return (chunk_spec.level_id, chunk_spec.chunk_id, chunk_size, id(self))  # py36/37 compatibility
 
     @cached(custom_key_maker=get_cache_key)
     def samples(self, chunk_spec):
         """
-        Return list of sample chunks for individual levels.
-        Possibly calls underlying quantities.
-        :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
-        :return: np.ndarray or None
+        Evaluate and return the data chunk for this quantity at the specified chunk_spec.
+        Calls samples(chunk_spec) recursively on inputs and passes the results to _operation.
+
+        :param chunk_spec: ChunkSpec object with level_id, chunk_id, and optional slice
+        :return: np.ndarray (M, chunk_size, 2) or None
         """
         chunks_quantity_level = [q.samples(chunk_spec) for q in self._input_quantities]
         return self._operation(*chunks_quantity_level)
 
     def _reduction_op(self, quantities, operation):
         """
+        Helper for building a reduction Quantity from many inputs.
+
+        If any input is a non-constant Quantity, return a Quantity with the operation and inputs.
+        If all inputs are QuantityConst, evaluate the operation immediately and return QuantityConst.
+
         :param quantities: List[Quantity]
-        :param operation: function which is run with given quantities
+        :param operation: Callable to apply
         :return: Quantity or QuantityConst
         """
         for quantity in quantities:
             if not isinstance(quantity, QuantityConst):
                 return Quantity(quantity.qtype, operation=operation, input_quantities=quantities)
-        # Quantity from QuantityConst instances
+        # All constant -> precompute value
         return QuantityConst(quantities[0].qtype, value=operation(*[q._value for q in quantities]))
 
     def select(self, *args):
         """
-        Performs sample selection based on conditions
-        :param args: Quantity
-        :return: Quantity
+        Apply boolean selection masks to this Quantity's samples.
+
+        :param args: One or more Quantity instances with BoolType that act as masks.
+        :return: Quantity representing the selected samples (mask applied on sample axis)
         """
-        # args always has len() at least 1
+        # First mask
         masks = args[0]
 
+        # Validate masks are BoolType
         for quantity in args:
             if not isinstance(quantity.qtype.base_qtype(), qt.BoolType):
                 raise Exception("Quantity: {} doesn't have BoolType, instead it has QType: {}"
                                 .format(quantity, quantity.qtype.base_qtype()))
 
-        # More conditions leads to default AND
+        # Combine multiple masks with logical AND
         if len(args) > 1:
             for m in args[1:]:
-                masks = np.logical_and(masks, m)  # method from this module
+                masks = np.logical_and(masks, m)
 
         def op(x, mask):
-            return x[..., mask, :]  # [...sample size, cut number of samples, 2]
+            # Mask samples (reduce number of sample columns)
+            return x[..., mask, :]  # [..., selected_samples, 2]
+
         q = Quantity(quantity_type=self.qtype, input_quantities=[self, masks], operation=op)
-        q._selection_id = id(q)
+        q._selection_id = id(q)  # mark selection id to ensure consistency
         return q
 
     def __array_ufunc__(self, ufunc, method, *args, **kwargs):
+        """
+        Support numpy ufuncs by routing them through _method which constructs a new Quantity.
+        """
         return Quantity._method(ufunc, method, *args, **kwargs)
 
+    # Arithmetic operator wrappers - build new Quantities or constants as needed.
     def __add__(self, other):
         return Quantity.create_quantity([self, Quantity.wrap(other)], Quantity.add_op)
 
@@ -207,19 +241,17 @@ def __rmod__(self, other):
     @staticmethod
     def create_quantity(quantities, operation):
         """
-        Create new quantity (Quantity or QuantityConst) based on given quantities and operation.
-        There are two scenarios:
-        1. At least one of quantities is Quantity instance then all quantities are considered to be input_quantities
-         of new Quantity
-        2. All of quantities are QuantityConst instances then new QuantityConst is created
-        :param quantities: List[Quantity]
-        :param operation: function which is run with given quantities
-        :return: Quantity
+        Create a new Quantity or QuantityConst. If any input is non-constant, return
+        a Quantity that will evaluate lazily. If all are constant, return QuantityConst.
+
+        :param quantities: list-like of Quantity / QuantityConst
+        :param operation: callable to combine inputs
+        :return: Quantity or QuantityConst
         """
         for quantity in quantities:
             if not isinstance(quantity, QuantityConst):
                 return Quantity(quantity.qtype, operation=operation, input_quantities=quantities)
-        # Quantity from QuantityConst instances
+        # all constant -> precompute
         return QuantityConst(quantities[0].qtype, value=operation(*[q._value for q in quantities]))
 
     @staticmethod
@@ -245,35 +277,40 @@ def mod_op(x, y):
     @staticmethod
     def _process_mask(x, y, operator):
         """
-        Create samples mask
-        All values for sample must meet given condition, if any value doesn't meet the condition,
-        whole sample is eliminated
-        :param x: Quantity chunk
-        :param y: Quantity chunk or int, float
-        :param operator: operator module function
-        :return: np.ndarray of bools
+        Create a boolean mask that marks full samples passing the given per-element condition.
+
+        The operator is applied elementwise; then we require that *every* element within the sample
+        passes to keep that sample. This collapses non-sample axes and returns a 1-D boolean array.
+
+        :param x: Quantity chunk (ndarray)
+        :param y: Quantity chunk or scalar
+        :param operator: operator module function like operator.lt
+        :return: 1-D boolean numpy array indexing samples
         """
         mask = operator(x, y)
+        # collapse over spatial/time axes and per-sample axis, keep sample index axis
         return mask.all(axis=tuple(range(mask.ndim - 2))).all(axis=1)
 
     def _mask_quantity(self, other, op):
         """
-        Create quantity that represent bool mask
-        :param other: number or Quantity
-        :param op: operation
-        :return: Quantity
+        Helper to build a BoolType Quantity representing comparisons (>, <, ==, etc.)
+
+        :param other: Quantity or scalar to compare with
+        :param op: operation callable that builds the boolean mask from chunked arrays
+        :return: Quantity producing a boolean mask per sample
         """
         bool_type = qt.BoolType()
-        new_qtype = self.qtype
-        new_qtype = new_qtype.replace_scalar(bool_type)
+        new_qtype = self.qtype.replace_scalar(bool_type)
         other = Quantity.wrap(other)
 
+        # Only scalar base types support comparison
         if not isinstance(self.qtype.base_qtype(), qt.ScalarType) or not isinstance(other.qtype.base_qtype(), qt.ScalarType):
             raise TypeError("Quantity has base qtype {}. "
                             "Quantities with base qtype ScalarType are the only ones that support comparison".
                             format(self.qtype.base_qtype()))
         return Quantity(quantity_type=new_qtype, input_quantities=[self, other], operation=op)
 
+    # Comparison operators returning boolean mask Quantities
     def __lt__(self, other):
         def lt_op(x, y):
             return Quantity._process_mask(x, y, operator.lt)
@@ -281,59 +318,66 @@ def lt_op(x, y):
 
     def __le__(self, other):
         def le_op(x, y):
-            return self._process_mask(x, y, operator.le)
+            return Quantity._process_mask(x, y, operator.le)
         return self._mask_quantity(other, le_op)
 
     def __gt__(self, other):
         def gt_op(x, y):
-            return self._process_mask(x, y, operator.gt)
+            return Quantity._process_mask(x, y, operator.gt)
         return self._mask_quantity(other, gt_op)
 
     def __ge__(self, other):
         def ge_op(x, y):
-            return self._process_mask(x, y, operator.ge)
+            return Quantity._process_mask(x, y, operator.ge)
         return self._mask_quantity(other, ge_op)
 
     def __eq__(self, other):
         def eq_op(x, y):
-            return self._process_mask(x, y, operator.eq)
+            return Quantity._process_mask(x, y, operator.eq)
         return self._mask_quantity(other, eq_op)
 
     def __ne__(self, other):
         def ne_op(x, y):
-            return self._process_mask(x, y, operator.ne)
+            return Quantity._process_mask(x, y, operator.ne)
         return self._mask_quantity(other, ne_op)
 
     @staticmethod
     def pick_samples(chunk, subsample_params):
         """
-        Pick samples some samples from chunk in order to have 'k' samples from 'n' after all chunks are processed
-        Inspired by https://dl.acm.org/doi/10.1145/23002.23003 method S
+        Subsample a chunk using Method S (hypergeometric sampling) so that across chunks
+        we end up with k samples from n total.
 
-        :param chunk: np.ndarray, shape M, N, 2, where N denotes number of samples in chunk
-        :param subsample_params: instance of SubsampleParams class, it has two parameters:
-                        k: number of samples which we want to get from all chunks
-                        n: number of all samples among all chunks
-        :return: np.ndarray
+        :param chunk: ndarray of shape (M, N, 2) where N is number of samples in this chunk
+        :param subsample_params: object with attributes k (remaining desired) and n (remaining available)
+        :return: selected sub-chunk array with shape (M, m, 2), where m is chosen by hypergeometric draw
         """
+        # Draw how many to pick from this chunk using hypergeometric distribution
         size = scipy.stats.hypergeom(subsample_params.n, subsample_params.k, chunk.shape[1]).rvs(size=1)
         out = RNG.choice(chunk, size=size, axis=1)
-        subsample_params.k -= out.shape[1]
-        subsample_params.n -= chunk.shape[1]
+        subsample_params.k -= out.shape[1]  # reduce remaining desired
+        subsample_params.n -= chunk.shape[1]  # reduce remaining available
         return out
 
     def subsample(self, sample_vec):
         """
-        Subsampling
-        :param sample_vec: list of number of samples at each level
-        :return: Quantity
+        Build a Quantity that implements subsampling across levels to obtain a specified
+        number of samples per level (sample_vec).
+
+        Returns a Quantity whose operation will pick samples according to subsample params
+        stored per-level. Uses QuantityConst with a level-aware _adjust_value to pass
+        different parameters to each level chunk.
+
+        :param sample_vec: list-like of desired numbers of samples per level
+        :return: Quantity producing subsampled chunks
         """
         class SubsampleParams:
+            """
+            Small helper to carry per-level parameters while subsampling across chunks.
+            """
             def __init__(self, num_subsample, num_collected):
                 """
-                Auxiliary object for subsampling
-                :param num_subsample: the number of samples we want to obtain from all samples
-                :param num_collected: total number of samples
+                :param num_subsample: desired number of samples to pick from this level
+                :param num_collected: total available samples on this level
                 """
                 self._orig_k = num_subsample
                 self._orig_n = num_collected
@@ -342,36 +386,41 @@ def __init__(self, num_subsample, num_collected):
                 self.n = num_collected
                 self.total_n = num_collected
 
-        # SubsampleParams for each level
+        # Build params per level using level collected counts from the storage
         subsample_level_params = {key: SubsampleParams(sample_vec[key], value)
                                   for key, value in enumerate(self.get_quantity_storage().n_collected())}
-        # Create a QuantityConst of dictionary in the sense of hashing dictionary items
+
+        # Wrap a hashed version of this parameters dict in a QuantityConst to feed into operation
         quantity_subsample_params = Quantity.wrap(hash(frozenset(subsample_level_params.items())))
 
         def adjust_value(values, level_id):
             """
-            Custom implementation of QuantityConst.adjust_value()
-            It allows us to get different parameters for different levels
+            Method assigned to QuantityConst._adjust_value so each level receives its own SubsampleParams.
+            Re-initializes k/n for repeated calls.
             """
             subsample_l_params_obj = subsample_level_params[level_id]
             subsample_l_params_obj.k = subsample_l_params_obj._orig_k
             subsample_l_params_obj.n = subsample_l_params_obj._orig_n
             subsample_l_params_obj.total_n = subsample_l_params_obj._orig_total_n
             return subsample_l_params_obj
+
         quantity_subsample_params._adjust_value = adjust_value
 
+        # Build resulting Quantity that uses pick_samples as its operation
         return Quantity(quantity_type=self.qtype.replace_scalar(qt.BoolType()),
                         input_quantities=[self, quantity_subsample_params], operation=Quantity.pick_samples)
 
     def __getitem__(self, key):
         """
-        Get items from Quantity, quantity type must support brackets access
-        :param key: str, int, tuple
-        :return: Quantity
+        Create a Quantity representing indexed/ sliced access into this quantity (similar to numpy slicing).
+
+        :param key: index or slice or tuple interpreted by qtype.get_key
+        :return: Quantity restricted to the requested key
         """
-        new_qtype, start = self.qtype.get_key(key)  # New quantity type
+        new_qtype, start = self.qtype.get_key(key)  # New quantity type for selection
 
         if not isinstance(self.qtype, qt.ArrayType):
+            # Convert key to a slice covering the sub-array if base is not ArrayType
             key = slice(start, start + new_qtype.size())
 
         def _make_getitem_op(y):
@@ -380,7 +429,11 @@ def _make_getitem_op(y):
         return Quantity(quantity_type=new_qtype, input_quantities=[self], operation=_make_getitem_op)
 
     def __getattr__(self, name):
-        static_fun = getattr(self.qtype, name)  # We support only static function call forwarding
+        """
+        Forward static QType methods as Quantity methods so that QType-level helpers are available
+        as operations on quantities (e.g., aggregation helpers).
+        """
+        static_fun = getattr(self.qtype, name)
 
         def apply_on_quantity(*attr, **d_attr):
             return static_fun(self, *attr, **d_attr)
@@ -389,11 +442,12 @@ def apply_on_quantity(*attr, **d_attr):
     @staticmethod
     def _concatenate(quantities, qtype, axis=0):
         """
-        Concatenate level_chunks
-        :param quantities: list of quantities
-        :param qtype: QType
-        :param axis: int
-        :return: Quantity
+        Construct a Quantity that concatenates multiple quantities along a given axis.
+
+        :param quantities: sequence of Quantity instances
+        :param qtype: QType describing result shape
+        :param axis: axis along which concatenation happens
+        :return: Quantity that when evaluated concatenates input chunks
         """
         def op_concatenate(*chunks):
             y = np.concatenate(tuple(chunks), axis=axis)
@@ -403,12 +457,12 @@ def op_concatenate(*chunks):
     @staticmethod
     def _get_base_qtype(args_quantities):
         """
-        Get quantities base Qtype
-        :param args_quantities: list of quantities and other passed arguments,
-         we expect at least one of the arguments is Quantity
-        :return: base QType, ScalarType if any quantity has that base type, otherwise BoolType
+        Determine base QType for arithmetic/ufunc results: if any argument has a ScalarType base
+        return ScalarType(), otherwise BoolType().
+
+        :param args_quantities: iterable containing Quantity instances and possibly other values
+        :return: base QType instance
         """
-        # Either all quantities are BoolType or it is considered to be ScalarType
         for quantity in args_quantities:
             if isinstance(quantity, Quantity):
                 if type(quantity.qtype.base_qtype()) == qt.ScalarType:
@@ -418,14 +472,17 @@ def _get_base_qtype(args_quantities):
     @staticmethod
     def _method(ufunc, method, *args, **kwargs):
         """
-        Process input parameters to perform numpy ufunc.
-        Get base QType of passed quantities, QuantityStorage instance, ...
-        Determine the resulting QType from the first few samples
-        :param ufunc: ufunc object that was called
-        :param method: string, indicating which Ufunc method was called
-        :param args: tuple of the input arguments to the ufunc
-        :param kwargs: dictionary containing the optional input arguments of the ufunc
-        :return: Quantity
+        Generic handler for numpy ufunc operations mapped to Quantities.
+
+        1) Wrap inputs as Quantities.
+        2) Determine the result QType by calling the ufunc on a small sample.
+        3) Return a new Quantity that performs the ufunc at evaluation time.
+
+        :param ufunc: numpy ufunc object
+        :param method: method name to call on ufunc (e.g., '__call__' or 'reduce')
+        :param args: positional arguments passed to ufunc (may include Quantities)
+        :param kwargs: optional ufunc kwargs
+        :return: Quantity representing ufunc applied to inputs
         """
         def _ufunc_call(*input_quantities_chunks):
             return getattr(ufunc, method)(*input_quantities_chunks, **kwargs)
@@ -438,9 +495,10 @@ def _ufunc_call(*input_quantities_chunks):
     @staticmethod
     def wrap(value):
         """
-        Convert flat, bool or array (list) to Quantity
-        :param value: flat, bool, array (list) or Quantity
-        :return: Quantity
+        Convert a primitive (int, float, bool), a numpy/list array, or an existing Quantity into a Quantity.
+
+        :param value: scalar, bool, list/ndarray, or Quantity
+        :return: Quantity or QuantityConst wrapping the value
         """
         if isinstance(value, Quantity):
             return value
@@ -459,27 +517,32 @@ def wrap(value):
     @staticmethod
     def _result_qtype(method, quantities):
         """
-        Determine QType from evaluation with given method and first few samples from storage
-        :param quantities: list of Quantities
-        :param method: ufunc function
-        :return: QType
+        Infer the resulting QType for an operation by evaluating the operation on the first
+        available chunk from each input quantity.
+
+        :param method: callable that takes input chunks and returns sample chunk result
+        :param quantities: list of Quantity instances
+        :return: inferred QType (ArrayType)
         """
         chunks_quantity_level = []
         for q in quantities:
             quantity_storage = q.get_quantity_storage()
-            # QuantityConst doesn't have QuantityStorage
+            # If QuantityConst (no storage), use an empty default ChunkSpec
             if quantity_storage is None:
                 chunk_spec = ChunkSpec()
             else:
                 chunk_spec = next(quantity_storage.chunks())
             chunks_quantity_level.append(q.samples(chunk_spec))
 
-        result = method(*chunks_quantity_level)  # numpy array of [M, <=10, 2]
+        result = method(*chunks_quantity_level)  # expect shape [M, <=10, 2]
         qtype = qt.ArrayType(shape=result.shape[0], qtype=Quantity._get_base_qtype(quantities))
         return qtype
 
     @staticmethod
     def QArray(quantities):
+        """
+        Build a Quantity representing an array-of-quantities aggregated into a single QType.
+        """
         flat_quantities = np.array(quantities).flatten()
         qtype = Quantity._check_same_qtype(flat_quantities)
         array_type = qt.ArrayType(np.array(quantities).shape, qtype)
@@ -487,24 +550,40 @@ def QArray(quantities):
 
     @staticmethod
     def QDict(key_quantity):
+        """
+        Build a Quantity representing a dictionary of quantities.
+        :param key_quantity: iterable of (key, Quantity)
+        """
         dict_type = qt.DictType([(key, quantity.qtype) for key, quantity in key_quantity])
         return Quantity._concatenate(np.array(key_quantity)[:, 1], qtype=dict_type)
 
     @staticmethod
     def QTimeSeries(time_quantity):
+        """
+        Build a Quantity representing a time series constructed from (time, Quantity) pairs.
+        """
         qtype = Quantity._check_same_qtype(np.array(time_quantity)[:, 1])
         times = np.array(time_quantity)[:, 0]
         return Quantity._concatenate(np.array(time_quantity)[:, 1], qtype=qt.TimeSeriesType(times=times, qtype=qtype))
 
-
     @staticmethod
     def QField(key_quantity):
+        """
+        Build a Quantity representing a field (mapping of locations to quantities).
+        """
         Quantity._check_same_qtype(np.array(key_quantity)[:, 1])
         field_type = qt.FieldType([(key, quantity.qtype) for key, quantity in key_quantity])
         return Quantity._concatenate(np.array(key_quantity)[:, 1], qtype=field_type)
 
     @staticmethod
     def _check_same_qtype(quantities):
+        """
+        Validate that all provided quantities share the same QType.
+
+        :param quantities: sequence of Quantity instances
+        :return: the shared QType
+        :raise ValueError: if a mismatch is found
+        """
         qtype = quantities[0].qtype
         for quantity in quantities[1:]:
             if qtype != quantity.qtype:
@@ -513,26 +592,28 @@ def _check_same_qtype(quantities):
 
 
 class QuantityConst(Quantity):
+    """
+    Represents a constant quantity whose value is stored directly in the instance.
+    The samples() method returns the constant value broadcasted to the requested chunk shape.
+    """
+
     def __init__(self, quantity_type, value):
         """
-        QuantityConst class represents constant quantity and also provides operation
-        that can be performed with quantity values.
-        The quantity is constant, meaning that this class stores the data itself
-        :param quantity_type: QType instance
-        :param value: quantity value
+        :param quantity_type: QType describing the const
+        :param value: scalar or array-like value
         """
         self.qtype = quantity_type
         self._value = self._process_value(value)
+        # No input dependencies for a constant
         self._input_quantities = []
-        # List of input quantities should be empty,
-        # but we still need this attribute due to storage_id() and level_ids() method
         self._selection_id = None
 
     def _process_value(self, value):
         """
-        Reshape value if array, otherwise create array first
-        :param value: quantity value
-        :return: value with shape [M, 1, 1] which suitable for further broadcasting
+        Ensure the constant is stored as an array with axes [M, 1, 1] suitable for broadcasting.
+
+        :param value: scalar or array-like
+        :return: ndarray shaped for broadcasting into (M, chunk_size, 2)
         """
         if isinstance(value, (int, float, bool)):
             value = np.array([value])
@@ -540,42 +621,50 @@ def _process_value(self, value):
 
     def selection_id(self):
         """
-        Get storage ids of all input quantities
-        :return: List[int]
+        Constants have no selection id (they are independent of storage).
         """
         return self._selection_id
 
     def _adjust_value(self, value, level_id=None):
         """
-        Allows process value based on chunk_epc params (such as level_id, ...).
-        The custom implementation is used in Qunatity.subsample method
-        :param value: np.ndarray
-        :param level_id: int
-        :return: np.ndarray, particular type depends on implementation
+        Hook to adjust constant value per-level. By default returns the stored value unchanged.
+        This method gets overridden by consumers (e.g., subsample) to provide level-specific params.
+
+        :param value: constant value array
+        :param level_id: int, level index (optional)
+        :return: possibly adjusted value
         """
         return value
 
     @cached(custom_key_maker=Quantity.get_cache_key)
     def samples(self, chunk_spec):
         """
-        Get constant values with an enlarged number of axes
-        :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
-        :return: np.ndarray
+        Return the constant value, optionally adjusted for the given level via _adjust_value.
+
+        :param chunk_spec: ChunkSpec with level_id
+        :return: ndarray representing the constant for this chunk
         """
         return self._adjust_value(self._value, chunk_spec.level_id)
 
 
 class QuantityMean:
+    """
+    Container for aggregated mean/variance results computed by mlmc.quantity.quantity_estimate.estimate_mean.
+
+    - qtype: QType of the quantity
+    - _l_means: per-level mean contributions (L x M flattened)
+    - _l_vars: per-level variance contributions (L x M flattened)
+    - _n_samples: number of samples used per level
+    - _n_rm_samples: number of removed samples per level
+    """
 
     def __init__(self, quantity_type, l_means, l_vars, n_samples, n_rm_samples):
         """
-        QuantityMean represents results of mlmc.quantity_estimate.estimate_mean method
         :param quantity_type: QType
-        :param l_means: np.ndarray, shape: L, M
-        :param l_vars: np.ndarray, shape: L, M
-        :param n_samples: List, number of samples that were used for means at each level
-        :param n_rm_samples: List, number of removed samples at each level,
-                             n_samples + n_rm_samples = all successfully collected samples
+        :param l_means: ndarray shape (L, M_flat) of level-wise mean contributions
+        :param l_vars: ndarray shape (L, M_flat) of level-wise variance contributions
+        :param n_samples: list/ndarray length L with number of samples used per level
+        :param n_rm_samples: list/ndarray length L with removed samples count per level
         """
         self.qtype = quantity_type
         self._mean = None
@@ -587,29 +676,43 @@ def __init__(self, quantity_type, l_means, l_vars, n_samples, n_rm_samples):
 
     def _calculate_mean_var(self):
         """
-        Calculates the overall estimates of the mean and the variance from the means and variances at each level
+        Compute overall mean and variance from per-level contributions:
+          mean = sum_l l_means[l]
+          var = sum_l (l_vars[l] / n_samples[l])
         """
         self._mean = np.sum(self._l_means, axis=0)
         self._var = np.sum(self._l_vars / self._n_samples[:, None], axis=0)
 
     @property
     def mean(self):
+        """
+        Reshaped overall mean according to QType.
+        """
         if self._mean is None:
             self._calculate_mean_var()
         return self._reshape(self._mean)
 
     @property
     def var(self):
+        """
+        Reshaped overall variance according to QType.
+        """
         if self._var is None:
             self._calculate_mean_var()
         return self._reshape(self._var)
 
     @property
     def l_means(self):
+        """
+        Level means reshaped according to QType for each level.
+        """
         return np.array([self._reshape(means) for means in self._l_means])
 
     @property
     def l_vars(self):
+        """
+        Level variances reshaped according to QType for each level.
+        """
         return np.array([self._reshape(vars) for vars in self._l_vars])
 
     @property
@@ -622,74 +725,96 @@ def n_rm_samples(self):
 
     def _reshape(self, data):
         """
-        Reshape passed data, expected means or vars
-        :param data: flatten np.ndarray
-        :return: np.ndarray, reshaped data, the final data shape depends on the particular QType
-                             there is currently a reshape for ArrayType only
+        Reshape a flat data vector (flattened M) into the structure determined by qtype.
+
+        :param data: flattened ndarray
+        :return: reshaped ndarray according to qtype
         """
         return self.qtype.reshape(data)
 
     def __getitem__(self, key):
         """
-        Get item from current QuantityMean, quantity type must support brackets access
-        All levels means and vars are reshaped to their QType shape and then the item is gotten,
-        ath the end, new QuantityMean instance is created with flatten selected means and vars
-        :param key: str, int, tuple
-        :return: QuantityMean
+        Index into QuantityMean similarly to Quantity.__getitem__:
+        reshape level-wise means/vars and select the requested key, then return a new QuantityMean.
+
+        :param key: indexing key (int, slice, str, etc.)
+        :return: QuantityMean restricted to the requested key
         """
         new_qtype, start = self.qtype.get_key(key)  # New quantity type
 
         if not isinstance(self.qtype, qt.ArrayType):
             key = slice(start, start + new_qtype.size())
 
-        # Getting items, it performs reshape inside
+        # Selecting and reshaping level arrays
         l_means = self.l_means[:, key]
         l_vars = self.l_vars[:, key]
 
-        return QuantityMean(quantity_type=new_qtype, l_means=l_means.reshape((l_means.shape[0], -1)),
-                            l_vars=l_vars.reshape((l_vars.shape[0], -1)), n_samples=self._n_samples,
+        return QuantityMean(quantity_type=new_qtype,
+                            l_means=l_means.reshape((l_means.shape[0], -1)),
+                            l_vars=l_vars.reshape((l_vars.shape[0], -1)),
+                            n_samples=self._n_samples,
                             n_rm_samples=self._n_rm_samples)
 
 
 class QuantityStorage(Quantity):
+    """
+    Special Quantity that provides direct access to SampleStorage.
+    It implements the bridge between storage and the Quantity abstraction.
+    """
+
     def __init__(self, storage, qtype):
         """
-        Special Quantity for direct access to SampleStorage
-        :param storage: mlmc._sample_storage.SampleStorage child
-        :param qtype: QType
+        :param storage: SampleStorage instance (in-memory or HDF5, etc.)
+        :param qtype: QType describing stored data structure
         """
+        # Store underlying storage reference and QType
         self._storage = storage
         self.qtype = qtype
+        # No operation or inputs required for storage root
         self._input_quantities = []
         self._operation = None
 
     def level_ids(self):
         """
-        Number of levels
+        Return list of available level ids from the SampleStorage.
         :return: List[int]
         """
         return self._storage.get_level_ids()
 
     def selection_id(self):
         """
-        Identity of QuantityStorage instance
+        Identity of this QuantityStorage (unique by object id).
         :return: int
         """
         return id(self)
 
     def get_quantity_storage(self):
+        """
+        For QuantityStorage the storage is itself.
+        :return: self
+        """
         return self
 
     def chunks(self, level_id=None):
+        """
+        Proxy to SampleStorage.chunks which yields ChunkSpec instances describing available chunks.
+        :param level_id: optional level id to restrict chunks
+        :return: generator of ChunkSpec
+        """
         return self._storage.chunks(level_id)
 
     def samples(self, chunk_spec):
         """
-        Get results for given level id and chunk id
-        :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
-        :return: Array[M, chunk size, 2]
+        Retrieve stored sample pairs for the requested level/chunk.
+
+        :param chunk_spec: ChunkSpec describing (level, chunk slice)
+        :return: ndarray shaped [M, chunk_size, 2] where M is number of result quantities
         """
         return self._storage.sample_pairs_level(chunk_spec)  # Array[M, chunk size, 2]
 
     def n_collected(self):
+        """
+        Return number of collected results per level from the underlying SampleStorage.
+        :return: list of ints
+        """
         return self._storage.get_n_collected()
diff --git a/mlmc/quantity/quantity_estimate.py b/mlmc/quantity/quantity_estimate.py
index a1f63ddd..e2553138 100644
--- a/mlmc/quantity/quantity_estimate.py
+++ b/mlmc/quantity/quantity_estimate.py
@@ -5,122 +5,146 @@
 
 def mask_nan_samples(chunk):
     """
-    Mask out samples that contain NaN in either fine or coarse part of the result
-    :param chunk: np.ndarray [M, chunk_size, 2]
-    :return: chunk: np.ndarray, number of masked samples: int
+    Remove (mask out) samples containing NaN values in either the fine or coarse part of the result.
+
+    :param chunk: np.ndarray of shape [M, chunk_size, 2]
+                  M - quantity size (number of scalar components),
+                  chunk_size - number of samples in the chunk,
+                  2 - fine and coarse parts of the result.
+    :return: (filtered_chunk, n_masked)
+             filtered_chunk: np.ndarray with invalid samples removed,
+             n_masked: int, number of masked (removed) samples.
     """
-    # Fine and coarse moments_fn mask
+    # Identify any sample with NaNs in its fine or coarse component
     mask = np.any(np.isnan(chunk), axis=0).any(axis=1)
     return chunk[..., ~mask, :], np.count_nonzero(mask)
 
 
 def cache_clear():
+    """
+    Clear cached Quantity sample evaluations.
+
+    Used before running MLMC estimations to ensure fresh data is fetched from storage.
+    """
     mlmc.quantity.quantity.Quantity.samples.cache_clear()
     mlmc.quantity.quantity.QuantityConst.samples.cache_clear()
 
 
 def estimate_mean(quantity, form="diff", operation_func=None, **kwargs):
     """
-    MLMC mean estimator.
-    The MLMC method is used to compute the mean estimate to the Quantity dependent on the collected samples.
-    The squared error of the estimate (the estimator variance) is estimated using the central limit theorem.
-    Data is processed by chunks, so that it also supports big data processing
-    :param quantity: Quantity
-    :param form: if "diff" estimates based on difference between fine and coarse data = MLMC approach
-                    "fine" estimates based on level's fine data
-                    "coarse" estimates based on level's coarse data
-    :param operation_func: function to process level data, e.g. kurtosis estimation
-    :return: QuantityMean which holds both mean and variance
+    Estimate the MLMC mean (and variance) of a Quantity using multilevel sampling.
+
+    The function computes per-level means and variances from simulation results.
+    Supports large datasets via chunked processing and handles NaN-masked samples.
+
+    :param quantity: Quantity instance to estimate.
+    :param form: str, type of estimation:
+                 - "diff": estimate based on differences (fine - coarse) → standard MLMC approach.
+                 - "fine": estimate using fine-level data only.
+                 - "coarse": estimate using coarse-level data only.
+    :param operation_func: Optional transformation applied to chunk data before accumulation
+                           (e.g., for moment or kurtosis computation).
+    :param kwargs: Additional keyword arguments passed to operation_func.
+    :return: QuantityMean object containing mean, variance, and sample statistics per level.
     """
+    # Reset cached quantity evaluations
     cache_clear()
+
     quantity_vec_size = quantity.size()
     sums = None
     sums_of_squares = None
 
-    # initialization
+    # Initialize level-specific storage
     quantity_storage = quantity.get_quantity_storage()
     level_ids = quantity_storage.level_ids()
     n_levels = np.max(level_ids) + 1
     n_samples = [0] * n_levels
     n_rm_samples = [0] * n_levels
 
+    # Iterate through data chunks
     for chunk_spec in quantity_storage.chunks():
         samples = quantity.samples(chunk_spec)
         chunk, n_mask_samples = mask_nan_samples(samples)
         n_samples[chunk_spec.level_id] += chunk.shape[1]
         n_rm_samples[chunk_spec.level_id] += n_mask_samples
 
-        # No samples in chunk
+        # Skip empty chunks
         if chunk.shape[1] == 0:
             continue
-        assert (chunk.shape[0] == quantity_vec_size)
+        assert chunk.shape[0] == quantity_vec_size
 
-        # Set variables for level sums and sums of squares
+        # Allocate accumulators at first valid chunk
         if sums is None:
             sums = [np.zeros(chunk.shape[0]) for _ in range(n_levels)]
             sums_of_squares = [np.zeros(chunk.shape[0]) for _ in range(n_levels)]
 
-        # Estimates of level's fine data
+        # Select appropriate data form for the estimator
         if form == "fine":
-            if chunk_spec.level_id == 0:
-                chunk_diff = chunk[:, :, 0]
-            else:
-                chunk_diff = chunk[:, :, 0]
-        # Estimate of level's coarse data
+            chunk_diff = chunk[:, :, 0]
         elif form == "coarse":
-            if chunk_spec.level_id == 0:
-                chunk_diff = np.zeros(chunk[:, :, 0].shape)
-            else:
-                chunk_diff = chunk[:, :, 1]
+            chunk_diff = np.zeros_like(chunk[:, :, 0]) if chunk_spec.level_id == 0 else chunk[:, :, 1]
         else:
-            if chunk_spec.level_id == 0:
-                chunk_diff = chunk[:, :, 0]
-            else:
-                chunk_diff = chunk[:, :, 0] - chunk[:, :, 1]
+            # Default MLMC difference (fine - coarse)
+            chunk_diff = chunk[:, :, 0] if chunk_spec.level_id == 0 else chunk[:, :, 0] - chunk[:, :, 1]
 
+        # Optional user-defined transformation of data
         if operation_func is not None:
             chunk_diff = operation_func(chunk_diff, chunk_spec, **kwargs)
 
+        # Accumulate sums and squared sums for this level
         sums[chunk_spec.level_id] += np.sum(chunk_diff, axis=1)
-        sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff**2, axis=1)
+        sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff ** 2, axis=1)
 
     if sums is None:
-        raise Exception("All samples were masked")
+        raise Exception("All samples were masked (no valid data found).")
 
+    # Compute means and variances for each level
     l_means = []
     l_vars = []
     for s, sp, n in zip(sums, sums_of_squares, n_samples):
         l_means.append(s / n)
         if n > 1:
-            l_vars.append((sp - (s ** 2 / n)) / (n-1))
+            l_vars.append((sp - (s ** 2 / n)) / (n - 1))
         else:
             l_vars.append(np.full(len(s), np.inf))
 
-    return mlmc.quantity.quantity.QuantityMean(quantity.qtype, l_means=l_means, l_vars=l_vars, n_samples=n_samples,
-                                               n_rm_samples=n_rm_samples)
+    # Construct QuantityMean object with level statistics
+    return mlmc.quantity.quantity.QuantityMean(
+        quantity.qtype,
+        l_means=l_means,
+        l_vars=l_vars,
+        n_samples=n_samples,
+        n_rm_samples=n_rm_samples
+    )
 
 
 def moment(quantity, moments_fn, i=0):
     """
-    Create quantity with operation that evaluates particular moment
-    :param quantity: Quantity instance
-    :param moments_fn: mlmc.moments.Moments child
-    :param i: index of moment
-    :return: Quantity
+    Construct a Quantity that represents a single statistical moment.
+
+    :param quantity: Base Quantity instance.
+    :param moments_fn: Instance of mlmc.moments.Moments defining the moment computation.
+    :param i: Index of the moment to compute.
+    :return: New Quantity that computes the i-th moment.
     """
     def eval_moment(x):
         return moments_fn.eval_single_moment(i, value=x)
-    return mlmc.quantity.quantity.Quantity(quantity_type=quantity.qtype, input_quantities=[quantity], operation=eval_moment)
+
+    return mlmc.quantity.quantity.Quantity(
+        quantity_type=quantity.qtype,
+        input_quantities=[quantity],
+        operation=eval_moment
+    )
 
 
 def moments(quantity, moments_fn, mom_at_bottom=True):
     """
-    Create quantity with operation that evaluates moments_fn
-    :param quantity: Quantity
-    :param moments_fn: mlmc.moments.Moments child
-    :param mom_at_bottom: bool, if True moments are underneath,
-                                a scalar is substituted with an array of moments of that scalar
-    :return: Quantity
+    Construct a Quantity representing all moments defined by a given Moments object.
+
+    :param quantity: Base Quantity.
+    :param moments_fn: mlmc.moments.Moments child defining moment evaluations.
+    :param mom_at_bottom: bool, if True, moments are added at the lowest (scalar) level of the Quantity type.
+    :return: Quantity that computes all defined moments.
     """
     def eval_moments(x):
         if mom_at_bottom:
@@ -129,73 +153,85 @@ def eval_moments(x):
             mom = moments_fn.eval_all(x).transpose((3, 0, 1, 2))  # [R, M, N, 2]
         return mom.reshape((np.prod(mom.shape[:-2]), mom.shape[-2], mom.shape[-1]))  # [M, N, 2]
 
-    # Create quantity type which has moments_fn at the bottom
+    # Define new Quantity type according to desired hierarchy
     if mom_at_bottom:
         moments_array_type = qt.ArrayType(shape=(moments_fn.size,), qtype=qt.ScalarType())
         moments_qtype = quantity.qtype.replace_scalar(moments_array_type)
-    # Create quantity type that has moments_fn on the surface
     else:
         moments_qtype = qt.ArrayType(shape=(moments_fn.size,), qtype=quantity.qtype)
-    return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_moments)
+
+    return mlmc.quantity.quantity.Quantity(
+        quantity_type=moments_qtype,
+        input_quantities=[quantity],
+        operation=eval_moments
+    )
 
 
 def covariance(quantity, moments_fn, cov_at_bottom=True):
     """
-    Create quantity with operation that evaluates covariance matrix
-    :param quantity: Quantity
-    :param moments_fn: mlmc.moments.Moments child
-    :param cov_at_bottom: bool, if True cov matrices are underneath,
-                                a scalar is substituted with a matrix of moments of that scalar
-    :return: Quantity
+    Construct a Quantity representing covariance matrices of the given moments.
+
+    :param quantity: Base Quantity.
+    :param moments_fn: mlmc.moments.Moments child defining moment evaluations.
+    :param cov_at_bottom: bool, if True covariance matrices are attached at the scalar level of the Quantity type.
+    :return: Quantity that computes covariance matrices.
     """
     def eval_cov(x):
+        # Compute all moments (fine and coarse)
         moments = moments_fn.eval_all(x)
         mom_fine = moments[..., 0, :]
         cov_fine = np.einsum('...i,...j', mom_fine, mom_fine)
 
         if moments.shape[-2] == 1:
+            # Single level (no coarse)
             cov = np.array([cov_fine])
         else:
             mom_coarse = moments[..., 1, :]
             cov_coarse = np.einsum('...i,...j', mom_coarse, mom_coarse)
             cov = np.array([cov_fine, cov_coarse])
 
+        # Reshape covariance according to desired data layout
         if cov_at_bottom:
-            cov = cov.transpose((1, 3, 4, 2, 0))   # [M, R, R, N, 2]
+            cov = cov.transpose((1, 3, 4, 2, 0))  # [M, R, R, N, 2]
         else:
-            cov = cov.transpose((3, 4, 1, 2, 0))   # [R, R, M, N, 2]
+            cov = cov.transpose((3, 4, 1, 2, 0))  # [R, R, M, N, 2]
         return cov.reshape((np.prod(cov.shape[:-2]), cov.shape[-2], cov.shape[-1]))
 
-    # Create quantity type which has covariance matrices at the bottom
+    # Adjust Quantity type for covariance structure
     if cov_at_bottom:
-        moments_array_type = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=qt.ScalarType())
+        moments_array_type = qt.ArrayType(shape=(moments_fn.size, moments_fn.size), qtype=qt.ScalarType())
         moments_qtype = quantity.qtype.replace_scalar(moments_array_type)
-    # Create quantity type that has covariance matrices on the surface
     else:
-        moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=quantity.qtype)
-    return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_cov)
+        moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size), qtype=quantity.qtype)
+
+    return mlmc.quantity.quantity.Quantity(
+        quantity_type=moments_qtype,
+        input_quantities=[quantity],
+        operation=eval_cov
+    )
 
 
 def kurtosis_numerator(chunk_diff, chunk_spec, l_means):
     """
-    Estimate sample kurtosis nominator:
-            E[(Y_l - E[Y_l])^4]
-    :param chunk_diff: np.ndarray, [quantity shape, number of samples]
-    :param chunk_spec: quantity_spec.ChunkSpec
-    :return: np.ndarray, unchanged shape
+    Compute the numerator for the sample kurtosis:
+        E[(Y_l - E[Y_l])^4]
+    :param chunk_diff: np.ndarray [quantity shape, number of samples]
+    :param chunk_spec: quantity_spec.ChunkSpec describing current level and chunk.
+    :param l_means: List of per-level means used for centering.
+    :return: np.ndarray of the same shape as input.
     """
-    chunk_diff = (chunk_diff - l_means[chunk_spec.level_id]) ** 4
-    return chunk_diff
+    return (chunk_diff - l_means[chunk_spec.level_id]) ** 4
 
 
 def level_kurtosis(quantity, means_obj):
     """
-    Estimate sample kurtosis at each level as:
-            E[(Y_l - E[Y_l])^4] / (Var[Y_l])^2, where Y_l = fine_l - coarse_l
-    :param quantity: Quantity
-    :param means_obj: Quantity.QuantityMean
-    :return: np.ndarray, kurtosis per level
+    Estimate the sample kurtosis for each level:
+        E[(Y_l - E[Y_l])^4] / (Var[Y_l])^2, where Y_l = fine_l - coarse_l
+
+    :param quantity: Quantity instance.
+    :param means_obj: QuantityMean object containing level means and variances.
+    :return: np.ndarray of kurtosis values per level.
     """
     numerator_means_obj = estimate_mean(quantity, operation_func=kurtosis_numerator, l_means=means_obj.l_means)
-    kurtosis = numerator_means_obj.l_means / (means_obj.l_vars)**2
+    kurtosis = numerator_means_obj.l_means / (means_obj.l_vars) ** 2
     return kurtosis
diff --git a/mlmc/quantity/quantity_spec.py b/mlmc/quantity/quantity_spec.py
index ea25dc66..377adad8 100644
--- a/mlmc/quantity/quantity_spec.py
+++ b/mlmc/quantity/quantity_spec.py
@@ -5,24 +5,56 @@
 
 @attr.s(auto_attribs=True, eq=False)
 class QuantitySpec:
+    """
+    Specification of a physical quantity for simulation or data storage.
+
+    :param name: Name of the quantity (e.g. 'pressure', 'velocity').
+    :param unit: Unit of the quantity (e.g. 'm/s', 'Pa').
+    :param shape: Tuple describing the shape of the data (e.g. (64, 64)).
+    :param times: List of time points associated with this quantity.
+    :param locations: List of either string-based identifiers or 3D coordinates
+                      (x, y, z) where the quantity is defined.
+    """
+
     name: str
     unit: str
     shape: Tuple[int, int]
     times: List[float]
     locations: Union[List[str], List[Tuple[float, float, float]]]
 
-    # Note: auto generated eq raises ValueError
     def __eq__(self, other):
-        if (self.name, self.unit) == (other.name, other.unit) \
-                and np.array_equal(self.shape, other.shape)\
-                and np.array_equal(self.times, other.times)\
-                and not (set(self.locations) - set(other.locations)):
-            return True
-        return False
+        """
+        Compare two QuantitySpec instances for equality.
+
+        :param other: Another QuantitySpec instance to compare with.
+        :return: True if both instances describe the same quantity, False otherwise.
+        """
+        if not isinstance(other, QuantitySpec):
+            return False
+
+        # Compare name, unit, shape, and times
+        same_basic_attrs = (
+            (self.name, self.unit) == (other.name, other.unit)
+            and np.array_equal(self.shape, other.shape)
+            and np.array_equal(self.times, other.times)
+        )
+
+        # Compare locations (set difference = ∅ → same)
+        same_locations = not (set(self.locations) - set(other.locations))
+
+        return same_basic_attrs and same_locations
 
 
 @attr.s(auto_attribs=True)
 class ChunkSpec:
+    """
+    Specification of a simulation or dataset chunk.
+
+    :param chunk_id: Integer identifier of the chunk.
+    :param chunk_slice: Slice object defining the range of data indices in the chunk.
+    :param level_id: Identifier of the refinement or simulation level.
+    """
+
     chunk_id: int = None
     chunk_slice: slice = None
     level_id: int = None
diff --git a/mlmc/quantity/quantity_types.py b/mlmc/quantity/quantity_types.py
index 41d6ea7f..56b42b69 100644
--- a/mlmc/quantity/quantity_types.py
+++ b/mlmc/quantity/quantity_types.py
@@ -7,23 +7,37 @@
 
 
 class QType(metaclass=abc.ABCMeta):
+    """
+    Base class for quantity types.
+
+    :param qtype: inner/contained QType or Python type
+    """
+
     def __init__(self, qtype):
         self._qtype = qtype
 
     def size(self) -> int:
         """
-        Size of type
+        Size of the type in flattened units.
+
         :return: int
         """
+        raise NotImplementedError
 
     def base_qtype(self):
+        """
+        Return the base scalar/bool type for nested types.
+
+        :return: QType
+        """
         return self._qtype.base_qtype()
 
     def replace_scalar(self, substitute_qtype):
         """
-        Find ScalarType and replace it with substitute_qtype
-        :param substitute_qtype: QType, replaces ScalarType
-        :return: QType
+        Find ScalarType and replace it with substitute_qtype.
+
+        :param substitute_qtype: QType that replaces ScalarType
+        :return: QType (new instance with scalar replaced)
         """
         inner_qtype = self._qtype.replace_scalar(substitute_qtype)
         new_qtype = copy.deepcopy(self)
@@ -31,85 +45,121 @@ def replace_scalar(self, substitute_qtype):
         return new_qtype
 
     @staticmethod
-    def keep_dims(chunk):
+    def keep_dims(chunk: np.ndarray) -> np.ndarray:
         """
-        Always keep chunk shape to be [M, chunk size, 2]!
-        For scalar quantities, the input block can have the shape (chunk size, 2)
-        Sometimes we need to 'flatten' first few shape to have desired chunk shape
-        :param chunk: list
-        :return: list
+        Ensure chunk has shape [M, chunk size, 2].
+
+        For scalar quantities the input block can have shape (chunk size, 2).
+        Sometimes we need to 'flatten' first few dimensions to achieve desired chunk shape.
+
+        :param chunk: numpy array
+        :return: numpy array with shape [M, chunk size, 2]
+        :raises ValueError: if chunk.ndim < 2
         """
         # Keep dims [M, chunk size, 2]
         if len(chunk.shape) == 2:
             chunk = chunk[np.newaxis, :]
         elif len(chunk.shape) > 2:
-            chunk = chunk.reshape((np.prod(chunk.shape[:-2]), chunk.shape[-2], chunk.shape[-1]))
+            chunk = chunk.reshape((int(np.prod(chunk.shape[:-2])), chunk.shape[-2], chunk.shape[-1]))
         else:
-            raise ValueError("Chunk shape not supported")
+            raise ValueError("Chunk shape not supported: need ndim >= 2")
         return chunk
 
-    def _make_getitem_op(self, chunk, key):
+    def _make_getitem_op(self, chunk: np.ndarray, key):
         """
-        Operation
-        :param chunk: level chunk, list with shape [M, chunk size, 2]
-        :param key: parent QType's key, needed for ArrayType
-        :return: list
+        Extract a slice from chunk while preserving chunk dims.
+
+        :param chunk: level chunk, numpy array with shape [M, chunk size, 2]
+        :param key: index/slice used by parent QType
+        :return: numpy array with shape [M', chunk size', 2]
         """
         return QType.keep_dims(chunk[key])
 
-    def reshape(self, data):
+    def reshape(self, data: np.ndarray) -> np.ndarray:
+        """
+        Default reshape (identity).
+
+        :param data: numpy array
+        :return: numpy array
+        """
         return data
 
 
 class ScalarType(QType):
+    """
+    Scalar quantity type (leaf type).
+    """
+
     def __init__(self, qtype=float):
+        """
+        :param qtype: Python type or nested type used as underlying scalar type
+        """
         self._qtype = qtype
 
     def base_qtype(self):
+        """
+        :return: base scalar QType (self or underlying BoolType base)
+        """
         if isinstance(self._qtype, BoolType):
             return self._qtype.base_qtype()
         return self
 
     def size(self) -> int:
-        if hasattr(self._qtype, 'size'):
+        """
+        :return: int size of the scalar (defaults to 1 or uses `_qtype.size()` if present)
+        """
+        if hasattr(self._qtype, "size"):
             return self._qtype.size()
         return 1
 
     def replace_scalar(self, substitute_qtype):
         """
-        Find ScalarType and replace it with substitute_qtype
-        :param substitute_qtype: QType, replaces ScalarType
-        :return: QType
+        Replace ScalarType with substitute type.
+
+        :param substitute_qtype: QType that replaces ScalarType
+        :return: substitute_qtype
         """
         return substitute_qtype
 
 
 class BoolType(ScalarType):
+    """
+    Boolean scalar type (inherits ScalarType).
+    """
     pass
 
 
 class ArrayType(QType):
-    def __init__(self, shape, qtype: QType):
+    """
+    Array quantity type.
 
+    :param shape: int or tuple describing array shape
+    :param qtype: contained QType for array elements
+    """
+
+    def __init__(self, shape, qtype: QType):
         if isinstance(shape, int):
             shape = (shape,)
-
         self._shape = shape
         self._qtype = qtype
 
     def size(self) -> int:
-        return np.prod(self._shape) * self._qtype.size()
+        """
+        :return: total flattened size (product of shape * inner qtype size)
+        """
+        return int(np.prod(self._shape)) * int(self._qtype.size())
 
     def get_key(self, key):
         """
-        ArrayType indexing
+        ArrayType indexing.
+
         :param key: int, tuple of ints or slice objects
-        :return: QuantityType - ArrayType or self._qtype
+        :return: Tuple (QuantityType, offset) where offset is 0 for this implementation
         """
-        # Get new shape
+        # Get new shape by applying indexing on an empty array of the target shape
         new_shape = np.empty(self._shape)[key].shape
 
-        # One selected item is considered to be a scalar QType
+        # If one selected item is considered to be a scalar QType
         if len(new_shape) == 1 and new_shape[0] == 1:
             new_shape = ()
 
@@ -121,26 +171,42 @@ def get_key(self, key):
             q_type = self._qtype
         return q_type, 0
 
-    def _make_getitem_op(self, chunk, key):
+    def _make_getitem_op(self, chunk: np.ndarray, key):
         """
-        Operation
-        :param chunk: list [M, chunk size, 2]
-        :param key: slice
-        :return:
+        Slice operation for ArrayType while restoring original shape.
+
+        :param chunk: numpy array [M, chunk size, 2]
+        :param key: slice or index to apply on the array-shaped leading dims
+        :return: numpy array with preserved dims via QType.keep_dims
         """
-        # Reshape M to original shape to allow access
         assert self._shape is not None
         chunk = chunk.reshape((*self._shape, chunk.shape[-2], chunk.shape[-1]))
         return QType.keep_dims(chunk[key])
 
-    def reshape(self, data):
+    def reshape(self, data: np.ndarray) -> np.ndarray:
+        """
+        Reshape flattened data to array shape.
+
+        :param data: numpy array
+        :return: reshaped numpy array
+        """
         if isinstance(self._qtype, ScalarType):
             return data.reshape(self._shape)
         else:
-            return data.reshape((*self._shape, np.prod(data.shape) // np.prod(self._shape)))
+            # assume trailing dimension belongs to inner types
+            total = np.prod(data.shape)
+            leading = int(np.prod(self._shape))
+            return data.reshape((*self._shape, int(total // leading)))
 
 
 class TimeSeriesType(QType):
+    """
+    Time-series quantity type.
+
+    :param times: iterable of time points
+    :param qtype: QType for each time slice
+    """
+
     def __init__(self, times, qtype):
         if isinstance(times, np.ndarray):
             times = times.tolist()
@@ -148,96 +214,176 @@ def __init__(self, times, qtype):
         self._qtype = qtype
 
     def size(self) -> int:
-        return len(self._times) * self._qtype.size()
+        """
+        :return: total size = number of time points * inner qtype.size()
+        """
+        return len(self._times) * int(self._qtype.size())
 
     def get_key(self, key):
+        """
+        Get a qtype and offset corresponding to a given time key.
+
+        :param key: time value to locate
+        :return: Tuple (q_type, offset)
+        """
         q_type = self._qtype
         try:
             position = self._times.index(key)
-        except KeyError:
-            print("Item " + str(key) + " was not found in TimeSeries" + ". Available items: " + str(list(self._times)))
+        except ValueError:
+            # keep behavior similar to original: print available items
+            print(
+                "Item "
+                + str(key)
+                + " was not found in TimeSeries"
+                + ". Available items: "
+                + str(list(self._times))
+            )
+            # raise to make the error explicit
+            raise
         return q_type, position * q_type.size()
 
     @staticmethod
     def time_interpolation(quantity, value):
         """
-        Interpolation in time
-        :param quantity: Quantity instance
-        :param value: point where to interpolate
-        :return: Quantity
+        Interpolate a time-series quantity to a single time value.
+
+        :param quantity: Quantity instance with qtype being a TimeSeriesType
+        :param value: float time value where to interpolate
+        :return: Quantity object representing interpolated value
         """
         def interp(y):
-            split_indeces = np.arange(1, len(quantity.qtype._times)) * quantity.qtype._qtype.size()
-            y = np.split(y, split_indeces, axis=-3)
+            split_indices = np.arange(1, len(quantity.qtype._times)) * quantity.qtype._qtype.size()
+            y = np.split(y, split_indices, axis=-3)
             f = interpolate.interp1d(quantity.qtype._times, y, axis=0)
             return f(value)
-        return mlmc.quantity.quantity.Quantity(quantity_type=quantity.qtype._qtype, input_quantities=[quantity], operation=interp)
+
+        return mlmc.quantity.quantity.Quantity(
+            quantity_type=quantity.qtype._qtype,
+            input_quantities=[quantity],
+            operation=interp
+        )
 
 
 class FieldType(QType):
+    """
+    Field type composed of named entries each having the same base qtype.
+
+    :param args: List of (name, QType) pairs
+    """
+
     def __init__(self, args: List[Tuple[str, QType]]):
-        """
-        QType must have same structure
-        :param args:
-        """
         self._dict = dict(args)
         self._qtype = args[0][1]
         assert all(q_type.size() == self._qtype.size() for _, q_type in args)
 
     def size(self) -> int:
-        return len(self._dict.keys()) * self._qtype.size()
+        """
+        :return: total size = number of fields * inner qtype size
+        """
+        return len(self._dict.keys()) * int(self._qtype.size())
 
     def get_key(self, key):
+        """
+        Access sub-field by name.
+
+        :param key: field name
+        :return: Tuple (q_type, offset)
+        """
         q_type = self._qtype
         try:
             position = list(self._dict.keys()).index(key)
-        except KeyError:
-            print("Key " + str(key) + " was not found in FieldType" +
-                  ". Available keys: " + str(list(self._dict.keys())[:5]) + "...")
+        except ValueError:
+            print(
+                "Key "
+                + str(key)
+                + " was not found in FieldType"
+                + ". Available keys: "
+                + str(list(self._dict.keys())[:5])
+                + "..."
+            )
+            raise
         return q_type, position * q_type.size()
 
 
 class DictType(QType):
+    """
+    Dictionary-like type of named QTypes which may differ in size.
+
+    :param args: List of (name, QType) pairs
+    """
+
     def __init__(self, args: List[Tuple[str, QType]]):
-        self._dict = dict(args)  # Be aware we it is ordered dictionary
+        self._dict = dict(args)  # keep ordered mapping semantics
         self._check_base_type()
 
     def _check_base_type(self):
+        """
+        Ensure all contained qtypes share the same base_qtype.
+
+        :raises TypeError: if base_qtypes differ
+        """
         qtypes = list(self._dict.values())
         qtype_0_base_type = qtypes[0].base_qtype()
         for qtype in qtypes[1:]:
             if not isinstance(qtype.base_qtype(), type(qtype_0_base_type)):
-                raise TypeError("qtype {} has base QType {}, expecting {}. "
-                                "All QTypes must have same base QType, either SacalarType or BoolType".
-                                format(qtype, qtype.base_qtype(), qtype_0_base_type))
+                raise TypeError(
+                    "qtype {} has base QType {}, expecting {}. "
+                    "All QTypes must have same base QType, either ScalarType or BoolType".format(
+                        qtype, qtype.base_qtype(), qtype_0_base_type
+                    )
+                )
 
     def base_qtype(self):
+        """
+        :return: base_qtype of the first element
+        """
         return next(iter(self._dict.values())).base_qtype()
 
     def size(self) -> int:
+        """
+        :return: total flattened size (sum of sizes of contained qtypes)
+        """
         return int(sum(q_type.size() for _, q_type in self._dict.items()))
 
     def get_qtypes(self):
+        """
+        :return: iterable of contained qtypes
+        """
         return self._dict.values()
 
     def replace_scalar(self, substitute_qtype):
         """
-        Find ScalarType and replace it with substitute_qtype
-        :param substitute_qtype: QType, replaces ScalarType
-        :return: DictType
+        Replace scalar types recursively inside dict entries.
+
+        :param substitute_qtype: QType that replaces ScalarType
+        :return: new DictType instance
         """
         dict_items = []
         for key, qtype in self._dict.items():
             new_qtype = qtype.replace_scalar(substitute_qtype)
-            dict_items.append((key,  new_qtype))
+            dict_items.append((key, new_qtype))
         return DictType(dict_items)
 
     def get_key(self, key):
+        """
+        Return the QType and starting offset for a named key.
+
+        :param key: name of entry
+        :return: Tuple (q_type, start_offset)
+        """
         try:
             q_type = self._dict[key]
         except KeyError:
-            print("Key " + str(key) + " was not found in DictType" +
-                  ". Available keys: " + str(list(self._dict.keys())[:5]) + "...")
+            print(
+                "Key "
+                + str(key)
+                + " was not found in DictType"
+                + ". Available keys: "
+                + str(list(self._dict.keys())[:5])
+                + "..."
+            )
+            raise
+
         start = 0
         for k, qt in self._dict.items():
             if k == key:
diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py
index cebd1572..7867c7b4 100644
--- a/mlmc/random/correlated_field.py
+++ b/mlmc/random/correlated_field.py
@@ -12,16 +12,17 @@
 def kozeny_carman(porosity, m, factor, viscosity):
     """
     Kozeny-Carman law. Empirical relationship between porosity and conductivity.
+
     :param porosity: Porosity value.
     :param m: Power. Suitable values are 1 < m < 4
-    :param factor: [m^2]
-        E.g. 1e-7 ,   m = 3.48;  juta fibers
-             2.2e-8 ,     1.46;  glass fibers
-             1.8e-13,     2.89;  erruptive material
-             1e-12        2.76;  erruptive material
-             1.8e-12      1.99;  basalt
-    :param viscosity: [Pa . s], water: 8.90e-4
-    :return:
+    :param factor: Factor [m^2]. Examples:
+        1e-7 , m = 3.48;  juta fibers
+        2.2e-8 , m = 1.46;  glass fibers
+        1.8e-13, m = 2.89;  erruptive material
+        1e-12 , m = 2.76;  erruptive material
+        1.8e-12, m = 1.99;  basalt
+    :param viscosity: Fluid viscosity [Pa.s], e.g., water: 8.90e-4
+    :return: Conductivity
     """
     assert np.all(viscosity > 1e-10)
     porosity = np.minimum(porosity, 1-1e-10)
@@ -33,10 +34,12 @@ def kozeny_carman(porosity, m, factor, viscosity):
 
 def positive_to_range(exp, a, b):
     """
-    Mapping a positive parameter 'exp' from the interval <0, \infty) to the interval <a,b).
-    Suitable e.g. to generate meaningful porosity from a variable with lognormal distribution.
-    :param exp: A positive parameter. (LogNormal distribution.)
-    :param a, b: Range interval.
+    Map a positive parameter 'exp' from <0, ∞) to <a, b>.
+
+    :param exp: Positive parameter (e.g., lognormal variable)
+    :param a: Lower bound of target interval
+    :param b: Upper bound of target interval
+    :return: Mapped value in [a, b)
     """
     return b * (1 - (b - a) / (b + (b - a) * exp))
 
@@ -44,12 +47,12 @@ def positive_to_range(exp, a, b):
 class Field:
     def __init__(self, name, field=None, param_fields=[], regions=[]):
         """
-        :param name: Name of the field.
-        :param field: scalar (const field), or instance of SpatialCorrelatedField, or a callable
-               for evaluation of the field from its param_fields.
-        :param regions: Domain where field is sampled.
-        :param param_fields: List of names of parameter fields, dependees.
-        TODO: consider three different derived classes for: const, random and func fields.
+        Initialize a Field object.
+
+        :param name: Name of the field
+        :param field: Scalar (const), RandomFieldBase, or callable function
+        :param param_fields: List of dependent parameter fields
+        :param regions: List of region names where the field is defined
         """
         self.correlated_field = None
         self.const = None
@@ -67,8 +70,6 @@ def __init__(self, name, field=None, param_fields=[], regions=[]):
             assert len(param_fields) == 0
         else:
             assert len(param_fields) > 0, field
-
-            # check callable
             try:
                 params = [np.ones(2) for i in range(len(param_fields))]
                 field(*params)
@@ -81,74 +82,61 @@ def __init__(self, name, field=None, param_fields=[], regions=[]):
 
     def set_points(self, points):
         """
-        Internal method to set evaluation points. See Fields.set_points.
+        Set points for field evaluation.
+
+        :param points: Array of points where the field will be evaluated
         """
         if self.const is not None:
             self._sample = self.const * np.ones(len(points))
         elif self.correlated_field is not None:
             self.correlated_field.set_points(points)
-            if type(self.correlated_field) is  SpatialCorrelatedField:
-                # TODO: make n_terms_range an optianal parmater for SpatialCorrelatedField
+            if type(self.correlated_field) is SpatialCorrelatedField:
                 self.correlated_field.svd_dcmp(n_terms_range=(10, 100))
         else:
             pass
 
     def sample(self):
         """
-        Internal method to generate/compute new sample.
-        :return:
+        Generate or compute a new sample of the field.
+
+        :return: Sample values of the field
         """
         if self.const is not None:
             return self._sample
         elif self.correlated_field is not None:
             self._sample = self.correlated_field.sample()
         else:
-            params = [ pf._sample for pf in self.param_fields]
+            params = [pf._sample for pf in self.param_fields]
             self._sample = self._func(*params)
         return self._sample
 
 
 class Fields:
-
     def __init__(self, fields):
         """
-        Creates a new set of cross dependent random fields.
-        Currently no support for cross-correlated random fields.
-        A set of independent basic random fields must exist
-        other fields can be dependent in deterministic way.
-
-        :param fields: A list of dependent fields.
+        Create a set of cross-dependent random fields.
 
-        Example:
-        rf = SpatialCorrelatedField(log=True)
-        Fields([
-            Field('por_top', rf, regions='ground_0'),
-            Field('porosity_top', positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'),
-            Field('por_bot', rf, regions='ground_1'),
-            Field('porosity_bot', positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'),
-            Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 1e-8, water_viscosity], regions='ground_0'),
-            Field('conductivity_bot', cf.kozeny_carman, ['porosity_bot', 1, 1e-10, water_viscosity],regions='ground_1')
-            ])
-
-        TODO: use topological sort to fix order of 'fields'
-        TODO: syntactic sugar for calculating with fields (like with np.arrays).
+        :param fields: List of Field objects
         """
         self.fields_orig = fields
         self.fields_dict = {}
         self.fields = []
 
-        # Have to make a copy of the fields since we want to generate the samples in them
-        # and the given instances of Field can be used by an independent FieldSet instance.
         for field in self.fields_orig:
             new_field = copy.copy(field)
             if new_field.param_fields:
-                new_field.param_fields = [self._get_field_obj(field, new_field.regions) for field in new_field.param_fields]
+                new_field.param_fields = [self._get_field_obj(f, new_field.regions)
+                                          for f in new_field.param_fields]
             self.fields_dict[new_field.name] = new_field
             self.fields.append(new_field)
 
     def _get_field_obj(self, field_name, regions):
         """
-        Get fields by name, replace constants by constant fields for unification.
+        Get Field object by name or create constant field.
+
+        :param field_name: Field name or constant
+        :param regions: Regions of the field
+        :return: Field object
         """
         if type(field_name) in [float, int]:
             const_field = Field("const_{}".format(field_name), field_name, regions=regions)
@@ -156,56 +144,31 @@ def _get_field_obj(self, field_name, regions):
             self.fields_dict[const_field.name] = const_field
             return const_field
         else:
-            assert field_name in self.fields_dict, "name: {} dict: {}".format(field_name, self.fields_dict)
+            assert field_name in self.fields_dict
             return self.fields_dict[field_name]
 
-    @property
-    def names(self):
-        return self.fields_dict.keys()
-
-    # def iterative_dfs(self, graph, start, path=[]):
-    #     q = [start]
-    #     while q:
-    #         v = q.pop(0)
-    #         if v not in path:
-    #             path = path + [v]
-    #             q = graph[v] + q
-    #
-    #     return path
-
     def set_outer_fields(self, outer):
         """
-        Set fields that will be in a dictionary produced by FieldSet.sample() call.
-        :param outer: A list of names of fields that are sampled.
-        :return:
+        Set fields to be included in the sampled dictionary.
+
+        :param outer: List of outer field names
         """
         outer_set = set(outer)
         for f in self.fields:
-            if f.name in outer_set:
-                f.is_outer = True
-            else:
-                f.is_outer = False
+            f.is_outer = f.name in outer_set
 
     def set_points(self, points, region_ids=[], region_map={}):
         """
-        Set mesh related data to fields.
-        - set points for sample evaluation
-        - translate region names to region ids in fields
-        - create maps from region constraned point sets of fields to full point set
-        :param points: np array of points for field evaluation
-        :param regions: regions of the points;
-               empty means no points for fields restricted to regions and all points for unrestricted fields
-        :return:
+        Assign evaluation points to each field.
+
+        :param points: Array of points for field evaluation
+        :param region_ids: Optional array of region ids for each point
+        :param region_map: Mapping from region name to region id
         """
         self.n_elements = len(points)
-        print("n elements: {}, len(points): {}".format(self.n_elements, len(points)))
-
-        #assert len(points) == len(region_ids)
         reg_points = {}
         for i, reg_id in enumerate(region_ids):
-            reg_list = reg_points.get(reg_id, [])
-            reg_list.append(i)
-            reg_points[reg_id] = reg_list
+            reg_points.setdefault(reg_id, []).append(i)
 
         for field in self.fields:
             point_ids = []
@@ -221,73 +184,42 @@ def set_points(self, points, region_ids=[], region_map={}):
 
     def sample(self):
         """
-        Return dictionary of sampled fields.
-        :return: { 'field_name': sample, ...}
+        Sample all outer fields.
+
+        :return: Dictionary with field names as keys and sampled arrays as values
         """
         result = {}
         for field in self.fields:
             sample = field.sample()
             if field.is_outer:
-                if field.name == "cond_tn":
-                    result[field.name] = np.zeros((self.n_elements, 3))
-                else:
-                    result[field.name] = np.zeros(self.n_elements)
-                #result[field.name] = np.zeros(self.n_elements)
+                shape = (self.n_elements, 3) if field.name == "cond_tn" else self.n_elements
+                result[field.name] = np.zeros(shape)
                 result[field.name][field.full_sample_ids] = sample
         return result
 
 
 class RandomFieldBase:
     """
-    Base class for various methods for generating random fields.
-
-    Generating realizations of a spatially correlated random field F for a fixed set of points at X.
-    E[F(x)]       = mu(x)
-    Cov_ij = Cov[x_i,x_j]  = E[(F(x_i) - mu(x))(F(x_j) - mu(x))]
-
-    We assume stationary random field with covariance matrix Cov_ij:
-        Cov_i,j = c(x_i - x_j)
-    where c(X) is the "stationary covariance" function. We assume:
-          c(X) = sigma^2 exp( -|X^t K X|^(alpha/2) )
-    for spatially heterogeneous sigma(X) we consider particular non-stationary generalization:\
-          Cov_i,i = sigma(x_i)*sigma(x_j) exp( -|X^t K X|^(alpha/2) ); X = x_i - x_j
-
-    where:
-        - sigma(X) is the standard deviance of the single uncorrelated value
-        - K is a positive definite tensor with eigen vectors corresponding to
-          main directions and eigen values equal to (1/l_i)^2, where l_i is correlation
-          length in singel main direction.
-        - alpha is =1 for "exponential" and =2 for "Gauss" correlation
-
-    SVD decomposition:
-        Considering first m vectors, such that lam(m)/lam(0) <0.1
-
-    Example:
-    ```
-        field = SpatialCorrelatedField(corr_exp='exp', corr_length=1.5)
-        X, Y = np.mgrid[0:1:10j, 0:1:10j]
-        points = np.vstack([X.ravel(), Y.ravel()])
-        field.set_points(points)
-        sample = field.sample()
-
-    ```
+    Base class for generating spatially correlated random fields.
+
+    Random field F(x) with mean E[F(x)] = mu(x) and covariance Cov[x_i,x_j].
+    Stationary covariance: Cov_ij = sigma^2 * exp(-|X^T K X|^(alpha/2)),
+    X = x_i - x_j.
+    Supports optional non-stationary variance sigma(X).
     """
 
     def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0,
                  aniso_correlation=None, mu=0.0, sigma=1.0, log=False, **kwargs):
         """
-        :param corr_exp: 'gauss', 'exp' or a float (should be >= 1)
-        :param dim: dimension of the domain (size of point coords)
-        :param corr_length: scalar, correlation length L > machine epsilon; tensor K = (1/L)^2
-        :param aniso_correlation: 3x3 array; K tensor, overrides correlation length
-        :param mu - mu field (currently just a constant)
-        :param sigma - sigma field (currently just a constant)
+        Initialize a random field.
 
-        TODO:
-        - implement anisotropy in the base class using transformation matrix for the points
-        - use transformation matrix also for the corr_length
-        - replace corr_exp by aux classes for various correlation functions and pass them here
-        - more general set of correlation functions
+        :param corr_exp: 'gauss', 'exp', or float >=1 (correlation exponent)
+        :param dim: Dimension of the domain
+        :param corr_length: Scalar correlation length
+        :param aniso_correlation: Optional anisotropic 3x3 correlation tensor
+        :param mu: Mean (scalar or array)
+        :param sigma: Standard deviation (scalar or array)
+        :param log: If True, output field is exponentiated
         """
         self.dim = dim
         self.log = log
@@ -299,8 +231,6 @@ def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0,
         else:
             self.correlation_exponent = float(corr_exp)
 
-        # TODO: User should prescribe scaling for main axis and their rotation.
-        # From this we should construct the transformation matrix for the points
         self._corr_length = corr_length
         if aniso_correlation is None:
             assert corr_length > np.finfo(float).eps
@@ -308,31 +238,26 @@ def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0,
             self._max_corr_length = corr_length
         else:
             self.correlation_tensor = aniso_correlation
-            self._max_corr_length = la.norm(aniso_correlation, ord=2)  # largest eigen value
+            self._max_corr_length = la.norm(aniso_correlation, ord=2)
 
-        #### Attributes set through `set_points`.
         self.points = None
-        # Evaluation points of the field.
         self.mu = mu
-        # Mean in points. Or scalar.
         self.sigma = sigma
-        # Standard deviance in points. Or scalar.
-
-        self._initialize(**kwargs)  # Implementation dependent initialization.
+        self._initialize(**kwargs)
 
     def _initialize(self, **kwargs):
+        """Implementation-specific initialization. To be overridden in subclasses."""
         raise NotImplementedError()
 
     def set_points(self, points, mu=None, sigma=None):
         """
-        :param points: N x d array. Points X_i where the field will be evaluated. d is the dimension.
-        :param mu: Scalar or N array. Mean value of uncorrelated field: E( F(X_i)).
-        :param sigma: Scalar or N array. Standard deviance of uncorrelated field: sqrt( E ( F(X_i) - mu_i )^2 )
-        :return: None
+        Set points for field evaluation.
+
+        :param points: Array of points (N x dim)
+        :param mu: Optional mean at points
+        :param sigma: Optional standard deviation at points
         """
         points = np.array(points, dtype=float)
-
-        assert len(points.shape) >= 1
         assert points.shape[1] == self.dim
         self.n_points, self.dimension = points.shape
         self.points = points
@@ -345,49 +270,43 @@ def set_points(self, points, mu=None, sigma=None):
         if sigma is not None:
             self.sigma = sigma
         self.sigma = np.array(self.sigma, dtype=float)
-        assert self.sigma.shape == () or sigma.shape == (len(points),)
+        assert self.sigma.shape == () or self.sigma.shape == (len(points),)
 
     def _set_points(self):
+        """Optional internal method to update points. Can be overridden."""
         pass
 
     def sample(self):
         """
-        :param uncorelated: Random samples from standard normal distribution.
-               Removed as the spectral method do not support it.
-        :return: Random field evaluated in points given by 'set_points'.
-        """
-        # if uncorelated is None:
-        #     uncorelated = np.random.normal(0, 1, self.n_approx_terms)
-        # else:
-        #     assert uncorelated.shape == (self.n_approx_terms,)
+        Generate a realization of the random field.
 
+        :return: Array of field values at set points
+        """
         field = self._sample()
         field = self.sigma * field + self.mu
-
-        if not self.log:
-            return field
-        return np.exp(field)
+        return np.exp(field) if self.log else field
 
     def _sample(self, uncorrelated):
+        """
+        Implementation-specific sample generation. To be overridden.
+
+        :param uncorrelated: Array of uncorrelated standard normal samples
+        :return: Field sample
+        """
         raise NotImplementedError()
 
 
 class SpatialCorrelatedField(RandomFieldBase):
+    """
+    Generate spatially correlated fields using covariance matrix and KL decomposition.
+    """
 
     def _initialize(self, **kwargs):
-        """
-        Called after initialization in common constructor.
-        """
-
-        ### Attributes computed in precalculation.
+        """Initialization specific to SVD/KL-based spatial correlation."""
         self.cov_mat = None
-        # Covariance matrix (dense).
         self._n_approx_terms = None
-        # Length of the sample vector, number of KL (Karhunen-Loe?ve) expansion terms.
         self._cov_l_factor = None
-        # (Reduced) L factor of the SVD decomposition of the covariance matrix.
         self._sqrt_ev = None
-        # (Reduced) square roots of singular values.
 
     def _set_points(self):
         self.cov_mat = None
@@ -395,17 +314,16 @@ def _set_points(self):
 
     def cov_matrix(self):
         """
-        Setup dense covariance matrix for given set of points.
-        :return: None.
+        Compute dense covariance matrix for current points.
+
+        :return: Covariance matrix
         """
         assert self.points is not None, "Points not set, call set_points."
-
-        self._points_bbox = box = (np.min(self.points, axis=0), np.max(self.points, axis=0))
-        diameter = np.max(np.abs(box[1] - box[0]))
+        self._points_bbox = (np.min(self.points, axis=0), np.max(self.points, axis=0))
+        diameter = np.max(np.abs(self._points_bbox[1] - self._points_bbox[0]))
         self._relative_corr_length = self._max_corr_length / diameter
-
-        # sigma_sqr_mat = np.outer(self.sigma, self.sigma.T)
         self._sigma_sqr_max = np.max(self.sigma) ** 2
+
         n_pt = len(self.points)
         self.cov_mat = np.empty((n_pt, n_pt))
         corr_exp = self.correlation_exponent / 2.0
@@ -415,19 +333,16 @@ def cov_matrix(self):
             diff_row = self.points - pt
             len_sqr_row = np.sum(diff_row.dot(self.correlation_tensor) * diff_row, axis=-1)
             self.cov_mat[i_row, :] = np.exp(-len_sqr_row ** corr_exp)
+
         return self.cov_mat
 
     def _eigen_value_estimate(self, m):
         """
-        Estimate of the m-th eigen value of the covariance matrix.
-        According to paper: Schwab, Thodor: KL Approximation  of Random Fields by ...
-        However for small gamma the asimtotics holds just for to big values of 'm'.
-        We rather need to find a semiempricial formula.
-        greater
-        :param m:
-        :return:
+        Semi-empirical estimate of the m-th eigenvalue of covariance matrix.
+
+        :param m: Eigenvalue index
+        :return: Estimated eigenvalue
         """
-        assert self.cov_mat is not None
         d = self.dimension
         alpha = self.correlation_exponent
         gamma = self._relative_corr_length
@@ -435,24 +350,11 @@ def _eigen_value_estimate(self, m):
 
     def svd_dcmp(self, precision=0.01, n_terms_range=(1, np.inf)):
         """
-        Does decomposition of covariance matrix defined by set of points
-        :param precision: Desired accuracy of the KL approximation, smaller eigen values are dropped.
-        :param n_terms_range: (min, max) number of terms in KL expansion to use. The number of terms estimated from
-        given precision is snapped to the given interval.
-
-        truncated SVD:
-         cov_mat = U*diag(ev) * V,
-         _cov_l_factor = U[:,0:m]*sqrt(ev[0:m])
+        Perform truncated SVD for Karhunen-Loeve decomposition.
 
-        Note on number of terms:
-        According to: C. Schwab and R. A. Todor: KL Approximation of Random Fields by Generalized Fast Multiploe Method
-        the eigen values should decay as (Proposition 2.18):
-            lambda_m ~ sigma^2 * ( 1/gamma ) **( m**(1/d) + alpha ) / Gamma(0.5 * m**(1/d) )
-        where gamma = correlation length / domain diameter
-        ans alpha is the correlation exponent. Gamma is the gamma function.
-        ... should be checked experimantaly and generalized for sigma(X)
-
-        :return:
+        :param precision: Desired accuracy
+        :param n_terms_range: Min/max number of KL terms
+        :return: (_cov_l_factor, singular values)
         """
         if self.cov_mat is None:
             self.cov_matrix()
@@ -461,55 +363,58 @@ def svd_dcmp(self, precision=0.01, n_terms_range=(1, np.inf)):
             U, ev, VT = np.linalg.svd(self.cov_mat)
             m = self.n_points
         else:
-            range = list(n_terms_range)
-            range[0] = max(1, range[0])
-            range[1] = min(self.n_points, range[1])
-
-            prec_range = (self._eigen_value_estimate(range[0]), self._eigen_value_estimate(range[1]))
+            range_vals = [max(1, n_terms_range[0]), min(self.n_points, n_terms_range[1])]
+            prec_range = (self._eigen_value_estimate(range_vals[0]), self._eigen_value_estimate(range_vals[1]))
             if precision < prec_range[0]:
-                m = range[0]
+                m = range_vals[0]
             elif precision > prec_range[1]:
-                m = range[1]
+                m = range_vals[1]
             else:
                 f = lambda m: self._eigen_value_estimate(m) - precision
-                m = sp.optmize.bisect(f, range[0], range[1], xtol=0.5, )
-
-            m = max(m, range[0])
+                m = sp.optimize.bisect(f, range_vals[0], range_vals[1], xtol=0.5)
+            m = max(m, range_vals[0])
             threshold = 2 * precision
-            # TODO: Test if we should cut eigen values by relative (like now) or absolute value
-            while threshold >= precision and m <= range[1]:
-                #print("treshold: {} m: {} precision: {} max_m: {}".format(threshold,  m, precision, range[1]))
+            while threshold >= precision and m <= range_vals[1]:
                 U, ev, VT = randomized_svd(self.cov_mat, n_components=m, n_iter=3, random_state=None)
                 threshold = ev[-1] / ev[0]
                 m = int(np.ceil(1.5 * m))
 
-            m = len(ev)
-            m = min(m, range[1])
+            m = min(len(ev), range_vals[1])
 
-        #print("KL approximation: {} for {} points.".format(m, self.n_points))
         self.n_approx_terms = m
-        self._sqrt_ev = np.sqrt(ev[0:m])
-        self._cov_l_factor = U[:, 0:m].dot(np.diag(self._sqrt_ev))
+        self._sqrt_ev = np.sqrt(ev[:m])
+        self._cov_l_factor = U[:, :m].dot(np.diag(self._sqrt_ev))
         self.cov_mat = None
-        return self._cov_l_factor, ev[0:m]
+        return self._cov_l_factor, ev[:m]
 
     def _sample(self):
         """
-        :param uncorelated: Random samples from standard normal distribution.
-        :return: Random field evaluated in points given by 'set_points'.
+        Generate a field realization using KL decomposition.
+
+        :return: Field sample array
         """
         if self._cov_l_factor is None:
             self.svd_dcmp()
-        uncorelated = np.random.normal(0, 1, self.n_approx_terms)
-        return self._cov_l_factor.dot(uncorelated)
+        uncorrelated = np.random.normal(0, 1, self.n_approx_terms)
+        return self._cov_l_factor.dot(uncorrelated)
 
 
 class GSToolsSpatialCorrelatedField(RandomFieldBase):
+    """
+    Spatially correlated random field using the GSTools library.
+
+    Uses Fourier modes to generate spatial random fields efficiently.
+    """
 
     def __init__(self, model, mode_no=1000, log=False, sigma=1, seed=None):
         """
-        :param model: instance of covariance model class, which parent is gstools.covmodel.CovModel
-        :param mode_no: number of Fourier modes, default: 1000 as in gstools package
+        Initialize GSTools-based spatial random field.
+
+        :param model: gstools covariance model (subclass of gstools.CovModel)
+        :param mode_no: Number of Fourier modes (default 1000)
+        :param log: If True, output field is exponentiated
+        :param sigma: Standard deviation
+        :param seed: Optional random seed for reproducibility
         """
         self.model = model
         self.mode_no = mode_no
@@ -521,172 +426,164 @@ def __init__(self, model, mode_no=1000, log=False, sigma=1, seed=None):
 
     def change_srf(self, seed):
         """
-        Spatial random field with new seed
-        :param seed: int, random number generator seed
-        :return: None
+        Generate a new spatial random field with a different seed.
+
+        :param seed: Random seed
         """
         self.srf = gstools.SRF(self.model, seed=seed, mode_no=self.mode_no)
 
     def random_field(self):
         """
-        Generate the spatial random field
-        :return: field, np.ndarray
+        Evaluate the spatial random field at the current points.
+
+        :return: Field values (np.ndarray)
         """
         if self.dim == 1:
             x = self.points
-            x.reshape(len(x))
-            field = self.srf((x,))
+            x = x.reshape(len(x),)
+            return self.srf((x,))
         elif self.dim == 2:
             x, y = self.points.T
             x = x.reshape(len(x), 1)
             y = y.reshape(len(y), 1)
-            field = self.srf((x, y))
-        else:
+            return self.srf((x, y))
+        else:  # dim == 3
             x, y, z = self.points.T
             x = x.reshape(len(x), 1)
             y = y.reshape(len(y), 1)
             z = z.reshape(len(z), 1)
-            field = self.srf((x, y, z))
-
-        return field
+            return self.srf((x, y, z))
 
     def sample(self):
         """
-        :return: Random field evaluated in points given by 'set_points'
+        Generate a realization of the GSTools spatial random field.
+
+        :return: Field values (np.ndarray)
         """
-        if not self.log:
-            return self.sigma * self.random_field() + self.mu
-        return np.exp(self.sigma * self.random_field() + self.mu)
+        field = self.random_field()
+        field = self.sigma * field + self.mu
+        return np.exp(field) if self.log else field
 
 
 class FourierSpatialCorrelatedField(RandomFieldBase):
     """
-    Generate spatial random fields
+    Deprecated: Fourier-based spatial random field generator.
+
+    Generates spatial random fields using a truncated Fourier series.
+    Use GSToolsSpatialCorrelatedField instead.
     """
 
     def _initialize(self, **kwargs):
         """
-        Own intialization.
-        :param mode_no: Number of Fourier modes
+        Initialization specific to Fourier-based spatial fields.
+
+        :param mode_no: Number of Fourier modes (default 1000)
         """
-        warnings.warn("FourierSpatialCorrelatedField class is deprecated, try to use GSToolsSpatialCorrelatedField class instead",
-            DeprecationWarning)
-        self.len_scale = self._corr_length * 2*np.pi
+        warnings.warn(
+            "FourierSpatialCorrelatedField class is deprecated, use GSToolsSpatialCorrelatedField instead",
+            DeprecationWarning
+        )
+        self.len_scale = self._corr_length * 2 * np.pi
         self.mode_no = kwargs.get("mode_no", 1000)
 
     def get_normal_distr(self):
         """
-        Normal distributed arrays
-        :return: np.ndarray
+        Generate normal distributed random coefficients for Fourier modes.
+
+        :return: Array of shape (2, mode_no)
         """
         Z = np.empty((2, self.mode_no))
         rng = self._get_random_stream()
         for i in range(2):
             Z[i] = rng.normal(size=self.mode_no)
-
         return Z
 
     def _sample_sphere(self, mode_no):
-        """Uniform sampling on a d-dimensional sphere
-        Parameters
-        ----------
-            mode_no : :class:`int`, optional
-                number of the Fourier modes
-        Returns
-        -------
-            coord : :class:`numpy.ndarray`
-                x[, y[, z]] coordinates on the sphere with shape (dim, mode_no)
+        """
+        Uniformly sample directions on the unit sphere (dim=1,2,3).
+
+        :param mode_no: Number of modes
+        :return: Array of unit vectors (dim, mode_no)
         """
         coord = self._create_empty_k(mode_no)
+        rng = self._get_random_stream()
         if self.dim == 1:
-            rng = self._get_random_stream()
             ang1 = rng.random_sample(mode_no)
             coord[0] = 2 * np.around(ang1) - 1
         elif self.dim == 2:
-            rng = self._get_random_stream()
             ang1 = rng.uniform(0.0, 2 * np.pi, mode_no)
             coord[0] = np.cos(ang1)
             coord[1] = np.sin(ang1)
         elif self.dim == 3:
-            raise NotImplementedError("For implementation see "
-                                      "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py")
+            raise NotImplementedError("3D implementation see GSTools repo")
         return coord
 
     def gau(self, mode_no=1000):
         """
-        Compute a gaussian spectrum
-        :param mode_no: int, Number of Fourier modes
-        :return: numpy.ndarray
+        Gaussian Fourier spectrum.
+
+        :param mode_no: Number of modes
+        :return: Array of wave vectors (dim, mode_no)
         """
         len_scale = self.len_scale * np.sqrt(np.pi / 4)
         if self.dim == 1:
             k = self._create_empty_k(mode_no)
-            rng = self._get_random_stream()
-            k[0] = rng.normal(0., np.pi / 2.0 / len_scale ** 2, mode_no)
+            k[0] = self._get_random_stream().normal(0., np.pi / 2.0 / len_scale ** 2, mode_no)
         elif self.dim == 2:
             coord = self._sample_sphere(mode_no)
-            rng = self._get_random_stream()
-            rad_u = rng.random_sample(mode_no)
-            # weibull distribution sampling
+            rad_u = self._get_random_stream().random_sample(mode_no)
             rad = np.sqrt(np.pi) / len_scale * np.sqrt(-np.log(rad_u))
             k = rad * coord
         elif self.dim == 3:
-            raise NotImplementedError("For implementation see "
-                                      "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py")
+            raise NotImplementedError("3D implementation see GSTools repo")
         return k
 
     def exp(self, mode_no=1000):
         """
-        Compute an exponential spectrum
-        :param mode_no: int, Number of Fourier modes
-        :return: numpy.ndarray
+        Exponential Fourier spectrum.
+
+        :param mode_no: Number of modes
+        :return: Array of wave vectors (dim, mode_no)
         """
         if self.dim == 1:
             k = self._create_empty_k(mode_no)
-            rng = self._get_random_stream()
-            k_u = rng.rng.uniform(-np.pi / 2.0, np.pi / 2.0, mode_no)
+            k_u = self._get_random_stream().uniform(-np.pi / 2.0, np.pi / 2.0, mode_no)
             k[0] = np.tan(k_u) / self.len_scale
         elif self.dim == 2:
             coord = self._sample_sphere(mode_no)
-            rng = self._get_random_stream()
-            rad_u = rng.random_sample(mode_no)
-            # sampling with ppf
+            rad_u = self._get_random_stream().random_sample(mode_no)
             rad = np.sqrt(1.0 / rad_u ** 2 - 1.0) / self.len_scale
             k = rad * coord
         elif self.dim == 3:
-            raise NotImplementedError("For implementation see "
-                                      "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py")
+            raise NotImplementedError("3D implementation see GSTools repo")
         return k
 
     def _create_empty_k(self, mode_no=None):
-        """ Create empty mode array with the correct shape.
-        Parameters
-        ----------
-            mode_no : :class:`int`
-                number of the fourier modes
-        Returns
-        -------
-            :class:`numpy.ndarray`
-                the empty mode array
-        """
-        if mode_no is None:
-            k = np.empty(self.dim)
-        else:
-            k = np.empty((self.dim, mode_no))
+        """
+        Helper to create empty Fourier mode array.
 
-        return k
+        :param mode_no: Number of modes
+        :return: Empty array of shape (dim, mode_no)
+        """
+        return np.empty((self.dim, mode_no)) if mode_no is not None else np.empty(self.dim)
 
     def _get_random_stream(self, seed=None):
+        """
+        Return a random number generator.
+
+        :param seed: Optional seed
+        """
         return rand.RandomState(rand.RandomState(seed).randint(2 ** 16 - 1))
 
     def random_field(self):
         """
-        Calculates the random modes for the randomization method.
+        Generate a random field using Fourier series.
+
+        :return: Field values at points
         """
-        y, z = None, None
+        # Prepare coordinates
         if self.dim == 1:
-            x = self.points
-            x.reshape(len(x), 1)
+            x = self.points.reshape(len(self.points), 1)
         elif self.dim == 2:
             x, y = self.points.T
             x = x.reshape(len(x), 1)
@@ -698,60 +595,20 @@ def random_field(self):
             z = z.reshape(len(z), 1)
 
         normal_distr_values = self.get_normal_distr()
+        k = self.gau(self.mode_no) if self.correlation_exponent == 2 else self.exp(self.mode_no)
 
-        if self.correlation_exponent == 2:
-            k = self.gau(self.mode_no)
-        else:
-            k = self.exp(self.mode_no)
+        summed_modes = np.zeros(len(self.points))
+        # Fourier summation (memory safe chunks could be implemented here)
+        for i in range(self.mode_no):
+            phase = np.sum(k[:, i] * self.points.T, axis=0)
+            summed_modes += normal_distr_values[0, i] * np.cos(2*np.pi*phase) + normal_distr_values[1, i] * np.sin(2*np.pi*phase)
 
-        # reshape for unstructured grid
-        for dim_i in range(self.dim):
-            k[dim_i] = np.squeeze(k[dim_i])
-            k[dim_i] = np.reshape(k[dim_i], (1, len(k[dim_i])))
-
-        summed_modes = np.broadcast(x, y, z)
-        summed_modes = np.squeeze(np.zeros(summed_modes.shape))
-        # Test to see if enough memory is available.
-        # In case there isn't, divide Fourier modes into smaller chunks
-        chunk_no = 1
-        chunk_no_exp = 0
-
-        while True:
-            try:
-                chunk_len = int(np.ceil(self.mode_no / chunk_no))
-
-                for chunk in range(chunk_no):
-                    a = chunk * chunk_len
-                    # In case k[d,a:e] with e >= len(k[d,:]) causes errors in
-                    # numpy, use the commented min-function below
-                    # e = min((chunk + 1) * chunk_len, self.mode_no-1)
-                    e = (chunk + 1) * chunk_len
-
-                    if self.dim == 1:
-                        phase = k[0, a:e]*x
-                    elif self.dim == 2:
-                        phase = k[0, a:e]*x + k[1, a:e]*y
-                    else:
-                        phase = (k[0, a:e]*x + k[1, a:e]*y +
-                                 k[2, a:e]*z)
-
-                    summed_modes += np.squeeze(
-                        np.sum(normal_distr_values[0, a:e] * np.cos(2.*np.pi*phase) +
-                               normal_distr_values[1, a:e] * np.sin(2.*np.pi*phase),
-                               axis=-1))
-            except MemoryError:
-                chunk_no += 2**chunk_no_exp
-                chunk_no_exp += 1
-                print('Not enough memory. Dividing Fourier modes into {} '
-                      'chunks.'.format(chunk_no))
-            else:
-                break
-
-        field = np.sqrt(1.0 / self.mode_no) * summed_modes
-        return field
+        return np.sqrt(1.0 / self.mode_no) * summed_modes
 
     def _sample(self):
         """
-        :return: Random field evaluated in points given by 'set_points'.
+        Generate a Fourier-based random field realization.
+
+        :return: Field values
         """
         return self.random_field()
diff --git a/mlmc/random/frac_geom.py b/mlmc/random/frac_geom.py
deleted file mode 100644
index 0d872646..00000000
--- a/mlmc/random/frac_geom.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import numpy as np
-import geomop.polygons as poly
-import geomop.merge as merge
-import geomop.polygons_io as poly_io
-import geomop.format_last as lg
-import geomop.layers_io
-import geomop.geometry
-#from geomop.plot_polygons import plot_polygon_decomposition
-
-
-
-
-
-
-
-
-
-def make_frac_mesh(box, mesh_step, fractures, frac_step):
-    """
-    Make geometry and mesh for given 2d box and set of fractures.
-    :param box: [min_point, max_point]; points are np.arrays
-    :param fractures: Array Nx2x2, one row for every fracture given by endpoints: [p0, p1]
-    :return: GmshIO object with physical groups:
-        box: 1,
-        fractures: 1000 + i, i = 0, ... , N-1
-    """
-    regions = make_regions(mesh_step, fractures, frac_step)
-    decomp, reg_map = make_decomposition(box, fractures, regions)
-    geom = fill_lg(decomp, reg_map, regions)
-    return make_mesh(geom)
-
-
-def add_reg(regions, name, dim, step=0.0, bc=False, not_used =False):
-    reg = lg.Region(dict(name=name, dim=dim, mesh_step=step, boundary=bc, not_used=not_used))
-    reg._id = len(regions)
-    regions.append(reg)
-
-def make_regions(mesh_step, fractures, frac_step):
-    regions = []
-    add_reg(regions, "NONE", -1, not_used=True)
-    add_reg(regions, "bulk_0", 2, mesh_step)
-    add_reg(regions, ".bc_inflow", 1, bc=True)
-    add_reg(regions, ".bc_outflow", 1, bc=True)
-    for f_id in range(len(fractures)):
-        add_reg(regions, "frac_{}".format(f_id), 1, frac_step)
-    return regions
-
-
-def make_decomposition(box, fractures, regions):
-    box_pd = poly.PolygonDecomposition()
-    p00, p11 = box
-    p01 = np.array([p00[0], p11[1]])
-    p10 = np.array([p11[0], p00[1]])
-    box_pd.add_line(p00, p01)
-    seg_outflow, = box_pd.add_line(p01, p11)
-    box_pd.add_line(p11, p10)
-    seg_inflow, = box_pd.add_line(p10, p00)
-
-    decompositions = [box_pd]
-    for p0, p1 in fractures:
-        pd = poly.PolygonDecomposition()
-        pd.add_line(p0, p1)
-        decompositions.append(pd)
-
-    common_decomp, maps = merge.intersect_decompositions(decompositions)
-    #plot_polygon_decomposition(common_decomp)
-    #print(maps)
-
-    # Map common_decomp objects to regions.
-    none_region_id = 0
-    box_reg_id = 1
-    bc_inflow_id = 2
-    bc_outflow_id = 3
-    frac_id_shift = 4
-    decomp_shapes = [common_decomp.points, common_decomp.segments, common_decomp.polygons]
-    reg_map = [{key: regions[none_region_id] for key in decomp_shapes[d].keys()} for d in range(3)]
-    for i_frac, f_map in enumerate(maps[1:]):
-        for id, orig_seg_id in f_map[1].items():
-            reg_map[1][id] = regions[frac_id_shift + i_frac]
-
-    for id, orig_poly_id in maps[0][2].items():
-        if orig_poly_id == 0:
-            continue
-        reg_map[2][id] = regions[box_reg_id]
-
-    for id, orig_seg_id in maps[0][1].items():
-        if orig_seg_id == seg_inflow.id:
-            reg_map[1][id] = regions[bc_inflow_id]
-        if orig_seg_id == seg_outflow.id:
-            reg_map[1][id] = regions[bc_outflow_id]
-
-
-    return common_decomp, reg_map
-
-
-def fill_lg(decomp, reg_map, regions):
-    """
-    Create LayerGeometry object.
-    """
-    nodes, topology = poly_io.serialize(decomp)
-
-    geom = lg.LayerGeometry()
-    geom.version
-    geom.regions = regions
-
-
-
-    iface_ns = lg.InterfaceNodeSet(dict(
-        nodeset_id = 0,
-        interface_id = 0
-    ))
-    layer = lg.FractureLayer(dict(
-        name = "layer",
-        top = iface_ns,
-        polygon_region_ids = [ reg_map[2][poly.id]._id for poly in decomp.polygons.values() ],
-        segment_region_ids = [ reg_map[1][seg.id]._id for seg in decomp.segments.values() ],
-        node_region_ids = [ reg_map[0][node.id]._id for node in decomp.points.values() ]
-    ))
-    geom.layers = [ layer ]
-    #geom.surfaces = [ClassFactory(Surface)]
-
-    iface = lg.Interface(dict(
-        surface_id = None,
-        elevation = 0.0
-    ))
-    geom.interfaces = [ iface ]
-    #geom.curves = [ClassFactory(Curve)]
-    geom.topologies = [ topology ]
-
-    nodeset = lg.NodeSet(dict(
-        topology_id = 0,
-        nodes = nodes
-    ))
-    geom.node_sets = [ nodeset ]
-    geomop.layers_io.write_geometry("fractured_2d.json", geom)
-    return geom
-
-
-def make_mesh(geometry):
-    return geomop.geometry.make_geometry(geometry=geometry, layers_file="fractured_2d.json", mesh_step=1.0)
\ No newline at end of file
diff --git a/mlmc/sample_storage.py b/mlmc/sample_storage.py
index ec05080f..623bd9b3 100644
--- a/mlmc/sample_storage.py
+++ b/mlmc/sample_storage.py
@@ -1,134 +1,142 @@
 import itertools
 import numpy as np
-from abc import ABCMeta
-from abc import abstractmethod
-from typing import List, Dict
+from abc import ABCMeta, abstractmethod
+from typing import List, Dict, Any, Generator, Optional, Tuple
 from mlmc.quantity.quantity_spec import QuantitySpec, ChunkSpec
 
 
 class SampleStorage(metaclass=ABCMeta):
     """
-    Provides methods to store and retrieve sample's data
+    Provides methods to store and retrieve sample data.
+    Abstract base class for all storage backends.
     """
 
     @abstractmethod
     def save_samples(self, successful_samples, failed_samples):
         """
-        Write results to storage
+        Write simulation results to storage.
+        :param successful_samples: Dict[level_id, List[Tuple[sample_id, (fine, coarse)]]]
+        :param failed_samples: Dict[level_id, List[Tuple[sample_id, error_message]]]
         """
 
     @abstractmethod
     def save_result_format(self, res_spec: List[QuantitySpec]):
         """
-        Save result format
+        Save result format.
+        :param res_spec: List of quantity specifications describing result structure.
         """
 
     @abstractmethod
     def load_result_format(self) -> List[QuantitySpec]:
         """
-        Load result format
+        Load stored result format.
+        :return: List[QuantitySpec]
         """
 
     @abstractmethod
     def save_global_data(self, result_format: List[QuantitySpec], level_parameters=None):
         """
-        Save global data, at the moment: _result_format, level_parameters
+        Save global metadata such as result format and level parameters.
+        :param result_format: List[QuantitySpec]
+        :param level_parameters: Optional metadata per level
         """
 
     @abstractmethod
     def save_scheduled_samples(self, level_id, samples):
         """
-        Save scheduled samples ids
+        Save scheduled sample identifiers.
+        :param level_id: int
+        :param samples: List[str]
         """
 
     @abstractmethod
-    def load_scheduled_samples(self):
+    def load_scheduled_samples(self) -> Dict[int, List[str]]:
         """
-        Load scheduled samples
-        :return: Dict[_level_id, List[sample_id: str]]
+        Load scheduled sample IDs.
+        :return: Dict[level_id, List[sample_id]]
         """
 
     @abstractmethod
     def sample_pairs(self):
         """
-        Get results from storage
-        :return: List[Array[M, N, 2]]
+        Retrieve all stored fine–coarse result pairs.
+        :return: List[np.ndarray[M, N, 2]]
         """
 
-    def chunks(self, level_id=None, n_samples=None):
+    def chunks(self, level_id: Optional[int] = None, n_samples: Optional[int] = None) -> Generator[ChunkSpec, None, None]:
         """
-        Create chunks generator
-        :param level_id: int, if not None return chunks for a given level
-        :param n_samples: int, number of samples to retrieve
-        :return: generator
+        Create a generator yielding chunk specifications for collected data.
+        :param level_id: int, if provided, return chunks only for the given level.
+        :param n_samples: int, maximum number of samples to retrieve.
+        :return: generator of ChunkSpec objects.
         """
-        assert isinstance(n_samples, (type(None), int)), "n_samples param must be int"
-        level_ids = self.get_level_ids()
-        if level_id is not None:
-            level_ids = [level_id]
-        return itertools.chain(*[self._level_chunks(level_id, n_samples) for level_id in level_ids])  # concatenate generators
+        assert isinstance(n_samples, (type(None), int)), "n_samples must be int or None"
+        level_ids = [level_id] if level_id is not None else self.get_level_ids()
+        return itertools.chain(*[self._level_chunks(lid, n_samples) for lid in level_ids])
 
     @abstractmethod
     def _level_chunks(self, level_id, n_samples=None):
         """
-        Info about chunks of level's collected data
+        Get chunk information for data collected at a given level.
+        :param level_id: int
+        :param n_samples: int
         :return: generator of ChunkSpec objects
         """
 
     @abstractmethod
     def n_finished(self):
         """
-        Number of finished samples
-        :return: List
+        Get number of finished samples on each level.
+        :return: List[int]
         """
 
     @abstractmethod
-    def save_n_ops(self, n_ops: Dict[int, List[float]]):
+    def save_n_ops(self, n_ops: Dict[int, Tuple[float, int]]):
         """
-        Save number of operations (time)
-        :param n_ops: Dict[_level_id, List[overall time, number of valid samples]]
+        Save number of operations (time).
+        :param n_ops: Dict[level_id, Tuple[total_time, n_valid_samples]]
         """
 
     @abstractmethod
     def get_n_ops(self):
         """
-        Number of operations (time) per sample for each level
+        Get number of operations per sample for each level.
         :return: List[float]
         """
 
     @abstractmethod
     def unfinished_ids(self):
         """
-        Get unfinished sample's ids
-        :return: list
+        Get IDs of unfinished samples.
+        :return: List[str]
         """
 
     @abstractmethod
     def get_level_ids(self):
         """
-        Get number of levels
-        :return: int
+        Get list of available level IDs.
+        :return: List[int]
         """
 
     @abstractmethod
     def get_n_levels(self):
         """
-        Get number of levels
+        Get total number of levels.
         :return: int
         """
 
     @abstractmethod
     def get_level_parameters(self):
         """
-        Get level parameters
-        :return: list
+        Get stored level parameters.
+        :return: List[Any]
         """
 
     @abstractmethod
     def get_n_collected(self):
         """
-        Get number of collected results at each evel
-        :return: list
+        Get number of collected results at each level.
+        :return: List[int]
         """
 
 
diff --git a/mlmc/sample_storage_hdf.py b/mlmc/sample_storage_hdf.py
index a53a8c64..5b7e4dbe 100644
--- a/mlmc/sample_storage_hdf.py
+++ b/mlmc/sample_storage_hdf.py
@@ -8,33 +8,39 @@
 
 class SampleStorageHDF(SampleStorage):
     """
-    Sample's data are stored in a HDF5 file
+    Store and manage sample data in an HDF5 file.
+
+    This implementation of the SampleStorage interface provides efficient
+    persistent storage for MLMC simulation results using HDF5.
     """
 
     def __init__(self, file_path):
         """
-        HDF5 storage, provide method to interact with storage
-        :param file_path: absolute path to hdf file (which not exists at the moment)
+        Initialize the HDF5 storage and create or load the file structure.
+
+        :param file_path: Absolute path to the HDF5 file.
+                          If the file exists, it will be loaded instead of created.
         """
         super().__init__()
-        # If file exists load not create new file
-        load_from_file = True if os.path.exists(file_path) else False
+        load_from_file = os.path.exists(file_path)
 
         # HDF5 interface
         self._hdf_object = hdf.HDF5(file_path=file_path, load_from_file=load_from_file)
         self._level_groups = []
 
-        # 'Load' level groups
+        # Load existing level groups if file already contains data
         if load_from_file:
-            # Create level group for each level
             if len(self._level_groups) != len(self._hdf_object.level_parameters):
                 for i_level in range(len(self._hdf_object.level_parameters)):
                     self._level_groups.append(self._hdf_object.add_level_group(str(i_level)))
 
     def _hdf_result_format(self, locations, times):
         """
-        QuantitySpec data type, necessary for hdf storage
-        :return:
+        Construct an appropriate dtype for QuantitySpec data representation in HDF5.
+
+        :param locations: List of spatial locations (as coordinates or identifiers).
+        :param times: List of time steps.
+        :return: Numpy dtype describing the QuantitySpec data structure.
         """
         if len(locations[0]) == 3:
             tuple_dtype = np.dtype((float, (3,)))
@@ -42,41 +48,42 @@ def _hdf_result_format(self, locations, times):
         else:
             loc_dtype = np.dtype(('S50', (len(locations),)))
 
-        result_dtype = {'names': ('name', 'unit', 'shape', 'times', 'locations'),
-                        'formats': ('S50',
-                                    'S50',
-                                    np.dtype((np.int32, (2,))),
-                                    np.dtype((float, (len(times),))),
-                                    loc_dtype
-                                    )
-                        }
+        result_dtype = {
+            'names': ('name', 'unit', 'shape', 'times', 'locations'),
+            'formats': (
+                'S50',
+                'S50',
+                np.dtype((np.int32, (2,))),
+                np.dtype((float, (len(times),))),
+                loc_dtype
+            )
+        }
 
         return result_dtype
 
     def save_global_data(self, level_parameters: List[float], result_format: List[QuantitySpec]):
         """
-        Save hdf5 file global attributes
-        :param level_parameters: list of simulation steps
-        :param result_format: simulation result format
+        Save HDF5 global attributes including simulation parameters and result format.
+
+        :param level_parameters: List of simulation level parameters (e.g., mesh sizes).
+        :param result_format: List of QuantitySpec objects describing result quantities.
         :return: None
         """
         res_dtype = self._hdf_result_format(result_format[0].locations, result_format[0].times)
-
-        # Create file structure
         self._hdf_object.create_file_structure(level_parameters)
 
-        # Create group for each level
+        # Create HDF5 groups for each simulation level
         if len(self._level_groups) != len(level_parameters):
             for i_level in range(len(level_parameters)):
                 self._level_groups.append(self._hdf_object.add_level_group(str(i_level)))
 
-        # Save result format (QuantitySpec)
         self.save_result_format(result_format, res_dtype)
 
     def load_scheduled_samples(self):
         """
-        Get scheduled samples for each level
-        :return:  Dict[level_id, List[sample_id: str]]
+        Load scheduled samples from storage.
+
+        :return: Dict[level_id, List[sample_id: str]]
         """
         scheduled = {}
         for level in self._level_groups:
@@ -85,79 +92,116 @@ def load_scheduled_samples(self):
 
     def save_result_format(self, result_format: List[QuantitySpec], res_dtype):
         """
-        Save result format to hdf
-        :param result_format: List[QuantitySpec]
+        Save result format metadata to HDF5.
+
+        :param result_format: List of QuantitySpec objects defining stored quantities.
+        :param res_dtype: Numpy dtype for structured storage.
         :return: None
         """
         try:
             if self.load_result_format() != result_format:
-                raise ValueError('You are setting a new different result format for an existing sample storage')
+                raise ValueError(
+                    "Attempting to overwrite an existing result format with a new incompatible one."
+                )
         except AttributeError:
             pass
+
         self._hdf_object.save_result_format(result_format, res_dtype)
 
     def load_result_format(self) -> List[QuantitySpec]:
         """
-        Load result format
+        Load and reconstruct the result format from HDF5.
+
+        :return: List of QuantitySpec objects.
         """
         results_format = self._hdf_object.load_result_format()
         quantities = []
         for res_format in results_format:
-            spec = QuantitySpec(res_format[0].decode(), res_format[1].decode(), res_format[2], res_format[3],
-                                [loc.decode() for loc in res_format[4]])
-
+            spec = QuantitySpec(
+                res_format[0].decode(),
+                res_format[1].decode(),
+                res_format[2],
+                res_format[3],
+                [loc.decode() for loc in res_format[4]]
+            )
             quantities.append(spec)
-
         return quantities
 
     def save_samples(self, successful, failed):
         """
-        Save successful and failed samples
-        :param successful: List[Tuple[sample_id: str, Tuple[ndarray, ndarray]]]
-        :param failed: List[Tuple[sample_id: str, error_message: str]]
+        Save successful and failed samples to the HDF5 storage.
+
+        :param successful: Dict[level_id, List[Tuple[sample_id: str, (fine, coarse)]]]
+        :param failed: Dict[level_id, List[Tuple[sample_id: str, error_message: str]]]
         :return: None
         """
-        self._save_succesful(successful)
+        self._save_successful(successful)
         self._save_failed(failed)
 
-    def _save_succesful(self, successful_samples):
+    def _save_successful(self, successful_samples):
+        """
+        Append successful sample results to the appropriate level group.
+
+        :param successful_samples: Dict[level_id, List[Tuple[sample_id, (fine, coarse)]]]
+        :return: None
+        """
         for level, samples in successful_samples.items():
             if len(samples) > 0:
                 self._level_groups[level].append_successful(np.array(samples, dtype=object))
 
     def _save_failed(self, failed_samples):
+        """
+        Append failed sample identifiers and messages.
+
+        :param failed_samples: Dict[level_id, List[Tuple[sample_id, error_message]]]
+        :return: None
+        """
         for level, samples in failed_samples.items():
             if len(samples) > 0:
                 self._level_groups[level].append_failed(samples)
 
     def save_scheduled_samples(self, level_id, samples: List[str]):
         """
-        Append scheduled samples
-        :param level_id: int
-        :param samples: list of sample identifiers
+        Append scheduled sample identifiers for a specific level.
+
+        :param level_id: Integer level identifier.
+        :param samples: List of sample identifiers.
         :return: None
         """
         self._level_groups[level_id].append_scheduled(samples)
 
     def _level_chunks(self, level_id, n_samples=None):
+        """
+        Generate chunk specifications for a given level.
+
+        :param level_id: Level identifier.
+        :param n_samples: Optional number of samples to include per chunk.
+        :return: Generator of ChunkSpec objects.
+        """
         return self._level_groups[level_id].chunks(n_samples)
 
     def sample_pairs(self):
         """
-        Load results from hdf file
-        :return: List[Array[M, N, 2]]
+        Retrieve all sample pairs from storage.
+
+        :return: List[np.ndarray[M, N, 2]] where M = number of results, N = number of samples.
         """
         if len(self._level_groups) == 0:
-            raise Exception("self._level_groups shouldn't be empty, save_global_data() method should have set it, "
-                            "that method is always called from mlmc.sampler.Sampler constructor."
-                            " In other cases, call save_global_data() directly")
+            raise Exception(
+                "Level groups are not initialized. "
+                "Ensure save_global_data() is called before using SampleStorageHDF."
+            )
 
         levels_results = list(np.empty(len(self._level_groups)))
 
         for level in self._level_groups:
-            chunk_spec = next(self.chunks(level_id=int(level.level_id),
-                                          n_samples=self.get_n_collected()[int(level.level_id)]))
-            results = self.sample_pairs_level(chunk_spec)  # return all samples no chunks
+            chunk_spec = next(
+                self.chunks(
+                    level_id=int(level.level_id),
+                    n_samples=self.get_n_collected()[int(level.level_id)]
+                )
+            )
+            results = self.sample_pairs_level(chunk_spec)
             if results is None or len(results) == 0:
                 levels_results[int(level.level_id)] = []
                 continue
@@ -166,13 +210,12 @@ def sample_pairs(self):
 
     def sample_pairs_level(self, chunk_spec):
         """
-        Get result for particular level and chunk
-        :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
-        :return: np.ndarray
+        Retrieve samples for a specific level and chunk.
+
+        :param chunk_spec: ChunkSpec containing level ID and slice information.
+        :return: np.ndarray of shape [M, chunk size, 2].
         """
-        level_id = chunk_spec.level_id
-        if chunk_spec.level_id is None:
-            level_id = 0
+        level_id = chunk_spec.level_id or 0
         chunk = self._level_groups[int(level_id)].collected(chunk_spec.chunk_slice)
 
         # Remove auxiliary zeros from level zero sample pairs
@@ -183,31 +226,31 @@ def sample_pairs_level(self, chunk_spec):
 
     def n_finished(self):
         """
-        Number of finished samples on each level
-        :return: List[int]
+        Count the number of finished samples for each level.
+
+        :return: np.ndarray[int] containing finished sample counts per level.
         """
         n_finished = np.zeros(len(self._level_groups))
         for level in self._level_groups:
             n_finished[int(level.level_id)] += len(level.get_finished_ids())
-
         return n_finished
 
     def unfinished_ids(self):
         """
-        List of unfinished ids
-        :return: list
+        Return identifiers of all unfinished samples.
+
+        :return: List[str]
         """
         unfinished = []
-
         for level in self._level_groups:
             unfinished.extend(level.get_unfinished_ids())
-
         return unfinished
 
     def failed_samples(self):
         """
-        Dictionary of failed samples
-        :return: dict
+        Return dictionary of failed samples for each level.
+
+        :return: Dict[str, List[str]]
         """
         failed_samples = {}
         for level in self._level_groups:
@@ -215,13 +258,17 @@ def failed_samples(self):
         return failed_samples
 
     def clear_failed(self):
+        """
+        Clear all failed sample records from storage.
+        """
         for level in self._level_groups:
             level.clear_failed_dataset()
 
     def save_n_ops(self, n_ops):
         """
-        Save number of operations (time) of samples
-        :param n_ops: Dict[level_id, List[overall time, number of successful samples]]
+        Save the estimated number of operations (e.g., runtime) for each level.
+
+        :param n_ops: Dict[level_id, List[total_time, num_successful_samples]]
         :return: None
         """
         for level_id, (time, n_samples) in n_ops:
@@ -236,8 +283,9 @@ def save_n_ops(self, n_ops):
 
     def get_n_ops(self):
         """
-        Get number of estimated operations on each level
-        :return: List
+        Get the average number of operations per sample for each level.
+
+        :return: List[float]
         """
         n_ops = list(np.zeros(len(self._level_groups)))
         for level in self._level_groups:
@@ -248,15 +296,26 @@ def get_n_ops(self):
         return n_ops
 
     def get_level_ids(self):
+        """
+        Get identifiers of all levels stored in HDF5.
+
+        :return: List[int]
+        """
         return [int(level.level_id) for level in self._level_groups]
 
     def get_level_parameters(self):
+        """
+        Load stored level parameters (e.g., step sizes or resolutions).
+
+        :return: List[float]
+        """
         return self._hdf_object.load_level_parameters()
 
     def get_n_collected(self):
         """
-        Get number of collected samples at each level
-        :return: List
+        Get the number of collected (stored) samples for each level.
+
+        :return: List[int]
         """
         n_collected = list(np.zeros(len(self._level_groups)))
         for level in self._level_groups:
@@ -265,7 +324,8 @@ def get_n_collected(self):
 
     def get_n_levels(self):
         """
-        Get number of levels
+        Get total number of levels present in storage.
+
         :return: int
         """
         return len(self._level_groups)
diff --git a/mlmc/sampler.py b/mlmc/sampler.py
index 17b7dce0..d1a39f29 100644
--- a/mlmc/sampler.py
+++ b/mlmc/sampler.py
@@ -8,7 +8,12 @@
 
 class Sampler:
     """
-    Manages samples scheduling, results collection, and result storage.
+    Manages sample scheduling, result collection, and persistent storage.
+
+    Coordinates the sampling pool, simulation factory, and sample storage:
+    - schedules new samples according to target counts,
+    - collects finished samples and writes them to storage,
+    - handles failed samples and runtime (n_ops) bookkeeping.
     """
 
     ADDING_SAMPLES_TIMEOUT = 1e-15
@@ -16,64 +21,74 @@ class Sampler:
     def __init__(self, sample_storage: SampleStorage, sampling_pool: SamplingPool, sim_factory: Simulation,
                  level_parameters: List[List[float]], seed=1234):
         """
+        Initialize sampler and prepare per-level simulation objects.
+
         :param sample_storage: store scheduled samples, results and result structure
-        :param sampling_pool: calculate samples
-        :param sim_factory: generate samples
-        :param level_parameters: List of e.g. simulation steps, ...
-        :param seed: global random seed
+        :param sampling_pool: sampling pool responsible for executing simulations
+        :param sim_factory: factory that creates level Simulation instances and provides result_format()
+        :param level_parameters: List of per-level parameters (e.g. simulation steps)
+        :param seed: global RNG seed used to seed NumPy's RNG
         """
         np.random.seed(seed)
         self.sample_storage = sample_storage
         self._sampling_pool = sampling_pool
 
+        # Target number of samples per level (may be updated later)
         self._n_target_samples = np.zeros(len(level_parameters))
-        # Number of target samples
+
+        # Create LevelSimulation objects for each level using the provided factory
         self._level_sim_objects = []
         self._create_level_sim_objects(level_parameters, sim_factory)
 
+        # Persist global data (level parameters and result format) into storage
         sample_storage.save_global_data(level_parameters=level_parameters,
                                         result_format=sim_factory.result_format())
 
+        # Load already scheduled samples (if any) from storage
         self._n_scheduled_samples = [len(level_scheduled) for level_id, level_scheduled in
                                      sample_storage.load_scheduled_samples().items()]
-        # Number of created samples
 
+        # If there are no scheduled samples yet, initialize to zeros
         if not self._n_scheduled_samples:
             self._n_scheduled_samples = np.zeros(len(level_parameters))
 
-        # Are there any unfinished samples which have already finished?
+        # Check for unfinished samples and inform the sampling pool
         self._check_failed_samples()
 
-        # @TODO: get unfinished samples from sampler and call have permanent samples -> add results to pool's queues,
-        # before scheduled samples call, call get_finished - we need to know how many samples is finished
+        # @TODO: If sampler is restarted, collect any samples finished while offline:
+        #  - add permanent samples into pool queues,
+        #  - before scheduling new samples, call get_finished to know how many are already done.
 
     @property
     def n_levels(self):
+        """Return number of MLMC levels managed by this sampler."""
         return len(self._level_sim_objects)
 
     @property
     def n_finished_samples(self):
         """
-        Retrieve number of all finished samples
-        :return:
+        Retrieve numbers of finished samples for all levels.
+
+        :return: array-like containing finished counts per level
         """
         return self.sample_storage.n_finished()
 
     def _create_level_sim_objects(self, level_parameters, sim_factory):
         """
-        Create LevelSimulation object for each level, use simulation factory
-        :param: level_parameters: List, simulation steps, ...
-        :param: sim_factory: Simulation instance
+        Create LevelSimulation object for each level via the simulation factory.
+
+        :param level_parameters: List of per-level parameters
+        :param sim_factory: Simulation factory providing level_instance and calculate methods
         :return: None
         """
         n_levels = len(level_parameters)
         for level_id in range(n_levels):
             if level_id == 0:
                 level_sim = sim_factory.level_instance(level_parameters[level_id], [0])
-
             else:
                 level_sim = sim_factory.level_instance(level_parameters[level_id], level_parameters[level_id - 1])
 
+            # Attach factory methods and metadata to the LevelSimulation
             level_sim._calculate = sim_factory.calculate
             level_sim._result_format = sim_factory.result_format
             level_sim._level_id = level_id
@@ -81,30 +96,37 @@ def _create_level_sim_objects(self, level_parameters, sim_factory):
 
     def sample_range(self, n0, nL):
         """
-        Geometric sequence of L elements decreasing from n0 to nL.
-        Useful to set number of samples explicitly.
-        :param n0: int
-        :param nL: int
-        :return: np.array of length L = n_levels.
+        Generate a geometric sequence of length L decreasing from n0 to nL.
+
+        Useful to generate a set of target sample counts across levels.
+
+        :param n0: int, number of samples at finest level
+        :param nL: int, number of samples at coarsest level
+        :return: np.ndarray of length self.n_levels with integer sample counts
         """
         return np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), self.n_levels))).astype(int)
 
     def set_initial_n_samples(self, n_samples=None):
         """
-        Set target number of samples for each level
-        :param n_samples: array of number of samples
+        Set initial target number of samples for each level.
+
+        Accepts:
+          - None (defaults to [100, 10]),
+          - single integer (interpreted as n0, with default nL=10),
+          - two-element list [n0, nL] (geometric interpolation across levels).
+
+        :param n_samples: scalar, length-2 list, or array specifying target counts
         :return: None
         """
         if n_samples is None:
             n_samples = [100, 10]
-        # Num of samples to ndarray
         n_samples = np.atleast_1d(n_samples)
 
-        # Just maximal number of samples is set
+        # Single value -> treat as n0 with default nL
         if len(n_samples) == 1:
             n_samples = np.array([n_samples[0], 10])
 
-        # Create number of samples for all levels
+        # Two values -> create geometric progression across levels
         if len(n_samples) == 2:
             n0, nL = n_samples
             n_samples = self.sample_range(n0, nL)
@@ -113,75 +135,81 @@ def set_initial_n_samples(self, n_samples=None):
 
     def _get_sample_tag(self, level_id):
         """
-        Create sample tag
+        Create a unique sample tag for a given level.
+
         :param level_id: identifier of current level
-        :return: str
+        :return: str unique sample tag (e.g. 'L00_S0000123')
         """
         return "L{:02d}_S{:07d}".format(level_id, int(self._n_scheduled_samples[level_id]))
 
     def schedule_samples(self, timeout=None, level_id=None, n_samples=None):
         """
-        Create simulation samples, loop through "levels" and its samples (given the number of target samples):
-            1) generate sample tag (same for fine and coarse simulation)
-            2) get LevelSimulation instance by simulation factory
-            3) schedule sample via sampling pool
-            4) store scheduled samples in sample storage, separately for each level
-        :param timeout: int, get_finished - while break timeout in seconds
+        Schedule new simulation samples in the sampling pool and record them in storage.
+
+        For each scheduled sample:
+         1) generate a unique sample id shared by fine and coarse tasks,
+         2) obtain the LevelSimulation instance for the level,
+         3) schedule the sample with SamplingPool,
+         4) store scheduled sample ids in SampleStorage.
+
+        :param timeout: float or None, passed to ask_sampling_pool_for_samples() before scheduling
+        :param level_id: int or None, if provided schedule only for this level (default: highest level)
+        :param n_samples: int or None, if provided schedule exactly this many samples for the specified level
         :return: None
         """
+        # First, collect any finished samples
         self.ask_sampling_pool_for_samples(timeout=timeout)
         plan_samples = self._n_target_samples - self._n_scheduled_samples
 
+        # Default to the coarsest level if not specified
         if level_id is None:
             level_id = len(plan_samples) - 1
+
+        # If a specific number of samples for one level is requested
         if n_samples is not None:
             samples = []
             for _ in range(int(n_samples)):
-                # Unique sample id
                 sample_id = self._get_sample_tag(level_id)
                 level_sim = self._level_sim_objects[level_id]
 
-                # Schedule current sample
                 self._sampling_pool.schedule_sample(sample_id, level_sim)
-                # Increment number of created samples at current level
                 self._n_scheduled_samples[level_id] += 1
                 samples.append(sample_id)
 
-            # Store scheduled samples
             self.sample_storage.save_scheduled_samples(level_id, samples)
         else:
+            # Iterate levels from coarsest to finest and schedule required samples
             for n_samples in np.flip(plan_samples):
                 samples = []
                 for _ in range(int(n_samples)):
-                    # Unique sample id
                     sample_id = self._get_sample_tag(level_id)
                     level_sim = self._level_sim_objects[level_id]
 
-                    # Schedule current sample
                     self._sampling_pool.schedule_sample(sample_id, level_sim)
-                    # Increment number of created samples at current level
                     self._n_scheduled_samples[level_id] += 1
-
                     samples.append(sample_id)
 
-                # Store scheduled samples
                 self.sample_storage.save_scheduled_samples(level_id, samples)
                 level_id -= 1
 
     def _check_failed_samples(self):
         """
-        Get unfinished samples and check if failed samples have saved results then collect them
-        :return:
+        Query storage for unfinished sample IDs and inform the sampling pool.
+
+        This allows the sampling pool to reattach or handle 'permanent' samples
+        that may have been started previously.
+        :return: None
         """
         unfinished_sample_ids = self.sample_storage.unfinished_ids()
         self._sampling_pool.have_permanent_samples(unfinished_sample_ids)
 
     def ask_sampling_pool_for_samples(self, sleep=0, timeout=None):
         """
-        Waiting for running simulations
-        :param sleep: time for doing nothing
-        :param timeout: maximum time for waiting on running simulations
-        :return: int, number of running simulations
+        Poll the sampling pool for finished simulations and store their results.
+
+        :param sleep: float, time to sleep between polls (seconds)
+        :param timeout: float or None, maximum time to wait; if <= 0 returns immediately
+        :return: int, number of running simulations remaining after the call
         """
         if timeout is None:
             timeout = 0
@@ -192,7 +220,7 @@ def ask_sampling_pool_for_samples(self, sleep=0, timeout=None):
         t0 = time.perf_counter()
         while n_running > 0:
             successful_samples, failed_samples, n_running, n_ops = self._sampling_pool.get_finished()
-            # Store finished samples
+            # Persist finished samples and operation counts
             self._store_samples(successful_samples, failed_samples, n_ops)
             time.sleep(sleep)
             if 0 < timeout < (time.perf_counter() - t0):
@@ -202,10 +230,11 @@ def ask_sampling_pool_for_samples(self, sleep=0, timeout=None):
 
     def _store_samples(self, successful_samples, failed_samples, n_ops):
         """
-        Store finished samples
-        :param successful_samples: Dict[level_id, List[Tuple[sample_id:str, Tuple[ndarray, ndarray]]]]
-        :param failed_samples: Dict[level_id, List[Tuple[sample_id: str, error message: str]]]
-        :param n_ops: Dict[level_id: int, List[total time: float, number of success samples: int]]
+        Persist finished samples and operation time estimates to storage.
+
+        :param successful_samples: Dict[level_id, List[Tuple[sample_id:str, (fine, coarse)]]]
+        :param failed_samples: Dict[level_id, List[Tuple[sample_id:str, error_message:str]]]
+        :param n_ops: Dict[level_id, Tuple[total_time:float, n_success_samples:int]]
         :return: None
         """
         self.sample_storage.save_samples(successful_samples, failed_samples)
@@ -213,24 +242,24 @@ def _store_samples(self, successful_samples, failed_samples, n_ops):
 
     def process_adding_samples(self, n_estimated, sleep=0, add_coeff=0.1, timeout=ADDING_SAMPLES_TIMEOUT):
         """
-        Process adding samples
-        Note: n_estimated are wrong if n_ops is similar through all levels
-        :param n_estimated: Number of estimated samples on each level, list
-        :param sleep: Sample waiting time
-        :param add_coeff: default value 0.1, The number of scheduled samples would be 'add_coef' fraction of difference
-         between current number of target samples and new estimated number of target samples
-        :param timeout: ask sampling pool for finished samples timeout
-        :return: bool, if True adding samples is complete
+        Add newly estimated samples in batches, scheduling a fraction of the difference
+        between current scheduled and newly estimated targets.
+
+        Note: n_estimated may be unreliable if per-level n_ops are similar across levels.
+
+        :param n_estimated: array-like, estimated target samples per level
+        :param sleep: float, time to sleep while waiting for results
+        :param add_coeff: float in (0,1], fraction of the difference to schedule each iteration (default 0.1)
+        :param timeout: float, timeout passed to ask_sampling_pool_for_samples()
+        :return: bool, True if scheduled counts reached the estimates for all levels
         """
+        # Ensure storage reflects any finished work
         self.ask_sampling_pool_for_samples(timeout=timeout)
 
-        # Get default scheduled samples
+        # Currently scheduled samples per level
         n_scheduled = self.l_scheduled_samples()
 
-        # New scheduled sample will be 10 percent of difference
-        # between current number of target samples and new estimated one
-        # If 10 percent of estimated samples is greater than difference between estimated and scheduled samples,
-        # set scheduled samples to estimated samples
+        # Compute new scheduled values (add_coeff fraction of the remaining difference)
         new_scheduled = np.where((n_estimated * add_coeff) > (n_estimated - n_scheduled),
                                  n_estimated,
                                  n_scheduled + (n_estimated - n_scheduled) * add_coeff)
@@ -239,41 +268,43 @@ def process_adding_samples(self, n_estimated, sleep=0, add_coeff=0.1, timeout=AD
                                        n_scheduled,
                                        new_scheduled))
 
-        # Levels where estimated are greater than scheduled
+        # Levels where estimated > scheduled
         greater_items = np.where(np.greater(n_estimated, n_scheduled))[0]
 
-        # Scheduled samples and wait until at least half of the samples are done
+        # Schedule and wait until at least a fraction of newly scheduled samples finish
         self.set_scheduled_and_wait(n_scheduled, greater_items, sleep, timeout=timeout)
 
         return np.all(n_estimated[greater_items] == n_scheduled[greater_items])
 
     def set_scheduled_and_wait(self, n_scheduled, greater_items, sleep, fin_sample_coef=0.5, timeout=1e-7):
         """
-        Scheduled samples on each level and wait until at least half of the samples is done
-        :param n_scheduled: ndarray, number of scheduled samples on each level
-        :param greater_items: Items where n_estimated is greater than n_scheduled
-        :param sleep: Time waiting for samples
-        :param fin_sample_coef: The proportion of samples to finished for further estimate
+        Set scheduled sample targets and wait until a proportion of those samples finish.
+
+        :param n_scheduled: ndarray, target number of scheduled samples per level
+        :param greater_items: iterable of indices where targets were increased
+        :param sleep: float, time to sleep between polls
+        :param fin_sample_coef: float in (0,1], fraction of scheduled samples that should finish before continuing
+        :param timeout: float, timeout passed to ask_sampling_pool_for_samples()
         :return: None
         """
-        # Set scheduled samples and run simulations
+        # Update internal targets and schedule required samples
         self.set_level_target_n_samples(n_scheduled)
         self.schedule_samples(timeout=timeout)
 
-        # Finished level samples
+        # Current finished counts
         n_finished = self.n_finished_samples
 
-        # Wait until at least half of the scheduled samples are done on each level
+        # Wait until at least fin_sample_coef fraction of scheduled samples are finished for affected levels
         while np.any(n_finished[greater_items] < fin_sample_coef * n_scheduled[greater_items]):
-            # Wait a while
             time.sleep(sleep)
             self.ask_sampling_pool_for_samples(timeout=timeout)
             n_finished = self.n_finished_samples
 
     def set_level_target_n_samples(self, n_samples):
         """
-        Set level number of target samples
-        :param n_samples: list, each level target samples
+        Update the per-level target sample counts to at least the provided values.
+
+        :param n_samples: iterable of new target samples per level
         :return: None
         """
         for level, n in enumerate(n_samples):
@@ -281,14 +312,18 @@ def set_level_target_n_samples(self, n_samples):
 
     def l_scheduled_samples(self):
         """
-        Get all levels number of scheduled samples
-        :return: list
+        Return the currently scheduled sample counts per level.
+
+        :return: list or array-like of scheduled sample counts
         """
         return self._n_scheduled_samples
 
     def renew_failed_samples(self):
         """
-        Resurrect failed samples
+        Reschedule previously failed samples.
+
+        Retrieves failed sample IDs from storage, re-schedules them in the sampling pool,
+        and clears failed records from storage.
         :return: None
         """
         failed_samples = self.sample_storage.failed_samples()
@@ -298,8 +333,8 @@ def renew_failed_samples(self):
             level_id = int(level_id)
             for sample_id in sample_ids:
                 level_sim = self._level_sim_objects[level_id]
-                # Schedule current sample
                 self._sampling_pool.schedule_sample(sample_id, level_sim)
                 samples.append(sample_id)
 
+        # Clear failed sample records after rescheduling
         self.sample_storage.clear_failed()
diff --git a/mlmc/sampling_pool.py b/mlmc/sampling_pool.py
index cbbbb360..9044246d 100644
--- a/mlmc/sampling_pool.py
+++ b/mlmc/sampling_pool.py
@@ -5,7 +5,7 @@
 import time
 import hashlib
 import numpy as np
-from typing import List
+from typing import List, Tuple, Dict, Optional, Any
 import traceback
 from abc import ABC, abstractmethod
 from multiprocessing import Pool as ProcPool
@@ -15,18 +15,22 @@
 
 class SamplingPool(ABC):
     """
-    Determining the runtime environment of samples, eg single process, multiple processes, running PBS, ...
+    Abstract base class defining the runtime environment for sample simulations.
+    It manages sample execution across different backends (single process,
+    multiprocessing, PBS, etc.).
     """
 
     FAILED_DIR = 'failed'
     SEVERAL_SUCCESSFUL_DIR = 'several_successful'
     N_SUCCESSFUL = 5
-    # Number of successful samples to store
+    # Number of successful samples to store.
 
-    def __init__(self, work_dir=None, debug=False):
+    def __init__(self, work_dir: Optional[str] = None, debug: bool = False):
         """
-        :param work_dir: Path to working directory
-        :param debug: bool, if True keep sample directories
+        Initialize the sampling pool environment.
+
+        :param work_dir: Path to the working directory where outputs are stored.
+        :param debug: If True, keep sample directories for debugging.
         """
         self._output_dir = None
         if work_dir is not None:
@@ -34,14 +38,16 @@ def __init__(self, work_dir=None, debug=False):
             self._output_dir = os.path.join(work_dir, "output")
         self._debug = debug
 
-        self._create_dir()  # prepare output dir
-        self._create_dir(SamplingPool.FAILED_DIR)  # prepare failed dir
-        self._successful_dir = self._create_dir(SamplingPool.SEVERAL_SUCCESSFUL_DIR)  # prepare several successful dir
+        # Prepare main output, failed, and successful directories.
+        self._create_dir()
+        self._create_dir(SamplingPool.FAILED_DIR)
+        self._successful_dir = self._create_dir(SamplingPool.SEVERAL_SUCCESSFUL_DIR)
 
-    def _create_dir(self, directory=""):
+    def _create_dir(self, directory: str = "") -> Optional[str]:
         """
-        Create output directory, in 'debug' mode not remove existing output_dir
-        :return: None
+        Create the output directory if it does not exist.
+
+        In debug mode, existing directories are preserved.
         """
         if self._output_dir is not None:
             directory = os.path.join(self._output_dir, directory)
@@ -49,289 +55,446 @@ def _create_dir(self, directory=""):
                 shutil.rmtree(directory)
             os.makedirs(directory, mode=0o775, exist_ok=True)
             return directory
+        return None
+
+    # --- Abstract methods to be implemented by subclasses ---
 
     @abstractmethod
-    def schedule_sample(self, sample_id, level_sim: LevelSimulation):
+    def schedule_sample(self, sample_id: str, level_sim: LevelSimulation):
         """
-        Method for calculating simulation samples
-        :param sample_id: str
-        :param level_sim: level_simulation.LevelSimulation instance
+        Schedule a simulation sample for execution.
+
+        :param sample_id: Unique sample identifier.
+        :param level_sim: LevelSimulation instance.
         :return: Tuple[str, List]
         """
 
     @abstractmethod
-    def have_permanent_samples(self, sample_ids):
+    def have_permanent_samples(self, sample_ids: List[str]) -> bool:
         """
-        Informs the Pool about sample_ids that have been scheduled but not yet finished
+        Inform the pool about samples that have been scheduled but not yet finished.
         """
 
     @abstractmethod
     def get_finished(self):
         """
-        Return finished samples
-        :return: list of results, number of running samples
+        Retrieve finished sample results.
+
+        :return: Tuple containing (successful samples, failed samples, number of running samples)
         """
 
+    # --- Utility methods shared across subclasses ---
+
     @staticmethod
-    def compute_seed(sample_id):
+    def compute_seed(sample_id: str) -> int:
         """
-        Calculate seed for given sample id
-        :param sample_id: str
-        :return: int
+        Compute a deterministic seed for a given sample ID.
+
+        :param sample_id: Unique sample identifier.
+        :return: Integer seed value.
         """
-        hash = hashlib.md5(sample_id.encode('ascii'))
-        seed = np.frombuffer(hash.digest(), dtype='uint32')[0]
-        return seed
+        hash_val = hashlib.md5(sample_id.encode('ascii'))
+        seed = np.frombuffer(hash_val.digest(), dtype='uint32')[0]
+        return int(seed)
 
     @staticmethod
-    def calculate_sample(sample_id, level_sim, work_dir=None, seed=None):
+    def calculate_sample(sample_id: str, level_sim: LevelSimulation,
+                         work_dir: Optional[str] = None,
+                         seed: Optional[int] = None) -> Tuple[str, Any, str, float]:
         """
-        Method for calculating results
-        :param sample_id: str
-        :param level_sim: LevelSimulation
-        :param work_dir: working directory
-        :param seed: random seed
-        :return: sample id, sample result, error message with traceback, running time
+        Execute a single simulation sample.
+
+        :param sample_id: Sample identifier.
+        :param level_sim: LevelSimulation instance.
+        :param work_dir: Working directory for the sample.
+        :param seed: Optional random seed (generated if not provided).
+        :return: Tuple(sample_id, result, error_message, running_time)
         """
         if seed is None:
             seed = SamplingPool.compute_seed(sample_id)
+
         res = (None, None)
         err_msg = ""
-        running_time = 0
+        running_time = 0.0
 
         if level_sim.need_sample_workspace:
             SamplingPool.handle_sim_files(work_dir, sample_id, level_sim)
+
         try:
             start = time.time()
             res = level_sim._calculate(level_sim.config_dict, seed)
             running_time = time.time() - start
 
-            # Check result format
-            if type(res[0]) is np.ndarray and type(res[1]) is np.ndarray:
+            # Validate result format.
+            if isinstance(res[0], np.ndarray) and isinstance(res[1], np.ndarray):
                 flatten_fine_res = res[0].flatten()
                 flatten_coarse_res = res[1].flatten()
 
-                res_expected_len = np.sum(
-                    [np.prod(quantity_spec.shape) * len(quantity_spec.times) * len(quantity_spec.locations)
-                     for quantity_spec in level_sim._result_format()])
+                expected_len = np.sum([
+                    np.prod(q.shape) * len(q.times) * len(q.locations)
+                    for q in level_sim._result_format()
+                ])
 
-                assert len(flatten_fine_res) == len(flatten_coarse_res) == res_expected_len, \
-                    "Unexpected result format, expected length: {}, resultf length: {}".format(res_expected_len,
-                                                                                               len(flatten_fine_res))
+                assert len(flatten_fine_res) == len(flatten_coarse_res) == expected_len, \
+                    f"Unexpected result format. Expected length: {expected_len}, got: {len(flatten_fine_res)}"
 
         except Exception:
-            str_list = traceback.format_exception(*sys.exc_info())
-            err_msg = "".join(str_list)
-            print("Error msg: ", err_msg)
+            err_msg = "".join(traceback.format_exception(*sys.exc_info()))
+            print("Error msg:", err_msg)
 
         return sample_id, res, err_msg, running_time
 
+    # --- File handling helpers ---
+
     @staticmethod
-    def change_to_sample_directory(work_dir, path: str):
+    def change_to_sample_directory(work_dir: str, path: str) -> str:
         """
-        Create sample directory and change working directory
-        :param path: str
-        :return: None
+        Create and switch to the sample-specific directory.
+
+        :param work_dir: Base working directory.
+        :param path: Sample subdirectory name.
+        :return: Absolute path to the created sample directory.
         """
         sample_dir = os.path.join(work_dir, path)
-        if not os.path.isdir(sample_dir):
-            os.makedirs(sample_dir, mode=0o775, exist_ok=True)
+        os.makedirs(sample_dir, mode=0o775, exist_ok=True)
         return sample_dir
 
     @staticmethod
-    def copy_sim_files(files: List[str], sample_dir):
+    def copy_sim_files(files: List[str], sample_dir: str):
         """
-        Copy simulation common files to current simulation sample directory
-        :param files: List of files
-        :return: None
+        Copy shared simulation files to the sample directory.
+
+        :param files: List of file paths to copy.
+        :param sample_dir: Destination sample directory.
         """
         for file in files:
             shutil.copy(file, sample_dir)
 
     @staticmethod
-    def handle_sim_files(work_dir, sample_id, level_sim):
+    def handle_sim_files(work_dir: str, sample_id: str, level_sim: LevelSimulation):
         """
-        Change working directory to sample dir and copy common files
-        :param sample_id: str
-        :param level_sim: LevelSimulation
-        :return: None
+        Prepare the sample workspace (create directory, copy common files, set cwd).
+
+        :param work_dir: Base working directory.
+        :param sample_id: Sample identifier.
+        :param level_sim: LevelSimulation instance.
         """
         if level_sim.need_sample_workspace:
             sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id)
-
             if level_sim.common_files is not None:
                 SamplingPool.copy_sim_files(level_sim.common_files, sample_dir)
             os.chdir(sample_dir)
 
     @staticmethod
-    def move_successful_rm(sample_id, level_sim, output_dir, dest_dir):
+    def move_successful_rm(sample_id: str, level_sim: LevelSimulation,
+                           output_dir: str, dest_dir: str):
+        """
+        Move successful sample directories and remove originals.
+        """
         if int(sample_id[-7:]) < SamplingPool.N_SUCCESSFUL:
-            SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir=dest_dir)
+            SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir)
         SamplingPool.remove_sample_dir(sample_id, level_sim.need_sample_workspace, output_dir)
 
     @staticmethod
-    def move_failed_rm(sample_id, level_sim, output_dir, dest_dir):
-        SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir=dest_dir)
+    def move_failed_rm(sample_id: str, level_sim: LevelSimulation,
+                       output_dir: str, dest_dir: str):
+        """
+        Move failed sample directories and remove originals.
+        """
+        SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir)
         SamplingPool.remove_sample_dir(sample_id, level_sim.need_sample_workspace, output_dir)
 
     @staticmethod
-    def move_dir(sample_id, sample_workspace, work_dir, dest_dir):
+    def move_dir(sample_id: str, sample_workspace: bool,
+                 work_dir: str, dest_dir: str):
         """
-        Move failed sample dir to failed directory
-        :param sample_id: str
-        :param sample_workspace: bool, simulation needs workspace
-        :param work_dir: str
-        :param dest_dir: destination
-        :return: None
+        Move a sample directory to another location (e.g., failed or successful).
+
+        :param sample_id: Sample identifier.
+        :param sample_workspace: Whether the sample uses its own workspace.
+        :param work_dir: Base working directory.
+        :param dest_dir: Destination subdirectory name.
         """
-        if sample_workspace and work_dir is not None and dest_dir is not None:
+        if sample_workspace and work_dir and dest_dir:
             destination_dir = os.path.join(work_dir, dest_dir)
             sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id)
-            if os.path.exists(os.path.join(destination_dir, sample_id)):
-                shutil.rmtree(os.path.join(destination_dir, sample_id), ignore_errors=True)
-            shutil.copytree(sample_dir, os.path.join(destination_dir, sample_id))
+            target_dir = os.path.join(destination_dir, sample_id)
+            if os.path.exists(target_dir):
+                shutil.rmtree(target_dir, ignore_errors=True)
+            shutil.copytree(sample_dir, target_dir)
 
     @staticmethod
-    def remove_sample_dir(sample_id, sample_workspace, work_dir):
+    def remove_sample_dir(sample_id: str, sample_workspace: bool, work_dir: str):
         """
-        Remove sample directory
-        :param sample_id: str
-        :param sample_workspace: bool, simulation needs workspace
-        :param work_dir: str
-        :return: None
+        Remove the directory for a completed or failed sample.
+
+        :param sample_id: Sample identifier.
+        :param sample_workspace: Whether the sample uses its own workspace.
+        :param work_dir: Base working directory.
         """
-        if sample_workspace and work_dir is not None:
+        if sample_workspace and work_dir:
             sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id)
             shutil.rmtree(sample_dir, ignore_errors=True)
 
 
 class OneProcessPool(SamplingPool):
+    """
+    Sampling pool implementation that executes all samples sequentially in a single process.
+    Used primarily for debugging or lightweight simulations.
+    """
 
     def __init__(self, work_dir=None, debug=False):
         """
-        Everything is running in one process
+        Initialize the one-process pool.
+
+        Parameters
+        ----------
+        work_dir : str, optional
+            Working directory for storing sample outputs.
+        debug : bool, default=False
+            If True, disables moving/removing files after successful execution.
         """
         super().__init__(work_dir=work_dir, debug=debug)
-        self._failed_queues = {}
-        self._queues = {}
-        self._n_running = 0
-        self.times = {}
+        self._failed_queues = {}  # Stores failed sample queues per level
+        self._queues = {}  # Stores successful sample queues per level
+        self._n_running = 0  # Tracks number of currently running samples
+        self.times = {}  # Stores total runtime and count per level
 
     def schedule_sample(self, sample_id, level_sim):
-        self._n_running += 1
+        """
+        Execute a single sample synchronously (in the current process).
+
+        Parameters
+        ----------
+        sample_id : int
+            Identifier of the sample.
+        level_sim : LevelSimulation
+            Simulation instance containing configuration for the sample.
+        """
+        self._n_running += 1  # Increment running sample counter
 
+        # Set output directory if required by simulation
         if self._output_dir is None and level_sim.need_sample_workspace:
             self._output_dir = os.getcwd()
 
-        sample_id, result, err_msg, running_time = SamplingPool.calculate_sample(sample_id, level_sim,
-                                                                                 work_dir=self._output_dir)
+        # Run the sample and collect result, error message, and runtime
+        sample_id, result, err_msg, running_time = SamplingPool.calculate_sample(
+            sample_id, level_sim, work_dir=self._output_dir
+        )
 
+        # Process result (successful or failed)
         self._process_result(sample_id, result, err_msg, running_time, level_sim)
 
     def _process_result(self, sample_id, result, err_msg, running_time, level_sim):
         """
-        Save sample result
-        :param sample_id: sample identifier from calculate_sample()
-        :param result: sample result from calculate_sample()
-        :param err_msg: sample error message from calculate_sample()
-        :param running_time: running time for sample from calculate_sample()
-        :param level_sim: level_simulation instance
-        :return: None
-        """
-        # Save running time for n_ops
+        Process result from a sample execution and store it in the appropriate queue.
+
+        Parameters
+        ----------
+        sample_id : int
+            Identifier of the executed sample.
+        result : tuple
+            Pair of fine and coarse results (numpy arrays).
+        err_msg : str
+            Error message if the sample failed, empty string otherwise.
+        running_time : float
+            Runtime of the sample execution in seconds.
+        level_sim : LevelSimulation
+            Simulation instance used to produce the sample.
+        """
+        # Record runtime for this level
         self._save_running_time(level_sim._level_id, running_time)
 
+        # If no error occurred, store successful result
         if not err_msg:
-            self._queues.setdefault(level_sim._level_id, queue.Queue()).put((sample_id, (result[0], result[1])))
+            self._queues.setdefault(level_sim._level_id, queue.Queue()).put(
+                (sample_id, (result[0], result[1]))
+            )
+            # Move successful sample to its permanent directory unless debugging
             if not self._debug:
-                SamplingPool.move_successful_rm(sample_id, level_sim, output_dir=self._output_dir, dest_dir=self._successful_dir)
+                SamplingPool.move_successful_rm(
+                    sample_id, level_sim, output_dir=self._output_dir, dest_dir=self._successful_dir
+                )
         else:
+            # If the simulation failed
             if not level_sim.need_sample_workspace:
-                print("Sample {} error: {}".format(sample_id, err_msg))
+                print(f"Sample {sample_id} error: {err_msg}")
             else:
-                SamplingPool.move_failed_rm(sample_id, level_sim, output_dir=self._output_dir, dest_dir=SamplingPool.FAILED_DIR)
+                SamplingPool.move_failed_rm(
+                    sample_id, level_sim, output_dir=self._output_dir, dest_dir=SamplingPool.FAILED_DIR
+                )
             self._failed_queues.setdefault(level_sim._level_id, queue.Queue()).put((sample_id, err_msg))
 
     def _save_running_time(self, level_id, running_time):
         """
-        Save running time to dictionary, store total time and number of samples
-        :param level_id: int
-        :param running_time: float
-        :return: None
+        Save sample execution time in the tracking dictionary.
+
+        Parameters
+        ----------
+        level_id : int
+            Identifier of the simulation level.
+        running_time : float
+            Execution time of the sample.
         """
-        # Save sample times [total time, number of samples]
+        # Initialize level entry if missing
         if level_id not in self.times:
             self.times[level_id] = [0, 0]
-        # Failed samples have running time equal 0 by default
+        # Only count successful samples with nonzero runtime
         if running_time != 0:
-            self.times[level_id][0] += running_time
-            self.times[level_id][1] += 1
+            self.times[level_id][0] += running_time  # Accumulate total runtime
+            self.times[level_id][1] += 1  # Increment sample count
 
     def have_permanent_samples(self, sample_ids):
+        """
+        Return False, indicating that no samples are stored permanently.
+
+        Parameters
+        ----------
+        sample_ids : list
+            List of sample identifiers (ignored).
+
+        Returns
+        -------
+        bool
+            Always False.
+        """
         return False
 
     def get_finished(self):
         """
-        return results from queue - list of (sample_id, pair_of_result_vectors, error_message)
+        Retrieve all completed (successful and failed) samples.
+
+        Returns
+        -------
+        successful : dict
+            Dictionary of successful samples by level.
+        failed : dict
+            Dictionary of failed samples by level.
+        n_running : int
+            Number of currently running samples.
+        times : list
+            List of (level_id, [total_time, n_samples]) pairs.
         """
         successful = self._queues_to_list(list(self._queues.items()))
         failed = self._queues_to_list(list(self._failed_queues.items()))
-
         return successful, failed, self._n_running, list(self.times.items())
 
     def _queues_to_list(self, queue_dict_list):
+        """
+        Convert queues to lists and clear them safely.
+
+        Parameters
+        ----------
+        queue_dict_list : list
+            List of (level_id, queue.Queue) pairs.
+
+        Returns
+        -------
+        results : dict
+            Dictionary mapping level_id to list of queue entries.
+        """
         results = {}
         for level_id, q in queue_dict_list:
             queue_list = list(q.queue)
             if not queue_list:
                 continue
             results[level_id] = queue_list
-            # Thread safe clear
+
+            # Thread-safe queue clearing
             with q.mutex:
                 q.queue.clear()
 
+            # Update running sample counter
             self._n_running -= len(results[level_id])
-
         return results
 
 
+# ==============================================================================
+
 class ProcessPool(OneProcessPool):
     """
-    Suitable for local parallel sampling for simulations WITHOUT external program call
+    Sampling pool using multiprocessing for parallel sample execution.
+    Suitable for simulations without external program calls.
     """
 
     def __init__(self, n_processes, work_dir=None, debug=False):
-        self._pool = ProcPool(n_processes)
+        """
+        Initialize process-based parallel sampling pool.
+
+        Parameters
+        ----------
+        n_processes : int
+            Number of worker processes to use.
+        work_dir : str, optional
+            Working directory for samples.
+        debug : bool, default=False
+            If True, disables moving/removing sample outputs.
+        """
+        self._pool = ProcPool(n_processes)  # Multiprocessing pool
         super().__init__(work_dir=work_dir, debug=debug)
 
     def res_callback(self, result, level_sim):
         """
-        Process simulation results
-        :param result: tuple
-        :param level_sim: LevelSimulation instance
-        :return: None
+        Callback for handling results from asynchronous execution.
+
+        Parameters
+        ----------
+        result : tuple
+            Returned result from SamplingPool.calculate_sample().
+        level_sim : LevelSimulation
+            Simulation level instance.
         """
         self._process_result(*result, level_sim)
 
     def schedule_sample(self, sample_id, level_sim):
+        """
+        Schedule a sample for parallel execution in a separate process.
+
+        Parameters
+        ----------
+        sample_id : int
+            Sample identifier.
+        level_sim : LevelSimulation
+            Simulation configuration instance.
+        """
         self._n_running += 1
 
+        # Set working directory for output files
         if self._output_dir is None and level_sim.need_sample_workspace:
             self._output_dir = os.getcwd()
 
-        self._pool.apply_async(SamplingPool.calculate_sample, args=(sample_id, level_sim, self._output_dir),
-                               callback=lambda res: self.res_callback(res, level_sim),
-                               error_callback=lambda res: self.res_callback(res, level_sim))
+        # Submit task asynchronously to process pool
+        self._pool.apply_async(
+            SamplingPool.calculate_sample,
+            args=(sample_id, level_sim, self._output_dir),
+            callback=lambda res: self.res_callback(res, level_sim),
+            error_callback=lambda res: self.res_callback(res, level_sim)
+        )
+
 
+# ==============================================================================
 
 class ThreadPool(ProcessPool):
     """
-    Suitable local parallel sampling for simulations WITH external program call
+    Sampling pool using threading for local parallel sampling.
+    Suitable for simulations with external program calls (I/O-bound).
     """
 
     def __init__(self, n_thread, work_dir=None, debug=False):
+        """
+        Initialize thread-based parallel sampling pool.
+
+        Parameters
+        ----------
+        n_thread : int
+            Number of threads to use.
+        work_dir : str, optional
+            Working directory for samples.
+        debug : bool, default=False
+            If True, disables moving/removing sample outputs.
+        """
         super().__init__(n_thread, work_dir=work_dir, debug=debug)
-        self._pool = pool.ThreadPool(n_thread)
+        self._pool = pool.ThreadPool(n_thread)  # Thread-based pool instead of process-based
         self._failed_queues = {}
         self._queues = {}
         self._n_running = 0
diff --git a/mlmc/sim/simulation.py b/mlmc/sim/simulation.py
index bd9a04d0..e5d5dd33 100644
--- a/mlmc/sim/simulation.py
+++ b/mlmc/sim/simulation.py
@@ -5,29 +5,49 @@
 
 
 class Simulation(ABC):
+    """
+    Abstract base class for multi-level Monte Carlo (MLMC) simulations.
+
+    Defines the interface that all concrete simulation classes must implement.
+    Provides methods for creating level simulations, specifying result formats, and running calculations.
+    """
 
     @abstractmethod
     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
         """
-        Create LevelSimulation object which is farther used for calculation etc.
-        :param fine_level_params:
-        :param coarse_level_params:
-        :return: LevelSimulation
+        Create a LevelSimulation object for a given level.
+
+        The LevelSimulation instance is used for sample generation and result extraction
+        at both the fine and coarse levels in MLMC.
+
+        :param fine_level_params: List of floats defining parameters for the fine simulation level.
+        :param coarse_level_params: List of floats defining parameters for the coarse simulation level.
+        :return: LevelSimulation instance configured for the given level parameters.
         """
 
     @abstractmethod
     def result_format(self) -> List[QuantitySpec]:
         """
-        Define simulation result format
-        :return: List[QuantitySpec, ...]
+        Define the format of the simulation results.
+
+        This method should return a list of QuantitySpec objects, which describe the
+        type, shape, and units of each quantity produced by the simulation.
+
+        :return: List of QuantitySpec objects defining the simulation output format.
         """
 
     @staticmethod
     @abstractmethod
-    def calculate(config_dict, seed):
+    def calculate(config_dict, seed: int):
         """
-        Method that actually run the calculation, calculate fine and coarse sample and also extract their results
-        :param config_dict: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance)
-        :param seed: random seed, int
-        :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation._calculate())
+        Execute a single simulation calculation.
+
+        This method runs the simulation for both fine and coarse levels, computes
+        the results, and returns them in a flattened form suitable for MLMC analysis.
+
+        :param config_dict: Dictionary containing simulation configuration parameters
+                            (usually LevelSimulation.config_dict from level_instance).
+        :param seed: Random seed (int) to ensure reproducibility of the stochastic simulation.
+        :return: List containing two elements:
+                 [fine_result, coarse_result], both as flattened arrays.
         """
diff --git a/mlmc/sim/synth_simulation.py b/mlmc/sim/synth_simulation.py
index 0a5176c6..2dd69701 100644
--- a/mlmc/sim/synth_simulation.py
+++ b/mlmc/sim/synth_simulation.py
@@ -1,3 +1,228 @@
+# import os
+# import ruamel.yaml as ruyaml
+# import numpy as np
+# from typing import List
+# import scipy.stats as stats
+# from mlmc.sim.simulation import Simulation
+# from mlmc.quantity.quantity_spec import QuantitySpec
+# from mlmc.level_simulation import LevelSimulation
+#
+#
+# class SynthSimulation(Simulation):
+#     """
+#     Synthetic simulation for testing MLMC workflows.
+#
+#     Produces artificial fine and coarse samples based on a distribution and a
+#     numerical step size. Can introduce NaNs to simulate failed simulations.
+#     """
+#
+#     # Class-level counters for failed samples and result bookkeeping
+#     n_nans = 0
+#     nan_fraction = 0
+#     len_results = 0
+#     result_dict = {}
+#
+#     def __init__(self, config=None):
+#         """
+#         Initialize the synthetic simulation.
+#
+#         :param config: Dictionary with keys:
+#                        - distr: scipy.stats distribution (default: normal)
+#                        - complexity: exponent for operation estimate (default: 2)
+#                        - nan_fraction: fraction of failed samples to simulate
+#                        - sim_method: unused here, placeholder for method type
+#         """
+#         super().__init__()
+#         if config is None:
+#             config = dict(distr=stats.norm(), complexity=2)
+#         self.config = config
+#
+#         # Reset class-level counters
+#         SynthSimulation.n_nans = 0
+#         SynthSimulation.nan_fraction = config.get('nan_fraction', 0.0)
+#         SynthSimulation.len_results = 0
+#
+#         # Indicates that this simulation does not require a workspace
+#         self.need_workspace: bool = False
+#
+#     @staticmethod
+#     def sample_fn(x, h):
+#         """
+#         Synthetic sample function introducing a small numerical error.
+#
+#         :param x: Random sample from distribution
+#         :param h: Simulation step size
+#         :return: Synthetic simulation output
+#         """
+#         return x + h * np.sqrt(1e-4 + np.abs(x))
+#
+#     @staticmethod
+#     def sample_fn_no_error(x, h):
+#         """
+#         Sample function without added error.
+#
+#         :param x: Random sample
+#         :param h: Simulation step (unused)
+#         :return: Original sample
+#         """
+#         return x
+#
+#     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
+#         """
+#         Create a LevelSimulation object for the fine and coarse levels.
+#
+#         :param fine_level_params: List of fine-level parameters (step size)
+#         :param coarse_level_params: List of coarse-level parameters (step size)
+#         :return: LevelSimulation object configured for MLMC
+#         """
+#         config = dict()
+#         config["fine"] = {"step": fine_level_params[0]}
+#         config["coarse"] = {"step": coarse_level_params[0]}
+#         config["distr"] = self.config["distr"]
+#         config["res_format"] = self.result_format()
+#
+#         return LevelSimulation(config_dict=config, task_size=self.n_ops_estimate(fine_level_params[0]))
+#
+#     @staticmethod
+#     def generate_random_samples(distr, seed, size):
+#         """
+#         Generate fine and coarse random samples from a given distribution.
+#
+#         Optionally simulates a fraction of NaN results to represent failed simulations.
+#
+#         :param distr: scipy.stats distribution object
+#         :param seed: random seed
+#         :param size: number of samples to generate
+#         :return: Tuple (fine_samples, coarse_samples)
+#         """
+#         SynthSimulation.len_results += 1
+#         distr.random_state = np.random.RandomState(seed)
+#         y = distr.rvs(size=size)
+#
+#         # Simulate failed samples
+#         if SynthSimulation.n_nans / (1e-10 + SynthSimulation.len_results) < SynthSimulation.nan_fraction:
+#             SynthSimulation.n_nans += 1
+#             y = [np.nan]
+#
+#         return y, y
+#
+#     @staticmethod
+#     def calculate(config, seed):
+#         """
+#         Compute fine and coarse simulation results.
+#
+#         :param config: Dictionary with LevelSimulation configuration
+#         :param seed: Random seed
+#         :return: Tuple (fine_result_flat, coarse_result_flat)
+#         """
+#         quantity_format = config["res_format"]
+#
+#         # Generate random samples for fine and coarse levels
+#         fine_random, coarse_random = SynthSimulation.generate_random_samples(
+#             config["distr"], seed, np.prod(quantity_format[0].shape)
+#         )
+#
+#         fine_step = config["fine"]["step"]
+#         coarse_step = config["coarse"]["step"]
+#
+#         fine_result = SynthSimulation.sample_fn(fine_random, fine_step)
+#         coarse_result = np.zeros(len(fine_result)) if coarse_step == 0 else SynthSimulation.sample_fn(coarse_random, coarse_step)
+#
+#         if np.any(np.isnan(fine_result)) or np.any(np.isnan(coarse_result)):
+#             raise Exception("Simulation produced NaN result")
+#
+#         # Map results to quantity specifications
+#         results = []
+#         for result in [fine_result, coarse_result]:
+#             quantities = []
+#             for quantity in quantity_format:
+#                 locations = np.array([result + i for i in range(len(quantity.locations))]) if coarse_step != 0 else np.array([result for _ in range(len(quantity.locations))])
+#                 times = np.array([locations for _ in range(len(quantity.times))])
+#                 quantities.append(times)
+#             results.append(np.array(quantities))
+#
+#         return results[0].flatten(), results[1].flatten()
+#
+#     def n_ops_estimate(self, step):
+#         """
+#         Estimate the computational cost for a given step size.
+#
+#         :param step: simulation step size
+#         :return: estimated number of operations
+#         """
+#         return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0))
+#
+#     def result_format(self) -> List[QuantitySpec]:
+#         """
+#         Define the result format for this synthetic simulation.
+#
+#         :return: List of QuantitySpec objects
+#         """
+#         spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20'])
+#         spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
+#         return [spec1, spec2]
+#
+#
+# class SynthSimulationWorkspace(SynthSimulation):
+#     """
+#     Synthetic simulation that requires a workspace.
+#
+#     Extends SynthSimulation by supporting workspace-based execution and configuration
+#     read from a YAML file.
+#     """
+#
+#     CONFIG_FILE = 'synth_sim_config.yaml'
+#
+#     def __init__(self, config):
+#         """
+#         :param config: Dictionary containing workspace configuration:
+#                        - config_yaml: path to YAML configuration file
+#                        - nan_fraction: fraction of failed samples
+#         """
+#         self.config_yaml = config["config_yaml"]
+#
+#         # Reset counters
+#         SynthSimulationWorkspace.n_nans = 0
+#         SynthSimulationWorkspace.nan_fraction = config.get('nan_fraction', 0.0)
+#         SynthSimulationWorkspace.len_results = 0
+#
+#         # Indicates that this simulation needs a workspace
+#         self.need_workspace: bool = True
+#
+#     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
+#         """
+#         Create a LevelSimulation object using workspace configuration.
+#
+#         :param fine_level_params: List of fine-level parameters (step size)
+#         :param coarse_level_params: List of coarse-level parameters (step size)
+#         :return: LevelSimulation object
+#         """
+#         config = dict()
+#         config["fine"] = {"step": fine_level_params[0]}
+#         config["coarse"] = {"step": coarse_level_params[0]}
+#         config["res_format"] = self.result_format()
+#
+#         job_weight = 20000
+#
+#         return LevelSimulation(
+#             config_dict=config,
+#             common_files=[self.config_yaml],
+#             task_size=1.0 / job_weight,
+#             need_sample_workspace=self.need_workspace
+#         )
+#
+#     @staticmethod
+#     def _read_config():
+#         """
+#         Read workspace configuration from YAML file.
+#
+#         :return: Dictionary with configuration values
+#         """
+#         with open(os.path.join(os.getcwd(), SynthSimulationWorkspace.CONFIG_FILE)) as file:
+#             yaml = ruyaml.YAML(typ='rt')
+#             config = yaml.load(file)
+#         return config
+
 import os
 import ruamel.yaml as ruyaml
 import numpy as np
diff --git a/mlmc/tool/context_statprof.py b/mlmc/tool/context_statprof.py
deleted file mode 100644
index faf3afc4..00000000
--- a/mlmc/tool/context_statprof.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import statprof
-from contextlib import contextmanager
-
-
-
-
-@contextmanager
-def stat_profiler():
-    statprof.start()
-    yield  statprof
-    statprof.stop()
-    statprof.display()
-
diff --git a/mlmc/tool/distribution.py b/mlmc/tool/distribution.py
index e377607b..1427ef2b 100644
--- a/mlmc/tool/distribution.py
+++ b/mlmc/tool/distribution.py
@@ -48,39 +48,6 @@ def __init__(self, moments_obj, moment_data, domain=None, force_decay=(True, Tru
 
         # Flag for monitoring convergence on stdout.
         self.monitor = monitor
-    # def choose_parameters_from_samples(self, samples):
-    #     """
-    #     Determine model hyperparameters, in particular domain of the density function,
-    #     from given samples.
-    #     :param samples: np array of samples from the distribution or its approximation.
-    #     :return: None
-    #     """
-    #     self.domain = (np.min(samples), np.max(samples))
-    #
-    # @staticmethod
-    # def choose_parameters_from_moments(mean, variance, quantile=0.9999, log=False):
-    #     """
-    #     Determine model hyperparameters, in particular domain of the density function,
-    #     from given samples.
-    #     :param samples: np array of samples from the distribution or its approximation.
-    #     :return: None
-    #     """
-    #     if log:
-    #         # approximate by log normal
-    #         # compute mu, sigma parameters from observed mean and variance
-    #         sigma_sq = np.log(np.exp(np.log(variance) - 2.0 * np.log(mean)) + 1.0)
-    #         mu = np.log(mean) - sigma_sq / 2.0
-    #         sigma = np.sqrt(sigma_sq)
-    #         domain = tuple(sc.stats.lognorm.ppf([1.0 - quantile, quantile], s=sigma, scale=np.exp(mu)))
-    #         assert np.isclose(mean, sc.stats.lognorm.mean(s=sigma, scale=np.exp(mu)))
-    #         assert np.isclose(variance, sc.stats.lognorm.var(s=sigma, scale=np.exp(mu)))
-    #     else:
-    #         domain = tuple(sc.stats.norm.ppf([1.0 - quantile, quantile], loc=mean, scale=np.sqrt(variance)))
-    #     return domain
-    #
-    # def choose_parameters_from_approximation(self):
-    #     pass
-
 
     def estimate_density_minimize(self, tol=1e-5, reg_param =0.01):
         """
@@ -411,15 +378,9 @@ def _calculate_jacobian_matrix(self, multipliers):
 
         jacobian_matrix[np.diag_indices_from(jacobian_matrix)] += self._stab_penalty
 
-
-        #e_vals = np.linalg.eigvalsh(jacobian_matrix)
-
-        #print(multipliers)
-        #print("jac spectra: ", e_vals[0], e_vals[-1], e_vals[-1]/e_vals[0])
         return jacobian_matrix
 
 
-
 def compute_exact_moments(moments_fn, density, tol=1e-4):
     """
     Compute approximation of moments using exact density.
diff --git a/mlmc/tool/flow_mc.py b/mlmc/tool/flow_mc.py
index 09fc12da..cdddb9c1 100644
--- a/mlmc/tool/flow_mc.py
+++ b/mlmc/tool/flow_mc.py
@@ -15,14 +15,21 @@
 
 def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1, mode_no=1000):
     """
-    Create random fields
-    :return:
+    Create correlated random-field provider (cf.Fields) according to selected backend.
+
+    :param model: One of 'fourier', 'svd', 'exp', 'TPLgauss', 'TPLexp', 'TPLStable', or others (defaults to 'gauss').
+    :param corr_length: Correlation length (used by GSTools or SVD implementations).
+    :param dim: Spatial dimension of the field (1, 2 or 3).
+    :param log: If True, generate log-normal field (exponentiate underlying Gaussian field).
+    :param sigma: Standard deviation for the generated field.
+    :param mode_no: Number of Fourier modes
+    :return: cf.Fields instance that can generate random field samples.
     """
     if model == 'fourier':
         return cf.Fields([
             cf.Field('conductivity', cf.FourierSpatialCorrelatedField('gauss', dim=dim,
-                                                                      corr_length=corr_length,
-                                                                      log=log, sigma=sigma)),
+                                                                       corr_length=corr_length,
+                                                                       log=log, sigma=sigma)),
         ])
 
     elif model == 'svd':
@@ -52,13 +59,14 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1
     ])
 
 
-
 def substitute_placeholders(file_in, file_out, params):
     """
-    Substitute for placeholders of format '<name>' from the dict 'params'.
-    :param file_in: Template file.
-    :param file_out: Values substituted.
-    :param params: { 'name': value, ...}
+    Replace placeholders of form '<name>' in a template file with corresponding values.
+
+    :param file_in: Path to the template file containing placeholders.
+    :param file_out: Path where the substituted output will be written.
+    :param params: Dictionary mapping placeholder names to replacement values, e.g. {'mesh_file': 'mesh.msh'}.
+    :return: List of parameter names that were actually used (replaced) in the template.
     """
     used_params = []
     with open(file_in, 'r') as src:
@@ -76,10 +84,10 @@ def substitute_placeholders(file_in, file_out, params):
 
 def force_mkdir(path, force=False):
     """
-    Make directory 'path' with all parents,
-    remove the leaf dir recursively if it already exists.
-    :param path: path to directory
-    :param force: if dir already exists then remove it and create new one
+    Create directory tree; optionally remove existing leaf directory first.
+
+    :param path: Directory path to create (parents created as needed).
+    :param force: If True and the directory already exists, remove it (recursively) before creating.
     :return: None
     """
     if force:
@@ -92,55 +100,36 @@ class FlowSim(Simulation):
     # placeholders in YAML
     total_sim_id = 0
     MESH_FILE_VAR = 'mesh_file'
-    # Timestep placeholder given as O(h), h = mesh step
-    TIMESTEP_H1_VAR = 'timestep_h1'
-    # Timestep placeholder given as O(h^2), h = mesh step
-    TIMESTEP_H2_VAR = 'timestep_h2'
+    TIMESTEP_H1_VAR = 'timestep_h1'  # O(h)
+    TIMESTEP_H2_VAR = 'timestep_h2'  # O(h^2)
 
-    # files
+    # filenames used in workspace and job directories
     GEO_FILE = 'mesh.geo'
     MESH_FILE = 'mesh.msh'
     YAML_TEMPLATE = 'flow_input.yaml.tmpl'
     YAML_FILE = 'flow_input.yaml'
     FIELDS_FILE = 'fields_sample.msh'
 
-    """
-    Gather data for single flow call (coarse/fine)
-
-    Usage:
-    mlmc.sampler.Sampler uses instance of FlowSim, it calls once level_instance() for each level step (The level_instance() method
-     is called as many times as the number of levels), it takes place in main process
-
-    mlmc.tool.pbs_job.PbsJob uses static methods in FlowSim, it calls calculate(). That's where the calculation actually runs,
-    it takes place in PBS process
-       It also extracts results and passes them back to PbsJob, which handles the rest 
-
-    """
-
     def __init__(self, config=None, clean=None):
         """
-        Simple simulation using flow123d
-        :param config: configuration of the simulation, processed keys:
-            env - Environment object.
-            fields - FieldSet object
-            yaml_file: Template for main input file. Placeholders:
-                <mesh_file> - replaced by generated mesh
-                <FIELD> - for FIELD be name of any of `fields`, replaced by the FieldElementwise field with generated
-                 field input file and the field name for the component.
-            geo_file: Path to the geometry file.
-        :param clean: bool, if True remove existing simulation files - mesh files, ...
+        Initialize FlowSim instance that runs flow123d simulations using generated random fields.
+
+        :param config: Dict with keys:
+            - env: dict of environment executables (flow123d, gmsh, gmsh_version, etc.)
+            - fields_params: parameters forwarded to create_corr_field
+            - yaml_file: base YAML template path
+            - geo_file: geometry (.geo) file path
+            - work_dir: base working directory for generated level common files
+            - field_template: optional template string for field definition in YAML
+            - time_factor: optional multiplier for timestep selection (default 1.0)
+        :param clean: If True, regenerate common files (mesh, yaml) for the given level.
         """
-        self.need_workspace = True
-        # This simulation requires workspace
+        self.need_workspace = True  # this simulation needs per-sample work directories
         self.env = config['env']
-        # Environment variables, flow123d, gmsh, ...
         self._fields_params = config['fields_params']
         self._fields = create_corr_field(**config['fields_params'])
         self._fields_used_params = None
-        # Random fields instance
         self.time_factor = config.get('time_factor', 1.0)
-        # It is used for minimal element from mesh determination (see level_instance method)
-
         self.base_yaml_file = config['yaml_file']
         self.base_geo_file = config['geo_file']
         self.field_template = config.get('field_template',
@@ -148,54 +137,55 @@ def __init__(self, config=None, clean=None):
         self.work_dir = config['work_dir']
         self.clean = clean
 
-        super(Simulation, self).__init__()
+        super(Simulation, self).__init__()  # keep compatibility with parent initialization
+
 
     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
         """
-        Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.)
-        :param fine_level_params: in this version, it is just fine simulation step
-        :param coarse_level_params: in this version, it is just coarse simulation step
-        :return: mlmc.LevelSimulation object, this object is serialized in SamplingPoolPbs and deserialized in PbsJob,
-         so it allows pass simulation data from main process to PBS process
+        Create a LevelSimulation object for given fine/coarse steps.
+
+        This method is called in the main process (Sampler) and must prepare
+        common files (mesh, YAML) for that level. The returned LevelSimulation
+        is serialized and sent to PBS jobs (PbsJob) for actual execution.
+
+        :param fine_level_params: list with single element [fine_step] (mesh step)
+        :param coarse_level_params: list with single element [coarse_step] (mesh step) or [0] for one-level MC
+        :return: LevelSimulation configured with task size and calculate method
         """
         fine_step = fine_level_params[0]
         coarse_step = coarse_level_params[0]
 
-        # TODO: determine minimal element from mesh
+        # Set time steps used in YAML substitution (O(h) and O(h^2) placeholders)
         self.time_step_h1 = self.time_factor * fine_step
         self.time_step_h2 = self.time_factor * fine_step * fine_step
 
-        # Set fine simulation common files directory
-        # Files in the directory are used by each simulation at that level
+        # Directory to store files common to all samples at this fine level
         common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(fine_step))
         force_mkdir(common_files_dir, force=self.clean)
 
         self.mesh_file = os.path.join(common_files_dir, self.MESH_FILE)
 
         if self.clean:
-            # Prepare mesh
+            # Create computational mesh from geometry template
             geo_file = os.path.join(common_files_dir, self.GEO_FILE)
             shutil.copyfile(self.base_geo_file, geo_file)
-            self._make_mesh(geo_file, self.mesh_file, fine_step)  # Common computational mesh for all samples.
+            self._make_mesh(geo_file, self.mesh_file, fine_step)
 
-            # Prepare main input YAML
+            # Prepare main YAML by substituting placeholders
             yaml_template = os.path.join(common_files_dir, self.YAML_TEMPLATE)
             shutil.copyfile(self.base_yaml_file, yaml_template)
             yaml_file = os.path.join(common_files_dir, self.YAML_FILE)
             self._substitute_yaml(yaml_template, yaml_file)
 
-        # Mesh is extracted because we need number of mesh points to determine task_size parameter (see return value)
+        # Extract mesh metadata to determine task_size (number of points affects job weight)
         fine_mesh_data = self.extract_mesh(self.mesh_file)
 
-        # Set coarse simulation common files directory
-        # Files in the directory are used by each simulation at that level
+        # Set coarse sim common files dir if coarse level exists
         coarse_sim_common_files_dir = None
         if coarse_step != 0:
             coarse_sim_common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(coarse_step))
 
-        # Simulation config
-        # Configuration is used in mlmc.tool.pbs_job.PbsJob instance which is run from PBS process
-        # It is part of LevelSimulation which is serialized and then deserialized in mlmc.tool.pbs_job.PbsJob
+        # Prepare configuration dict that will be serialized in LevelSimulation
         config = dict()
         config["fine"] = {}
         config["coarse"] = {}
@@ -204,71 +194,66 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
         config["fine"]["common_files_dir"] = common_files_dir
         config["coarse"]["common_files_dir"] = coarse_sim_common_files_dir
 
-        config[
-            "fields_used_params"] = self._fields_used_params  # Params for Fields instance, which is createed in PbsJob
+        config["fields_used_params"] = self._fields_used_params
         config["gmsh"] = self.env['gmsh']
         config["flow123d"] = self.env['flow123d']
         config['fields_params'] = self._fields_params
 
-        # Auxiliary parameter which I use to determine task_size (should be from 0 to 1, if task_size is above 1 then pbs job is scheduled)
-        job_weight = 17000000  # 4000000 - 20 min, 2000000 - cca 10 min
+        # job_weight is used to convert mesh size into a normalized task_size
+        job_weight = 17000000
 
         return LevelSimulation(config_dict=config,
                                task_size=len(fine_mesh_data['points']) / job_weight,
                                calculate=FlowSim.calculate,
-                               # method which carries out the calculation, will be called from PBS processs
-                               need_sample_workspace=True  # If True, a sample directory is created
+                               need_sample_workspace=True
                                )
 
     @staticmethod
     def calculate(config, seed):
         """
-        Method that actually run the calculation, it's called from mlmc.tool.pbs_job.PbsJob.calculate_samples()
-        Calculate fine and coarse sample and also extract their results
-        :param config: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance)
-        :param seed: random seed, int
-        :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation.calculate())
+        Execute one MLMC sample calculation (fine and optional coarse) inside PBS job.
+
+        :param config: Configuration dict from LevelSimulation.config_dict (contains common_files dirs, steps, fields params)
+        :param seed: Random seed for the sample generation (derived from sample id)
+        :return: Tuple (fine_result_array, coarse_result_array), both numpy arrays (coarse may be zeros for one-level MC)
         """
-        # Init correlation field objects
-        fields = create_corr_field(**config['fields_params'])  # correlated_field.Fields instance
+        # Initialize fields object in the worker process
+        fields = create_corr_field(**config['fields_params'])
         fields.set_outer_fields(config["fields_used_params"])
 
-        coarse_step = config["coarse"]["step"]  # Coarse simulation step, zero if one level MC
-        flow123d = config["flow123d"]  # Flow123d command
+        coarse_step = config["coarse"]["step"]
+        flow123d = config["flow123d"]
 
-        # Extract fine mesh
-        fine_common_files_dir = config["fine"]["common_files_dir"]  # Directory with fine simulation common files
+        # Extract fine mesh structure and optionally coarse mesh structure
+        fine_common_files_dir = config["fine"]["common_files_dir"]
         fine_mesh_data = FlowSim.extract_mesh(os.path.join(fine_common_files_dir, FlowSim.MESH_FILE))
 
-        # Extract coarse mesh
         coarse_mesh_data = None
         coarse_common_files_dir = None
         if coarse_step != 0:
-            coarse_common_files_dir = config["coarse"][
-                "common_files_dir"]  # Directory with coarse simulation common files
+            coarse_common_files_dir = config["coarse"]["common_files_dir"]
             coarse_mesh_data = FlowSim.extract_mesh(os.path.join(coarse_common_files_dir, FlowSim.MESH_FILE))
 
-        # Create fields both fine and coarse
+        # Prepare combined fields object that has points for both fine and coarse meshes
         fields = FlowSim.make_fields(fields, fine_mesh_data, coarse_mesh_data)
 
-        # Set random seed, seed is calculated from sample id, so it is not user defined
+        # Sample random field realizations reproducibly
         np.random.seed(seed)
-        # Generate random samples
-        fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(fields, coarse_step=coarse_step,
-                                                                                n_fine_elements=len(
-                                                                                    fine_mesh_data['points']))
+        fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(
+            fields, coarse_step=coarse_step, n_fine_elements=len(fine_mesh_data['points'])
+        )
 
-        # Run fine sample
+        # Run fine-level simulation
         fields_file = os.path.join(os.getcwd(), FlowSim.FIELDS_FILE)
         fine_res = FlowSim._run_sample(fields_file, fine_mesh_data['ele_ids'], fine_input_sample, flow123d,
                                        fine_common_files_dir)
 
-        # Rename fields_sample.msh to fine_fields_sample.msh, we might remove it
+        # Move generated files to have 'fine_' prefix so they don't collide
         for filename in os.listdir(os.getcwd()):
             if not filename.startswith("fine"):
                 shutil.move(os.path.join(os.getcwd(), filename), os.path.join(os.getcwd(), "fine_" + filename))
 
-        # Run coarse sample
+        # Run coarse-level simulation if coarse sample exists
         coarse_res = np.zeros(len(fine_res))
         if coarse_input_sample:
             coarse_res = FlowSim._run_sample(fields_file, coarse_mesh_data['ele_ids'], coarse_input_sample, flow123d,
@@ -279,17 +264,19 @@ def calculate(config, seed):
     @staticmethod
     def make_fields(fields, fine_mesh_data, coarse_mesh_data):
         """
-        Create random fields that are used by both coarse and fine simulation
-        :param fields: correlated_field.Fields instance
-        :param fine_mesh_data: Dict contains data extracted from fine mesh file (points, point_region_ids, region_map)
-        :param coarse_mesh_data: Dict contains data extracted from coarse mesh file (points, point_region_ids, region_map)
-        :return: correlated_field.Fields
+        Assign evaluation points to fields and return the Fields object prepared for sampling.
+
+        :param fields: correlated_field.Fields instance (with local field definitions)
+        :param fine_mesh_data: Dict returned by extract_mesh() for the fine mesh
+        :param coarse_mesh_data: Dict returned by extract_mesh() for the coarse mesh (or None for one-level)
+        :return: the same cf.Fields object with points set for sampling
         """
-        # One level MC has no coarse_mesh_data
+        # If no coarse mesh, just register fine mesh points
         if coarse_mesh_data is None:
             fields.set_points(fine_mesh_data['points'], fine_mesh_data['point_region_ids'],
                               fine_mesh_data['region_map'])
         else:
+            # Concatenate fine and coarse points to compute joint fields (ensures consistent sampling)
             coarse_centers = coarse_mesh_data['points']
             both_centers = np.concatenate((fine_mesh_data['points'], coarse_centers), axis=0)
             both_regions_ids = np.concatenate(
@@ -302,13 +289,14 @@ def make_fields(fields, fine_mesh_data, coarse_mesh_data):
     @staticmethod
     def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_dir):
         """
-        Create random fields file, call Flow123d and extract results
-        :param fields_file: Path to file with random fields
-        :param ele_ids: Element IDs in computational mesh
-        :param fine_input_sample: fields: {'field_name' : values_array, ..}
-        :param flow123d: Flow123d command
-        :param common_files_dir: Directory with simulations common files (flow_input.yaml, )
-        :return: simulation result, ndarray
+        Write random fields to Gmsh file, call flow123d, and extract sample results.
+
+        :param fields_file: Path where fields will be written (in current working directory)
+        :param ele_ids: Array of element ids for which field values are provided
+        :param fine_input_sample: Dict mapping field names to arrays of shape (n_elements, 1)
+        :param flow123d: Path/command to flow123d executable
+        :param common_files_dir: Directory containing common YAML and other input files for the level
+        :return: numpy.ndarray with extracted simulation result (e.g., water balance)
         """
         gmsh_io.GmshIO().write_fields(fields_file, ele_ids, fine_input_sample)
 
@@ -321,11 +309,16 @@ def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_
     @staticmethod
     def generate_random_sample(fields, coarse_step, n_fine_elements):
         """
-        Generate random field, both fine and coarse part.
-        Store them separeted.
-        :return: Dict, Dict
+        Generate random field samples for the fine and (optionally) coarse meshes.
+
+        :param fields: cf.Fields object (already configured with points)
+        :param coarse_step: coarse-level step (0 for no coarse sample)
+        :param n_fine_elements: Number of elements that belong to fine mesh (used to split combined sample)
+        :return: Tuple (fine_input_sample: dict, coarse_input_sample: dict)
+                 Each dict maps field name -> array shaped (n_elements, 1).
         """
         fields_sample = fields.sample()
+        # Fine inputs are first n_fine_elements rows; coarse are the remainder (if any)
         fine_input_sample = {name: values[:n_fine_elements, None] for name, values in fields_sample.items()}
         coarse_input_sample = {}
         if coarse_step != 0:
@@ -336,10 +329,12 @@ def generate_random_sample(fields, coarse_step, n_fine_elements):
 
     def _make_mesh(self, geo_file, mesh_file, fine_step):
         """
-        Make the mesh, mesh_file: <geo_base>_step.msh.
-        Make substituted yaml: <yaml_base>_step.yaml,
-        using common fields_step.msh file for generated fields.
-        :return:
+        Invoke Gmsh to produce a mesh with the requested geometric scale (clscale).
+
+        :param geo_file: Path to the .geo file used to generate the mesh
+        :param mesh_file: Path where the .msh output will be written
+        :param fine_step: Mesh step (controls element size via -clscale)
+        :return: None
         """
         if self.env['gmsh_version'] == 2:
             subprocess.call(
@@ -350,9 +345,14 @@ def _make_mesh(self, geo_file, mesh_file, fine_step):
     @staticmethod
     def extract_mesh(mesh_file):
         """
-        Extract mesh from file
-        :param mesh_file: Mesh file path
-        :return: Dict
+        Parse a Gmsh mesh file and extract points (element centers), element ids and region mapping.
+
+        :param mesh_file: Path to .msh file to parse (Gmsh 2/4 depending on GmshIO implementation)
+        :return: Dict with keys:
+                 - 'points': np.ndarray of shape (n_elements, dim) with element center coordinates
+                 - 'point_region_ids': np.ndarray of region id per element
+                 - 'ele_ids': np.ndarray of original element ids
+                 - 'region_map': dict mapping region name -> region id
         """
         mesh = gmsh_io.GmshIO(mesh_file)
         is_bc_region = {}
@@ -386,7 +386,7 @@ def extract_mesh(mesh_file):
         diff = max_pt - min_pt
         min_axis = np.argmin(diff)
         non_zero_axes = [0, 1, 2]
-        # TODO: be able to use this mesh_dimension in fields
+        # If mesh is effectively 2D (one axis collapsed), remove that axis from point coordinates
         if diff[min_axis] < 1e-10:
             non_zero_axes.pop(min_axis)
         points = centers[:, non_zero_axes]
@@ -395,8 +395,11 @@ def extract_mesh(mesh_file):
 
     def _substitute_yaml(self, yaml_tmpl, yaml_out):
         """
-        Create substituted YAML file from the tamplate.
-        :return:
+        Build YAML input file for flow123d by substituting placeholders for mesh and fields.
+
+        :param yaml_tmpl: Path to YAML template with placeholders like '<mesh_file>' and '<FIELDNAME>'.
+        :param yaml_out: Path to output YAML file that will be used by flow123d.
+        :return: None (also populates self._fields_used_params with names of substituted fields)
         """
         param_dict = {}
         field_tmpl = self.field_template
@@ -412,11 +415,12 @@ def _substitute_yaml(self, yaml_tmpl, yaml_out):
     @staticmethod
     def _extract_result(sample_dir):
         """
-        Extract the observed value from the Flow123d output.
-        :param sample_dir: str, path to sample directory
-        :return: None, inf or water balance result (float) and overall sample time
+        Extract the observed quantity (e.g., water balance flux) from a flow123d run directory.
+
+        :param sample_dir: Directory where flow123d output (water_balance.yaml) is located.
+        :return: numpy.ndarray with a single value [-total_flux] representing outflow (negative sign).
+                 Raises Exception if expected data is not found or inflow at outlet is positive.
         """
-        # extract the flux
         balance_file = os.path.join(sample_dir, "water_balance.yaml")
 
         with open(balance_file, "r") as f:
@@ -434,44 +438,19 @@ def _extract_result(sample_dir):
                 flux_in = float(flux_item['data'][1])
                 if flux_in > 1e-10:
                     raise Exception("Possitive inflow at outlet region.")
-                total_flux += flux  # flux field
+                total_flux += flux
                 found = True
 
-        # Get flow123d computing time
-        # run_time = FlowSim.get_run_time(sample_dir)
-
         if not found:
-            raise Exception
+            raise Exception("No outlet flux found in water_balance.yaml")
         return np.array([-total_flux])
 
     @staticmethod
     def result_format() -> List[QuantitySpec]:
         """
-        Define simulation result format
-        :return: List[QuantitySpec, ...]
+        Describe the simulation output format as a list of QuantitySpec objects.
+
+        :return: List[QuantitySpec] describing each output quantity (name, unit, shape, times, locations)
         """
         spec1 = QuantitySpec(name="conductivity", unit="m", shape=(1, 1), times=[1], locations=['0'])
-        # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
         return [spec1]
-
-    # @staticmethod
-    # def get_run_time(sample_dir):
-    #     """
-    #     Get flow123d sample running time from profiler
-    #     :param sample_dir: Sample directory
-    #     :return: float
-    #     """
-    #     profiler_file = os.path.join(sample_dir, "profiler_info_*.json")
-    #     profiler = glob.glob(profiler_file)[0]
-    #
-    #     try:
-    #         with open(profiler, "r") as f:
-    #             prof_content = json.load(f)
-    #
-    #         run_time = float(prof_content['children'][0]['cumul-time-sum'])
-    #     except:
-    #         print("Extract run time failed")
-    #
-    #     return run_time
-
-
diff --git a/mlmc/tool/gmsh_io.py b/mlmc/tool/gmsh_io.py
index c5a3ad36..0d059918 100644
--- a/mlmc/tool/gmsh_io.py
+++ b/mlmc/tool/gmsh_io.py
@@ -3,21 +3,8 @@
 
 import struct
 import numpy as np
-import enum
 
 
-# class ElementType(enum.IntEnum):
-#     simplex_1d = 1
-#     simplex_2d = 2
-#     simplex_3d = 4
-#
-# element_sizes = {
-#     1: 1,
-#     2: 2,
-#     4: 3
-# }
-#
-
 class GmshIO:
     """This is a class for storing nodes and elements. Based on Gmsh.py
 
@@ -28,25 +15,60 @@ class GmshIO:
 
     Methods:
     read([file]) -- Parse a Gmsh version 1.0 or 2.0 mesh file
-    write([file]) -- Output a Gmsh version 2.0 mesh file
+    write_ascii([file]) -- Output a Gmsh version 2.0 mesh file (ASCII)
+    write_binary([file]) -- Output a Gmsh version 2.0 mesh file (binary)
+    write_element_data(f, ele_ids, name, values) -- write $ElementData block
+    write_fields(msh_file, ele_ids, fields) -- convenience to write several ElementData blocks
     """
 
     def __init__(self, filename=None):
-        """Initialise Gmsh data structure"""
+        """
+        Initialise Gmsh data structure.
+
+        :param filename: Optional path to a .msh file. If provided, the file is read on construction.
+        :return: None
+        """
         self.reset()
         self.filename = filename
         if self.filename:
             self.read()
 
     def reset(self):
-        """Reinitialise Gmsh data structure"""
+        """
+        Reinitialise internal storage.
+
+        Clears nodes, elements, physical names and element_data dictionaries.
+
+        :return: None
+        """
         self.nodes = {}
         self.elements = {}
         self.physical = {}
         self.element_data = {}
 
     def read_element_data_head(self, mshfile):
-
+        """
+        Read header of a $ElementData block from an open mshfile.
+
+        The method expects the lines after '$ElementData' to match the conventional
+        Gmsh textual ElementData header layout:
+            <nstringtags>
+            "<field name>"
+            <nrealstags>
+            <time>
+            <ninttags>
+            <time_index>
+            <ncomponents>
+            <nentries>
+
+        :param mshfile: Open file-like object positioned after the '$ElementData' line.
+        :return: tuple (field, time, t_idx, n_comp, n_elem)
+                 - field: string field name
+                 - time: float time tag
+                 - t_idx: integer time index
+                 - n_comp: number of components per element
+                 - n_elem: number of element entries following header
+        """
         columns = mshfile.readline().strip().split()
         n_str_tags = int(columns[0])
         assert (n_str_tags == 1)
@@ -71,12 +93,25 @@ def read_element_data_head(self, mshfile):
 
 
     def read(self, mshfile=None):
-        """Read a Gmsh .msh file.
-
-        Reads Gmsh format 1.0 and 2.0 mesh files, storing the nodes and
-        elements in the appropriate dicts.
         """
-
+        Read a Gmsh .msh file.
+
+        Supports parsing textual (ASCII) Gmsh files with sections like:
+         - $MeshFormat
+         - $Nodes / $NOD
+         - $Elements / $ELM
+         - $PhysicalNames
+         - $ElementData
+
+        Parsed data is stored in the instance attributes:
+         - self.nodes: dict nodeID -> [x, y, z]
+         - self.elements: dict elemID -> (type, tags_list, nodeIDs_list)
+         - self.physical: dict name -> (id, dim)
+         - self.element_data: dict field_name -> { time_idx: (time, {elemID: component_list}) }
+
+        :param mshfile: Optional open file-like object or path string; if None uses filename passed to __init__.
+        :return: None
+        """
         if not mshfile:
             mshfile = open(self.filename, 'r')
 
@@ -111,16 +146,19 @@ def read(self, mshfile=None):
             elif readmode:
                 columns = line.split()
                 if readmode == 6:
+                    # Reading element data values lines
                     ele_idx = int(columns[0])
                     comp_values = [float(col) for col in columns[1:]]
                     assert len(comp_values) == self.current_n_components
                     self.current_elem_data[ele_idx] = comp_values
 
                 if readmode == 5:
+                    # Physical names: each line has "dim id name"
                     if len(columns) == 3:
                         self.physical[str(columns[2])] = (int(columns[1]), int(columns[0]))
 
                 if readmode == 4:
+                    # MeshFormat block: either ASCII or Binary; limited handling here
                     if len(columns) == 3:
                         vno, ftype, dsize = (float(columns[0]),
                                              int(columns[1]),
@@ -129,7 +167,7 @@ def read(self, mshfile=None):
                     else:
                         endian = struct.unpack('i', columns[0])
                 if readmode == 1:
-                    # Version 1.0 or 2.0 Nodes
+                    # Version 1.0 or 2.0 Nodes (text or binary)
                     try:
                         if ftype == 0 and len(columns) == 4:
                             self.nodes[int(columns[0])] = [float(col) for col in columns[1:]]
@@ -144,7 +182,7 @@ def read(self, mshfile=None):
                         print('Node format error: ' + line, ERROR)
                         readmode = 0
                 elif ftype == 0 and readmode > 1 and len(columns) > 5:
-                    # Version 1.0 or 2.0 Elements
+                    # Version 1.0 or 2.0 Elements (textual)
                     try:
                         columns = [int(col) for col in columns]
                     except ValueError:
@@ -163,7 +201,7 @@ def read(self, mshfile=None):
                             nodes = columns[3 + ntags:]
                         self.elements[id] = (type, tags, nodes)
                 elif readmode == 3 and ftype == 1:
-                    # el_type : num of nodes per element
+                    # Binary elements block for format where element types and node counts are given
                     tdict = {1: 2, 2: 3, 3: 4, 4: 4, 5: 5, 6: 6, 7: 5, 8: 3, 9: 6, 10: 9, 11: 10, 15: 1}
                     try:
                         neles = int(columns[0])
@@ -189,8 +227,15 @@ def read(self, mshfile=None):
         mshfile.close()
 
     def write_ascii(self, mshfile=None):
-        """Dump the mesh out to a Gmsh 2.0 msh file."""
+        """
+        Dump the mesh out to a Gmsh 2.0 (textual) msh file.
+
+        Writes $MeshFormat, $PhysicalNames, $Nodes and $Elements sections according to
+        the current contents of self.physical, self.nodes and self.elements.
 
+        :param mshfile: Optional open file or filename; if None uses self.filename opened for writing.
+        :return: None
+        """
         if not mshfile:
             mshfile = open(self.filename, 'w')
 
@@ -217,8 +262,16 @@ def write_ascii(self, mshfile=None):
         print('$EndElements', file=mshfile)
 
     def write_binary(self, filename=None):
-        """Dump the mesh out to a Gmsh 2.0 msh file."""
+        """
+        Dump the mesh out to a Gmsh 2.0 msh file in binary format.
 
+        Note: this implementation mirrors the ASCII writer's structure but writes
+        binary packed integers/doubles. This method attempts to follow the Gmsh
+        2.2 binary formatting conventions.
+
+        :param filename: Path to write binary .msh file; if None, uses self.filename.
+        :return: None
+        """
         if not filename:
             filename = self.filename
 
@@ -249,14 +302,16 @@ def write_binary(self, filename=None):
 
     def write_element_data(self, f, ele_ids, name, values):
         """
-        Write given element data to the MSH file. Write only a single '$ElementData' section.
-        :param f: Output file stream.
-        :param ele_ids: Iterable giving element ids of N value rows given in 'values'
-        :param name: Field name.
-        :param values: np.array (N, L); N number of elements, L values per element (components)
-        :return:
-
-        TODO: Generalize to time dependent fields.
+        Write a single $ElementData block for a field to an open file stream.
+
+        The function writes a minimal textual $ElementData header and then one
+        row per element with element ID followed by component values.
+
+        :param f: Open file-like object opened for writing.
+        :param ele_ids: Iterable of element ids corresponding to the rows in 'values'.
+        :param name: String name of the field (will be written as the ElementData field name).
+        :param values: numpy array of shape (N, L) where N == len(ele_ids) and L is components per element.
+        :return: None
         """
         n_els = values.shape[0]
         n_comp = np.atleast_1d(values[0]).shape[0]
@@ -288,11 +343,15 @@ def write_element_data(self, f, ele_ids, name, values):
 
     def write_fields(self, msh_file, ele_ids, fields):
         """
-        Creates input data msh file for Flow model.
-        :param msh_file: Target file (or None for current mesh file)
-        :param ele_ids: Element IDs in computational mesh corrsponding to order of
-        field values in element's barycenter.
-        :param fields: {'field_name' : values_array, ..}
+        Create an MSH file that contains $ElementData blocks for the provided fields.
+
+        This is a convenience writer used to generate field input files for models (Flow123d).
+        It writes a MeshFormat header and then for each field calls write_element_data.
+
+        :param msh_file: Path to output MSH file (string). If falsy, uses self.filename when available.
+        :param ele_ids: Iterable of element ids (order must match field value ordering).
+        :param fields: Dict mapping field name -> array-like values (one row per element).
+        :return: None
         """
         if not msh_file:
             msh_file = open(self.filename, 'w')
@@ -300,44 +359,3 @@ def write_fields(self, msh_file, ele_ids, fields):
             fout.write('$MeshFormat\n2.2 0 8\n$EndMeshFormat\n')
             for name, values in fields.items():
                 self.write_element_data(fout, ele_ids, name, values)
-
-
-    def read_element_data(self):
-        """
-        Write given element data to the MSH file. Write only a single '$ElementData' section.
-        :param f: Output file stream.
-        :param ele_ids: Iterable giving element ids of N value rows given in 'values'
-        :param name: Field name.
-        :param values: np.array (N, L); N number of elements, L values per element (components)
-        :return:
-
-        TODO: Generalize to time dependent fields.
-        """
-
-        n_els = values.shape[0]
-        n_comp = np.atleast_1d(values[0]).shape[0]
-        np.reshape(values, (n_els, n_comp))
-        header_dict = dict(
-            field=str(name),
-            time=0,
-            time_idx=0,
-            n_components=n_comp,
-            n_els=n_els
-        )
-
-        header = "1\n" \
-                 "\"{field}\"\n" \
-                 "1\n" \
-                 "{time}\n" \
-                 "3\n" \
-                 "{time_idx}\n" \
-                 "{n_components}\n" \
-                 "{n_els}\n".format(**header_dict)
-
-        f.write('$ElementData\n')
-        f.write(header)
-        assert len(values.shape) == 2
-        for ele_id, value_row in zip(ele_ids, values):
-            value_line = " ".join([str(val) for val in value_row])
-            f.write("{:d} {}\n".format(int(ele_id), value_line))
-        f.write('$EndElementData\n')
diff --git a/mlmc/tool/hdf5.py b/mlmc/tool/hdf5.py
index 32e24149..472a7882 100644
--- a/mlmc/tool/hdf5.py
+++ b/mlmc/tool/hdf5.py
@@ -44,22 +44,23 @@ class HDF5:
                                 mashape: (None, 1)
                                 chunks: True
     """
+
     def __init__(self, file_path, load_from_file=False):
         """
-        Create HDF5 class instance
-        :param file_path: hdf5 file path
+        Create HDF5 class instance.
+        :param file_path: Path to HDF5 file to use.
+        :param load_from_file: If True, load metadata from an existing file instead of initializing a new structure.
         """
-        # # Absolute path to mlmc HDF5 file
         self.file_name = file_path
-        # If True not create file structure from scratch
         self._load_from_file = load_from_file
         if self._load_from_file:
             self.load_from_file()
 
     def create_file_structure(self, level_parameters):
         """
-        Create hdf structure
-        :param level_parameters: List[float]
+        Create top-level HDF5 structure for MLMC results or load existing one.
+
+        :param level_parameters: List[float] of level parameters to store in root attributes when initializing new file.
         :return: None
         """
         if self._load_from_file:
@@ -70,52 +71,61 @@ def create_file_structure(self, level_parameters):
 
     def load_from_file(self):
         """
-        Load root group attributes from existing HDF5 file
+        Load root group attributes from an existing HDF5 file and set them as instance attributes.
+
+        Raises an Exception if required attributes (like 'level_parameters') are not present.
+
         :return: None
         """
         with h5py.File(self.file_name, "r") as hdf_file:
-            # Set class attributes from hdf file
+            # Set class attributes from hdf file root attributes
             for attr_name, value in hdf_file.attrs.items():
                 self.__dict__[attr_name] = value
 
         if 'level_parameters' not in self.__dict__:
-            raise Exception("'level_parameters' aren't store in HDF file, so unable to create level groups")
+            raise Exception("'level_parameters' aren't stored in HDF file, so unable to create level groups")
 
     def clear_groups(self):
         """
-        Remove HDF5 group Levels, it allows run same mlmc object more times
+        Remove all top-level groups/datasets from the HDF5 file.
+
+        Useful when reinitializing a new MLMC run into an existing file.
+
         :return: None
         """
         with h5py.File(self.file_name, "a") as hdf_file:
-            for item in hdf_file.keys():
+            for item in list(hdf_file.keys()):
                 del hdf_file[item]
 
     def init_header(self, level_parameters):
         """
-        Add h5py.File metadata to .attrs (attrs objects are of class h5py.AttributeManager)
-        :param level_parameters: MLMC level range of steps
+        Initialize root attributes and create the top-level 'Levels' group.
+
+        :param level_parameters: Iterable of level parameters to store in root attributes.
         :return: None
         """
         with h5py.File(self.file_name, "a") as hdf_file:
-            # Set global attributes to root group (h5py.Group)
+            # Set global attributes on root group
             hdf_file.attrs['version'] = '1.0.1'
             hdf_file.attrs['level_parameters'] = level_parameters
-            # Create h5py.Group Levels, it contains other groups with mlmc.Level data
-            hdf_file.create_group("Levels")
+            # Create top-level group 'Levels' to hold per-level groups
+            if "Levels" not in hdf_file:
+                hdf_file.create_group("Levels")
 
     def add_level_group(self, level_id):
         """
-        Create group for particular level, parent group is 'Levels'
-        :param level_id: str, mlmc.Level identifier
-        :return: LevelGroup instance, it is container for h5py.Group instance
+        Create (if necessary) and return a LevelGroup wrapper for a particular level.
+
+        :param level_id: str, mlmc.Level identifier (e.g. '0', '1', ...)
+        :return: LevelGroup instance bound to the '/Levels/{level_id}' HDF5 group
         """
-        # HDF5 path to particular level group
         level_group_hdf_path = '/Levels/' + level_id
 
         with h5py.File(self.file_name, "a") as hdf_file:
-            # Create group (h5py.Group) if it has not yet been created
+            # Create group for level if missing
+            if 'Levels' not in hdf_file:
+                hdf_file.create_group('Levels')
             if level_group_hdf_path not in hdf_file:
-                # Create group for level named by level id (e.g. 0, 1, 2, ...)
                 hdf_file['Levels'].create_group(level_id)
 
         return LevelGroup(self.file_name, level_group_hdf_path, level_id, loaded_from_file=self._load_from_file)
@@ -123,23 +133,27 @@ def add_level_group(self, level_id):
     @property
     def result_format_dset_name(self):
         """
-        Result format dataset name
-        :return: str
+        Dataset name used to store the simulation result format (QuantitySpec array).
+
+        :return: str dataset name
         """
         return "result_format"
 
     def save_result_format(self, result_format, res_dtype):
         """
-        Save result format to dataset
-        :param result_format: List[QuantitySpec]
-        :param res_dtype: result numpy dtype
+        Save simulation result format into a structured dataset.
+
+        The `result_format` is a list of QuantitySpec objects; `res_dtype` is a NumPy structured dtype
+        describing how to store the QuantitySpec attributes in the dataset.
+
+        :param result_format: List[QuantitySpec] (objects describing output fields)
+        :param res_dtype: numpy.dtype used for the dataset storage of a single QuantitySpec
         :return: None
         """
         result_format_dtype = res_dtype
 
-        # Create data set
+        # Ensure dataset exists (resizable)
         with h5py.File(self.file_name, 'a') as hdf_file:
-            # Check if dataset exists
             if self.result_format_dset_name not in hdf_file:
                 hdf_file.create_dataset(
                     self.result_format_dset_name,
@@ -148,35 +162,42 @@ def save_result_format(self, result_format, res_dtype):
                     maxshape=(None,),
                     chunks=True)
 
-        # Format data
+        # Prepare numpy structured array to write
         result_array = np.empty((len(result_format),), dtype=result_format_dtype)
         for res, quantity_spec in zip(result_array, result_format):
             for attribute in list(quantity_spec.__dict__.keys()):
-                if isinstance(getattr(quantity_spec, attribute), (tuple, list)):
-                    res[attribute][:] = getattr(quantity_spec, attribute)
+                val = getattr(quantity_spec, attribute)
+                if isinstance(val, (tuple, list)):
+                    # For array-like fields copy into subarray
+                    res[attribute][:] = val
                 else:
-                    res[attribute] = getattr(quantity_spec, attribute)
+                    res[attribute] = val
 
-        # Write to file
+        # Write structured array into dataset
         with h5py.File(self.file_name, 'a') as hdf_file:
             dataset = hdf_file[self.result_format_dset_name]
             dataset[:] = result_array
 
     def load_result_format(self):
         """
-        Load format result, it just read dataset
-        :return:
+        Load the saved result_format dataset and return it as a NumPy array.
+
+        :return: numpy.ndarray containing the stored result_format structured records
+        :raises AttributeError: if the dataset is not present
         """
         with h5py.File(self.file_name, 'r') as hdf_file:
             if self.result_format_dset_name not in hdf_file:
-                raise AttributeError
-
+                raise AttributeError("Result format dataset not present in HDF file")
             dataset = hdf_file[self.result_format_dset_name]
             return dataset[()]
 
     def load_level_parameters(self):
+        """
+        Read level_parameters from the HDF5 file root attributes.
+
+        :return: value of 'level_parameters' attribute or empty list if not present
+        """
         with h5py.File(self.file_name, "r") as hdf_file:
-            # Set global attributes to root group (h5py.Group)
             if 'level_parameters' in hdf_file.attrs:
                 return hdf_file.attrs['level_parameters']
             else:
@@ -184,78 +205,85 @@ def load_level_parameters(self):
 
 
 class LevelGroup:
-    # Row format for dataset (h5py.Dataset) scheduled
+    """
+    Helper class to manipulate per-level HDF5 group contents.
+
+    It provides convenience methods to append scheduled samples, collected results,
+    failed entries and to iterate over collected data in chunks.
+    """
+
+    # Structured dtype for scheduled rows (single sample_id string)
     SCHEDULED_DTYPE = {'names': ['sample_id'],
                        'formats': ['S100']}
 
+    # Structured dtype for failed entries: sample id and message
     FAILED_DTYPE = {'names': ('sample_id', 'message'),
                     'formats': ('S100', 'S1000')}
 
+    # Attributes describing datasets we create for collected ids/values
     COLLECTED_ATTRS = {"sample_id": {'name': 'collected_ids', 'default_shape': (0,), 'maxshape': (None,),
                                      'dtype': SCHEDULED_DTYPE}}
 
     def __init__(self, file_name, hdf_group_path, level_id, loaded_from_file=False):
         """
-        Create LevelGroup instance, each mlmc.Level has access to corresponding LevelGroup to save data
-        :param file_name: Name of hdf file
-        :param hdf_group_path: h5py.Group path
-        :param level_id: Unambiguous identifier of mlmc.Level object
-        :param loaded_from_file: bool, create new file or loaded existing groups
+        Create LevelGroup instance bound to given HDF5 group path.
+
+        :param file_name: Path to HDF5 file.
+        :param hdf_group_path: HDF5 group path string (e.g. '/Levels/0').
+        :param level_id: str identifier of the level (used as attribute).
+        :param loaded_from_file: If True, assume group already existed and skip creating datasets.
         """
         self.file_name = file_name
-        # HDF file name
         self.level_id = level_id
-        # Level identifier
         self.level_group_path = hdf_group_path
-        # HDF Group object (h5py.Group)
         self._n_items_in_chunk = None
-        # Collected items in one chunk
         self._chunk_size_items = {}
-        # Chunk size and corresponding number of items
 
-        # Set group attribute 'level_id'
+        # Ensure HDF group has attribute 'level_id'
         with h5py.File(self.file_name, 'a') as hdf_file:
             if 'level_id' not in hdf_file[self.level_group_path].attrs:
                 hdf_file[self.level_group_path].attrs['level_id'] = self.level_id
 
-        # Create necessary datasets (h5py.Dataset) a groups (h5py.Group)
+        # If creating anew, initialize required datasets/groups
         if not loaded_from_file:
             self._make_groups_datasets()
 
     def _make_groups_datasets(self):
         """
-        Create h5py.Dataset for scheduled samples, collected samples according to COLLECTED_ATTRS and failed samples,
-        also create h5py.Group for Jobs - it contains or will contain datasets with sample id,
-        so we can find all sample ids which was run in particular pbs job
+        Create default datasets under the level group:
+          - scheduled (resizable structured array of sample ids)
+          - collected_ids (resizable structured array of collected ids)
+          - failed (resizable structured array of failed entries)
+          - collected_values is created later when first result is appended
+
         :return: None
         """
-        # Create dataset for scheduled samples
+        # scheduled dataset (initially empty)
         self._make_dataset(name=self.scheduled_dset, shape=(0,), maxshape=(None,), dtype=LevelGroup.SCHEDULED_DTYPE,
                            chunks=True)
 
-        # Create datasets for collected samples by COLLECTED_ATTRS
+        # collected_ids dataset(s)
         for _, attr_properties in LevelGroup.COLLECTED_ATTRS.items():
             self._make_dataset(name=attr_properties['name'], shape=attr_properties['default_shape'],
                                maxshape=attr_properties['maxshape'], dtype=attr_properties['dtype'], chunks=True)
 
-        # Create dataset for failed samples
+        # failed dataset (initially empty)
         self._make_dataset(name=self.failed_dset, shape=(0,), dtype=LevelGroup.FAILED_DTYPE, maxshape=(None,), chunks=True)
 
     def _make_dataset(self, **kwargs):
         """
-        Create h5py.Dataset
-        :param kwargs: h5py.Dataset properties
-                    name: Dataset name, key in h5py.Group (self._level_group)
-                    shape: NumPy-style shape tuple giving dataset dimensions.
-                    dtype: NumPy dtype object giving the dataset’s type.
-                    maxshape: NumPy-style shape tuple indicating the maxiumum dimensions up to
-                              which the dataset may be resized. Axes with None are unlimited.
-                    chunks: Tuple giving the chunk shape, or True if we want to use chunks but not specify the size or
-                            None if chunked storage is not used
-        :return: Dataset name
+        Generic helper to create a dataset under the level group if missing.
+
+        :param kwargs: expects keys:
+            - name: dataset name (str)
+            - shape: initial shape tuple
+            - dtype: numpy dtype or structured dtype
+            - maxshape: max shape for resizable axes
+            - chunks: chunk setting (True or tuple)
+        :return: str the dataset name that was created/ensured
         """
         with h5py.File(self.file_name, 'a') as hdf_file:
-            # Check if dataset exists
+            # Create dataset only if it does not exist
             if kwargs.get('name') not in hdf_file[self.level_group_path]:
                 hdf_file[self.level_group_path].create_dataset(
                     kwargs.get('name'),
@@ -263,111 +291,128 @@ def _make_dataset(self, **kwargs):
                     dtype=kwargs.get('dtype'),
                     maxshape=kwargs.get('maxshape'),
                     chunks=kwargs.get('chunks'))
-
         return kwargs.get('name')
 
     @property
     def collected_ids_dset(self):
         """
-        Collected ids dataset
-        :return: Dataset name
+        Name of dataset storing collected ids.
+
+        :return: str
         """
         return "collected_ids"
 
     @property
     def scheduled_dset(self):
         """
-        Dataset with scheduled samples
-        :return: Dataset name
+        Name of dataset storing scheduled sample ids.
+
+        :return: str
         """
         return "scheduled"
 
     @property
     def failed_dset(self):
         """
-        Dataset of ids of failed samples
-        :return: Dataset name
+        Name of dataset storing failed sample rows.
+
+        :return: str
         """
         return "failed"
 
     def append_scheduled(self, scheduled_samples):
         """
-        Save scheduled samples to dataset (h5py.Dataset)
-        :param scheduled_samples: list of sample ids
+        Append scheduled sample ids to the scheduled dataset.
+
+        :param scheduled_samples: iterable of sample-id strings (or bytes-like)
         :return: None
         """
-        # Append samples to existing scheduled dataset
         if len(scheduled_samples) > 0:
             self._append_dataset(self.scheduled_dset, scheduled_samples)
 
     def append_successful(self, samples: np.array):
         """
-        Save level samples to datasets (h5py.Dataset), save ids of collected samples and their results
-        :param samples: np.ndarray
+        Append successful (collected) samples.
+
+        The `samples` array is expected to have rows of the form [sample_id, result_value].
+        The method appends sample ids to 'collected_ids' and result values to 'collected_values'.
+
+        :param samples: numpy.ndarray where each row is [sample_id, value], value may be array-like itself.
         :return: None
         """
+        # Append collected ids (first column)
         self._append_dataset(self.collected_ids_dset, samples[:, 0])
 
         values = samples[:, 1]
+        # Determine dtype for stored result values (store as numeric array shape)
         result_type = np.dtype((float, np.array(values[0]).shape))
 
-        # Create dataset for failed samples
+        # Ensure collected_values dataset exists (resizable)
         self._make_dataset(name='collected_values', shape=(0,),
                            dtype=result_type, maxshape=(None,),
                            chunks=True)
 
+        # Append values (converted to simple list for h5py)
         d_name = 'collected_values'
         self._append_dataset(d_name, [val for val in values])
 
     def append_failed(self, failed_samples):
         """
-        Save level failed sample ids (not append samples)
-        :param failed_samples: set; Level sample ids
+        Append failed sample rows to the failed dataset.
+
+        :param failed_samples: iterable of failed sample descriptors (e.g. tuples (sample_id, message))
         :return: None
         """
         self._append_dataset(self.failed_dset, failed_samples)
 
     def _append_dataset(self, dataset_name, values):
         """
-        Append values to existing dataset
-        :param dataset_name: str, dataset name
-        :param values: list of values (tuple, NumPy array or single value)
+        Append new rows to a resizable dataset.
+
+        :param dataset_name: Name of dataset under the level group.
+        :param values: Iterable of new entries; for structured dtypes supply tuples.
         :return: None
         """
         with h5py.File(self.file_name, 'a') as hdf_file:
             dataset = hdf_file[self.level_group_path][dataset_name]
-            # Resize dataset
+            # Resize along first axis to accommodate new rows
             dataset.resize(dataset.shape[0] + len(values), axis=0)
-            # Append new values to the end of dataset
             dataset[-len(values):] = values
 
     def scheduled(self):
         """
-        Read level dataset with scheduled samples
-        :return:
+        Read and return the scheduled dataset contents.
+
+        :return: numpy.ndarray of scheduled entries (structured dtype)
         """
         with h5py.File(self.file_name, 'r') as hdf_file:
             scheduled_dset = hdf_file[self.level_group_path][self.scheduled_dset]
             return scheduled_dset[()]
 
     def chunks(self, n_samples=None):
+        """
+        Iterate over collected_values dataset chunks and yield ChunkSpec descriptors.
+
+        :param n_samples: If provided, yield a single ChunkSpec from 0..n_samples instead of iterating actual chunks.
+        :yield: ChunkSpec(chunk_id, chunk_slice, level_id)
+        """
         with h5py.File(self.file_name, 'r') as hdf_file:
             if 'collected_values' not in hdf_file[self.level_group_path]:
-                raise AttributeError("No collected values in level group ".format(self.level_id))
+                raise AttributeError("No collected values in level group {}".format(self.level_id))
             dataset = hdf_file["/".join([self.level_group_path, "collected_values"])]
 
             if n_samples is not None:
                 yield ChunkSpec(chunk_id=0, chunk_slice=slice(0, n_samples, 1), level_id=int(self.level_id))
             else:
                 for chunk_id, chunk in enumerate(dataset.iter_chunks()):
-                    yield ChunkSpec(chunk_id=chunk_id, chunk_slice=chunk[0], level_id=int(self.level_id))  # slice, level_id
+                    yield ChunkSpec(chunk_id=chunk_id, chunk_slice=chunk[0], level_id=int(self.level_id))
 
     def collected(self, chunk_slice):
         """
-        Read collected data by chunks,
-        number of items in chunk is determined by LevelGroup.chunk_size (number of bytes)
-        :param chunk_slice: slice() object
-        :return: np.ndarray
+        Read a slice (chunk) from the collected_values dataset.
+
+        :param chunk_slice: slice object describing which rows to read
+        :return: numpy.ndarray with the chunk rows or None if dataset missing
         """
         with h5py.File(self.file_name, 'r') as hdf_file:
             if 'collected_values' not in hdf_file[self.level_group_path]:
@@ -377,61 +422,73 @@ def collected(self, chunk_slice):
 
     def collected_n_items(self):
         """
-        Number of collected samples
-        :return: int
+        Return the number of collected items (rows) stored for this level.
+
+        :return: int number of collected rows
         """
         with h5py.File(self.file_name, 'r') as hdf_file:
             if 'collected_values' not in hdf_file[self.level_group_path]:
-                return AttributeError("collected_values dataset not in HDF file for level {}".format(self.level_id))
+                raise AttributeError("collected_values dataset not in HDF file for level {}".format(self.level_id))
             dataset = hdf_file["/".join([self.level_group_path, "collected_values"])]
             collected_n_items = len(dataset[()])
         return collected_n_items
 
     def get_finished_ids(self):
         """
-        Get collected and failed samples ids
-        :return: NumPy array
+        Return concatenated list of successful and failed sample ids for this level.
+
+        :return: numpy.ndarray of sample id strings (successful then failed)
         """
         with h5py.File(self.file_name, 'r') as hdf_file:
-            failed_ids = [sample[0].decode() for sample in hdf_file[self.level_group_path][self.failed_dset][()]]
-            successful_ids = [sample[0].decode() for sample in hdf_file[self.level_group_path][self.collected_ids_dset][()]]
+            # Extract failed and successful rows and decode bytes to strings
+            failed_rows = hdf_file[self.level_group_path][self.failed_dset][()]
+            failed_ids = [sample[0].decode() for sample in failed_rows] if len(failed_rows) > 0 else []
+
+            success_rows = hdf_file[self.level_group_path][self.collected_ids_dset][()]
+            successful_ids = [sample[0].decode() for sample in success_rows] if len(success_rows) > 0 else []
+
             return np.concatenate((np.array(successful_ids), np.array(failed_ids)), axis=0)
 
     def get_unfinished_ids(self):
         """
-        Get unfinished sample ids as difference between scheduled ids and finished ids
-        :return: list
+        Compute unfinished sample ids = scheduled_ids \ finished_ids.
+
+        :return: list of unfinished sample id strings
         """
         scheduled_ids = [sample[0].decode() for sample in self.scheduled()]
-        return list(set(scheduled_ids) - set(self.get_finished_ids()))
+        finished_ids = list(self.get_finished_ids())
+        return list(set(scheduled_ids) - set(finished_ids))
 
     def get_failed_ids(self):
         """
-        Failed samples ids
-        :return: list of failed sample ids
+        Get list of failed sample ids for this level.
+
+        :return: list of failed sample id strings
         """
         with h5py.File(self.file_name, 'r') as hdf_file:
-            failed_ids = [sample[0].decode() for sample in hdf_file[self.level_group_path][self.failed_dset][()]]
-
+            failed_rows = hdf_file[self.level_group_path][self.failed_dset][()]
+            failed_ids = [sample[0].decode() for sample in failed_rows] if len(failed_rows) > 0 else []
         return failed_ids
 
     def clear_failed_dataset(self):
         """
-        Clear failed_ids dataset
+        Remove and recreate the failed dataset (clears all failure records).
+
         :return: None
         """
         with h5py.File(self.file_name, 'a') as hdf_file:
             if self.failed_dset in hdf_file[self.level_group_path]:
                 del hdf_file[self.level_group_path][self.failed_dset]
-                # Create dataset for failed samples
+                # Recreate failed dataset as empty
                 self._make_dataset(name=self.failed_dset, shape=(0,), dtype=LevelGroup.FAILED_DTYPE, maxshape=(None,),
                                    chunks=True)
 
     @property
     def n_ops_estimate(self):
         """
-        Get number of operations estimate
-        :return: float
+        Number of operations estimate stored as a group attribute.
+
+        :return: float or object stored under 'n_ops_estimate' attribute (if present)
         """
         with h5py.File(self.file_name, 'r') as hdf_file:
             if 'n_ops_estimate' in hdf_file[self.level_group_path].attrs:
@@ -440,12 +497,12 @@ def n_ops_estimate(self):
     @n_ops_estimate.setter
     def n_ops_estimate(self, n_ops_estimate):
         """
-        Set property n_ops_estimate
-        :param n_ops_estimate: number of operations (time) per samples
+        Set 'n_ops_estimate' attribute for the level group.
+
+        :param n_ops_estimate: numeric estimate (e.g., task weight per sample)
         :return: None
         """
         with h5py.File(self.file_name, 'a') as hdf_file:
             if 'n_ops_estimate' not in hdf_file[self.level_group_path].attrs:
                 hdf_file[self.level_group_path].attrs['n_ops_estimate'] = [0., 0.]
             hdf_file[self.level_group_path].attrs['n_ops_estimate'] = n_ops_estimate
-
diff --git a/mlmc/tool/pbs_job.py b/mlmc/tool/pbs_job.py
index 6b8a6535..718ff308 100644
--- a/mlmc/tool/pbs_job.py
+++ b/mlmc/tool/pbs_job.py
@@ -30,12 +30,13 @@ class PbsJob:
 
     def __init__(self, output_dir, jobs_dir, job_id, level_sim_file, debug):
         """
-        Class representing both pbs job in SamplingPoolPBS and true pbs process
-        :param output_dir: output directory path
-        :param jobs_dir: jobs directory path
-        :param job_id: unique job id
-        :param level_sim_file: file name of serialized LevelSimulation instance
-        :param debug: bool, if True keep sample directories
+        Construct a PbsJob instance used both by SamplingPool (to create a job) and by PBS worker process.
+
+        :param output_dir: str, directory where sample work dirs and outputs live
+        :param jobs_dir: str, directory where scheduler/job control files are stored
+        :param job_id: str, unique identifier of this job
+        :param level_sim_file: str, format string for per-level serialized LevelSimulation files
+        :param debug: bool; if True do not remove per-sample directories after successful runs
         """
         self._output_dir = output_dir
         self._jobs_dir = jobs_dir
@@ -44,18 +45,21 @@ def __init__(self, output_dir, jobs_dir, job_id, level_sim_file, debug):
         self._debug = debug
 
         self._level_simulations = {}
-        # LevelSimulations instances
+        # LevelSimulation instances deserialized on demand
 
     @classmethod
     def create_job(cls, output_dir, jobs_dir, job_id, level_sim_file, debug):
         """
-        Create PbsProcess instance from SamplingPoolPBS
+        Create and serialize a PbsJob descriptor for a PBS process to later deserialize.
+
+        The created descriptor (CLASS_FILE) is written under output_dir for the PBS worker.
+
         :param output_dir: str
         :param jobs_dir: str
         :param job_id: str
-        :param level_sim_file: str, file name format of LevelSimulation serialization
-        :param debug: bool, if True keep sample directories
-        :return: PbsProcess instance
+        :param level_sim_file: str, format of LevelSimulation serialization filenames
+        :param debug: bool
+        :return: PbsJob instance
         """
         pbs_process = cls(output_dir, jobs_dir, job_id, level_sim_file, debug)
         PbsJob._serialize_pbs_process(pbs_process)
@@ -65,8 +69,12 @@ def create_job(cls, output_dir, jobs_dir, job_id, level_sim_file, debug):
     @classmethod
     def create_process(cls):
         """
-        Create PbsProcess via PBS
-        :return:
+        Create PbsJob instance inside PBS worker process.
+
+        The worker expects command-line arguments (see command_params) and a serialized CLASS_FILE
+        in output_dir describing jobs_dir and level_sim_file format.
+
+        :return: PbsJob instance
         """
         job_id, output_dir = PbsJob.command_params()
         jobs_dir, level_sim_file_format, debug = PbsJob._deserialize_pbs_process(output_dir)
@@ -76,9 +84,11 @@ def create_process(cls):
     @staticmethod
     def _serialize_pbs_process(pbs_process):
         """
-        Write down files necessary for pbs process call of this class - jobs_dir and format of file with serialized
-                                                                        LevelSimulation
-        :param pbs_process: PbsProcess instance
+        Persist minimal information (jobs_dir, level_sim_file format, debug) for PBS worker.
+
+        This function writes CLASS_FILE inside the pbs_process._output_dir for later deserialization.
+
+        :param pbs_process: PbsJob instance to serialize
         :return: None
         """
         if not os.path.exists(os.path.join(pbs_process._output_dir, PbsJob.CLASS_FILE)):
@@ -90,9 +100,10 @@ def _serialize_pbs_process(pbs_process):
     @staticmethod
     def _deserialize_pbs_process(output_dir):
         """
-        Get jobs_dir and level_sim_file from serialized PbsProcess
-        :param output_dir: str
-        :return: jobs_dir, level_sim_file
+        Read CLASS_FILE written by _serialize_pbs_process and return stored parameters.
+
+        :param output_dir: str path where CLASS_FILE was written
+        :return: tuple (jobs_dir: str, level_sim_file: str, debug: bool)
         """
         with open(os.path.join(output_dir, PbsJob.CLASS_FILE), "r") as reader:
             line = reader.readline().split(';')
@@ -101,8 +112,11 @@ def _deserialize_pbs_process(output_dir):
     @staticmethod
     def command_params():
         """
-        Read command parameters - job identifier and file with necessary files
-        :return: None
+        Parse PBS worker command-line parameters. Called inside worker process.
+
+        Expects sys.argv[1] = output_dir, sys.argv[2] = job_id
+
+        :return: tuple (job_id: str, output_dir: str)
         """
         output_dir = sys.argv[1]
         job_id = sys.argv[2]
@@ -111,8 +125,10 @@ def command_params():
 
     def _get_level_sim(self, level_id):
         """
-        Deserialize LevelSimulation object
-        :return: None
+        Deserialize LevelSimulation object for a given level id and store it in self._level_simulations.
+
+        :param level_id: int or str identifier of level (used to format self._level_sim_file)
+        :return: None (LevelSimulation object is stored internally)
         """
         with open(os.path.join(self._output_dir, self._level_sim_file.format(level_id)), "rb") as reader:
             l_sim = pickle.load(reader)
@@ -120,8 +136,11 @@ def _get_level_sim(self, level_id):
 
     def _get_level_id_sample_id_seed(self):
         """
-        Get scheduled samples
-        :return: List[Tuple[level_id: int, sample_id: str, seed: int]] sorted by level_id ASC
+        Read scheduled samples list for this job.
+
+        The scheduled YAML file contains a list of tuples (level_id, sample_id, seed).
+
+        :return: Sorted list of tuples [(level_id, sample_id, seed), ...] sorted by level_id ascending
         """
         with open(os.path.join(self._jobs_dir, PbsJob.SCHEDULED.format(self._job_id))) as file:
             level_id_sample_id_seed = yaml.load(file, yaml.Loader)
@@ -131,8 +150,16 @@ def _get_level_id_sample_id_seed(self):
 
     def calculate_samples(self):
         """
-        Calculate scheduled samples
-        :return:
+        Main worker routine: calculate each scheduled sample, move produced files, and record success/failure.
+
+        This method:
+         - reads the scheduled list,
+         - deserializes LevelSimulation objects on demand,
+         - calls SamplingPool.calculate_sample for each scheduled sample,
+         - moves successful/failed artifacts,
+         - writes partial results to YAML files (successful, failed, times).
+
+        :return: None
         """
         self._success_file = os.path.join(self._jobs_dir, PbsJob.SUCCESSFUL_RESULTS.format(self._job_id))
         self._failed_file = os.path.join(self._jobs_dir, PbsJob.FAILED_RESULTS.format(self._job_id))
@@ -142,27 +169,23 @@ def calculate_samples(self):
         level_id_sample_id_seed = self._get_level_id_sample_id_seed()
 
         failed = []
-        # Failed samples - Tuple(level_id, sample_id, error_msg)
         success = []
-        # Successful samples - Tuple(level_id, sample_id, (fine result, coarse result))
         current_level = 0
         current_samples = []
-        # Currently saved samples
         start_time = time.time()
         successful_samples_time = 0
         times = []
-        # Sample calculation time - Tuple(level_id, [n samples, cumul time for n sample])
         n_times = 0
         successful_dest_dir = os.path.join(self._output_dir, SamplingPool.SEVERAL_SUCCESSFUL_DIR)
+
         for level_id, sample_id, seed in level_id_sample_id_seed:
             start_time = time.time()
-            # Deserialize level simulation config
+            # Deserialize level simulation config if not loaded
             if level_id not in self._level_simulations:
                 self._get_level_sim(level_id)
 
-            # Start measuring time
+            # When level changes, reset time accounting for previous level
             if current_level != level_id:
-                # Save previous level times
                 times.append((current_level, successful_samples_time, n_times))
                 n_times = 0
                 start_time = time.time()
@@ -171,12 +194,13 @@ def calculate_samples(self):
 
             level_sim = self._level_simulations[current_level]
             assert level_sim._level_id == current_level
-            # Calculate sample
+
+            # Calculate sample (may create sample working dir, call external tools)
             _, res, err_msg, _ = SamplingPool.calculate_sample(sample_id, level_sim, work_dir=self._output_dir, seed=seed)
 
             if not err_msg:
                 success.append((current_level, sample_id, (res[0], res[1])))
-                # Increment number of successful samples for measured time
+                # Move successful artifacts unless in debug mode
                 if not self._debug:
                     SamplingPool.move_successful_rm(sample_id, level_sim,
                                                     output_dir=self._output_dir,
@@ -184,7 +208,6 @@ def calculate_samples(self):
                 n_times += 1
                 successful_samples_time += (time.time() - start_time)
                 print("sample time ", time.time() - start_time)
-                # times.append((current_level, time.time() - start_time, n_times))
             else:
                 failed.append((current_level, sample_id, err_msg))
                 SamplingPool.move_failed_rm(sample_id, level_sim,
@@ -192,31 +215,28 @@ def calculate_samples(self):
                                             dest_dir=SamplingPool.FAILED_DIR)
 
             current_samples.append(sample_id)
-            #n_times += 1
             times.append((current_level, successful_samples_time, n_times))
             self._save_to_file(success, failed, times, current_samples)
 
+            # Reset accumulators for next loop iteration
             success = []
             failed = []
             current_samples = []
             times = []
 
+        # Final flush (in case any accumulators still have items)
         self._save_to_file(success, failed, times, current_samples)
 
-        # self._write_end_mark(self._success_file)
-        # self._write_end_mark(self._failed_file)
-        # self._write_end_mark(self._times_file)
-
     def _save_to_file(self, success, failed, times, current_samples):
         """
-        Save sample results to files, create file which indicates that sample in stored
-        :param success: dict
-        :param failed: dict
-        :param times: dict
-        :param current_samples: list
+        Append success/failure/time data to corresponding YAML result files.
+
+        :param success: list of successful sample tuples
+        :param failed: list of failed sample tuples
+        :param times: list of (level_id, cumulative_time, n_samples) tuples
+        :param current_samples: list of current sample ids processed
         :return: None
         """
-        # Write results to files
         if success:
             self._append_file(success, self._success_file)
         if failed:
@@ -224,26 +244,18 @@ def _save_to_file(self, success, failed, times, current_samples):
         if times:
             self._append_file(times, self._times_file)
 
-    # def _write_end_mark(self, path):
-    #     """
-    #     Write end mark to the file
-    #     :param path: str, file path
-    #     :return: None
-    #     """
-    #     if os.path.exists(path):
-    #         with open(path, "a") as f:
-    #             yaml.dump("end", f)
-
     def save_sample_id_job_id(self, job_id, sample_ids):
         """
-        Store the sample ID associated with the job ID
+        Save mapping of sample ids to this job_id so other tools can query which job handled a sample.
+
         :param job_id: str
-        :param sample_ids: list of str
+        :param sample_ids: iterable of sample-identifiers (each sample_id is usually a tuple or list, code expects sample_id[1])
+        :return: None
         """
         sample_id_job_id_file = os.path.join(self._jobs_dir, PbsJob.SAMPLE_ID_JOB_ID)
 
-        job_id = [job_id] * len(sample_ids)
-        new_ids = dict(zip([sid[1] for sid in sample_ids], job_id))
+        job_id_list = [job_id] * len(sample_ids)
+        new_ids = dict(zip([sid[1] for sid in sample_ids], job_id_list))
 
         saved_ids = {}
         if os.path.exists(sample_id_job_id_file):
@@ -257,10 +269,11 @@ def save_sample_id_job_id(self, job_id, sample_ids):
     @staticmethod
     def job_id_from_sample_id(sample_id, jobs_dir):
         """
-        Get job ID for given sample ID
-        :param sample_id: str
-        :param jobs_dir: jobs directory with results
-        :return: str, job id
+        Lookup job id that processed a given sample id.
+
+        :param sample_id: str sample identifier
+        :param jobs_dir: path to jobs directory where SAMPLE_ID_JOB_ID file is stored
+        :return: str job id associated with sample_id
         """
         sample_id_job_id_file = os.path.join(jobs_dir, PbsJob.SAMPLE_ID_JOB_ID)
         with open(sample_id_job_id_file, "r") as file:
@@ -269,9 +282,10 @@ def job_id_from_sample_id(sample_id, jobs_dir):
 
     def _append_file(self, data, path):
         """
-        Append result files, it works on read - update - write basis
-        :param data: Data to append
-        :param path: file path
+        Append `data` (serializable by YAML) to a file by opening in append mode and dumping.
+
+        :param data: Python object serializable by ruamel.yaml (list, dict, etc.)
+        :param path: Path to YAML file to append to
         :return: None
         """
         with open(path, "a") as f:
@@ -279,9 +293,10 @@ def _append_file(self, data, path):
 
     def _handle_sim_files(self, sample_id, level_sim):
         """
-        Change working directory to sample dir and copy common files
+        If simulation requires workspace, switch to per-sample directory and copy common files there.
+
         :param sample_id: str
-        :param level_sim: LevelSimulation
+        :param level_sim: LevelSimulation instance
         :return: None
         """
         if level_sim.need_sample_workspace:
@@ -292,63 +307,64 @@ def _handle_sim_files(self, sample_id, level_sim):
     @staticmethod
     def read_results(job_id, jobs_dir):
         """
-        Read result file for given job id
+        Read and aggregate results produced by a PBS job into dictionaries.
+
+        The function reads SUCCESSFUL_RESULTS, FAILED_RESULTS and TIME YAML files (if present)
+        and returns aggregated dicts keyed by level_id.
+
         :param job_id: str
-        :param jobs_dir: path to jobs directory
-        :return: successful: Dict[level_id, List[Tuple[sample_id:str, Tuple[ndarray, ndarray]]]]
-                 failed: Dict[level_id, List[Tuple[sample_id: str, error message: str]]]
-                 time: Dict[level_id: int, List[total time: float, number of success samples: int]]
+        :param jobs_dir: path to directory containing job result YAML files
+        :return: tuple (successful_dict, failed_dict, time_dict) where:
+                 - successful_dict[level_id] = [(sample_id, result), ...]
+                 - failed_dict[level_id] = [(sample_id, error_message), ...]
+                 - time_dict[level_id] = [(n_samples, cumulative_time), ...]
         """
         successful = {}
         failed = {}
         time = {}
 
-        # Save successful results
-        if os.path.exists(os.path.join(jobs_dir, PbsJob.SUCCESSFUL_RESULTS.format(job_id))):
-            with open(os.path.join(jobs_dir, PbsJob.SUCCESSFUL_RESULTS.format(job_id)), "r") as reader:
+        # Load successful results
+        succ_path = os.path.join(jobs_dir, PbsJob.SUCCESSFUL_RESULTS.format(job_id))
+        if os.path.exists(succ_path):
+            with open(succ_path, "r") as reader:
                 successful_samples = yaml.load(reader)
                 for level_id, sample_id, result in successful_samples:
                     successful.setdefault(level_id, []).append((sample_id, result))
 
-        # Save failed results
-        if os.path.exists(os.path.join(jobs_dir, PbsJob.FAILED_RESULTS.format(job_id))):
-            with open(os.path.join(jobs_dir, PbsJob.FAILED_RESULTS.format(job_id)), "r") as reader:
+        # Load failed results
+        failed_path = os.path.join(jobs_dir, PbsJob.FAILED_RESULTS.format(job_id))
+        if os.path.exists(failed_path):
+            with open(failed_path, "r") as reader:
                 failed_samples = yaml.load(reader)
                 for level_id, sample_id, err_msg in failed_samples:
                     failed.setdefault(level_id, []).append((sample_id, err_msg))
 
-        # Save time
-        if os.path.exists(os.path.join(jobs_dir, PbsJob.TIME.format(job_id))):
-            with open(os.path.join(jobs_dir, PbsJob.TIME.format(job_id)), "r") as reader:
+        # Load times
+        times_path = os.path.join(jobs_dir, PbsJob.TIME.format(job_id))
+        if os.path.exists(times_path):
+            with open(times_path, "r") as reader:
                 times = yaml.load(reader)
                 for level_id, n_samples, t in times:
                     time.setdefault(level_id, []).append((n_samples, t))
 
-        # Deal with not finished (failed) samples in finished job
+        # Mark any scheduled-but-not-recorded samples as failed ("job failed")
         level_id_sample_id_seed = PbsJob.get_scheduled_sample_ids(job_id, jobs_dir)
-
         for level_id, sample_id, _ in level_id_sample_id_seed:
-            successfull_ids = [success[0] for success in successful.get(level_id, [])]
+            successfull_ids = [s[0] for s in successful.get(level_id, [])]
             failed_ids = [f[0] for f in failed.get(level_id, [])]
             if sample_id not in failed_ids and sample_id not in successfull_ids:
                 failed.setdefault(level_id, []).append((sample_id, "job failed"))
 
-        # if "end" in successful:
-        #     del successful["end"]
-        # if "end" in failed:
-        #     del failed["end"]
-        # if "end" in time:
-        #     del time["end"]
-
         return successful, failed, time
 
     @staticmethod
     def get_scheduled_sample_ids(job_id, jobs_dir):
         """
-        Get scheduled samples
+        Read the scheduled YAML file and return the list of scheduled (level_id, sample_id, seed) tuples.
+
         :param job_id: str
         :param jobs_dir: str
-        :return:
+        :return: list of tuples (level_id, sample_id, seed)
         """
         with open(os.path.join(jobs_dir, PbsJob.SCHEDULED.format(job_id))) as file:
             level_id_sample_id_seed = yaml.load(file, yaml.Loader)
@@ -357,8 +373,9 @@ def get_scheduled_sample_ids(job_id, jobs_dir):
 
     def write_pbs_id(self, pbs_job_id):
         """
-        Create empty file name contains pbs jobID and our jobID
-        :param pbs_job_id: str
+        Write an empty file whose filename encodes the mapping from our internal job id to the external PBS job id.
+
+        :param pbs_job_id: str (external PBS job identifier)
         :return: None
         """
         file_name = os.path.join(self._jobs_dir, PbsJob.PBS_ID.format(self._job_id))
@@ -368,8 +385,9 @@ def write_pbs_id(self, pbs_job_id):
 
     def save_scheduled(self, scheduled):
         """
-        Save scheduled samples to yaml file
-        format: List[Tuple[level_id, sample_id]]
+        Store scheduled samples list into the jobs folder.
+
+        :param scheduled: list of tuples (level_id, sample_id, seed) or similar structure
         :return: None
         """
         try:
@@ -381,10 +399,11 @@ def save_scheduled(self, scheduled):
     @staticmethod
     def get_job_n_running(job_id, jobs_dir):
         """
-        Get number of running (scheduled) samples for given unfinished jobs
+        Return number of scheduled samples for a job (length of scheduled list file).
+
         :param job_id: str
-        :param jobs_dir: str, path to jobs directory
-        :return: int
+        :param jobs_dir: str path to jobs directory
+        :return: int count of scheduled entries
         """
         with open(os.path.join(jobs_dir, PbsJob.SCHEDULED.format(job_id))) as file:
             lines = yaml.load(file, yaml.Loader)
diff --git a/mlmc/tool/process_base.py b/mlmc/tool/process_base.py
index bcd2b072..3f451217 100644
--- a/mlmc/tool/process_base.py
+++ b/mlmc/tool/process_base.py
@@ -7,9 +7,19 @@
 
 class ProcessBase:
     """
-    Parent class for particular simulation processes
+    Parent class for particular simulation processes.
+    Subclasses should implement `setup_config`.
     """
     def __init__(self):
+        """
+        Parse CLI arguments and run the requested command.
+
+        The constructor reads command-line arguments (sys.argv[1:]) using get_arguments,
+        sets default attributes and then either runs or re-runs the workflow based on
+        provided arguments.
+
+        :return: None
+        """
         args = ProcessBase.get_arguments(sys.argv[1:])
 
         self.step_range = (1, 0.01)
@@ -29,9 +39,14 @@ def __init__(self):
     @staticmethod
     def get_arguments(arguments):
         """
-        Getting arguments from console
-        :param arguments: list of arguments
-        :return: namespace
+        Parse command-line arguments.
+
+        :param arguments: list of arguments (typically sys.argv[1:])
+        :return: argparse.Namespace with parsed arguments:
+                 - command: one of ['run', 'collect', 'renew', 'process']
+                 - work_dir: str path
+                 - clean: bool
+                 - debug: bool
         """
         import argparse
         parser = argparse.ArgumentParser()
@@ -52,7 +67,13 @@ def get_arguments(arguments):
 
     def run(self, renew=True):
         """
-        Run mlmc
+        High-level entry point to run the MLMC workflow.
+
+        Creates the working directory, sets up MLMC configurations for a set of level
+        counts (currently hard-coded to [1]) and schedules/generates jobs. After job creation
+        it triggers collection of results via all_collect.
+
+        :param renew: bool, if True indicates renewing failed samples (passed down to setup_config in some subclasses)
         :return: None
         """
         os.makedirs(self.work_dir, mode=0o775, exist_ok=True)
@@ -65,46 +86,13 @@ def run(self, renew=True):
 
         self.all_collect(mlmc_list)
 
-    # def collect(self):
-    #     """
-    #     Collect samples
-    #     :return: None
-    #     """
-    #     assert os.path.isdir(self.work_dir)
-    #     mlmc_list = []
-    #
-    #     for nl in [1, 2, 3, 4, 5, 7]:  # , 3, 4, 5, 7, 9]:#, 5,7]:
-    #         mlmc = self.setup_config(nl, clean=False)
-    #         mlmc_list.append(mlmc)
-    #     self.all_collect(mlmc_list)
-    #     self.calculate_var(mlmc_list)
-    #     # show_results(mlmc_list)
-
-    # def process(self):
-    #     """
-    #     Use collected data
-    #     :return: None
-    #     """
-    #     assert os.path.isdir(self.work_dir)
-    #     mlmc_est_list = []
-    #     # for nl in [ 1,3,5,7,9]:
-    #     for nl in [3]:  # high resolution fields
-    #         mlmc = self.setup_config(nl, clean=False)
-    #         # Use wrapper object for working with collected data
-    #         mlmc_est_list.append(mlmc)
-    #
-    #     cl = CompareLevels(mlmc_est_list,
-    #                        output_dir=src_path,
-    #                        quantity_name="Q [m/s]",
-    #                        moment_class=Legendre,
-    #                        log_scale=False,
-    #                        n_moments=21, )
-    #
-    #     self.process_analysis(cl)
-
     def set_environment_variables(self):
         """
-        Set pbs config, flow123d, gmsh
+        Determine environment-dependent configuration values (PBS config, executables, timeouts).
+
+        The method inspects the work_dir path to decide whether it runs on a cluster or locally and
+        sets attributes used later (pbs_config, sample_sleep, init_sample_timeout, sample_timeout, flow123d, gmsh).
+
         :return: None
         """
         root_dir = os.path.abspath(self.work_dir)
@@ -121,15 +109,15 @@ def set_environment_variables(self):
             home_dir='/storage/liberec3-tul/home/martin_spetlik/')
 
         if tail == 'storage':
-            # Metacentrum
+            # Cluster settings
             self.sample_sleep = 30
             self.init_sample_timeout = 600
             self.sample_timeout = 0
             self.pbs_config['qsub'] = '/usr/bin/qsub'
-            self.flow123d = 'flow123d'  # "/storage/praha1/home/jan_brezina/local/flow123d_2.2.0/flow123d"
+            self.flow123d = 'flow123d'
             self.gmsh = "/storage/liberec3-tul/home/martin_spetlik/astra/gmsh/bin/gmsh"
         else:
-            # Local
+            # Local settings
             self.sample_sleep = 1
             self.init_sample_timeout = 60
             self.sample_timeout = 60
@@ -139,18 +127,23 @@ def set_environment_variables(self):
 
     def setup_config(self, n_levels, clean):
         """
-        Set simulation configuration depends on particular task
-        :param n_levels: Number of levels
-        :param clean: bool, if False use existing files
-        :return: mlmc.MLMC
+        Set simulation configuration depending on particular task.
+
+        Subclasses **must** override this method and return a configured mlmc.MLMC object.
+
+        :param n_levels: int, number of MLMC levels
+        :param clean: bool, whether to clean/create new files or use existing ones
+        :return: mlmc.MLMC instance (implementation dependent)
+        :raises NotImplementedError: always in base class
         """
         raise NotImplementedError("Simulation configuration is not set")
 
     def rm_files(self, output_dir):
         """
-        Rm files and dirs
-        :param output_dir: Output directory path
-        :return:
+        Remove (recursively) output_dir and create an empty directory in its place.
+
+        :param output_dir: str path to remove and recreate
+        :return: None
         """
         if os.path.isdir(output_dir):
             shutil.rmtree(output_dir, ignore_errors=True)
@@ -158,9 +151,12 @@ def rm_files(self, output_dir):
 
     def create_pbs_object(self, output_dir, clean):
         """
-        Initialize object for PBS execution
-        :param output_dir: Output directory
-        :param clean: bool, if True remove existing files
+        Initialize PBS helper object for submitting/executing jobs.
+
+        This creates self.pbs_obj and configures it with common PBS settings.
+
+        :param output_dir: str, directory where PBS scripts and job state will be created
+        :param clean: bool, if True remove existing scripts before creating new ones
         :return: None
         """
         pbs_work_dir = os.path.join(output_dir, "scripts")
@@ -168,6 +164,7 @@ def create_pbs_object(self, output_dir, clean):
         if os.path.isdir(pbs_work_dir):
             num_jobs = len([_ for _ in os.listdir(pbs_work_dir)])
 
+        # pbs module is expected to be imported where available
         self.pbs_obj = pbs.Pbs(pbs_work_dir,
                                job_count=num_jobs,
                                qsub=self.pbs_config['qsub'],
@@ -176,8 +173,13 @@ def create_pbs_object(self, output_dir, clean):
 
     def generate_jobs(self, mlmc, n_samples=None):
         """
-        Generate level samples
-        :param n_samples: None or list, number of samples for each level
+        Prepare and kick off sampling jobs for the provided MLMC object.
+
+        The method optionally sets the initial n_samples (if provided), refills the sampler
+        queues and triggers the PBS object execution. It then waits for simulations to finish.
+
+        :param mlmc: mlmc.MLMC instance
+        :param n_samples: None or list specifying number of samples to request for each level
         :return: None
         """
         if n_samples is not None:
@@ -190,19 +192,24 @@ def generate_jobs(self, mlmc, n_samples=None):
 
     def set_moments(self, n_moments, log=False):
         """
-        Create moments function instance
+        Create and store a moments function instance (Legendre polynomial family).
+
         :param n_moments: int, number of moments
-        :param log: bool, If true then apply log transform
-        :return:
+        :param log: bool, whether to apply log-transform to quantity prior to moment evaluation
+        :return: Legendre moments instance
         """
         self.moments_fn = Legendre(n_moments, self.domain, safe_eval=True, log=log)
         return self.moments_fn
 
     def n_sample_estimate(self, mlmc, target_variance=0.001):
         """
-        Estimate number of level samples considering target variance
-        :param mlmc: MLMC object
-        :param target_variance: float, target variance of moments
+        Heuristic routine to estimate a good number of initial samples for MLMC using target variance.
+
+        It triggers an initial sampling run, estimates the domain, constructs moments, and requests
+        additional samples using mlmc.target_var_adding_samples.
+
+        :param mlmc: mlmc.MLMC instance
+        :param target_variance: float target variance for moment estimates
         :return: None
         """
         mlmc.set_initial_n_samples()
@@ -217,8 +224,11 @@ def n_sample_estimate(self, mlmc, target_variance=0.001):
 
     def all_collect(self, sampler_list):
         """
-        Collect samples
-        :param mlmc_list: List of mlmc.MLMC objects
+        Poll samplers to collect running samples until none are left.
+
+        Repeatedly asks each sampler for the number of running jobs and keeps polling until all complete.
+
+        :param sampler_list: list of sampler-like objects providing ask_sampling_pool_for_samples(sleep, timeout)
         :return: None
         """
         running = 1
@@ -230,9 +240,10 @@ def all_collect(self, sampler_list):
 
     def process_analysis(self, cl):
         """
-        Main analysis function. Particular types of analysis called from here.
-        :param cl: Instance of CompareLevels - list of Estimate objects
-        :return:
+        Top-level analysis entry point. Calls specific analysis routines (many commented out).
+
+        :param cl: CompareLevels instance (or equivalent) holding estimation/collected data
+        :return: None
         """
         cl.collected_report()
         mlmc_level = 1
@@ -247,32 +258,29 @@ def process_analysis(self, cl):
 
     def analyze_pdf_approx(self, cl):
         """
-        Plot densities
-        :param cl: mlmc.estimate.CompareLevels
+        Perform PDF approximation experiments and plotting.
+
+        :param cl: CompareLevels instance
         :return: None
         """
-        # PDF approximation experiments
         np.random.seed(15)
         cl.set_common_domain(0)
         print("cl domain:", cl.domain)
 
         cl.reinit(n_moments=35)
         il = 1
-        # ns = cl[il].mlmc.estimate_n_samples_for_target_variance(0.01, cl.moments)
-        # cl[il].mlmc.subsample(ns)
         cl.construct_densities(tol=0.01, reg_param=1)
-        # cl[il].construct_density(tol = 0.01, reg_param = 1)
         cl.plot_densities(i_sample_mlmc=0)
 
     def analyze_regression_of_variance(self, cl, mlmc_level):
         """
-        Analyze regression of variance
-        :param cl: mlmc.estimate.CompareLevels instance
-        :param mlmc_level: selected MC method
+        Analyze regression of variance for a selected level.
+
+        :param cl: CompareLevels instance
+        :param mlmc_level: int index of method/level to analyze
         :return: None
         """
         mc = cl[mlmc_level]
-        # Plot reference variances as scater and line plot of regression result.
         mc.ref_estimates_bootstrap(10)
         sample_vec = [5000, 5000, 1700, 600, 210, 72, 25, 9, 3]
         mc.mlmc.subsample(sample_vec[mc.n_levels])
@@ -280,115 +288,74 @@ def analyze_regression_of_variance(self, cl, mlmc_level):
 
     def analyze_error_of_variance(self, cl, mlmc_level):
         """
-        Analyze error of variance for particular mlmc method or for all collected methods
-        :param cl: mlmc.estimate.CompareLevels instance
-        :param mlmc_level: selected MC method
+        Analyze error of variance estimators and plot related diagnostics.
+
+        :param cl: CompareLevels instance
+        :param mlmc_level: int index of method/level to analyze
         :return: None
         """
         np.random.seed(20)
         cl.plot_variances()
         cl.plot_level_variances()
-
-        # # Error of total variance estimator and contribution form individual levels.
-        # sample_vec = [5000, 5000, 1700, 600, 210, 72, 25, 9, 3]
-        # mc = cl[mlmc_level]
-        # mc.ref_estimates_bootstrap(300, sample_vector=sample_vec[:mc.n_levels])
-        # mc.mlmc.update_moments(cl.moments)
-        # mc.mlmc.subsample()
-
-        # print("std var. est / var. est.\n", np.sqrt(mc._bs_var_variance) / mc._bs_mean_variance)
-        # vv_components = mc._bs_level_mean_variance[:, :] ** 2 / mc._bs_n_samples[:,None] ** 3
-        # vv = np.sum(vv_components, axis=0) / mc.n_levels
-        # print("err. var. composition\n", vv_components  - vv)
-        # cl.plot_var_compare(9)
+        mc = cl[mlmc_level]
         mc.plot_bs_var_error_contributions()
 
     def analyze_error_of_regression_variance(self, cl, mlmc_level):
         """
-        Analyze error of regression variance
-        :param cl: CompareLevels
-        :param mlmc_level: selected MC method
-        :return:
+        Bootstrap-based analysis of regression variance errors.
+
+        :param cl: CompareLevels instance
+        :param mlmc_level: int index of method/level to analyze
+        :return: None
         """
-        # Demonstrate that variance of varaince estimates is proportional to
         sample_vec = [5000, 5000, 1700, 600, 210, 72, 25, 9, 3]
         mc = cl[mlmc_level]
-
-        # sample_vec = 9*[80]
         mc.ref_estimates_bootstrap(300, sample_vector=sample_vec[mc.n_levels], regression=True)
-        # print(mc._bs_level_mean_variance)
         mc.mlmc.update_moments(cl.moments)
         mc.mlmc.subsample()
-        # cl.plot_var_compare(9)
         mc.plot_bs_var_error_contributions()
 
     def analyze_error_of_level_variances(self, cl, mlmc_level):
         """
-        Analyze error of level variances
-        :param cl: mlmc.estimate.CompareLevels instance
-        :param mlmc_level: selected MC method
+        Analyze errors in per-level variance estimates and plot results.
+
+        :param cl: CompareLevels instance
+        :param mlmc_level: int index of method/level to analyze
         :return: None
         """
-        # Demonstrate that variance of varaince estimates is proportional to
-
         mc = cl[mlmc_level]
-        # sample_vec = 9*[8]
         sample_vec = [5000, 5000, 1700, 600, 210, 72, 25, 9, 3]
-        # n_samples = mc.mlmc.estimate_n_samples_for_target_variance(0.0001, cl.moments )
-        # sample_vec = np.max(n_samples, axis=1).astype(int)
-        # print(sample_vec)
-
         mc.ref_estimates_bootstrap(300, sample_vector=sample_vec[:mc.n_levels])
         mc.mlmc.update_moments(cl.moments)
         mc.mlmc.subsample()
-
-        # print("std var. est / var. est.\n", np.sqrt(mc._bs_var_variance) / mc._bs_mean_variance)
-        # vv_components = mc._bs_level_mean_variance[:, :] ** 2 / mc._bs_n_samples[:,None] ** 3
-        # vv = np.sum(vv_components, axis=0) / mc.n_levels
-        # print("err. var. composition\n", vv_components  - vv)
-        # cl.plot_var_compare(9)
         mc.plot_bs_level_variances_error()
 
     def analyze_error_of_regression_level_variances(self, cl, mlmc_level):
         """
-        Analyze error of level variances
-        :param cl: mlmc.estimate.CompareLevels instance
-        :param mlmc_level: selected MC method
+        Analyze combined regression and level variance errors with bootstrap.
+
+        :param cl: CompareLevels instance
+        :param mlmc_level: int index of method/level to analyze
         :return: None
         """
-        # Demonstrate that variance of varaince estimates is proportional to
         mc = cl[mlmc_level]
-        # sample_vec = 9*[8]
         sample_vec = [5000, 5000, 1700, 600, 210, 72, 25, 9, 3]
-        # n_samples = mc.mlmc.estimate_n_samples_for_target_variance(0.0001, cl.moments )
-        # sample_vec = np.max(n_samples, axis=1).astype(int)
-        # print(sample_vec)
-
         mc.ref_estimates_bootstrap(10, sample_vector=sample_vec[:mc.n_levels], regression=True)
         mc.mlmc.update_moments(cl.moments)
         mc.mlmc.subsample()
-
-        # print("std var. est / var. est.\n", np.sqrt(mc._bs_var_variance) / mc._bs_mean_variance)
-        # vv_components = mc._bs_level_mean_variance[:, :] ** 2 / mc._bs_n_samples[:,None] ** 3
-        # vv = np.sum(vv_components, axis=0) / mc.n_levels
-        # print("err. var. composition\n", vv_components  - vv)
-        # cl.plot_var_compare(9)
         mc.plot_bs_level_variances_error()
 
     def analyze_error_of_log_variance(self, cl, mlmc_level):
         """
-        Analyze error of level variances
-        :param cl: mlmc.estimate.CompareLevels instance
-        :param mlmc_level: selected MC method
+        Analyze bootstrap error of log-variance estimates.
+
+        :param cl: CompareLevels instance
+        :param mlmc_level: int index of method/level to analyze
         :return: None
         """
-        # Demonstrate that variance of varaince estimates is proportional to
-        # sample_vec = [5000, 5000, 1700, 600, 210, 72, 25, 9, 3]
         sample_vec = [5000, 5000, 1700, 600, 210, 72, 25, 9, 3]
-        # sample_vec = 9*[80]
         mc = cl[mlmc_level]
         mc.ref_estimates_bootstrap(300, sample_vector=sample_vec[:mc.n_levels], log=True)
         mc.mlmc.update_moments(cl.moments)
         mc.mlmc.subsample()
-        # cl.plot_var_compare(9)
         mc.plot_bs_var_log_var()
diff --git a/mlmc/tool/simple_distribution.py b/mlmc/tool/simple_distribution.py
index 77513f6a..a255d512 100644
--- a/mlmc/tool/simple_distribution.py
+++ b/mlmc/tool/simple_distribution.py
@@ -6,17 +6,31 @@
 
 EXACT_QUAD_LIMIT = 1000
 
+
 class SimpleDistribution:
     """
-    Calculation of the distribution
+    Approximate a probability density function (PDF) from given moments.
+
+    The class constructs a parametric PDF using Lagrange multipliers and
+    fits those multipliers by minimizing a functional (or solving a root
+    problem). Numerical integration and adaptive quadrature are used to
+    compute moments, gradients and Jacobians required by the optimizer.
     """
 
     def __init__(self, moments_obj, moment_data, domain=None, force_decay=(True, True), verbose=False):
         """
-        :param moments_obj: Function for calculating moments
-        :param moment_data: Array  of moments and their vars; (n_moments, 2)
-        :param domain: Explicit domain fo reconstruction. None = use domain of moments.
-        :param force_decay: Flag for each domain side to enforce decay of the PDF approximation.
+        Initialize SimpleDistribution.
+
+        :param moments_obj: Object providing moment functions and attributes:
+                            - .domain: tuple (a, b) domain of the moment functions
+                            - .size: number of available moment basis functions
+                            - .eval_all(x, size) or .eval(i, x) for evaluating moments
+        :param moment_data: numpy array of shape (n_moments, 2) or None.
+                            If provided, column 0 = mean, column 1 = variance of moment estimates.
+        :param domain: Optional (a, b) domain for PDF support. If None uses moments_obj.domain.
+        :param force_decay: Tuple (bool, bool) controlling whether to penalize non-decay of the
+                            PDF at left and right domain endpoints respectively.
+        :param verbose: If True, print solver diagnostics.
         """
         # Moment evaluation function with bounded number of moments and their domain.
         self.moments_fn = None
@@ -25,43 +39,60 @@ def __init__(self, moments_obj, moment_data, domain=None, force_decay=(True, Tru
         if domain is None:
             domain = moments_obj.domain
         self.domain = domain
-        # Indicates whether force decay of PDF at domain endpoints.
+
+        # Indicates whether force decay of PDF at domain endpoints (left, right).
         self.decay_penalty = force_decay
         self._verbose = verbose
 
-        # Approximation of moment values.
+        # Approximation of moment values (means and standard errors).
         if moment_data is not None:
             self.moment_means = moment_data[:, 0]
             self.moment_errs = np.sqrt(moment_data[:, 1])
 
-        # Approximation parameters. Lagrange multipliers for moment equations.
+        # Lagrange multipliers for moment equations (to be estimated).
         self.multipliers = None
+
         # Number of basis functions to approximate the density.
-        # In future can be smaller then number of provided approximative moments.
+        # In future can be smaller than number of provided approximate moments.
         self.approx_size = len(self.moment_means)
         assert moments_obj.size >= self.approx_size
         self.moments_fn = moments_obj
 
-        # Degree of Gauss quad to use on every subinterval determined by adaptive quad.
+        # Degree of Gauss-Legendre quadrature to use on each adaptive subinterval.
         self._gauss_degree = 21
-        # Panalty coef for endpoint derivatives
+
+        # Penalty coefficient for endpoint derivative enforcement (decay penalty).
+        # Set to 0 by default in SimpleDistribution (no penalty).
         self._penalty_coef = 0
 
-    def estimate_density_minimize(self, tol=1e-5, reg_param =0.01):
+    def estimate_density_minimize(self, tol=1e-5, reg_param=0.01):
         """
-        Optimize density estimation
-        :param tol: Tolerance for the nonlinear system residual, after division by std errors for
-        individual moment means, i.e.
-        res = || (F_i - \mu_i) / \sigma_i ||_2
-        :return: None
+        Estimate multipliers by minimizing the dual functional.
+
+        Uses scipy.optimize.minimize (trust-ncg by default) to minimize the
+        functional _calculate_functional(multipliers). The function sets up
+        quadrature and internal tolerances before solving, and updates the
+        multipliers attribute with the optimizer result.
+
+        :param tol: Optimization tolerance (used for jacobian/grad stopping).
+        :param reg_param: Regularization parameter (not used directly here but kept for API parity).
+        :return: scipy OptimizeResult with fields:
+                 - x: optimized multipliers
+                 - success: bool convergence flag (set to True if solver succeeded or residual < tol)
+                 - nit: number of iterations (at least 1)
+                 - fun_norm: norm of gradient at solution
+                 - eigvals: eigenvalues of computed Jacobian (added by this method)
+                 - solver_res: raw solver residual information (copy of result.jac)
+        :notes:
+        - After optimization the code enforces normalization by subtracting log(moment_0)
+          from multipliers[0] so that the integral of the density is consistent with moment_0.
         """
-        # Initialize domain, multipliers, ...
-
+        # Initialize multipliers, quadrature, etc.
         self._initialize_params(self.approx_size, tol)
         max_it = 20
-        #method = 'trust-exact'
-        #method ='Newton-CG'
-        method = 'trust-ncg'
+        method = 'trust-ncg'  # solver selected for this simpler variant
+
+        # Minimize functional using gradient and Hessian (Jacobian)
         result = sc.optimize.minimize(self._calculate_functional, self.multipliers, method=method,
                                       jac=self._calculate_gradient,
                                       hess=self._calculate_jacobian_matrix,
@@ -69,25 +100,31 @@ def estimate_density_minimize(self, tol=1e-5, reg_param =0.01):
                                                'gtol': tol, 'disp': False,  'maxiter': max_it})
         self.multipliers = result.x
         jac_norm = np.linalg.norm(result.jac)
+
         if self._verbose:
             print("size: {} nits: {} tol: {:5.3g} res: {:5.3g} msg: {}".format(
                self.approx_size, result.nit, tol, jac_norm, result.message))
 
+        # Compute Jacobian and its eigenvalues for diagnostics
         jac = self._calculate_jacobian_matrix(self.multipliers)
         result.eigvals = np.linalg.eigvalsh(jac)
-        #result.residual = jac[0] * self._moment_errs
-        #result.residual[0] *= self._moment_errs[0]
+
+        # Keep solver residual and diagnostics
         result.solver_res = result.jac
-        # Fix normalization
+
+        # Fix normalization: ensure integral of density corresponds to moment_0
         moment_0, _ = self._calculate_exact_moment(self.multipliers, m=0, full_output=0)
         m0 = sc.integrate.quad(self.density, self.domain[0], self.domain[1])[0]
         if self._verbose:
             print("moment[0]: {} m0: {}".format(moment_0, m0))
+        # Adjust the zeroth multiplier so that the integrated moment_0 matches
         self.multipliers[0] -= np.log(moment_0)
 
+        # Mark solver as successful if solver thinks so or residual is small
         if result.success or jac_norm < tol:
             result.success = True
-        # Number of iterations
+
+        # Ensure iteration count at least 1 for downstream code that expects it
         result.nit = max(result.nit, 1)
         result.fun_norm = jac_norm
 
@@ -95,17 +132,31 @@ def estimate_density_minimize(self, tol=1e-5, reg_param =0.01):
 
     def density(self, value):
         """
-        :param value: float or np.array
-        :param moments_fn: counting moments function
-        :return: density for passed value
+        Evaluate the approximated density at the given point(s).
+
+        :param value: scalar or numpy array of points
+        :return: numpy array of density values (same shape as flattened input)
+        :notes:
+        - Uses self.multipliers, self._moment_errs and the basis moments returned
+          by eval_moments() to build exponent power = sum_i multipliers_i * moment_i / err_i.
+        - The result is clipped (power limited to [-200, 200]) to avoid overflow.
         """
         moms = self.eval_moments(value)
         power = -np.sum(moms * self.multipliers / self._moment_errs, axis=1)
         power = np.minimum(np.maximum(power, -200), 200)
         return np.exp(power)
 
-
     def cdf(self, values):
+        """
+        Evaluate the cumulative distribution function (CDF) at the given points.
+
+        :param values: scalar or array-like points
+        :return: numpy array of CDF values corresponding to input points
+        :notes:
+        - The method integrates the density piecewise between successive query points
+          using fixed_quad with n=10 on subintervals determined by the adaptive quadrature info.
+        - Values outside domain are mapped to 0 (left) or 1 (right).
+        """
         values = np.atleast_1d(values)
         np.sort(values)
         last_x = self.domain[0]
@@ -126,41 +177,55 @@ def cdf(self, values):
 
     def _initialize_params(self, size, tol=None):
         """
-        Initialize parameters for density estimation
-        :return: None
+        Initialize multipliers, quadrature tolerance and related structures.
+
+        :param size: number of multipliers to initialize (approximation order)
+        :param tol: tolerance hint for integration/solver (not used directly here)
+        :effects:
+        - Sets self._quad_tolerance, self._moment_errs, self.multipliers,
+          self._quad_log, evaluates endpoint derivatives and updates quadrature.
         """
         assert self.domain is not None
-
         assert tol is not None
-        #self._quad_tolerance = tol / 1024
-        self._quad_tolerance = 1e-10
 
-        #self.moment_errs[np.where(self.moment_errs == 0)] = np.min(self.moment_errs[np.where(self.moment_errs != 0)]/8)
-        #self.moment_errs[0] = np.min(self.moment_errs[1:]) / 8
+        # Use a very tight quad tolerance for this simple class variant
+        self._quad_tolerance = 1e-10
 
+        # Keep a local copy of moment errors used in weighting
         self._moment_errs = self.moment_errs
-        #self._moment_errs[0] = np.min(self.moment_errs[1:]) / 2
 
-        # Start with uniform distribution
+        # Start multipliers from uniform (log of uniform density)
         self.multipliers = np.zeros(size)
         self.multipliers[0] = -np.log(1/(self.domain[1] - self.domain[0]))
-        # Log to store error messages from quad, report only on conv. problem.
+
+        # Log storage for quadrature diagnostics
         self._quad_log = []
 
-        # Evaluate endpoint derivatives of the moments.
+        # Evaluate endpoint derivatives and force quadrature update for initialization
         self._end_point_diff = self.end_point_derivatives()
         self._update_quadrature(self.multipliers, force=True)
 
     def eval_moments(self, x):
+        """
+        Evaluate all basis moment functions at x up to current approximation size.
+
+        :param x: scalar or array-like points
+        :return: numpy.ndarray of shape (n_points, approx_size) or similar depending on moments_fn.eval_all
+        """
         return self.moments_fn.eval_all(x, self.approx_size)
 
     def _calculate_exact_moment(self, multipliers, m=0, full_output=0):
         """
-        Compute moment 'm' using adaptive quadrature to machine precision.
-        :param multipliers:
-        :param m:
-        :param full_output:
-        :return:
+        Compute exact integral of the m-th moment under the current parametric density.
+
+        :param multipliers: array-like of multipliers used in exponent
+        :param m: index of the moment to integrate (default 0)
+        :param full_output: if set, pass full_output to scipy.integrate.quad for extra info
+        :return: tuple (value, quad_result) where value is integral result and quad_result is
+                 the raw return from scipy.integrate.quad (or a subset depending on full_output)
+        :notes:
+        - The integrand is exp(power) * moment_m. power is clipped to avoid overflow.
+        - Uses self._quad_tolerance as epsabs for quad.
         """
         def integrand(x):
             moms = self.eval_moments(x)
@@ -173,32 +238,18 @@ def integrand(x):
 
         return result[0], result
 
-    # def _calculate_exact_hessian(self, i, j, multipliers=None):
-    #     """
-    #     Compute exact jacobian element (i,j).
-    #     :param i:
-    #     :param j:
-    #     :param multipliers:
-    #     :return:
-    #     """
-    #     if multipliers is None:
-    #         multipliers = self.multipliers
-    #
-    #     def integrand(x):
-    #         moms = self.eval_moments(x)
-    #         power = -np.sum(moms * multipliers / self._moment_errs, axis=1)
-    #         power = np.minimum(np.maximum(power, -200), 200)
-    #         return np.exp(power) * moms[:,i] * moms[:,j]
-    #
-    #     result = sc.integrate.quad(integrand, self.domain[0], self.domain[1],
-    #                                epsabs=self._quad_tolerance, full_output=False)
-    #
-    #     return result[0], result
-
     def _update_quadrature(self, multipliers, force=False):
         """
-        Update quadrature points and their moments and weights based on integration of the density.
-        return: True if update of gradient is necessary
+        Update quadrature points/weights and cached moments for the current multipliers.
+
+        :param multipliers: current multipliers array
+        :param force: if True, force a quadrature update even if error estimates are small
+        :effects:
+        - Computes adaptive quadrature using scipy.integrate.quad's 'full_output' info (alist/blist).
+        - Stores flattened Gauss-Legendre nodes and weights across adaptive intervals in
+          self._quad_points and self._quad_weights.
+        - Evaluates self._quad_moments at quad points, computes current gradient-like integral
+          and stores it as self._last_gradient for reuse.
         """
         if not force:
             mult_norm = np.linalg.norm(multipliers - self._last_multipliers)
@@ -211,6 +262,7 @@ def _update_quadrature(self, multipliers, force=False):
             if quad_err_estimate < self._quad_tolerance:
                 return
 
+        # Integrate the highest-order moment to get adaptive quadrature info
         val, result = self._calculate_exact_moment(multipliers, m=self.approx_size-1, full_output=1)
 
         if len(result) > 3:
@@ -218,29 +270,42 @@ def _update_quadrature(self, multipliers, force=False):
             self._quad_log.append(result)
         else:
             y, abserr, info = result
-            message =""
+            message = ""
+
+        # Build Gauss-Legendre nodes and weights on each subinterval returned by adaptive quad
         pt, w = np.polynomial.legendre.leggauss(self._gauss_degree)
         K = info['last']
-        #print("Update Quad: {} {} {} {}".format(K, y, abserr, message))
         a = info['alist'][:K, None]
         b = info['blist'][:K, None]
         points = (pt[None, :] + 1) / 2 * (b - a) + a
         weights = w[None, :] * (b - a) / 2
+
+        # Flatten into 1D arrays for convenience
         self._quad_points = points.flatten()
         self._quad_weights = weights.flatten()
+
+        # Evaluate basis moments at quadrature nodes
         self._quad_moments = self.eval_moments(self._quad_points)
 
+        # Compute density and weighted gradient integral used in gradient/Jacobian computations
         power = -np.dot(self._quad_moments, multipliers/self._moment_errs)
         power = np.minimum(np.maximum(power, -200), 200)
         q_gradient = self._quad_moments.T * np.exp(power)
         integral = np.dot(q_gradient, self._quad_weights) / self._moment_errs
+
+        # Cache last multipliers and gradient
         self._last_multipliers = multipliers
         self._last_gradient = integral
 
     def end_point_derivatives(self):
         """
-        Compute approximation of moment derivatives at endpoints of the domain.
-        :return: array (2, n_moments)
+        Approximate derivatives of all moment basis functions at domain endpoints.
+
+        :return: numpy array of shape (2, approx_size) where index 0 = left derivative,
+                 index 1 = right derivative. The derivatives are scaled by moment errors.
+        :notes:
+        - Uses a tiny eps shift (1e-10) to compute forward/backward differences.
+        - If corresponding decay_penalty is False for a side, that side's derivative is left as zeros.
         """
         eps = 1e-10
         left_diff = right_diff = np.zeros((1, self.approx_size))
@@ -249,35 +314,49 @@ def end_point_derivatives(self):
         if self.decay_penalty[1]:
             right_diff = -self.eval_moments(self.domain[1]) + self.eval_moments(self.domain[1] - eps)
 
-        return np.stack((left_diff[0,:], right_diff[0,:]), axis=0)/eps/self._moment_errs[None, :]
+        return np.stack((left_diff[0, :], right_diff[0, :]), axis=0) / eps / self._moment_errs[None, :]
 
     def _density_in_quads(self, multipliers):
+        """
+        Evaluate the parameterized density at cached quadrature nodes.
+
+        :param multipliers: multiplier vector used to compute density
+        :return: 1D numpy array of density values at self._quad_points
+        """
         power = -np.dot(self._quad_moments, multipliers / self._moment_errs)
         power = np.minimum(np.maximum(power, -200), 200)
         return np.exp(power)
 
     def _calculate_functional(self, multipliers):
         """
-        Minimized functional.
-        :param multipliers: current multipliers
-        :return: float
+        The functional to be minimized with respect to multipliers.
+
+        :param multipliers: array-like current multipliers
+        :return: scalar functional value = sum(mean_i * lam_i / err_i) + integral(density)
+        :notes:
+        - Adds endpoint penalty if endpoint derivatives violate decay constraints.
+        - This functional corresponds to the dual of moment-matching problem.
         """
         self._update_quadrature(multipliers)
         q_density = self._density_in_quads(multipliers)
         integral = np.dot(q_density, self._quad_weights)
-        sum = np.sum(self.moment_means * multipliers / self._moment_errs)
+        sum_ = np.sum(self.moment_means * multipliers / self._moment_errs)
 
         end_diff = np.dot(self._end_point_diff, multipliers)
-        penalty = np.sum(np.maximum(end_diff, 0)**2)
-        fun = sum + integral
+        penalty = np.sum(np.maximum(end_diff, 0) ** 2)
+        fun = sum_ + integral
         fun = fun + np.abs(fun) * self._penalty_coef * penalty
 
         return fun
 
     def _calculate_gradient(self, multipliers):
         """
-        Gradient of th functional
-        :return: array, shape (n_moments,)
+        Gradient of the functional with respect to multipliers.
+
+        :param multipliers: array-like current multipliers
+        :return: numpy array gradient of shape (approx_size,)
+        :notes:
+        - Gradient = moment_means/err - integral(moments * density)/err + penalty_terms
         """
         self._update_quadrature(multipliers)
         q_density = self._density_in_quads(multipliers)
@@ -285,31 +364,30 @@ def _calculate_gradient(self, multipliers):
         integral = np.dot(q_gradient, self._quad_weights) / self._moment_errs
 
         end_diff = np.dot(self._end_point_diff, multipliers)
-        penalty = 2 * np.dot( np.maximum(end_diff, 0), self._end_point_diff)
+        penalty = 2 * np.dot(np.maximum(end_diff, 0), self._end_point_diff)
         fun = np.sum(self.moment_means * multipliers / self._moment_errs) + integral[0] * self._moment_errs[0]
         gradient = self.moment_means / self._moment_errs - integral + np.abs(fun) * self._penalty_coef * penalty
         return gradient
 
     def _calculate_jacobian_matrix(self, multipliers):
         """
-        :return: jacobian matrix, symmetric, (n_moments, n_moments)
+        Compute Jacobian (Hessian) matrix of the functional.
+
+        :param multipliers: array-like current multipliers
+        :return: square numpy array (approx_size, approx_size), symmetric
+        :notes:
+        - Uses matrix formulation (q_mom.T * diag(q_density * weights) * q_mom) for efficiency.
+        - Adds endpoint-penalty contributions and diagonal stabilization if needed.
         """
         self._update_quadrature(multipliers)
         q_density = self._density_in_quads(multipliers)
         q_density_w = q_density * self._quad_weights
         q_mom = self._quad_moments / self._moment_errs
 
+        # Efficient assembly: (Q^T * diag(w*density)) * Q
         jacobian_matrix = (q_mom.T * q_density_w) @ q_mom
 
-        # Compute just triangle use lot of memory (possibly faster)
-        # moment_outer = np.einsum('ki,kj->ijk', q_mom, q_mom)
-        # triu_idx = np.triu_indices(self.approx_size)
-        # triu_outer = moment_outer[triu_idx[0], triu_idx[1], :]
-        # integral = np.dot(triu_outer, q_density_w)
-        # jacobian_matrix = np.empty(shape=(self.approx_size, self.approx_size))
-        # jacobian_matrix[triu_idx[0], triu_idx[1]] = integral
-        # jacobian_matrix[triu_idx[1], triu_idx[0]] = integral
-
+        # Endpoint derivative penalty contribution
         end_diff = np.dot(self._end_point_diff, multipliers)
         fun = np.sum(self.moment_means * multipliers / self._moment_errs) + jacobian_matrix[0,0] * self._moment_errs[0]**2
         for side in [0, 1]:
@@ -317,23 +395,17 @@ def _calculate_jacobian_matrix(self, multipliers):
                 penalty = 2 * np.outer(self._end_point_diff[side], self._end_point_diff[side])
                 jacobian_matrix += np.abs(fun) * self._penalty_coef * penalty
 
-
-        #e_vals = np.linalg.eigvalsh(jacobian_matrix)
-
-        #print(multipliers)
-        #print("jac spectra: ", e_vals)
-        #print("means:", self.moment_means)
-        #print("\n jac:", np.diag(jacobian_matrix))
         return jacobian_matrix
 
 
 def compute_exact_moments(moments_fn, density, tol=1e-10):
     """
-    Compute approximation of moments using exact density.
-    :param moments_fn: Moments function.
-    :param density: Density function (must accept np vectors).
-    :param tol: Tolerance of integration.
-    :return: np.array, moment values
+    Compute moments by integrating moments_fn against provided density.
+
+    :param moments_fn: object with .domain and .size and .eval(i, x)
+    :param density: callable accepting numpy arrays (vectorized) returning density values
+    :param tol: absolute tolerance for numerical integration
+    :return: numpy array of length moments_fn.size containing integrated moments
     """
     a, b = moments_fn.domain
     integral = np.zeros(moments_fn.size)
@@ -347,6 +419,15 @@ def fn(x):
 
 
 def compute_semiexact_moments(moments_fn, density, tol=1e-10):
+    """
+    Compute moments using a hybrid approach: use adaptive quad to identify subintervals
+    then apply Gauss-Legendre nodes inside those subintervals for an accurate quadrature.
+
+    :param moments_fn: moments object with .domain and .size and .eval_all
+    :param density: callable density(x)
+    :param tol: quad tolerance
+    :return: vector of integrated moments (length = moments_fn.size)
+    """
     a, b = moments_fn.domain
     m = moments_fn.size - 1
 
@@ -363,7 +444,6 @@ def integrand(x):
         y, abserr, info = result
     pt, w = np.polynomial.legendre.leggauss(21)
     K = info['last']
-    # print("Update Quad: {} {} {} {}".format(K, y, abserr, message))
     a = info['alist'][:K, None]
     b = info['blist'][:K, None]
     points = (pt[None, :] + 1) / 2 * (b - a) + a
@@ -380,11 +460,12 @@ def integrand(x):
 
 def compute_exact_cov(moments_fn, density, tol=1e-10):
     """
-    Compute approximation of covariance matrix using exact density.
-    :param moments_fn: Moments function.
-    :param density: Density function (must accept np vectors).
-    :param tol: Tolerance of integration.
-    :return: np.array, moment values
+    Compute covariance matrix of moment basis under the provided density.
+
+    :param moments_fn: moments object
+    :param density: callable density(x)
+    :param tol: integration tolerance
+    :return: symmetric matrix (size x size) containing E[m_i * m_j]
     """
     a, b = moments_fn.domain
     integral = np.zeros((moments_fn.size, moments_fn.size))
@@ -401,15 +482,16 @@ def fn(x):
 
 def compute_semiexact_cov(moments_fn, density, tol=1e-10):
     """
-    Compute approximation of covariance matrix using exact density.
-    :param moments_fn: Moments function.
-    :param density: Density function (must accept np vectors).
-    :param tol: Tolerance of integration.
-    :return: np.array, moment values
-    """
+    Compute approximate covariance matrix using quadrature nodes determined by adaptive integration.
 
+    :param moments_fn: moments object
+    :param density: callable density(x)
+    :param tol: integration tolerance
+    :return: Jacobian-like matrix approximating covariance (moments weighted by density)
+    """
     a, b = moments_fn.domain
     m = moments_fn.size - 1
+
     def integrand(x):
         moms = moments_fn.eval_all(x)[0, :]
         return density(x) * moms[m] * moms[m]
@@ -423,7 +505,6 @@ def integrand(x):
         y, abserr, info = result
     pt, w = np.polynomial.legendre.leggauss(21)
     K = info['last']
-    # print("Update Quad: {} {} {} {}".format(K, y, abserr, message))
     a = info['alist'][:K, None]
     b = info['blist'][:K, None]
     points = (pt[None, :] + 1) / 2 * (b - a) + a
@@ -437,158 +518,119 @@ def integrand(x):
     jacobian_matrix = (quad_moments.T * q_density_w) @ quad_moments
     return jacobian_matrix
 
-    return jacobian_matrix
-
 
 def KL_divergence(prior_density, posterior_density, a, b):
     """
-    Compute D_KL(P | Q) = \int_R P(x) \log( P(X)/Q(x)) \dx
-    :param prior_density: P
-    :param posterior_density: Q
-    :return: KL divergence value
+    Compute Kullback-Leibler divergence between two densities over [a,b].
+
+    Using numerically stable integrand:
+        integrand = p * log(p/q) - p + q
+    which equals D_KL(P||Q) when both integrate to 1 but remains finite
+    even when Q is not perfectly normalized.
+
+    :param prior_density: callable P(x)
+    :param posterior_density: callable Q(x)
+    :param a: left integration bound
+    :param b: right integration bound
+    :return: scalar KL divergence (floored at 1e-10)
     """
     def integrand(x):
-        # prior
         p = prior_density(x)
-        # posterior
         q = max(posterior_density(x), 1e-300)
-        # modified integrand to provide positive value even in the case of imperfect normalization
-        return  p * np.log(p / q) - p + q
+        return p * np.log(p / q) - p + q
 
     value = integrate.quad(integrand, a, b, epsabs=1e-10)
     return max(value[0], 1e-10)
 
 
 def L2_distance(prior_density, posterior_density, a, b):
+    """
+    Compute L2 distance between two densities on [a, b].
+
+    :param prior_density: callable P(x)
+    :param posterior_density: callable Q(x)
+    :param a: left bound
+    :param b: right bound
+    :return: scalar L2 norm: sqrt( integral (Q-P)^2 )
+    """
     integrand = lambda x: (posterior_density(x) - prior_density(x)) ** 2
     return np.sqrt(integrate.quad(integrand, a, b))[0]
 
 
-
-
-
-
-
-
-
-######################################
-
-
-
-# def detect_treshold(self, values, log=True, window=4):
-#     """
-#     Detect most significant change of slope in the sorted sequence.
-#     Negative values are omitted for log==True.
-#
-#     Notes: not work well since the slope difference is weighted by residuum so for
-#     points nearly perfectly in line even small changes of slope can be detected.
-#     :param values: Increassing sequence.
-#     :param log: Use logarithm of the sequence.
-#     :return: Index K for which K: should have same slope.
-#     """
-#     values = np.array(values)
-#     orig_len = len(values)
-#     if log:
-#         min_positive = np.min(values[values>0])
-#         values = np.maximum(values, min_positive)
-#         values = np.log(values)
-#
-#     # fit model for all valid window positions
-#     X = np.empty((window, 2))
-#     X[:, 0] = np.ones(window)
-#     X[:, 1] = np.flip(np.arange(window))
-#     fit_matrix = np.matmul(np.linalg.inv(np.matmul(X.T, X)), X.T)
-#     intercept = np.convolve(values, fit_matrix[0], mode='valid')
-#     assert len(intercept) == len(values) - window + 1
-#     slope = np.convolve(values, fit_matrix[1], mode='valid')
-#     fits = np.stack( (intercept, slope) ).T
-#
-#     # We test hypothesis of equality of slopes from two non-overlapping windows.
-#     # https://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/equalslo.htm
-#     # https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/PASS/Tests_for_the_Difference_Between_Two_Linear_Regression_Slopes.pdf
-#     # Dupont and Plummer (1998)
-#
-#     df = 2 * window - 4
-#     varX = np.var(np.arange(window)) * window
-#     p_vals = np.ones_like(values)
-#     for i, _ in enumerate(values):
-#         ia = i - window + 1
-#         ib = i
-#         if ia < 0 or ib + window >= len(values):
-#             p_vals[i] = 1.0
-#             continue
-#         res_a = values[ia:ia + window] - np.flip(np.dot(X, fits[ia]))
-#         res_b = values[ib:ib + window] - np.flip(np.dot(X, fits[ib]))
-#
-#         varY = (np.sum(res_a**2) + np.sum(res_b**2)) / df
-#         SS_r = varY * 2 / (window * varX)
-#         T = (fits[ia, 1] -  fits[ib, 1]) / np.sqrt(SS_r)
-#         # Single tail alternative: slope_a < slope_b
-#         p_vals[i] = 1 - stats.t.cdf(T, df=df)
-#         print(ia, ib, np.sqrt(SS_r), fits[ia, 1], fits[ib, 1], p_vals[i])
-#
-#
-#     i_min = np.argmin(p_vals)
-#     i_treshold = i_min + window + orig_len - len(values) - 1
-#
-#     self.plot_values(values, val2=p_vals, treshold=i_treshold)
-#     return i_treshold, p_vals[i_min]
-
-
 def best_fit_all(values, range_a, range_b):
+    """
+    Find the best linear fit across all given index ranges.
+
+    The function searches all combinations of indices `a` and `b`
+    within `range_a` and `range_b` such that `a < b` and fits a
+    first-degree polynomial to the values between these indices.
+    It then selects the fit with the smallest residual normalized
+    by the square of the interval length.
+
+    :param values: Array-like sequence of values to fit.
+    :param range_a: Iterable of possible starting indices.
+    :param range_b: Iterable of possible ending indices.
+    :return: Tuple (a, b, fit) corresponding to the best fit,
+             where `fit` is the array of polynomial coefficients.
+    """
     best_fit = None
     best_fit_value = np.inf
     for a in range_a:
         for b in range_b:
-            if 0 <= a and  a + 2 < b < len(values):
+            if 0 <= a and a + 2 < b < len(values):
 
                 Y = values[a:b]
-
                 X = np.arange(a, b)
-                assert len(X) == len(Y), "a:{}  b:{}".format(a,b)
+                assert len(X) == len(Y), f"a:{a}  b:{b}"
                 fit, res, _, _, _ = np.polyfit(X, Y, deg=1, full=1)
 
-                fit_value = res / ((b - a)**2)
-                #print("a b fit", a, b, fit_value)
+                fit_value = res / ((b - a) ** 2)
                 if fit_value < best_fit_value:
                     best_fit = (a, b, fit)
                     best_fit_value = fit_value
     return best_fit
 
 
-
 def best_p1_fit(values):
     """
-    Find indices a < b such that linear fit for values[a:b]
-    have smallest residual / (b - a)** alpha
-    alpha is fixed parameter.
-    This should find longest fit with reasonably small residual.
-    :return: (a, b)
+    Recursively find the best linear (P1) fit segment of the sequence.
+
+    The method finds indices `a < b` such that the segment
+    `values[a:b]` has the smallest residual (least-squares error)
+    normalized by `(b - a)**2`. If the array is large, it downsamples
+    the data before recursively fitting.
+
+    :param values: Sequence of numeric values to fit.
+    :return: Tuple (a, b, fit) representing the best segment and
+             corresponding linear coefficients.
     """
     if len(values) > 12:
         # downscale
-        end = len(values)  - len(values) % 2    # even size of result
+        end = len(values) - len(values) % 2    # ensure even length
         avg_vals = np.mean(values[:end].reshape((-1, 2)), axis=1)
         a, b, fit = best_p1_fit(avg_vals)
         # upscale
-        a, b = 2*a, 2*b
-
-        return best_fit_all(values, [a-1, a, a+1], [b-1, b, b+1])
+        a, b = 2 * a, 2 * b
+        return best_fit_all(values, [a - 1, a, a + 1], [b - 1, b, b + 1])
     else:
         v_range = range(len(values))
         return best_fit_all(values, v_range, v_range)
 
 
-
-
 def detect_treshold_slope_change(values, log=True):
     """
-    Find a longest subsequence with linear fit residual X% higher then the best
-    at least 4 point fit. Extrapolate this fit to the left.
-
-    :param values: Increassing sequence.
-    :param log: Use logarithm of the sequence.
-    :return: Index K for which K: should have same slope.
+    Detect the index where the slope of a sequence changes significantly.
+
+    This function fits linear segments to the data (optionally in log scale)
+    and detects where the slope begins to deviate, returning both the
+    threshold index and a modified version of the input values where
+    the slope change is extrapolated.
+
+    :param values: Monotonically increasing numeric sequence.
+    :param log: If True, the logarithm of the sequence is used for fitting.
+    :return: Tuple (i_treshold, mod_vals)
+             - i_treshold: Index where the slope change is detected.
+             - mod_vals: Modified version of values with extrapolated segment.
     """
     values = np.array(values)
     i_first_positive = 0
@@ -599,133 +641,30 @@ def detect_treshold_slope_change(values, log=True):
     a, b, fit = best_p1_fit(values[i_first_positive:])
     p = np.poly1d(fit)
 
-
     i_treshold = a + i_first_positive
     mod_vals = values.copy()
     mod_vals[:i_treshold] = p(np.arange(-i_first_positive, a))
-    #self.plot_values(values, val2=mod_vals, treshold=i_treshold)
+
     if log:
         mod_vals = np.exp(mod_vals)
     return i_treshold, mod_vals
 
 
-# def detect_treshold_lm(self, values, log=True, window=4):
-#     """
-#     Detect most significant change of slope in the sorted sequence.
-#     Negative values are omitted for log==True.
-#
-#     Just build a linear model for increasing number of values and find
-#     the first one that do not fit significantly.
-#
-#     :param values: Increassing sequence.
-#     :param log: Use logarithm of the sequence.
-#     :return: Index K for which K: should have same slope.
-#     """
-#
-#     values = np.array(values)
-#     orig_len = len(values)
-#     if log:
-#         min_positive = np.min(values[values>0])
-#         values = np.maximum(values, min_positive)
-#         values = np.log(values)
-#     values = np.flip(values)
-#     i_break = 0
-#     for i in range(2, len(values)):
-#         # fit the mode
-#         X = np.empty((i, 2))
-#         X[:, 0] = np.ones(i)
-#         X[:, 1] = np.arange(i)
-#         fit_matrix = np.matmul(np.linalg.inv(np.matmul(X.T, X)), X.T)
-#         Y = values[:i]
-#         fit = np.dot(fit_matrix, Y)
-#         i_val_model = fit[0] + fit[1]*i
-#         diff =  i_val_model - values[i]
-#         Y_model = np.matmul(X, fit)
-#         if i > 3:
-#             sigma = np.sqrt(np.sum((Y - Y_model)**2) / (i - 2))
-#         else:
-#             sigma = -fit[1]
-#         #print(i, diff, fit[1], sigma)
-#         if diff > 3*sigma and i_break == 0:
-#             #print("break: ", i)
-#             i_break = i
-#     if i_break > 0:
-#         i_break = len(values) - i_break
-#     return i_break
-#     #return i_treshold, p_vals[i_min]
-#
-# def optimal_n_moments(self):
-#     """
-#     Iteratively decrease number of used moments until no eigne values need to be removed.
-#     :return:
-#     """
-#     reduced_moments = self.moments
-#     i_eig_treshold = 1
-#     while reduced_moments.size > 6 and i_eig_treshold > 0:
-#
-#         moments = reduced_moments
-#         cov = self._covariance = self.mlmc.estimate_covariance(moments)
-#
-#         # centered covarince
-#         M = np.eye(moments.size)
-#         M[:, 0] = -cov[:, 0]
-#         cov_center = M @ cov @ M.T
-#         eval, evec = np.linalg.eigh(cov_center)
-#         i_first_positive = np.argmax(eval > 0)
-#         pos_eval = eval[i_first_positive:]
-#         treshold = self.detect_treshold_lm(pos_eval)
-#         i_eig_treshold = i_first_positive + treshold
-#         #self.plot_values(pos_eval, log=True, treshold=treshold)
-#
-#         reduced_moments = moments.change_size(moments.size - i_eig_treshold)
-#         print("mm: ", i_eig_treshold, " s: ", reduced_moments.size)
-#
-#     # Possibly cut remaining negative eigen values
-#     i_first_positive = np.argmax(eval > 0)
-#     eval = eval[i_first_positive:]
-#     evec = evec[:, i_first_positive:]
-#     eval = np.flip(eval)
-#     evec = np.flip(evec, axis=1)
-#     L = -(1/np.sqrt(eval))[:, None] * (evec.T @ M)
-#     natural_moments = mlmc.moments.TransformedMoments(moments, L)
-#
-#     return natural_moments
-#
-#
-# def detect_treshold_mse(self, eval, std_evals):
-#     """
-#     Detect treshold of eigen values by its estimation error:
-#     1. eval, evec decomposition
-#     2. rotated moments using just evec as the rotation matrix
-#     3. compute covariance for rotated moments with errors, use errors of diagonal entries
-#        as errors of eigenvalue estimate.
-#     4. Set treshold to the last eigenvalue with relative error larger then 0.3
-#
-#     Notes: Significant errors occures also for correct eigen values, so this is not good treshold detection.
-#
-#     :param eval:
-#     :param std_evals:
-#     :return:
-#     """
-#     i_first_positive = np.argmax(eval > 0)
-#     rel_err = std_evals[i_first_positive:] / eval[i_first_positive:]
-#     rel_tol = 0.3
-#     large_rel_err = np.nonzero(rel_err > rel_tol)[0]
-#     treshold = large_rel_err[-1] if len(large_rel_err) > 0 else 0
-#     return i_first_positive + treshold
-
-# def eigenvalue_error(moments):
-#     rot_cov, var_evals = self._covariance = self.mlmc.estimate_covariance(moments, mse=True)
-#     var_evals = np.flip(var_evals)
-#     var_evals[var_evals < 0] = np.max(var_evals)
-#     std_evals = np.sqrt(var_evals)
-#     return std_evals
-
-
 def lsq_reconstruct(cov, eval, evec, treshold):
-    #eval = np.flip(eval)
-    #evec = np.flip(evec, axis=1)
-
+    """
+    Perform least-squares reconstruction of the eigenvectors
+    of a covariance matrix to restore orthogonality.
+
+    This method adjusts the eigenvectors using nonlinear least-squares
+    minimization so that the reconstructed eigenvectors are orthogonal
+    and diagonalize the covariance matrix as closely as possible.
+
+    :param cov: Covariance matrix (2D array).
+    :param eval: Eigenvalues (1D array).
+    :param evec: Eigenvectors (2D array).
+    :param treshold: Number of eigenvectors to fix (use exact values up to this index).
+    :return: Reconstructed orthogonal eigenvector matrix Q.
+    """
     Q1 = evec[:, :treshold]
     Q20 = evec[:, treshold:]
     C = cov
@@ -736,7 +675,7 @@ def lsq_reconstruct(cov, eval, evec, treshold):
     def fun(x):
         alpha_orto = 2
         Q2 = x.reshape(q_shape)
-        Q = np.concatenate( (Q1, Q2), axis=1)
+        Q = np.concatenate((Q1, Q2), axis=1)
         f = np.sum(np.abs(np.ravel(Q.T @ C @ Q - D))) + alpha_orto * np.sum(np.abs(np.ravel(Q @ Q.T - I)))
         return f
 
@@ -747,231 +686,56 @@ def fun(x):
 
     print("D err", D - Q.T @ cov @ Q)
     print("D", D)
-    print("QcovQT",  Q.T @ cov @ Q)
+    print("QcovQT", Q.T @ cov @ Q)
     print("I err:", I - Q @ Q.T)
     print("Q err:", Q20 - Q2)
 
     return Q
 
+
 def construct_ortogonal_moments(moments, cov, tol=None):
     """
-    For given moments find the basis orthogonal with respect to the covariance matrix, estimated from samples.
-    :param moments: moments object
-    :return: orthogonal moments object of the same size.
+    Construct orthogonal statistical moments with respect to the covariance matrix.
+
+    This function computes a transformation that makes the given moments
+    orthogonal under the provided covariance matrix. It determines the
+    threshold for significant eigenvalues (either via slope detection
+    or tolerance) and constructs a transformation matrix accordingly.
+
+    :param moments: Input moments object.
+    :param cov: Covariance matrix estimated from samples.
+    :param tol: Optional eigenvalue threshold. If None, an automatic
+                slope-change detection is used.
+    :return: Tuple (ortogonal_moments, info)
+             - ortogonal_moments: Transformed (orthogonalized) moments.
+             - info: Tuple containing (eval, threshold, transformation_matrix).
     """
-
-    # centered covariance
     M = np.eye(moments.size)
     M[:, 0] = -cov[:, 0]
     cov_center = M @ cov @ M.T
-    #cov_center = cov
     eval, evec = np.linalg.eigh(cov_center)
-    # eval is in increasing order
-
-
-    # Compute eigen value errors.
-    #evec_flipped = np.flip(evec, axis=1)
-    #L = (evec_flipped.T @ M)
-    #rot_moments = mlmc.moments.TransformedMoments(moments, L)
-    #std_evals = eigenvalue_error(rot_moments)
-
 
     if tol is None:
-        # treshold by statistical test of same slopes of linear models
+        # determine threshold using slope-change detection
         threshold, fixed_eval = detect_treshold_slope_change(eval, log=True)
-        threshold = np.argmax( eval - fixed_eval[0] > 0)
+        threshold = np.argmax(eval - fixed_eval[0] > 0)
     else:
         # threshold given by eigenvalue magnitude
         threshold = np.argmax(eval > tol)
 
-    #treshold, _ = self.detect_treshold(eval, log=True, window=8)
-
-    # tresold by MSE of eigenvalues
-    #treshold = self.detect_treshold_mse(eval, std_evals)
-
-    # treshold
-
-
-    #self.lsq_reconstruct(cov_center, fixed_eval, evec, treshold)
-
-    #use fixed
-    #eval[:treshold] = fixed_eval[:treshold]
-
-
-    # set eig. values under the treshold to the treshold
-    #eval[:treshold] = eval[treshold]
-
-    # cut eigen values under treshold
     new_eval = eval[threshold:]
     new_evec = evec[:, threshold:]
 
-    # we need highest eigenvalues first
     eval_flipped = np.flip(new_eval, axis=0)
     evec_flipped = np.flip(new_evec, axis=1)
-    #conv_sqrt = -M.T @ evec_flipped * (1 / np.sqrt(eval_flipped))[:, None]
-    #icov_sqrt_t = -M.T @ evec_flipped * (1/np.sqrt(eval_flipped))[None, :]
+
     icov_sqrt_t = M.T @ evec_flipped * (1 / np.sqrt(eval_flipped))[None, :]
-    R_nm, Q_mm  = sc.linalg.rq(icov_sqrt_t, mode='full')
-    # check
+    R_nm, Q_mm = sc.linalg.rq(icov_sqrt_t, mode='full')
+
     L_mn = R_nm.T
     if L_mn[0, 0] < 0:
         L_mn = -L_mn
 
-
     ortogonal_moments = mlmc.moments.TransformedMoments(moments, L_mn)
-    #ortogonal_moments = mlmc.moments.TransformedMoments(moments, cov_sqrt_t.T)
-
-    #################################
-    # cov = self.mlmc.estimate_covariance(ortogonal_moments)
-    # M = np.eye(ortogonal_moments.size)
-    # M[:, 0] = -cov[:, 0]
-    # cov_center = M @ cov @ M.T
-    # eval, evec = np.linalg.eigh(cov_center)
-    #
-    # # Compute eigen value errors.
-    # evec_flipped = np.flip(evec, axis=1)
-    # L = (evec_flipped.T @ M)
-    # rot_moments = mlmc.moments.TransformedMoments(moments, L)
-    # std_evals = self.eigenvalue_error(rot_moments)
-    #
-    # self.plot_values(eval, log=True, treshold=treshold)
-
-
     info = (eval, threshold, L_mn)
     return ortogonal_moments, info
-
-
-# def construct_density(self, tol=1.95, reg_param=0.01):
-#     """
-#     Construct approximation of the density using given moment functions.
-#     Args:
-#         moments_fn: Moments object, determines also domain and n_moments.
-#         tol: Tolerance of the fitting problem, with account for variances in moments.
-#              Default value 1.95 corresponds to the two tail confidency 0.95.
-#         reg_param: Regularization parameter.
-#     """
-#     moments_obj = self.construct_ortogonal_moments()
-#     print("n levels: ", self.n_levels)
-#     #est_moments, est_vars = self.mlmc.estimate_moments(moments)
-#     est_moments = np.zeros(moments.size)
-#     est_moments[0] = 1.0
-#     est_vars = np.ones(moments.size)
-#     min_var, max_var = np.min(est_vars[1:]), np.max(est_vars[1:])
-#     print("min_err: {} max_err: {} ratio: {}".format(min_var, max_var, max_var / min_var))
-#     moments_data = np.stack((est_moments, est_vars), axis=1)
-#     distr_obj = SimpleDistribution(moments_obj, moments_data, domain=moments_obj.domain)
-#     distr_obj.estimate_density_minimize(tol, reg_param)  # 0.95 two side quantile
-#     self._distribution = distr_obj
-#
-#     # # [print("integral density ", integrate.simps(densities[index], x[index])) for index, density in
-#     # # enumerate(densities)]
-#     # moments_fn = self.moments
-#     # domain = moments_fn.domain
-#     #
-#     # #self.mlmc.update_moments(moments_fn)
-#     # cov = self._covariance = self.mlmc.estimate_covariance(moments_fn)
-#     #
-#     # # centered covarince
-#     # M = np.eye(self.n_moments)
-#     # M[:,0] = -cov[:,0]
-#     # cov_center = M @ cov @ M.T
-#     # #print(cov_center)
-#     #
-#     # eval, evec = np.linalg.eigh(cov_center)
-#     # #self.plot_values(eval[:-1], log=False)
-#     # #self.plot_values(np.maximum(np.abs(eval), 1e-30), log=True)
-#     # #print("eval: ", eval)
-#     # #min_pos = np.min(np.abs(eval))
-#     # #assert min_pos > 0
-#     # #eval = np.maximum(eval, 1e-30)
-#     #
-#     # i_first_positive = np.argmax(eval > 0)
-#     # pos_eval = eval[i_first_positive:]
-#     # pos_evec = evec[:, i_first_positive:]
-#     #
-#     # treshold = self.detect_treshold_lm(pos_eval)
-#     # print("ipos: ", i_first_positive, "Treshold: ", treshold)
-#     # self.plot_values(pos_eval, log=True, treshold=treshold)
-#     # eval_reduced = pos_eval[treshold:]
-#     # evec_reduced = pos_evec[:, treshold:]
-#     # eval_reduced = np.flip(eval_reduced)
-#     # evec_reduced = np.flip(evec_reduced, axis=1)
-#     # print(eval_reduced)
-#     # #eval[eval<0] = 0
-#     # #print(eval)
-#     #
-#     #
-#     # #opt_n_moments =
-#     # #evec_reduced = evec
-#     # # with reduced eigen vector matrix: P = n x m , n < m
-#     # # \sqrt(Lambda) P^T = Q_1 R
-#     # #SSV =  evec_reduced * (1/np.sqrt(eval_reduced))[None, :]
-#     # #r, q = sc.linalg.rq(SSV)
-#     # #Linv = r.T
-#     # #Linv = Linv / Linv[0,0]
-#     #
-#     # #self.plot_values(np.maximum(eval, 1e-30), log=True)
-#     # #print( np.matmul(evec, eval[:, None] * evec.T) - cov)
-#     # #u,s,v = np.linalg.svd(cov, compute_uv=True)
-#     # #print("S: ", s)
-#     # #print(u - v.T)
-#     # #L = np.linalg.cholesky(self._covariance)
-#     # #L = sc.linalg.cholesky(cov, lower=True)
-#     # #SSV = np.sqrt(s)[:, None] * v[:, :]
-#     # #q, r = np.linalg.qr(SSV)
-#     # #L = r.T
-#     # #Linv = np.linalg.inv(L)
-#     # #LCL = np.matmul(np.matmul(Linv, cov), Linv.T)
-#     #
-#     # L = -(1/np.sqrt(eval_reduced))[:, None] * (evec_reduced.T @ M)
-#     # p_evec = evec.copy()
-#     # #p_evec[:, :i_first_positive] = 0
-#     # #L = evec.T @ M
-#     # #L = M
-#     # natural_moments = mlmc.moments.TransformedMoments(moments_fn, L)
-#     # #self.plot_moment_functions(natural_moments, fig_file='natural_moments.pdf')
-#     #
-#     # # t_var = 1e-5
-#     # # ref_diff_vars, _ = mlmc.estimate_diff_vars(moments_fn)
-#     # # ref_moments, ref_vars = mc.estimate_moments(moments_fn)
-#     # # ref_std = np.sqrt(ref_vars)
-#     # # ref_diff_vars_max = np.max(ref_diff_vars, axis=1)
-#     # # ref_n_samples = mc.set_target_variance(t_var, prescribe_vars=ref_diff_vars)
-#     # # ref_n_samples = np.max(ref_n_samples, axis=1)
-#     # # ref_cost = mc.estimate_cost(n_samples=ref_n_samples)
-#     # # ref_total_std = np.sqrt(np.sum(ref_diff_vars / ref_n_samples[:, None]) / n_moments)
-#     # # ref_total_std_x = np.sqrt(np.mean(ref_vars))
-#     #
-#     # #self.mlmc.update_moments(natural_moments)
-#     # est_moments, est_vars = self.mlmc.estimate_moments(natural_moments)
-#     # nat_cov_est = self.mlmc.estimate_covariance(natural_moments)
-#     # nat_cov = L @ cov @ L.T
-#     # nat_mom = L @ cov[:,0]
-#     #
-#     # print("nat_cov_est norm: ", np.linalg.norm(nat_cov_est - np.eye(natural_moments.size)))
-#     # # def describe(arr):
-#     # #     print("arr ", arr)
-#     # #     q1, q3 = np.percentile(arr, [25, 75])
-#     # #     print("q1 ", q1)
-#     # #     print("q2 ", q3)
-#     # #     return "{:f8.2} < {:f8.2} | {:f8.2} | {:f8.2} < {:f8.2}".format(
-#     # #         np.min(arr), q1, np.mean(arr), q3, np.max(arr))
-#     #
-#     # print("n_levels: ", self.n_levels)
-#     # print("moments: ", est_moments)
-#     # est_moments[1:] = 0
-#     # moments_data = np.stack((est_moments, est_vars), axis=1)
-#     # distr_obj = Distribution(natural_moments, moments_data, domain=domain)
-#     # distr_obj.estimate_density_minimize(tol, reg_param)  # 0.95 two side quantile
-#     #
-#     #
-#     # F = [distr_obj._calculate_exact_moment(distr_obj.multipliers, m)[0] for m in range(natural_moments.size)]
-#     # print("F norm: ", np.linalg.norm(np.array(F) - est_moments))
-#     #
-#     # H = [[distr_obj._calculate_exact_hessian(i,j)[0] for i in range(natural_moments.size)] \
-#     #         for j in range(natural_moments.size)]
-#     # print("H norm: ", np.linalg.norm(np.array(H) - np.eye(natural_moments.size)))
-#     # # distr_obj.estimate_density_minimize(0.1)  # 0.95 two side quantile
-#     # self._distribution = distr_obj
-#
-#
diff --git a/mlmc/tool/stats_tests.py b/mlmc/tool/stats_tests.py
index 8fe56255..1bc733e8 100644
--- a/mlmc/tool/stats_tests.py
+++ b/mlmc/tool/stats_tests.py
@@ -4,51 +4,67 @@
 
 def t_test(mu_0, samples, max_p_val=0.01):
     """
-    Test that mean of samples is mu_0, false
-    failures with probability max_p_val.
-
-    Perform the two-tailed t-test and
-    Assert that p-val is smaller then given value.
-    :param mu_0: Exact mean.
-    :param samples: Samples to test.
-    :param max_p_val: Probability of failed t-test for correct samples.
+    Perform a one-sample two-tailed t-test to check if the sample mean equals mu_0.
+
+    This test ensures that false positives (rejecting H0 when true) occur with probability <= max_p_val.
+
+    :param mu_0: Expected mean value of the samples.
+    :param samples: Array-like of sample values to test.
+    :param max_p_val: Maximum allowed p-value for false rejection (significance threshold).
+    :raises AssertionError: if the p-value is larger than max_p_val, indicating the mean is statistically equal to mu_0.
     """
+    # Perform one-sample t-test
     T, p_val = st.ttest_1samp(samples, mu_0)
-    assert p_val < max_p_val
+
+    # Assert that the p-value is smaller than threshold, otherwise fail the test
+    assert p_val < max_p_val, f"T-test failed: p_val={p_val}, threshold={max_p_val}"
 
 
 def chi2_test(var_0, samples, max_p_val=0.01, tag=""):
     """
-    Test that variance of samples is sigma_0, false
-    failures with probability max_p_val.
-    :param sigma_0: Exact mean.
-    :param samples: Samples to test.
-    :param max_p_val: Probability of failed t-test for correct samples.
+    Perform a chi-squared test to check if the sample variance equals var_0.
+
+    False rejections should occur with probability <= max_p_val.
+
+    :param var_0: Expected variance of the samples.
+    :param samples: Array-like of sample values.
+    :param max_p_val: Maximum allowed p-value for false rejection (significance threshold).
+    :param tag: Optional string tag for printing/debugging.
+    :raises AssertionError: if the p-value indicates variance significantly differs from var_0.
     """
     N = len(samples)
-    var = np.var(samples)
-    T = var * N / var_0
-    pst = st.chi2.cdf(T, df=len(samples)-1)
-    p_val = 2 * min(pst, 1 - pst)
-    print("{}\n var: {} var_0: {} p-val: {}".format(tag, var, var_0, p_val))
-    assert p_val > max_p_val
+    var = np.var(samples, ddof=0)  # population variance
+    T = var * N / var_0            # chi-squared statistic
+    pst = st.chi2.cdf(T, df=N-1)   # cumulative probability
+    p_val = 2 * min(pst, 1 - pst)  # two-tailed p-value
+
+    # Print debug info
+    print(f"{tag}\nvar: {var}, var_0: {var_0}, p-val: {p_val}")
+
+    # Assert variance is consistent with expected variance
+    assert p_val > max_p_val, f"Chi2-test failed: p_val={p_val}, threshold={max_p_val}"
 
 
 def anova(level_moments):
     """
-    Analysis of variance
-    :param level_moments: moments values per level
-    :return: bool
+    Perform one-way ANOVA (analysis of variance) across multiple levels.
+
+    Tests the null hypothesis H0: all levels have the same mean.
+
+    :param level_moments: List of arrays, each array containing moments/samples for a level.
+    :return: True if H0 cannot be rejected (means are statistically equal),
+             False if H0 is rejected (at least one mean differs).
     """
-    # H0: all levels moments have same mean value
+    # Compute F-statistic and p-value
     f_value, p_value = st.f_oneway(*level_moments)
 
-    # Significance level
-    alpha = 0.05
-    # Same means, can not be rejected H0
+    alpha = 0.05  # significance level
+
     if p_value > alpha:
-        print("Same means, cannot be rejected H0")
+        # H0 cannot be rejected: means are statistically the same
+        print("Same means, cannot reject H0")
         return True
-    # Different means (reject H0)
-    print("Different means, reject H0")
-    return False
+    else:
+        # H0 rejected: means differ significantly
+        print("Different means, reject H0")
+        return False

From 424266ab4aaaa8e68dab5abb7536bb49aa3fd895 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 12:46:50 +0200
Subject: [PATCH 17/31] comments

---
 examples/quantity_operations.py             | 176 +++++---
 examples/shooting/shooting_1D.py            | 230 ++++++----
 examples/shooting/shooting_1D_mcqmc.py      | 210 +++++----
 examples/shooting/shooting_2D.py            | 163 ++++---
 examples/shooting/simulation_shooting_1D.py | 159 +++++--
 examples/shooting/simulation_shooting_2D.py | 165 +++++--
 examples/synthetic_quantity.py              |  58 ++-
 mlmc/archive/flow123d_mock.py               |  19 -
 mlmc/estimator.py                           | 454 ++++++++++++-------
 mlmc/sim/synth_simulation.py                | 460 +++++++-------------
 10 files changed, 1211 insertions(+), 883 deletions(-)
 delete mode 100644 mlmc/archive/flow123d_mock.py

diff --git a/examples/quantity_operations.py b/examples/quantity_operations.py
index e8cd5b5a..e4b0ecc6 100644
--- a/examples/quantity_operations.py
+++ b/examples/quantity_operations.py
@@ -1,31 +1,63 @@
+"""
+Quantity examples and quick reference.
+
+This module demonstrates basic operations with mlmc Quantity objects.
+It shows how to:
+ - create a synthetic root quantity (result format specified below),
+ - compute mean estimates,
+ - estimate moments and covariance,
+ - select sub-quantities, time-interpolate and slice,
+ - perform arithmetic and NumPy ufuncs with Quantity objects,
+ - perform selections using conditions and masks.
+
+Result format used in the synthetic example:
+
+result_format = [
+    mlmc.quantity.quantity_spec.QuantitySpec(
+        name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']
+    ),
+    mlmc.quantity.quantity_spec.QuantitySpec(
+        name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']
+    ),
+]
+
+Meaning:
+ - sample results contain data on two quantities ("length" and "width"),
+ - each quantity is evaluated at three times [1,2,3] and two locations,
+ - each quantity can also have its own internal shape.
+"""
 import numpy as np
 import mlmc.quantity.quantity_spec
 from mlmc.quantity.quantity import make_root_quantity
 import mlmc.quantity.quantity_estimate
 from examples.synthetic_quantity import create_sampler
 
-# An overview of basic Quantity operations
-
-######################################
-###    Create synthetic quantity   ###
-######################################
-# Synthetic Quantity with the following result format
-# result_format = [
-#     mlmc.quantity.quantity_spec.QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']),
-#     mlmc.quantity.quantity_spec.QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']),
-# ]
-# Meaning: sample results contain data on two quantities in three time steps [1, 2, 3] and in two locations,
-#          each quantity can have different shape
+# -----------------------------------------------------------------------------
+# Create synthetic quantity
+# -----------------------------------------------------------------------------
+"""
+Create a synthetic sampler + factory and produce a root Quantity instance.
 
+- create_sampler() returns: (sampler, simulation_factory, moments_fn)
+- make_root_quantity(storage, q_specs) builds a Quantity object that represents the
+  whole result data structure (root with named sub-quantities).
+"""
 sampler, simulation_factory, moments_fn = create_sampler()
 root_quantity = make_root_quantity(sampler.sample_storage, simulation_factory.result_format())
-# root_quantity is mlmc.quantity.quantity.Quantity instance and represents the whole result data,
+# root_quantity is an mlmc.quantity.quantity.Quantity instance and represents the whole result data,
 # it contains two sub-quantities named "length" and "width"
 
-###################################
-####      Mean estimates      #####
-###################################
-# To get estimated mean of a quantity:
+# -----------------------------------------------------------------------------
+# Mean estimates
+# -----------------------------------------------------------------------------
+"""
+Compute and inspect mean estimates for a Quantity.
+
+- estimate_mean(quantity) returns a QuantityMean instance that contains:
+    - .mean : mean Quantity
+    - .var  : variance information
+    - .l_vars : level variances (if available)
+"""
 root_quantity_mean = mlmc.quantity.quantity_estimate.estimate_mean(root_quantity)
 # root_quantity_mean is an instance of mlmc.quantity.QuantityMean
 # To get overall mean value:
@@ -35,55 +67,60 @@
 # To get level variance value:
 root_quantity_mean.l_vars
 
-#########################################################
-####     Estimate moments and covariance matrix     #####
-#########################################################
-# Create a quantity representing moments
+# -----------------------------------------------------------------------------
+# Estimate moments and covariance matrix
+# -----------------------------------------------------------------------------
+"""
+Construct moments and covariance quantities from a root Quantity.
+
+- moments(root_quantity, moments_fn=moments_fn) returns a Quantity of moments.
+- estimate_mean(moments_quantity) computes means for those moments.
+- covariance(root_quantity, moments_fn=moments_fn) returns a Quantity describing covariance.
+"""
 moments_quantity = mlmc.quantity.quantity_estimate.moments(root_quantity, moments_fn=moments_fn)
 moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(moments_quantity)
 # Central moments:
 central_root_quantity = root_quantity - root_quantity_mean.mean
-central_moments_quantity = mlmc.quantity.quantity_estimate.moments(central_root_quantity, moments_fn=moments_fn)
+central_moments_quantity = mlmc.quantity.quantity_estimate.moments(
+    central_root_quantity, moments_fn=moments_fn
+)
 central_moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(central_moments_quantity)
 # Create a quantity representing covariance matrix
 covariance_quantity = mlmc.quantity.quantity_estimate.covariance(root_quantity, moments_fn=moments_fn)
 cov_mean = mlmc.quantity.quantity_estimate.estimate_mean(covariance_quantity)
 
-# Both moments() and covariance() calls return mlmc.quantity.quantity.Quantity instance
+# Both moments() and covariance() calls return mlmc.quantity.quantity.Quantity instances
 
-##################################
-###    Quantity selection     ####
-##################################
-# According to the result_format, tt is possible to select items from a quantity
+# -----------------------------------------------------------------------------
+# Quantity selection
+# -----------------------------------------------------------------------------
+"""
+Examples of indexing and time/ location selection on a Quantity.
+
+According to the result_format you can select by name, then time, then location.
+Selecting location before time is not supported.
+"""
 length = root_quantity["length"]  # Get quantity with name="length"
 width = root_quantity["width"]  # Get quantity with name="width"
-# length and width are still mlmc.quantity.quantity.Quantity instances
 
-# To get a quantity at particular time:
+# To get a quantity at a particular (interpolated) time:
 length_locations = length.time_interpolation(2.5)
 # length_locations represents results for all locations of quantity named "length" at the time 2.5
 
-# To get quantity at particular location
+# To get quantity at particular location:
 length_result = length_locations['10']
-# length_result represents results shape=(2, 1) of quantity named "length" at the time 2,5 and location '10'
-
-# Now it is possible to slice Quantity length_result the same way as np.ndarray
-# For example:
-#   length_result[1, 0]
-#   length_result[:, 0]
-#   length_result[:, :]
-#   length_result[:1, :1]
-#   length_result[:2, ...]
-
-# Keep in mind:
-# - all derived quantities such as length_locations and length_result, ... are still mlmc.quantity.quantity.Quantity instances
-# - selecting location before time is not supported!
-
-###################################
-####    Binary operations     #####
-###################################
-# Following operations are supported
-# Addition of compatible quantities
+# length_result represents shape=(2, 1) data of "length" at time 2.5 and location '10'
+
+# You can slice Quantity like an ndarray:
+#   length_result[1, 0], length_result[:, 0], length_result[:2, ...], etc.
+
+# -----------------------------------------------------------------------------
+# Binary operations
+# -----------------------------------------------------------------------------
+"""
+Supported arithmetic operations between compatible Quantity objects and scalars.
+The result is a Quantity instance with the same result_format structure.
+"""
 quantity = root_quantity + root_quantity
 quantity = root_quantity + root_quantity + root_quantity
 
@@ -96,11 +133,13 @@
 quantity_const_mod = root_quantity % const
 quantity_add_mult = root_quantity + root_quantity * const
 
-
-###################################
-#### NumPy universal functions ####
-###################################
-# Examples of tested NumPy universal functions:
+# -----------------------------------------------------------------------------
+# NumPy universal functions (ufuncs)
+# -----------------------------------------------------------------------------
+"""
+Many NumPy ufuncs are supported and return Quantity instances:
+- np.add, np.max, np.sin, np.sum, np.maximum, np.divide, np.arctan2, ...
+"""
 quantity_np_add = np.add(root_quantity, root_quantity)
 quantity_np_max = np.max(root_quantity, axis=0, keepdims=True)
 quantity_np_sin = np.sin(root_quantity)
@@ -112,10 +151,21 @@
 quantity_np_add_const = np.add(x, root_quantity)
 quantity_np_arctan2_cosnt = np.arctan2(x, root_quantity)
 
-################################################
-####   Quantity selection by a condition    ####
-################################################
-# Method select returns mlmc.quantity.quantity.Quantity instance
+# -----------------------------------------------------------------------------
+# Quantity selection by a condition
+# -----------------------------------------------------------------------------
+"""
+The select(condition) method extracts elements of a Quantity according to a boolean condition;
+it returns a Quantity (masking is internal and shape-aware).
+
+Examples:
+ - selected_quantity = root_quantity.select(0 < root_quantity)
+ - quantity_add_select = quantity_add.select(root_quantity < quantity_add)
+ - root_quantity_selected = root_quantity.select(-1 != root_quantity)
+
+You can combine conditions via logical ufuncs (np.logical_or / np.logical_and), and
+you can explicitly pass a Quantity mask (mask is a Quantity instance).
+"""
 selected_quantity = root_quantity.select(0 < root_quantity)
 
 quantity_add = root_quantity + root_quantity
@@ -125,9 +175,11 @@
 # Logical operation for more conditions is AND
 quantity_add.select(root_quantity < quantity_add, root_quantity < 10)
 
-# User can use one of the logical NumPy universal functions
-selected_quantity_or = root_quantity.select(np.logical_or(0 < root_quantity, root_quantity < 10))
+# Use NumPy logical ufuncs for complex masks
+selected_quantity_or = root_quantity.select(
+    np.logical_or(0 < root_quantity, root_quantity < 10)
+)
 
-# It is possible to explicitly define the selection condition of one quantity by another quantity
-mask = np.logical_and(0 < root_quantity, root_quantity < 10)  # mask is Quantity instance
+# Explicit mask Quantity
+mask = np.logical_and(0 < root_quantity, root_quantity < 10)  # mask is a Quantity instance
 q_bounded = root_quantity.select(mask)
diff --git a/examples/shooting/shooting_1D.py b/examples/shooting/shooting_1D.py
index 609d2d6a..12ba7036 100644
--- a/examples/shooting/shooting_1D.py
+++ b/examples/shooting/shooting_1D.py
@@ -16,141 +16,164 @@
 #           - create Quantity instance
 #           - approximate density
 class ProcessShooting1D:
+    """
+    Example driver for a 1D shooting problem using the MLMC framework.
+
+    This tutorial class demonstrates how to:
+      - create a Sampler with storage and sampling pool,
+      - schedule and collect MLMC samples,
+      - estimate moments and build an approximate probability density function
+        of the quantity of interest.
+
+    The constructor runs a full example MLMC workflow when the module is executed.
+    """
 
     def __init__(self):
+        """
+        Initialize parameters, create sampler, generate samples, collect them and postprocess.
+
+        The constructor sets up:
+        - MLMC level parameters,
+        - sampling and storage components,
+        - schedule and collection of samples,
+        - postprocessing including moment estimation and density approximation.
+        """
         n_levels = 2
         # Number of MLMC levels
+
         step_range = [1, 1e-3]
         # step_range [simulation step at the coarsest level, simulation step at the finest level]
+
         level_parameters = ProcessShooting1D.determine_level_parameters(n_levels, step_range)
-        # Determine each level parameters (in this case, simulation step at each level), level_parameters should be
-        # simulation dependent
-        self._sample_sleep = 0#30
-        # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run
-        self._sample_timeout = 60
-        # Maximum waiting time for running simulations
-        self._adding_samples_coef = 0.1
-        self._n_moments = 20
-        # number of generalized statistical moments used for MLMC number of samples estimation
-        self._quantile = 0.01
-        # Setting parameters that are utilized when scheduling samples
-        ###
-        # MLMC run
-        ###
+        # Determine each level parameters (in this case, simulation step at each level)
+
+        # Sampling control parameters
+        self._sample_sleep = 0  # seconds to sleep between checking sampling pool (was 30)
+        self._sample_timeout = 60  # maximum waiting time for running simulations (seconds)
+        self._adding_samples_coef = 0.1  # coefficient used when dynamically adding samples
+
+        # Moment / distribution settings
+        self._n_moments = 20  # number of generalized moments used for MLMC sample estimation
+        self._quantile = 0.01  # quantile used to estimate domain for moment functions
+
+        # MLMC run: create sampler and run sampling workflow
         sampler = self.create_sampler(level_parameters=level_parameters)
-        # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run
         self.generate_samples(sampler, n_samples=None, target_var=1e-3)
-        # Generate MLMC samples, there are two ways:
-        # 1) set exact number of samples at each level,
-        #    e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50])
-        # 2) set target variance of MLMC estimates,
-        #    e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6)
+        # Generate MLMC samples. Two ways:
+        # 1) Provide exact n_samples per level: generate_samples(sampler, n_samples=[...])
+        # 2) Provide target variance and let algorithm decide counts: target_var=1e-6
+
         self.all_collect(sampler)
-        # Check if all samples are finished
-        ###
-        # Postprocessing
-        ###
+        # Ensure all scheduled samples finished
+
+        # Postprocessing: compute moments and approximate distributions
         self.process_results(sampler, n_levels)
-        # Postprocessing, MLMC is finished at this point
 
     def create_sampler(self, level_parameters):
         """
-        Create:
-        # sampling pool - the way sample simulations are executed
-        # sample storage - stores sample results
-        # sampler - controls MLMC execution
-        :param level_parameters: list of lists
-        :return: mlmc.sampler.Sampler instance
+        Create the Sampler with storage and sampling pool and return it.
+
+        Components created:
+          - sampling_pool: OneProcessPool (runs all samples in the same process)
+          - simulation_factory: instance of ShootingSimulation1D
+          - sample_storage: Memory() (in-memory storage)
+          - sampler: mlmc.sampler.Sampler
+
+        :param level_parameters: List of level parameter lists (simulation-dependent).
+        :return: mlmc.sampler.Sampler instance configured for this example.
         """
-        # Create OneProcessPool - all run in the same process
+        # Use OneProcessPool for simplicity (sequential execution). For parallel local runs,
+        # replace with ProcessPool(n) or ThreadPool(n).
         sampling_pool = OneProcessPool()
-        # There is another option mlmc.sampling_pool.ProcessPool() - supports local parallel sample simulation run
-        # sampling_pool = ProcessPool(n), n - number of parallel simulations, depends on computer architecture
 
-        # Simulation configuration which is passed to simulation constructor
+        # Simulation configuration dictionary passed to simulation factory
         simulation_config = {
             "start_position": np.array([0, 0]),
             "start_velocity": np.array([10, 0]),
             "area_borders":  np.array([-100, 200, -300, 400]),
             "max_time": 10,
-            "complexity": 2,  # used for initial estimate of number of operations per sample
+            "complexity": 2,  # used as prior for cost estimates
             'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
         }
 
-        # Create simulation factory, instance of class that inherits from mlmc.sim.simulation
+        # Simulation factory (constructs LevelSimulation instances internally)
         simulation_factory = ShootingSimulation1D(config=simulation_config)
 
-        # Create simple sample storage
-        # Memory() storage keeps samples in computer main memory
+        # Lightweight in-memory sample storage
         sample_storage = Memory()
-        # We support also HDF file storage mlmc.sample_storage_hdf.SampleStorageHDF()
-        # sample_storage = SampleStorageHDF(file_path=path_to_HDF5_file)
+        # Alternative: HDF storage (persistent) - sample_storage_hdf.SampleStorageHDF(...)
 
-        # Create sampler
-        # Controls the execution of MLMC
-        sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
+        # Create and return the Sampler that orchestrates MLMC
+        sampler = Sampler(sample_storage=sample_storage,
+                          sampling_pool=sampling_pool,
+                          sim_factory=simulation_factory,
                           level_parameters=level_parameters)
         return sampler
 
     def generate_samples(self, sampler, n_samples=None, target_var=None):
         """
-        Generate MLMC samples
-        :param sampler: mlmc.sampler.Sampler instance
-        :param n_samples: None or list, number of samples at each level
-        :param target_var: target variance of MLMC estimates
-        :return: None
+        Schedule and generate MLMC samples, optionally targetting a global variance.
+
+        :param sampler: mlmc.sampler.Sampler instance controlling sampling.
+        :param n_samples: None or list of exact sample counts per level.
+        :param target_var: Target variance for MLMC estimator (if not None).
+        :return: None (results are stored in sampler.sample_storage).
         """
-        # The number of samples is set by user
+        # If user provided exact counts, set them; otherwise let sampler pick initial counts
         if n_samples is not None:
             sampler.set_initial_n_samples(n_samples)
-        # The number of initial samples is determined automatically
         else:
             sampler.set_initial_n_samples()
-        # Samples are scheduled and the program is waiting for all of them to be completed.
+
+        # Schedule initial samples and wait for them to complete
         sampler.schedule_samples()
         sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout)
         self.all_collect(sampler)
 
-        # MLMC estimates target variance is set
+        # If a target variance is requested, iteratively estimate variances and add more samples
         if target_var is not None:
-            # The mlmc.quantity.quantity.Quantity instance is created
-            # parameters 'storage' and 'q_specs' are obtained from sample_storage,
-            # originally 'q_specs' is set in the simulation class
+            # Create a Quantity for the root results; needed for moments estimation
             root_quantity = make_root_quantity(storage=sampler.sample_storage,
                                                q_specs=sampler.sample_storage.load_result_format())
 
-            # Moment functions object is created
-            # The MLMC algorithm determines number of samples according to the moments variance,
-            # Type of moment functions (Legendre by default) might affect the total number of MLMC samples
+            # Build moment functions (Legendre polynomials on estimated domain)
             moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments)
-            estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage,
-                                                   moments_fn=moments_fn)
+            estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, moments_fn=moments_fn)
 
-            # Initial estimation of the number of samples at each level
+            # Initial variance and cost estimates from currently finished samples
             variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
-            # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples
+
+            # Compute initial recommended number of samples for each level
             n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
-                                                                           n_levels=sampler.n_levels)
-
-            #####
-            # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples
-            #####
-            # Loop until number of estimated samples is greater than the number of scheduled samples
-            while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef,
-                                                     timeout=self._sample_timeout):
-                # New estimation according to already finished samples
+                                                                 n_levels=sampler.n_levels)
+
+            # Gradually schedule additional samples until the estimate stabilizes
+            while not sampler.process_adding_samples(n_estimated,
+                                                    self._sample_sleep,
+                                                    self._adding_samples_coef,
+                                                    timeout=self._sample_timeout):
+                # Re-estimate variance / ops using newly finished samples and recompute targets
                 variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
                 n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
                                                                      n_levels=sampler.n_levels)
 
     def set_moments(self, quantity, sample_storage, n_moments=25):
+        """
+        Create a Legendre-moments function object for the given quantity.
+
+        :param quantity: mlmc.quantity.quantity.Quantity instance (root quantity).
+        :param sample_storage: sample storage instance to estimate domain from samples.
+        :param n_moments: Number of moments (basis size) to construct.
+        :return: Legendre(n_moments, domain) instance to be used for moment estimation.
+        """
         true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
         return Legendre(n_moments, true_domain)
 
     def all_collect(self, sampler):
         """
-        Collect samples, wait until all samples are finished
-        :param sampler: mlmc.sampler.Sampler object
+        Wait until all scheduled samples finish by repeatedly collecting finished samples.
+
+        :param sampler: mlmc.sampler.Sampler instance.
         :return: None
         """
         running = 1
@@ -161,37 +184,44 @@ def all_collect(self, sampler):
 
     def process_results(self, sampler, n_levels):
         """
-        Process MLMC results
-        :param sampler: mlmc.sampler.Sampler instance
-        :param n_levels: int, number of MLMC levels
+        Postprocess completed samples: estimate moments and approximate distribution.
+
+        Steps:
+          - Load result format and build Quantity objects
+          - Choose a target item inside the quantity and check its mean
+          - Compute estimated domain, construct moment functions (Legendre)
+          - Estimate moments and their variances, compute distribution approximation
+
+        :param sampler: mlmc.sampler.Sampler instance containing sample_storage.
+        :param n_levels: int, number of MLMC levels used in the run.
         :return: None
         """
         sample_storage = sampler.sample_storage
-        # Load result format from the sample storage
+
+        # Load result format (list of QuantitySpec) and create root Quantity
         result_format = sample_storage.load_result_format()
-        # Create Quantity instance representing our real quantity of interest
         root_quantity = make_root_quantity(sample_storage, result_format)
 
-        # It is possible to access items of the quantity according to the result format
+        # Access an example item from the nested quantity to demonstrate API
         target = root_quantity['target']
         time = target[10]
         position = time['0']
         q_value = position[0]
 
-        # Compute moments, first estimate domain of moment functions
+        # Estimate domain for moment functions from sample data and build moments function
         estimated_domain = Estimate.estimate_domain(q_value, sample_storage, quantile=self._quantile)
         moments_fn = Legendre(self._n_moments, estimated_domain)
 
-        # Create estimator for the quantity
+        # Create an estimator and compute moment means and variances
         estimator = Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
-        # Estimate moment means and variances
         means, vars = estimator.estimate_moments(moments_fn)
-        # Generally, root quantity has different domain than its items
+
+        # For root-level moments use separate estimated domain
         root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage,
-                                                                                 quantile=self._quantile)
+                                                                 quantile=self._quantile)
         root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain)
 
-        # There is another possible approach to calculating all moments at once and then select desired quantity
+        # Alternative approach: compute moments for the entire root quantity and then extract target
         moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True)
         moments_mean = estimate_mean(moments_quantity)
         target_mean = moments_mean['target']
@@ -199,35 +229,45 @@ def process_results(self, sampler, n_levels):
         location_mean = time_mean['0']  # locations: ['0']
         value_mean = location_mean[0]  # result shape: (1,)
 
+        # Quick assertion expected for the tutorial problem (value_mean should be 1)
         assert value_mean.mean[0] == 1
+
+        # Build approximate density for the selected quantity
         self.approx_distribution(estimator, n_levels, tol=1e-8)
 
     def approx_distribution(self, estimator, n_levels, tol=1.95):
         """
-        Probability density function approximation
-        :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated
-        :param n_levels: int, number of MLMC levels
-        :param tol: Tolerance of the fitting problem, with account for variances in moments.
+        Construct and display an approximate probability density function for the quantity.
+
+        :param estimator: mlmc.estimator.Estimate instance which contains the quantity and methods
+                          for constructing the density approximation.
+        :param n_levels: int, number of MLMC levels (used for optional plotting of raw samples).
+        :param tol: Tolerance parameter for the density fitting problem (default 1.95).
         :return: None
         """
         distr_obj, result, _, _ = estimator.construct_density(tol=tol)
         distr_plot = Distribution(title="distributions", error_plot=None)
         distr_plot.add_distribution(distr_obj)
 
+        # Optionally overlay raw samples for single-level case
         if n_levels == 1:
             samples = estimator.get_level_samples(level_id=0)[..., 0]
             distr_plot.add_raw_samples(np.squeeze(samples))
+
         distr_plot.show(None)
         distr_plot.reset()
 
     @staticmethod
     def determine_level_parameters(n_levels, step_range):
         """
-        Determine level parameters,
-        In this case, a step of fine simulation at each level
-        :param n_levels: number of MLMC levels
-        :param step_range: simulation step range
-        :return: list of lists
+        Determine level parameters for MLMC from the coarsest and finest step sizes.
+
+        The default strategy interpolates in log-space between step_range[0] and step_range[1]
+        across n_levels and returns a list of single-entry lists containing the step for each level.
+
+        :param n_levels: int number of MLMC levels.
+        :param step_range: Sequence [coarse_step, fine_step] where coarse_step > fine_step.
+        :return: list of lists where each inner list contains the parameter(s) for that level.
         """
         assert step_range[0] > step_range[1]
         level_parameters = []
diff --git a/examples/shooting/shooting_1D_mcqmc.py b/examples/shooting/shooting_1D_mcqmc.py
index bcf5f5fe..af251f06 100644
--- a/examples/shooting/shooting_1D_mcqmc.py
+++ b/examples/shooting/shooting_1D_mcqmc.py
@@ -17,141 +17,151 @@
 #           - create Quantity instance
 #           - approximate density
 class ProcessShooting1D:
+    """
+    Example driver for a 1D shooting problem using the MLMC framework.
+
+    Demonstrates:
+      - creating the Sampler, sampling_pool and sample storage,
+      - scheduling and collecting MLMC samples,
+      - estimating moments and building an approximate PDF for a quantity of interest.
+    """
 
     def __init__(self):
+        """
+        Initialize parameters, create sampler, schedule and collect samples, and postprocess.
+
+        The constructor executes a full example run:
+          - determine level parameters,
+          - create sampler,
+          - generate samples (either user-specified or determined from a target variance),
+          - collect all results,
+          - postprocess and approximate distribution of the chosen quantity.
+        """
         n_levels = 3
         # Number of MLMC levels
+
         step_range = [1, 1e-3]
         # step_range [simulation step at the coarsest level, simulation step at the finest level]
+
         level_parameters = ProcessShooting1D.determine_level_parameters(n_levels, step_range)
-        # Determine each level parameters (in this case, simulation step at each level), level_parameters should be
-        # simulation dependent
-        self._sample_sleep = 0#30
-        # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run
-        self._sample_timeout = 60
-        # Maximum waiting time for running simulations
-        self._adding_samples_coef = 0.1
+        # Determine each level parameters (in this case, simulation step at each level)
+
+        self._sample_sleep = 0  # seconds to sleep while polling sampling pool
+        self._sample_timeout = 60  # maximum waiting time for sampling pool operations (seconds)
+        self._adding_samples_coef = 0.1  # coefficient used when adding samples adaptively
+
         self._n_moments = 20
-        # number of generalized statistical moments used for MLMC number of samples estimation
+        # number of generalized statistical moments used for MLMC sample estimation
         self._quantile = 0.01
-        # Setting parameters that are utilized when scheduling samples
-        ###
+        # quantile used to estimate domain of distribution for moment basis
+
         # MLMC run
-        ###
         sampler = self.create_sampler(level_parameters=level_parameters)
-        # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run
+        # Create sampler (mlmc.Sampler instance) - controls MLMC run
         self.generate_samples(sampler, n_samples=None, target_var=1e-3)
-        # Generate MLMC samples, there are two ways:
-        # 1) set exact number of samples at each level,
-        #    e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50])
-        # 2) set target variance of MLMC estimates,
-        #    e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6)
+        # Generate MLMC samples (either explicit counts or target variance)
         self.all_collect(sampler)
-        # Check if all samples are finished
-        ###
+        # Wait for all scheduled samples to finish
+
         # Postprocessing
-        ###
         self.process_results(sampler, n_levels)
-        # Postprocessing, MLMC is finished at this point
+        # Postprocessing complete
 
     def create_sampler(self, level_parameters):
         """
-        Create:
-        # sampling pool - the way sample simulations are executed
-        # sample storage - stores sample results
-        # sampler - controls MLMC execution
-        :param level_parameters: list of lists
-        :return: mlmc.sampler.Sampler instance
+        Create and configure sampler components: sampling pool, simulation factory and storage.
+
+        :param level_parameters: list of per-level parameter lists (simulation-dependent).
+        :return: mlmc.sampler.Sampler instance configured with Memory storage and OneProcessPool.
         """
-        # Create OneProcessPool - all run in the same process
+        # Use OneProcessPool for sequential runs. Replace with ProcessPool/ThreadPool for parallel runs.
         sampling_pool = OneProcessPool()
-        # There is another option mlmc.sampling_pool.ProcessPool() - supports local parallel sample simulation run
-        # sampling_pool = ProcessPool(n), n - number of parallel simulations, depends on computer architecture
 
-        # Simulation configuration which is passed to simulation constructor
+        # Simulation configuration passed into the simulation factory
         simulation_config = {
             "start_position": np.array([0, 0]),
             "start_velocity": np.array([10, 0]),
-            "area_borders":  np.array([-100, 200, -300, 400]),
+            "area_borders": np.array([-100, 200, -300, 400]),
             "max_time": 10,
-            "complexity": 2,  # used for initial estimate of number of operations per sample
-            'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
+            "complexity": 2,  # used for initial estimate of operations per sample
+            "fields_params": dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
         }
 
-        # Create simulation factory, instance of class that inherits from mlmc.sim.simulation
+        # Create simulation factory (produces LevelSimulation instances)
         simulation_factory = ShootingSimulation1D(config=simulation_config)
 
-        # Create simple sample storage
-        # Memory() storage keeps samples in computer main memory
+        # In-memory sample storage (alternative: HDF-based storage)
         sample_storage = Memory()
-        # We support also HDF file storage mlmc.sample_storage_hdf.SampleStorageHDF()
-        # sample_storage = SampleStorageHDF(file_path=path_to_HDF5_file)
 
-        # Create sampler
-        # Controls the execution of MLMC
-        sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
+        # Create and return the Sampler orchestrating MLMC
+        sampler = Sampler(sample_storage=sample_storage,
+                          sampling_pool=sampling_pool,
+                          sim_factory=simulation_factory,
                           level_parameters=level_parameters)
         return sampler
 
     def generate_samples(self, sampler, n_samples=None, target_var=None):
         """
-        Generate MLMC samples
-        :param sampler: mlmc.sampler.Sampler instance
-        :param n_samples: None or list, number of samples at each level
-        :param target_var: target variance of MLMC estimates
+        Schedule and generate MLMC samples. If target_var is provided, iteratively determine
+        and add samples until the MLMC variance target is reached.
+
+        :param sampler: mlmc.sampler.Sampler instance controlling sampling.
+        :param n_samples: Optional list of exact sample counts per level. If provided, used as initial counts.
+        :param target_var: Optional float target variance for MLMC estimator. If provided, algorithm estimates counts.
         :return: None
         """
-        # The number of samples is set by user
+        # Set initial number of samples (user-specified or default)
         if n_samples is not None:
             sampler.set_initial_n_samples(n_samples)
-        # The number of initial samples is determined automatically
         else:
             sampler.set_initial_n_samples()
-        # Samples are scheduled and the program is waiting for all of them to be completed.
+
+        # Schedule and start the initial batch of samples, then wait for completion
         sampler.schedule_samples()
         sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout)
         self.all_collect(sampler)
 
-        # MLMC estimates target variance is set
+        # If a target_var is provided, compute required sample counts and add samples iteratively
         if target_var is not None:
-            # The mlmc.quantity.quantity.Quantity instance is created
-            # parameters 'storage' and 'q_specs' are obtained from sample_storage,
-            # originally 'q_specs' is set in the simulation class
+            # Build a root quantity (required for moments estimation)
             root_quantity = make_root_quantity(storage=sampler.sample_storage,
                                                q_specs=sampler.sample_storage.load_result_format())
 
-            # Moment functions object is created
-            # The MLMC algorithm determines number of samples according to the moments variance,
-            # Type of moment functions (Legendre by default) might affect the total number of MLMC samples
+            # Create moment functions (Legendre) on estimated domain
             moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments)
-            estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage,
-                                                   moments_fn=moments_fn)
+            estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, moments_fn=moments_fn)
 
-            # Initial estimation of the number of samples at each level
+            # Estimate variances and costs from finished samples
             variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
-            # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples
+
+            # Compute estimated number of samples per level for the target variance
             n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
-                                                                           n_levels=sampler.n_levels)
+                                                                 n_levels=sampler.n_levels)
 
-            #####
-            # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples
-            #####
-            # Loop until number of estimated samples is greater than the number of scheduled samples
+            # Iteratively add samples until the scheduler has scheduled enough
             while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef,
                                                      timeout=self._sample_timeout):
-                # New estimation according to already finished samples
                 variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
                 n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
                                                                      n_levels=sampler.n_levels)
 
     def set_moments(self, quantity, sample_storage, n_moments=25):
+        """
+        Build Legendre moment basis on the domain estimated from samples.
+
+        :param quantity: Quantity (or root quantity) used to estimate domain.
+        :param sample_storage: Sample storage used to compute domain estimate.
+        :param n_moments: Number of Legendre basis functions / moments.
+        :return: Legendre(n_moments, domain) instance.
+        """
         true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
         return Legendre(n_moments, true_domain)
 
     def all_collect(self, sampler):
         """
-        Collect samples, wait until all samples are finished
-        :param sampler: mlmc.sampler.Sampler object
+        Repeatedly collect finished samples until none are running.
+
+        :param sampler: mlmc.sampler.Sampler instance to poll for finished samples.
         :return: None
         """
         running = 1
@@ -162,60 +172,69 @@ def all_collect(self, sampler):
 
     def process_results(self, sampler, n_levels):
         """
-        Process MLMC results
-        :param sampler: mlmc.sampler.Sampler instance
-        :param n_levels: int, number of MLMC levels
+        Postprocess completed samples:
+          - build Quantity objects,
+          - compute moment means/variances,
+          - perform checks and consistency tests,
+          - approximate distribution for the target quantity.
+
+        :param sampler: mlmc.sampler.Sampler instance (contains sample_storage).
+        :param n_levels: int, number of MLMC levels used in the run.
         :return: None
         """
         sample_storage = sampler.sample_storage
-        # Load result format from the sample storage
+
+        # Load result format and create root quantity
         result_format = sample_storage.load_result_format()
-        # Create Quantity instance representing our real quantity of interest
         root_quantity = make_root_quantity(sample_storage, result_format)
 
         print("N collected ", sample_storage.get_n_collected())
 
-        # It is possible to access items of the quantity according to the result format
+        # Access a nested item (example of how to index Quantity)
         target = root_quantity['target']
         time = target[10]
         position = time['0']
         q_value = position[0]
 
-        # Compute moments, first estimate domain of moment functions
+        # Estimate domain from samples and build moments function
         estimated_domain = Estimate.estimate_domain(q_value, sample_storage, quantile=self._quantile)
         moments_fn = Legendre(self._n_moments, estimated_domain)
 
-        # Create estimator for the quantity
+        # Estimator for the selected quantity
         estimator = Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
-        # Estimate moment means and variances
+
+        # Compute moment means and variances
         means, vars = estimator.estimate_moments(moments_fn)
 
-        est.plot_checks(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
+        # Diagnostics and consistency checks
+        #est.plot_checks(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
         est.consistency_check(quantity=q_value, sample_storage=sample_storage)
         estimator.kurtosis_check(q_value)
 
-        # Generally, root quantity has different domain than its items
+        # Optionally compute moments for full root quantity and extract target mean
         root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage,
-                                                                                 quantile=self._quantile)
+                                                                 quantile=self._quantile)
         root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain)
-
-        # There is another possible approach to calculating all moments at once and then select desired quantity
         moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True)
         moments_mean = estimate_mean(moments_quantity)
         target_mean = moments_mean['target']
-        time_mean = target_mean[10]  # times: [1]
-        location_mean = time_mean['0']  # locations: ['0']
-        value_mean = location_mean[0]  # result shape: (1,)
+        time_mean = target_mean[10]
+        location_mean = time_mean['0']
+        value_mean = location_mean[0]
 
+        # Example assertion for tutorial (problem-dependent)
         assert value_mean.mean[0] == 1
+
+        # Build and show approximate density
         self.approx_distribution(estimator, n_levels, tol=1e-8)
 
     def approx_distribution(self, estimator, n_levels, tol=1.95):
         """
-        Probability density function approximation
-        :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated
-        :param n_levels: int, number of MLMC levels
-        :param tol: Tolerance of the fitting problem, with account for variances in moments.
+        Approximate and display the probability density function for the estimator's quantity.
+
+        :param estimator: mlmc.estimator.Estimate instance (contains quantity and methods for density construction).
+        :param n_levels: int, number of MLMC levels (used for optional raw-sample overlay).
+        :param tol: Tolerance for density fitting (accounts for moment variances).
         :return: None
         """
         distr_obj, result, _, _ = estimator.construct_density(tol=tol)
@@ -225,17 +244,20 @@ def approx_distribution(self, estimator, n_levels, tol=1.95):
         if n_levels == 1:
             samples = estimator.get_level_samples(level_id=0)[..., 0]
             distr_plot.add_raw_samples(np.squeeze(samples))
+
         distr_plot.show(None)
         distr_plot.reset()
 
     @staticmethod
     def determine_level_parameters(n_levels, step_range):
         """
-        Determine level parameters,
-        In this case, a step of fine simulation at each level
-        :param n_levels: number of MLMC levels
-        :param step_range: simulation step range
-        :return: list of lists
+        Determine parameters for each MLMC level (here a single step size per level).
+
+        Interpolates between step_range[0] (coarse) and step_range[1] (fine) across levels.
+
+        :param n_levels: int number of MLMC levels.
+        :param step_range: [coarse_step, fine_step] with coarse_step > fine_step.
+        :return: list of lists; each inner list contains the step parameter for that level.
         """
         assert step_range[0] > step_range[1]
         level_parameters = []
diff --git a/examples/shooting/shooting_2D.py b/examples/shooting/shooting_2D.py
index b58ebe73..06d3bbcb 100644
--- a/examples/shooting/shooting_2D.py
+++ b/examples/shooting/shooting_2D.py
@@ -14,55 +14,85 @@
 
 
 class ProcessShooting2D:
+    """
+    Example driver for a 2D shooting simulation using the MLMC framework.
+
+    Demonstrates:
+      - building sampler, sampling pool and sample storage,
+      - scheduling and collecting MLMC samples,
+      - estimating moments and approximating probability densities for quantities of interest.
+    """
 
     def __init__(self):
+        """
+        Initialize parameters, create sampler, schedule and collect samples, and postprocess.
+
+        The constructor runs a full example MLMC workflow:
+          - determine level parameters,
+          - create sampler and sampling pool,
+          - generate MLMC samples (either user-specified or adaptively from target variance),
+          - wait for completion and postprocess results.
+        """
         n_levels = 5
         # Number of MLMC levels
         step_range = [0.05, 0.005]
         # step_range [simulation step at the coarsest level, simulation step at the finest level]
         level_parameters = ProcessShooting2D.determine_level_parameters(n_levels, step_range)
-        # Determine each level parameters (in this case, simulation step at each level), level_parameters should be
-        # simulation dependent
-        self._sample_sleep = 0  # 30
-        # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run
+        # Determine each level parameters (in this case, simulation step at each level)
+
+        self._sample_sleep = 0
+        # Seconds to sleep while polling sampling pool
         self._sample_timeout = 60
-        # Maximum waiting time for running simulations
+        # Maximum waiting time for sampling pool operations (seconds)
         self._adding_samples_coef = 0.1
         self._n_moments = 20
         # number of generalized statistical moments used for MLMC number of samples estimation
         self._quantile = 0.001
-        # Setting parameters that are utilized when scheduling samples
-        ###
+        # quantile used to estimate domain for moment basis functions
+
         # MLMC run
-        ###
         sampler = self.create_sampler(level_parameters=level_parameters)
-        # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run
+        # Create sampler (mlmc.Sampler instance) - controls MLMC run
         self.generate_samples(sampler, n_samples=None, target_var=1e-3)
-        # Generate MLMC samples, there are two ways:
-        # 1) set exact number of samples at each level,
-        #    e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50])
-        # 2) set target variance of MLMC estimates,
-        #    e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6)
+        # Generate MLMC samples: either explicit counts or by target variance
         self.all_collect(sampler)
-        # Check if all samples are finished
-        ###
+        # Wait until all samples finish
+
         # Postprocessing
-        ###
         self.process_results(sampler, n_levels)
-        # Postprocessing, MLMC is finished at this point
+        # Postprocessing complete
 
     def create_estimator(self, quantity, sample_storage):
+        """
+        Create an Estimate object for a given quantity and storage using Legendre moments.
+
+        :param quantity: Quantity object (mlmc.quantity.quantity.Quantity item) to estimate.
+        :param sample_storage: sample storage instance used to estimate domain.
+        :return: mlmc.estimator.Estimate instance configured for the quantity.
+        """
         estimated_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
         moments_fn = Legendre(self._n_moments, estimated_domain)
-        # Create estimator for your quantity
-        return Estimate(quantity=quantity, sample_storage=sample_storage,
-                                              moments_fn=moments_fn)
+        return Estimate(quantity=quantity, sample_storage=sample_storage, moments_fn=moments_fn)
 
     def process_results(self, sampler, n_levels):
+        """
+        Postprocess completed MLMC samples: form quantities, create estimators and approximate distributions.
+
+        Steps performed:
+          - load result format and create root Quantity,
+          - extract x and y components of the target item,
+          - build per-component estimators,
+          - compute root-level moments and means,
+          - approximate and display distributions for x and y.
+
+        :param sampler: mlmc.sampler.Sampler instance containing completed samples.
+        :param n_levels: int, number of MLMC levels used.
+        :return: None
+        """
         sample_storage = sampler.sample_storage
         # Load result format from sample storage
         result_format = sample_storage.load_result_format()
-        # Create quantity instance representing your real quantity of interest
+        # Create quantity instance representing the quantity of interest
         root_quantity = make_root_quantity(sample_storage, result_format)
 
         # You can access item of quantity according to result format
@@ -72,15 +102,15 @@ def process_results(self, sampler, n_levels):
         x_quantity_value = position[0]
         y_quantity_value = position[1]
 
-        # Create estimator for quantities
+        # Create estimators for each coordinate component
         x_estimator = self.create_estimator(x_quantity_value, sample_storage)
         y_estimator = self.create_estimator(y_quantity_value, sample_storage)
 
+        # Optionally compute moments for full root quantity (and extract means)
         root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage,
                                                                   quantile=self._quantile)
         root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain)
 
-        # There is another possible approach to calculating all moments at once and then choose quantity
         moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True)
         moments_mean = estimate_mean(moments_quantity)
         target_mean = moments_mean['target']
@@ -88,15 +118,17 @@ def process_results(self, sampler, n_levels):
         location_mean = time_mean['0']  # locations: ['0']
         value_mean = location_mean[0]  # result shape: (1,)
 
+        # Display approximated distributions for both coordinates
         self.approx_distribution(x_estimator, n_levels, tol=1e-8)
         self.approx_distribution(y_estimator, n_levels, tol=1e-8)
 
     def approx_distribution(self, estimator, n_levels, tol=1.95):
         """
-        Probability density function approximation
-        :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated
-        :param n_levels: int, number of MLMC levels
-        :param tol: Tolerance of the fitting problem, with account for variances in moments.
+        Approximate and display the probability density function for the estimator's quantity.
+
+        :param estimator: mlmc.estimator.Estimate instance providing construct_density().
+        :param n_levels: int, number of MLMC levels (used to optionally overlay raw samples).
+        :param tol: float, tolerance for density-fitting routine (affects regularization with moment variances).
         :return: None
         """
         distr_obj, result, _, _ = estimator.construct_density(tol=tol)
@@ -111,26 +143,27 @@ def approx_distribution(self, estimator, n_levels, tol=1.95):
 
     def create_sampler(self, level_parameters):
         """
-        Simulation dependent configuration
-        :return: mlmc.sampler instance
+        Create sampler, sampling pool and simulation factory for the 2D shooting example.
+
+        :param level_parameters: list of per-level parameter lists.
+        :return: mlmc.sampler.Sampler instance configured with Memory storage and OneProcessPool.
         """
-        # Create Pbs sampling pool
         sampling_pool = OneProcessPool()
 
         simulation_config = {
             "start_position": np.array([0, 0]),
             "start_velocity": np.array([10, 0]),
-            "area_borders":  np.array([-100, 200, -300, 400]),
+            "area_borders": np.array([-100, 200, -300, 400]),
             "max_time": 10,
             "complexity": 2,  # used for initial estimate of number of operations per sample
-            'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
+            "fields_params": dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
         }
 
         # Create simulation factory
         simulation_factory = ShootingSimulation2D(config=simulation_config)
-        # Create HDF sample storage
+        # In-memory storage (Memory). Replace with HDF storage if persistence is needed.
         sample_storage = Memory()
-        # Create sampler, it manages sample scheduling and so on
+        # Create sampler which orchestrates scheduling and storage
         sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
                           level_parameters=level_parameters)
 
@@ -138,59 +171,52 @@ def create_sampler(self, level_parameters):
 
     def generate_samples(self, sampler, n_samples=None, target_var=None):
         """
-        Generate MLMC samples
-        :param sampler: mlmc.sampler.Sampler instance
-        :param n_samples: None or list, number of samples at each level
-        :param target_var: target variance of MLMC estimates
+        Schedule and generate MLMC samples. If target_var is provided, iteratively determine
+        additional samples required to reach the variance target.
+
+        :param sampler: mlmc.sampler.Sampler instance controlling the run.
+        :param n_samples: Optional list of exact sample counts per level.
+        :param target_var: Optional float target variance for MLMC estimator.
         :return: None
         """
-        # The number of samples is set by user
+        # Set initial samples either from user or default logic
         if n_samples is not None:
             sampler.set_initial_n_samples(n_samples)
-        # The number of initial samples is determined automatically
         else:
             sampler.set_initial_n_samples()
-        # Samples are scheduled and the program is waiting for all of them to be completed.
+
+        # Schedule and wait for initial batch of samples
         sampler.schedule_samples()
         sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout)
         self.all_collect(sampler)
 
-        # MLMC estimates target variance is set
+        # If a global target variance is specified, estimate and iteratively add samples
         if target_var is not None:
-            # The mlmc.quantity.quantity.Quantity instance is created
-            # parameters 'storage' and 'q_specs' are obtained from sample_storage,
-            # originally 'q_specs' is set in the simulation class
+            # Build a root quantity required for moment estimation
             root_quantity = make_root_quantity(storage=sampler.sample_storage,
                                                q_specs=sampler.sample_storage.load_result_format())
 
-            # Moment functions object is created
-            # The MLMC algorithm determines number of samples according to the moments variance,
-            # Type of moment functions (Legendre by default) might affect the total number of MLMC samples
             moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments)
             estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage,
                                     moments_fn=moments_fn)
 
-            # Initial estimation of the number of samples at each level
+            # Estimate per-level variances and costs from finished samples
             variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
-            # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples
             n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
                                                                  n_levels=sampler.n_levels)
 
-            #####
-            # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples
-            #####
-            # Loop until number of estimated samples is greater than the number of scheduled samples
+            # Loop until sampler has scheduled enough samples
             while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef,
                                                      timeout=self._sample_timeout):
-                # New estimation according to already finished samples
                 variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
                 n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
                                                                      n_levels=sampler.n_levels)
 
     def all_collect(self, sampler):
         """
-        Collect samples
-        :param sampler: mlmc.Sampler object
+        Repeatedly poll the sampling pool and collect finished samples until none remain running.
+
+        :param sampler: mlmc.sampler.Sampler instance.
         :return: None
         """
         running = 1
@@ -200,17 +226,28 @@ def all_collect(self, sampler):
             print("N running: ", running)
 
     def set_moments(self, quantity, sample_storage, n_moments=25):
+        """
+        Build Legendre moments object for a given quantity using the domain estimated from samples.
+
+        :param quantity: quantity object (used to estimate domain).
+        :param sample_storage: storage used to compute the domain estimate.
+        :param n_moments: number of Legendre polynomials to use.
+        :return: Legendre instance configured for the estimated domain.
+        """
         true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
         return Legendre(n_moments, true_domain)
 
     @staticmethod
     def determine_level_parameters(n_levels, step_range):
         """
-        Determine level parameters,
-        In this case, a step of fine simulation at each level
-        :param n_levels: number of MLMC levels
-        :param step_range: simulation step range
-        :return: List of lists
+        Determine level parameters for MLMC.
+
+        For this example a single per-level 'step' is returned in a list, interpolating
+        (geometrically) between step_range[0] and step_range[1].
+
+        :param n_levels: int number of MLMC levels.
+        :param step_range: [coarse_step, fine_step] with coarse_step > fine_step.
+        :return: List[List[float]]: per-level parameters (each inner list contains the step value).
         """
         assert step_range[0] > step_range[1]
         level_parameters = []
diff --git a/examples/shooting/simulation_shooting_1D.py b/examples/shooting/simulation_shooting_1D.py
index b5646a09..81615794 100644
--- a/examples/shooting/simulation_shooting_1D.py
+++ b/examples/shooting/simulation_shooting_1D.py
@@ -10,8 +10,17 @@
 
 def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1, mode_no=1000):
     """
-    Create correlated random field
-    :return: mlmc.random.correlated_field.Field instance
+    Create a correlated random field wrapper used by the simulations.
+
+    :param model: str, covariance model name. Supported values:
+                  'gauss' (default), 'exp', 'TPLgauss', 'TPLexp', 'TPLStable'.
+    :param corr_length: float, correlation length (len_scale) for the covariance model.
+    :param dim: int, spatial dimension of the covariance model.
+    :param log: bool, whether the generated random field is treated as log-field.
+    :param sigma: float, standard deviation (sigma) for the random field.
+    :param mode_no: int, number of Fourier modes (used by GSTools SRF).
+    :return: cf.Field instance that wraps a GSToolsSpatialCorrelatedField configured
+             with the selected covariance model.
     """
     if model == 'exp':
         model = gstools.Exponential(dim=dim, len_scale=corr_length)
@@ -28,22 +37,42 @@ def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1,
 
 
 class ShootingSimulation1D(Simulation):
+    """
+    Simple 1D "shooting" simulation used as example for MLMC workflow.
+
+    The class implements:
+      - level_instance(...) to create LevelSimulation configurations,
+      - calculate(...) static method to produce paired (fine, coarse) samples,
+      - result_format() describing the output QuantitySpec.
+    """
 
     def __init__(self, config):
         """
-        :param config: Dict, simulation configuration
+        Initialize simulation factory.
+
+        :param config: Dict containing simulation configuration. Expected keys include:
+                       - 'start_position': np.ndarray shape (2,)
+                       - 'start_velocity': np.ndarray shape (2,)
+                       - 'area_borders': np.ndarray [xmin, xmax, ymin, ymax]
+                       - 'max_time': float
+                       - 'complexity': float used in n_ops_estimate
+                       - 'fields_params': dict passed to create_corr_field()
         """
         super().__init__()
         self._config = config
-        # This attribute is obligatory, if True workspace is created
+        # If True, a workspace for each sample will be created (not used here).
         self.need_workspace: bool = False
 
     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
         """
-        Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.level_simulation)
-        :param fine_level_params: fine simulation step at particular level
-        :param coarse_level_params: coarse simulation step at particular level
-        :return: mlmc.LevelSimulation object
+        Create a LevelSimulation object for given fine and coarse level parameters.
+
+        This constructs the simulation configuration for one MLMC level (fine/coarse pair)
+        and returns a LevelSimulation that knows which calculate() function to call.
+
+        :param fine_level_params: List[float], typically a single-element list containing the fine step size.
+        :param coarse_level_params: List[float], typically a single-element list containing the coarse step size.
+        :return: LevelSimulation configured with the derived config dictionary and task size.
         """
         config = copy.deepcopy(self._config)
         config["fine"] = {}
@@ -51,6 +80,8 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
         config["fine"]["step"] = fine_level_params[0]
         config["coarse"]["step"] = coarse_level_params[0]
         config["res_format"] = self.result_format()
+
+        # compute number of elements per level from complexity and step
         config["fine"]["n_elements"] = int(config["complexity"] / config["fine"]["step"])
         if config["coarse"]["step"] > 0:
             config["coarse"]["n_elements"] = int(config["complexity"] / config["coarse"]["step"])
@@ -63,20 +94,33 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
     @staticmethod
     def calculate(config, seed):
         """
-        Calculate fine and coarse sample and also extract their results
-        :param config: dictionary containing simulation configuration
-        :param seed: random number generator seed
-        :return: np.ndarray, np.ndarray
+        Build random field and produce a paired (fine, coarse) simulation result.
+
+        This is the static worker function used by LevelSimulation to generate one sample pair.
+        It creates the correlated random field, samples it on the fine+coarse point set and
+        runs the deterministic integrator for fine and coarse input arrays.
+
+        :param config: dict, level-specific configuration produced by level_instance().
+                       It must include 'fields_params', and 'fine'/'coarse' with 'n_elements'
+                       and 'step' keys, plus other simulation settings.
+        :param seed: int, RNG seed passed by sampler (currently unused inside; RNG is handled by field implementation).
+        :return: tuple (fine_result, coarse_result), each is a scalar float (y coordinate at final time or NaN).
         """
-        # Create random field structure
+        # create random field generator (GSTools wrapper)
         field = create_corr_field(**config['fields_params'])
+
+        # create sample points (concatenated fine + coarse) and remember fine size
         points, n_fine_points = ShootingSimulation1D.create_points(config)
+
+        # assign points to field and generate realization
         field.set_points(points)
 
-        fine_input_sample, coarse_input_sample = ShootingSimulation1D.generate_random_sample(field,
-                                                                                           coarse_step=config["coarse"]["step"],
-                                                                                           n_fine_elements=n_fine_points)
+        # sample fine and coarse inputs from the realizations
+        fine_input_sample, coarse_input_sample = ShootingSimulation1D.generate_random_sample(
+            field, coarse_step=config["coarse"]["step"], n_fine_elements=n_fine_points
+        )
 
+        # run simulator for fine and coarse inputs
         fine_res = ShootingSimulation1D._run_sample(config, fine_input_sample)
         coarse_res = ShootingSimulation1D._run_sample(config, coarse_input_sample)
 
@@ -85,36 +129,46 @@ def calculate(config, seed):
     @staticmethod
     def _run_sample(config, rnd_input_samples):
         """
-        Simulation of 1D shooting
-        :param config: dictionary containing simulation configuration
-        :param rnd_input_samples: np.ndarray, shape: (number of elements )
+        Run the explicit Euler "shooting" simulation for a given input array.
+
+        The dynamics are:
+            X_{k+1} = X_k + dt * V_k
+            V_{k+1} = V_k + dt * F_k
+        where F_k is drawn from the correlated field sample (rnd_input_samples).
+
+        If the simulated projectile exits `area_borders`, the function returns np.nan.
+
+        :param config: dict, simulation configuration (same structure as in calculate()).
+        :param rnd_input_samples: np.ndarray-like, shape (n_elements,) with random forcing values.
+        :return: float, y-coordinate of the projectile at the end of simulation
+                 (or np.nan if it left the domain).
         """
         n_elements = len(rnd_input_samples)
-        x, y, time, n = 0, 0, 0, 0
-        X = config["start_position"]
-        V = config['start_velocity']
+        X = config["start_position"].copy()  # 2-vector [x, y]
+        V = config['start_velocity'].copy()  # 2-vector
+        y = 0.0
 
-        # Time step
+        # compute time step (if n_elements == 0 dt is unused and loop is skipped)
         if n_elements != 0:
             dt = config['max_time'] / n_elements
-        # Loop through random array F
+
+        # step through the random forcing and integrate
         for i in range(n_elements):
-            # New coordinates
+            # update position and velocity (explicit Euler)
             X = X + dt * V
-            # New vector of speed
             V = V + dt * rnd_input_samples[i]
 
             x = X[0]
             y = X[1]
 
-            if x > config['area_borders'][1] or x < config['area_borders'][0] or\
-                    y > config['area_borders'][3] or y < config['area_borders'][2]:
+            # check domain boundaries; if outside, record NaN and stop
+            if x > config['area_borders'][1] or x < config['area_borders'][0] or \
+               y > config['area_borders'][3] or y < config['area_borders'][2]:
                 y = np.nan
                 break
 
+            # current time (not used beyond stopping condition)
             time = dt * (i + 1)
-
-            # End simulation if time is bigger then maximum time
             if time >= config['max_time']:
                 break
 
@@ -122,23 +176,43 @@ def _run_sample(config, rnd_input_samples):
 
     @staticmethod
     def create_points(config):
+        """
+        Create concatenated evaluation points for the random field for fine and coarse parts.
+
+        Points are returned as an (n_fine + n_coarse, 1) array where each row is a coordinate
+        at which the GSTools SRF will be evaluated. The first n_fine rows correspond to the fine mesh,
+        the remaining rows (if any) to the coarse mesh.
+
+        :param config: dict, level configuration that contains 'fine' and 'coarse' sub-dicts
+                       with 'n_elements' integer entries and other simulation parameters.
+        :return: tuple (points, n_fine_elements)
+                 - points: np.ndarray shape (n_fine + n_coarse, 1)
+                 - n_fine_elements: int number of fine elements (split index)
+        """
         n_fine_elements = config["fine"]["n_elements"]
         n_coarse_elements = config["coarse"]["n_elements"]
 
         assert n_fine_elements > n_coarse_elements
+        # build a 1D coordinate array for GSTools evaluation; here we use distance ~ velocity * time
         points = np.empty((n_fine_elements + n_coarse_elements, 1))
-        points[:, 0] = np.concatenate((np.linspace(0, config["start_velocity"][0]*config["max_time"],
-                                                   n_fine_elements),
-                                       np.linspace(0, config["start_velocity"][0]*config["max_time"],
-                                                   n_coarse_elements)))
+        points[:, 0] = np.concatenate((
+            np.linspace(0, config["start_velocity"][0] * config["max_time"], n_fine_elements),
+            np.linspace(0, config["start_velocity"][0] * config["max_time"], n_coarse_elements)
+        ))
 
         return points, n_fine_elements
 
     @staticmethod
     def generate_random_sample(field, coarse_step, n_fine_elements):
         """
-        Generate random field, both fine and coarse part.
-        :return: List, List
+        Sample a realization from the provided correlated field and split it into fine and coarse parts.
+
+        :param field: cf.Field instance created by create_corr_field(...) and already `set_points()` called.
+        :param coarse_step: float, coarse level step (if 0 then no coarse part is returned).
+        :param n_fine_elements: int, number of entries that belong to the fine-level part (split index).
+        :return: tuple (fine_input_sample, coarse_input_sample)
+                 - fine_input_sample: np.ndarray shape (n_fine_elements,)
+                 - coarse_input_sample: np.ndarray shape (n_coarse_elements,) or [] if coarse_step == 0
         """
         field_sample = field.sample()
         fine_input_sample = field_sample[:n_fine_elements]
@@ -148,13 +222,22 @@ def generate_random_sample(field, coarse_step, n_fine_elements):
         return fine_input_sample, coarse_input_sample
 
     def n_ops_estimate(self, step):
+        """
+        Estimate computational cost (# of operations) for a sample at a given step size.
+
+        :param step: float, simulation step size for the fine level
+        :return: float, heuristic estimate of cost used by MLMC to balance work across levels
+        """
         return (1 / step) ** self._config['complexity'] * np.log(max(1 / step, 2.0))
 
     def result_format(self) -> List[QuantitySpec]:
         """
-        Result format
-        :return:
+        Describe the output format of the simulation for MLMC.
+
+        :return: List[QuantitySpec] describing the result vector(s). Here we return a single
+                 scalar quantity 'target' representing y-position at final time.
         """
         spec1 = QuantitySpec(name="target", unit="m", shape=(1,), times=[10], locations=['0'])
         return [spec1]
 
+
diff --git a/examples/shooting/simulation_shooting_2D.py b/examples/shooting/simulation_shooting_2D.py
index 79571eb1..f906f503 100644
--- a/examples/shooting/simulation_shooting_2D.py
+++ b/examples/shooting/simulation_shooting_2D.py
@@ -9,45 +9,78 @@
 
 def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1, mode_no=1000):
     """
-    Create random fields
-    :return:
+    Create a correlated random field wrapper (GSTools model -> mlmc.random.correlated_field.Field).
+
+    :param model: str, covariance model name. Supported strings: 'gauss' (default), 'exp',
+                  'TPLgauss', 'TPLexp', 'TPLStable'.
+    :param corr_length: float, correlation length (len_scale) supplied to the GSTools model.
+    :param dim: int, spatial dimension of the covariance model.
+    :param log: bool, if True the field will be considered in log-space when sampling.
+    :param sigma: float, multiplicative sigma parameter for the SRF generator.
+    :param mode_no: int, number of Fourier modes used by GSTools SRF.
+    :return: cf.Field — an mlmc.random.correlated_field.Field wrapping a GSTools SRF.
     """
     if model == 'exp':
         model = gstools.Exponential(dim=dim, len_scale=corr_length)
     elif model == 'TPLgauss':
-        model = gstools.TPLGaussian(dim=dim,  len_scale=corr_length)
+        model = gstools.TPLGaussian(dim=dim, len_scale=corr_length)
     elif model == 'TPLexp':
-        model = gstools.TPLExponential(dim=dim,  len_scale=corr_length)
+        model = gstools.TPLExponential(dim=dim, len_scale=corr_length)
     elif model == 'TPLStable':
-        model = gstools.TPLStable(dim=dim,  len_scale=corr_length)
+        model = gstools.TPLStable(dim=dim, len_scale=corr_length)
     else:
-        model = gstools.Gaussian(dim=dim,  len_scale=corr_length)
+        model = gstools.Gaussian(dim=dim, len_scale=corr_length)
+
     return cf.Field('conductivity', cf.GSToolsSpatialCorrelatedField(model, log=log, sigma=sigma, mode_no=mode_no))
 
 
 class ShootingSimulation2D(Simulation):
+    """
+    Example 2D shooting simulation used with MLMC sampler.
+
+    Implements:
+      - level_instance(...) to create per-level LevelSimulation config,
+      - calculate(...) static method used to produce a (fine, coarse) sample pair,
+      - result_format() describing output QuantitySpec.
+    """
 
     def __init__(self, config):
         """
-        :param config: Dict, simulation configuration
+        Initialize the simulation factory.
+
+        :param config: dict containing base simulation configuration. Expected keys:
+                       - 'start_position': np.ndarray shape (2,)
+                       - 'start_velocity': np.ndarray shape (2,)
+                       - 'area_borders': np.ndarray [xmin, xmax, ymin, ymax]
+                       - 'max_time': float
+                       - 'complexity': float used by n_ops_estimate
+                       - 'fields_params': dict passed to create_corr_field()
         """
         super().__init__()
         self.config = config
-        # This attribute is obligatory
+        # If True, a sample workspace will be created (not required here)
         self.need_workspace: bool = False
 
     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
         """
+        Construct LevelSimulation for given fine/coarse parameters.
+
+        This method mutates a copy of self.config into a level-specific config and
+        returns a LevelSimulation that will call ShootingSimulation2D.calculate to
+        generate samples for this level.
 
-        :param fine_level_params:
-        :param coarse_level_params:
-        :return:
+        :param fine_level_params: list-like, typically [fine_step]
+        :param coarse_level_params: list-like, typically [coarse_step]
+        :return: LevelSimulation configured for this level
         """
+        # Update level-specific fields in self.config
         self.config["fine"] = {}
         self.config["coarse"] = {}
         self.config["fine"]["step"] = fine_level_params[0]
         self.config["coarse"]["step"] = coarse_level_params[0]
         self.config["res_format"] = self.result_format()
+
+        # compute number of elements per level from complexity and step sizes
         self.config["fine"]["n_elements"] = int(self.config["complexity"] / self.config["fine"]["step"])
         if self.config["coarse"]["step"] > 0:
             self.config["coarse"]["n_elements"] = int(self.config["complexity"] / self.config["coarse"]["step"])
@@ -60,84 +93,124 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
     @staticmethod
     def calculate(config, seed):
         """
-        Calculate fine and coarse sample and also extract their results
-        :param config: dictionary containing simulation configuration
-        :param seed: random number generator seed
-        :return: np.ndarray, np.ndarray
+        Build correlated fields and produce paired fine/coarse simulation results.
+
+        This static function is intended to be executed by the LevelSimulation worker.
+        It:
+          - creates two independent correlated fields (x and y components),
+          - sets evaluation points,
+          - samples the fields and splits the realization into fine and coarse parts,
+          - simulates the projectile dynamics for fine/coarse inputs.
+
+        :param config: dict, level-specific configuration produced by level_instance().
+        :param seed: int, RNG seed (currently not used directly; GSTools RNG is internal).
+        :return: tuple (fine_result, coarse_result)
+                 - fine_result: np.ndarray or scalar describing the fine-level output
+                 - coarse_result: np.ndarray or scalar describing the coarse-level output
         """
-        # Create random field structure
+        # Create independent correlated random fields for X and Y forcing
         field_x = create_corr_field(**config['fields_params'])
         field_y = create_corr_field(**config['fields_params'])
 
+        # create concatenated points and get number of fine points
         points, n_fine_points = ShootingSimulation2D.create_points(config)
         field_x.set_points(points)
         field_y.set_points(points)
 
-        fine_input_sample, coarse_input_sample = ShootingSimulation2D.generate_random_sample(field_x, field_y,
-                                                                                             coarse_step=config["coarse"]["step"],
-                                                                                             n_fine_elements=n_fine_points)
+        # sample and split into fine and coarse inputs
+        fine_input_sample, coarse_input_sample = ShootingSimulation2D.generate_random_sample(
+            field_x, field_y, coarse_step=config["coarse"]["step"], n_fine_elements=n_fine_points
+        )
 
+        # run dynamics for fine and coarse inputs
         fine_res = ShootingSimulation2D._run_sample(config, fine_input_sample)
-        coarse_res = ShootingSimulation2D._run_sample(config, fine_input_sample)
+        coarse_res = ShootingSimulation2D._run_sample(config, coarse_input_sample)
 
         return fine_res, coarse_res
 
     @staticmethod
     def _run_sample(config, rnd_input_samples):
         """
-        Simulation of 2D shooting
-        :param config: dictionary containing simulation configuration
-        :param rnd_input_samples: np.ndarray, shape: (number of elements )
+        Run the explicit Euler integrator for the 2D shooting model.
+
+        Dynamics:
+            X_{k+1} = X_k + dt * V_k
+            V_{k+1} = V_k + dt * F_k
+
+        The method returns the final position vector X (or [nan, nan] if the projectile exits area_borders).
+
+        :param config: dict, level-specific simulation configuration.
+        :param rnd_input_samples: array-like with shape (n_elements, 2) or (n_elements,) depending on caller.
+        :return: np.ndarray shape (2,) representing final [x, y] or [nan, nan] if out-of-bounds.
         """
         n_elements = len(rnd_input_samples)
-        X = config["start_position"]
-        V = config['start_velocity']
+        # Use copies so we don't mutate config objects
+        X = np.array(config["start_position"], dtype=float).copy()
+        V = np.array(config['start_velocity'], dtype=float).copy()
 
-        # Time step
+        # compute time step if there are elements
         if n_elements != 0:
             dt = config['max_time'] / n_elements
 
-        # Loop through random array F
+        # integrate step-by-step
         for i in range(n_elements):
-            # New coordinates
+            # update position
             X = X + dt * V
 
-            # New vector of speed
+            # update velocity: rnd_input_samples[i] must be 2-vector (Fx, Fy)
             V = V + dt * rnd_input_samples[i]
 
             x = X[0]
             y = X[1]
 
-            if x > config['area_borders'][1] or x < config['area_borders'][0] or\
-                    y > config['area_borders'][3] or y < config['area_borders'][2]:
-                X = [np.nan, np.nan]
+            # if projectile leaves the bounding box, return NaNs
+            if x > config['area_borders'][1] or x < config['area_borders'][0] or \
+               y > config['area_borders'][3] or y < config['area_borders'][2]:
+                X = np.array([np.nan, np.nan])
                 break
 
             time = dt * (i + 1)
-
-            # End simulation if time is bigger then maximum time
             if time >= config['max_time']:
                 break
+
         return X
 
     @staticmethod
     def create_points(config):
+        """
+        Build concatenated evaluation points for the correlated fields (fine first, then coarse).
+
+        The points are 1D coordinates used for the GSTools SRF evaluation; the first
+        n_fine rows correspond to the fine mesh, the remainder to the coarse mesh.
+
+        :param config: dict with 'fine' and 'coarse' sub-dicts containing 'n_elements'.
+        :return: tuple (points, n_fine_elements)
+                 - points: np.ndarray shape (n_fine + n_coarse, 1)
+                 - n_fine_elements: int number of fine-level points
+        """
         n_fine_elements = config["fine"]["n_elements"]
         n_coarse_elements = config["coarse"]["n_elements"]
 
         assert n_fine_elements > n_coarse_elements
         points = np.empty((n_fine_elements + n_coarse_elements, 1))
-        points[:, 0] = np.concatenate((np.linspace(0, config["start_velocity"][0]*config["max_time"],
-                                                   n_fine_elements),
-                                       np.linspace(0, config["start_velocity"][0]*config["max_time"],
-                                                   n_coarse_elements)))
+        points[:, 0] = np.concatenate((
+            np.linspace(0, config["start_velocity"][0] * config["max_time"], n_fine_elements),
+            np.linspace(0, config["start_velocity"][0] * config["max_time"], n_coarse_elements)
+        ))
         return points, n_fine_elements
 
     @staticmethod
     def generate_random_sample(field_x, field_y, coarse_step, n_fine_elements):
         """
-        Generate random field, both fine and coarse part.
-        :return: List, List
+        Sample two correlated fields (x and y components) and split the realization into fine and coarse parts.
+
+        :param field_x: cf.Field for x-component (already had set_points called).
+        :param field_y: cf.Field for y-component (already had set_points called).
+        :param coarse_step: float, coarse-level step size (if 0 there is no coarse part).
+        :param n_fine_elements: int, how many samples belong to the fine-level portion.
+        :return: tuple (fine_input_sample, coarse_input_sample)
+                 - fine_input_sample: np.ndarray shape (n_fine_elements, 2) with x and y forcing
+                 - coarse_input_sample: np.ndarray shape (n_coarse_elements, 2) or empty array if coarse_step==0
         """
         field_sample_x = field_x.sample()
         field_sample_y = field_y.sample()
@@ -153,13 +226,19 @@ def generate_random_sample(field_x, field_y, coarse_step, n_fine_elements):
         return fine_input_sample, coarse_input_sample
 
     def n_ops_estimate(self, step):
+        """
+        Heuristic estimate of computational expense for a single fine-level sample.
+
+        :param step: float, fine-level step size.
+        :return: float, cost estimate used by MLMC scheduler.
+        """
         return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0))
 
     def result_format(self) -> List[QuantitySpec]:
         """
-        Result format
-        :return:
+        Describe the result format (QuantitySpec) produced by calculate().
+
+        :return: list of QuantitySpec objects describing outputs. Here a single 2D target vector.
         """
         spec1 = QuantitySpec(name="target", unit="m", shape=(2,), times=[10], locations=['0'])
         return [spec1]
-
diff --git a/examples/synthetic_quantity.py b/examples/synthetic_quantity.py
index ad792dd0..0fcec175 100644
--- a/examples/synthetic_quantity.py
+++ b/examples/synthetic_quantity.py
@@ -19,6 +19,27 @@
 
 
 def fill_sample_storage(sample_storage, result_format):
+    """
+    Populate a sample storage with synthetic scheduled and finished samples for tests.
+
+    This saves:
+      - global meta-data (result_format, level_parameters)
+      - scheduled samples per level
+      - finished (successful) samples per level
+      - number-of-operations records per level
+
+    Parameters
+    ----------
+    sample_storage : mlmc.sample_storage.* instance
+        Storage object to be filled (Memory or HDF).
+    result_format : list[QuantitySpec]
+        Result format used to determine sizes of flattened sample vectors.
+
+    Returns
+    -------
+    tuple
+        (result_format, sizes) where `sizes` is the last computed per-quantity sizes list.
+    """
     np.random.seed(123)
     n_levels = 3
 
@@ -55,7 +76,37 @@ def fill_sample_storage(sample_storage, result_format):
 
     return result_format, sizes
 
+
 def create_sampler(clean=True, memory=True, n_moments=5):
+    """
+    Create and prepare a Sampler with synthetic simulation factory and initial scheduled samples.
+
+    The function:
+      - creates a temporary working directory (when clean=True),
+      - constructs a SynthSimulationForTests factory,
+      - chooses Memory or HDF sample storage,
+      - creates a OneProcessPool sampling_pool,
+      - constructs a Sampler instance,
+      - prepares a Monomial moments function,
+      - schedules an initial set of samples and triggers immediate sample execution.
+
+    Parameters
+    ----------
+    clean : bool, optional
+        If True, removes and recreates the test working directory before use (default True).
+    memory : bool, optional
+        If True use Memory() storage; otherwise use SampleStorageHDF (default True).
+    n_moments : int, optional
+        Number of monomial moments (unused in this helper besides constructing moments_fn) (default 5).
+
+    Returns
+    -------
+    tuple
+        (sampler, simulation_factory, moments_fn)
+        - sampler: mlmc.sampler.Sampler instance with initial samples scheduled/executed
+        - simulation_factory: SynthSimulationForTests instance used by the sampler
+        - moments_fn: Monomial moments object constructed for the true_domain
+    """
     # Set work dir
     np.random.seed(1234)
     n_levels = 3
@@ -76,10 +127,6 @@ def create_sampler(clean=True, memory=True, n_moments=5):
     simulation_config = dict(distr=distr, complexity=2, nan_fraction=failed_fraction, sim_method='_sample_fn')
     simulation_factory = SynthSimulationForTests(simulation_config)
 
-    # shutil.copyfile('synth_sim_config.yaml', os.path.join(work_dir, 'synth_sim_config.yaml'))
-    # simulation_config = {"config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')}
-    # simulation_workspace = SynthSimulationWorkspace(simulation_config)
-
     # Create sample storages
     if memory:
         sample_storage = Memory()
@@ -87,7 +134,6 @@ def create_sampler(clean=True, memory=True, n_moments=5):
         sample_storage = SampleStorageHDF(file_path=os.path.join(work_dir, "mlmc_test.hdf5"))
     # Create sampling pools
     sampling_pool = OneProcessPool()
-    # sampling_pool_dir = OneProcessPool(work_dir=work_dir)
 
     if clean:
         if sampling_pool._output_dir is not None:
@@ -111,5 +157,3 @@ def create_sampler(clean=True, memory=True, n_moments=5):
     sampler.ask_sampling_pool_for_samples()
 
     return sampler, simulation_factory, moments_fn
-
-
diff --git a/mlmc/archive/flow123d_mock.py b/mlmc/archive/flow123d_mock.py
deleted file mode 100644
index aca6f1a0..00000000
--- a/mlmc/archive/flow123d_mock.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Mockup for the Flow123d simulator.
-"""
-
-import argparse
-import ruamel.yaml as yaml
-
-parser = argparse.ArgumentParser(description='Flow123d mockup.')
-parser.add_argument('yaml_file', metavar='<main YAML>', type=string, nargs=1,
-                    help='Main input YAML file.')
-
-args = parser.parse_args()
-
-with open(args.yaml_file, 'r') as f:
-    input = yaml.load(f)
-    mesh_file = input.mesh.mesh_file
-
-def conductivity_field_average(gmsh):
-    pass
diff --git a/mlmc/estimator.py b/mlmc/estimator.py
index b5957f80..cc98be3b 100644
--- a/mlmc/estimator.py
+++ b/mlmc/estimator.py
@@ -11,9 +11,27 @@
 
 class Estimate:
     """
-    Provides wrapper methods for moments estimation, pdf approximation, ...
+    A wrapper class for moment estimation, PDF approximation, and related MLMC post-processing.
+
+    Provides utility methods to:
+      - Estimate statistical moments, variances, and covariances
+      - Perform regression-based variance estimation
+      - Conduct bootstrap resampling
+      - Construct approximate probability density functions
+      - Visualize and analyze MLMC variance and sample distributions
     """
+
     def __init__(self, quantity, sample_storage, moments_fn=None):
+        """
+        Initialize the Estimate instance.
+
+        :param quantity: mlmc.quantity.Quantity
+            Quantity object representing the stochastic quantity of interest.
+        :param sample_storage: mlmc.sample_storage.SampleStorage
+            Storage containing MLMC samples for each level.
+        :param moments_fn: callable, optional
+            Function defining the statistical moments to be estimated.
+        """
         self._quantity = quantity
         self._sample_storage = sample_storage
         self._moments_fn = moments_fn
@@ -21,31 +39,45 @@ def __init__(self, quantity, sample_storage, moments_fn=None):
 
     @property
     def quantity(self):
+        """Return the current Quantity object."""
         return self._quantity
 
     @quantity.setter
     def quantity(self, quantity):
+        """Set a new Quantity object."""
         self._quantity = quantity
 
     @property
     def n_moments(self):
+        """Return the number of moment functions defined."""
         return self._moments_fn.size
 
     @property
     def moments_mean_obj(self):
+        """Return the most recently computed mean of the moments."""
         return self._moments_mean
 
     @moments_mean_obj.setter
     def moments_mean_obj(self, moments_mean):
+        """
+        Set the estimated mean of the moments.
+
+        :param moments_mean: mlmc.quantity.quantity.QuantityMean
+            Object containing mean and variance of the estimated moments.
+        :raises TypeError: If the object is not an instance of QuantityMean.
+        """
         if not isinstance(moments_mean, mlmc.quantity.quantity.QuantityMean):
             raise TypeError
         self._moments_mean = moments_mean
 
     def estimate_moments(self, moments_fn=None):
         """
-        Use collected samples to estimate moments and variance of this estimate.
-        :param moments_fn: moments function
-        :return: estimate_of_moment_means, estimate_of_variance_of_estimate ; arrays of length n_moments
+        Estimate the mean and variance of the defined moment functions.
+
+        :param moments_fn: callable, optional
+            Function to compute statistical moments. If None, uses the stored function.
+        :return: tuple (moment_means, moment_variances)
+            Arrays of length n_moments representing estimated means and variances.
         """
         if moments_fn is None:
             moments_fn = self._moments_fn
@@ -56,9 +88,11 @@ def estimate_moments(self, moments_fn=None):
 
     def estimate_covariance(self, moments_fn=None):
         """
-        Use collected samples to estimate covariance matrix and variance of this estimate.
-        :param moments_fn: moments function
-        :return: estimate_of_moment_means, estimate_of_variance_of_estimate ; arrays of length n_moments
+        Estimate the covariance matrix and its variance from MLMC samples.
+
+        :param moments_fn: callable, optional
+            Function defining moment evaluations. If None, uses the stored one.
+        :return: tuple (covariance_matrix, covariance_variance)
         """
         if moments_fn is None:
             moments_fn = self._moments_fn
@@ -68,40 +102,45 @@ def estimate_covariance(self, moments_fn=None):
 
     def estimate_diff_vars_regression(self, n_created_samples, moments_fn=None, raw_vars=None):
         """
-        Estimate variances using linear regression model.
-        Assumes increasing variance with moments_fn, use only two moments_fn with highest average variance.
-        :param n_created_samples: number of created samples on each level
-        :param moments_fn: Moment evaluation function
-        :return: array of variances, n_ops_estimate
+        Estimate variances using a linear regression model.
+
+        Assumes that variance increases with moment order. Typically, only two moments
+        with the highest average variance are used.
+
+        :param n_created_samples: array-like
+            Number of created samples on each MLMC level.
+        :param moments_fn: callable, optional
+            Moment evaluation function.
+        :param raw_vars: ndarray, optional
+            Precomputed raw variance estimates.
+        :return: tuple (variance_array, n_ops_estimate)
         """
         self._n_created_samples = n_created_samples
-        # vars shape L x R
         if raw_vars is None:
             if moments_fn is None:
                 moments_fn = self._moments_fn
             raw_vars, n_samples = self.estimate_diff_vars(moments_fn)
 
         sim_steps = np.squeeze(self._sample_storage.get_level_parameters())
-        # print("sim steps ", sim_steps)
         vars = self._all_moments_variance_regression(raw_vars, sim_steps)
 
-        # We need to get n_ops_estimate from storage
         return vars, self._sample_storage.get_n_ops()
 
     def estimate_diff_vars(self, moments_fn=None):
         """
-        Estimate moments_fn variance from samples
-        :param moments_fn: Moment evaluation functions
-        :return: (diff_variance, n_samples);
-            diff_variance - shape LxR, variances of diffs of moments_fn
-            n_samples -  shape L, num samples for individual levels.
+        Estimate the variance of moment differences between consecutive MLMC levels.
+
+        :param moments_fn: callable, optional
+            Moment evaluation functions.
+        :return: tuple (diff_variance, n_samples)
+            diff_variance - shape (L, R): variances of differences of moments.
+            n_samples - shape (L,): number of samples per level.
         """
         moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn))
-        # print("moments_mean.l_vars ", moments_mean.l_vars)
-        # print("moments_mean.n_samples ", moments_mean.n_samples)
         return moments_mean.l_vars, moments_mean.n_samples
 
     def _all_moments_variance_regression(self, raw_vars, sim_steps):
+        """Apply variance regression across all moment functions."""
         reg_vars = raw_vars.copy()
         n_moments = raw_vars.shape[1]
         for m in range(1, n_moments):
@@ -111,53 +150,53 @@ def _all_moments_variance_regression(self, raw_vars, sim_steps):
 
     def _moment_variance_regression(self, raw_vars, sim_steps):
         """
-        Estimate level variance using separate model for every moment.
+        Perform regression-based smoothing of level variance for a single moment.
+
+        Model:
+            log(var_l) = A + B * log(h_l) + C * log^2(h_l)
 
-        log(var_l) = A + B * log(h_l) + C * log^2(hl),
-                                            for l = 0, .. L-1
-        :param raw_vars: moments_fn variances raws, shape (L,)
-        :param sim_steps: simulation steps, shape (L,)
-        :return: np.array  (L, )
+        :param raw_vars: ndarray, shape (L,)
+            Raw variance estimates of a single moment.
+        :param sim_steps: ndarray, shape (L,)
+            Simulation step sizes or level parameters.
+        :return: ndarray, shape (L,)
+            Smoothed variance estimates.
         """
         L, = raw_vars.shape
         L1 = L - 1
         if L < 3 or np.allclose(raw_vars, 0):
             return raw_vars
 
-        # estimate of variances of variances, compute scaling
         W = 1.0 / np.sqrt(self._variance_of_variance())
-        W = W[1:]   # ignore level 0
+        W = W[1:]
         W = np.ones((L - 1,))
 
-        # Use linear regresion to improve estimate of variances V1, ...
-        # model log var_{r,l} = a_r  + b * log step_l
-        # X_(r,l), j = dirac_{r,j}
-
-        K = 3  # number of parameters
+        K = 3
         X = np.zeros((L1, K))
         log_step = np.log(sim_steps[1:])
         X[:, 0] = np.ones(L1)
         X[:, 1] = np.full(L1, log_step)
         X[:, 2] = np.full(L1, log_step ** 2)
 
-        WX = X * W[:, None]    # scale
-        log_vars = np.log(raw_vars[1:])     # omit first variance
-        log_vars = W * log_vars       # scale RHS
+        WX = X * W[:, None]
+        log_vars = np.log(raw_vars[1:])
+        log_vars = W * log_vars
         params, res, rank, sing_vals = np.linalg.lstsq(WX, log_vars)
 
         new_vars = raw_vars.copy()
         new_vars[1:] = np.exp(np.dot(X, params))
-
         return new_vars
 
     def _variance_of_variance(self, n_samples=None):
         """
-        Approximate variance of log(X) where
-        X is from ch-squared with df=n_samples - 1.
-        Return array of variances for actual n_samples array.
+        Approximate the variance of log(X), where X follows a chi-squared distribution.
 
-        :param n_samples: Optional array with n_samples.
-        :return: array of variances of variance estimate.
+        Used to approximate the uncertainty of variance estimates.
+
+        :param n_samples: array-like, optional
+            Number of samples per level.
+        :return: ndarray
+            Variance of variance estimates per level.
         """
         if n_samples is None:
             n_samples = self._n_created_samples
@@ -186,19 +225,27 @@ def compute_moment(moment):
         return np.array(vars)
 
     def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
-
+        """
+        Perform bootstrap resampling to estimate uncertainty in MLMC estimators.
+
+        :param n_subsamples: int, default=100
+            Number of bootstrap subsamples.
+        :param sample_vector: ndarray, optional
+            Sampling vector for selecting subsamples.
+        :param moments_fn: callable, optional
+            Moment evaluation function.
+        """
         if moments_fn is not None:
             self._moments_fn = moments_fn
         else:
             moments_fn = self._moments_fn
 
-        sample_vector = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
-                                             n_levels=self._sample_storage.get_n_levels(),
-                                             sample_vector=sample_vector)
-        bs_mean = []
-        bs_var = []
-        bs_l_means = []
-        bs_l_vars = []
+        sample_vector = determine_sample_vec(
+            n_collected_samples=self._sample_storage.get_n_collected(),
+            n_levels=self._sample_storage.get_n_levels(),
+            sample_vector=sample_vector
+        )
+        bs_mean, bs_var, bs_l_means, bs_l_vars = [], [], [], []
         for i in range(n_subsamples):
             quantity_subsample = self.quantity.subsample(sample_vec=sample_vector)
             moments_quantity = qe.moments(quantity_subsample, moments_fn=moments_fn, mom_at_bottom=False)
@@ -222,52 +269,105 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
         self.var_bs_l_means = np.var(bs_l_means, axis=0, ddof=1)
         self.var_bs_l_vars = np.var(bs_l_vars, axis=0, ddof=1)
 
-        self._bs_level_mean_variance = self.var_bs_l_means * np.array(self._sample_storage.get_n_collected())[:, None]
+        self._bs_level_mean_variance = (
+            self.var_bs_l_means * np.array(self._sample_storage.get_n_collected())[:, None]
+        )
 
     def bs_target_var_n_estimated(self, target_var, sample_vec=None, n_subsamples=100):
-        sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
-                                          n_levels=self._sample_storage.get_n_levels(),
-                                          sample_vector=sample_vec)
+        """
+        Estimate the number of samples required to achieve a target variance.
+
+        :param target_var: float
+            Desired target variance for MLMC estimation.
+        :param sample_vec: ndarray, optional
+            Sampling vector specifying subsamples per level.
+        :param n_subsamples: int, default=100
+            Number of bootstrap resamplings to perform.
+        :return: ndarray
+            Estimated number of samples required at each level.
+        """
+        sample_vec = determine_sample_vec(
+            n_collected_samples=self._sample_storage.get_n_collected(),
+            n_levels=self._sample_storage.get_n_levels(),
+            sample_vector=sample_vec
+        )
 
         self.est_bootstrap(n_subsamples=n_subsamples, sample_vector=sample_vec)
 
-        variances, n_ops = self.estimate_diff_vars_regression(sample_vec, raw_vars=self.mean_bs_l_vars)
+        variances, n_ops = self.estimate_diff_vars_regression(
+            sample_vec, raw_vars=self.mean_bs_l_vars
+        )
 
-        n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
-                                                             n_levels=self._sample_storage.get_n_levels())
+        n_estimated = estimate_n_samples_for_target_variance(
+            target_var, variances, n_ops, n_levels=self._sample_storage.get_n_levels()
+        )
 
         return n_estimated
 
     def plot_variances(self, sample_vec=None):
+        """
+        Plot variance breakdown from bootstrap and regression data.
+
+        :param sample_vec: ndarray, optional
+            Sampling vector specifying subsamples per level.
+        """
         var_plot = plots.VarianceBreakdown(10)
 
-        sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
-                                          n_levels=self._sample_storage.get_n_levels(),
-                                          sample_vector=sample_vec)
+        sample_vec = determine_sample_vec(
+            n_collected_samples=self._sample_storage.get_n_collected(),
+            n_levels=self._sample_storage.get_n_levels(),
+            sample_vector=sample_vec
+        )
         self.est_bootstrap(n_subsamples=100, sample_vector=sample_vec)
 
-        var_plot.add_variances(self.mean_bs_l_vars, sample_vec, ref_level_vars=self._bs_level_mean_variance)
+        var_plot.add_variances(
+            self.mean_bs_l_vars,
+            sample_vec,
+            ref_level_vars=self._bs_level_mean_variance
+        )
         var_plot.show(None)
 
     def plot_bs_var_log(self, sample_vec=None):
-        sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
-                                          n_levels=self._sample_storage.get_n_levels(),
-                                          sample_vector=sample_vec)
+        """
+        Generate log-scale bootstrap variance plots and variance regression fits.
 
-        moments_quantity = qe.moments(self._quantity, moments_fn=self._moments_fn, mom_at_bottom=False)
+        :param sample_vec: ndarray, optional
+            Sampling vector specifying subsamples per level.
+        """
+        sample_vec = determine_sample_vec(
+            n_collected_samples=self._sample_storage.get_n_collected(),
+            n_levels=self._sample_storage.get_n_levels(),
+            sample_vector=sample_vec
+        )
+
+        moments_quantity = qe.moments(
+            self._quantity, moments_fn=self._moments_fn, mom_at_bottom=False
+        )
         q_mean = qe.estimate_mean(moments_quantity)
 
-        bs_plot = plots.BSplots(bs_n_samples=sample_vec, n_samples=self._sample_storage.get_n_collected(),
-                                n_moments=self._moments_fn.size, ref_level_var=q_mean.l_vars)
+        bs_plot = plots.BSplots(
+            bs_n_samples=sample_vec,
+            n_samples=self._sample_storage.get_n_collected(),
+            n_moments=self._moments_fn.size,
+            ref_level_var=q_mean.l_vars
+        )
 
-        bs_plot.plot_means_and_vars(self.mean_bs_mean[1:], self.mean_bs_var[1:], n_levels=self._sample_storage.get_n_levels())
+        bs_plot.plot_means_and_vars(
+            self.mean_bs_mean[1:],
+            self.mean_bs_var[1:],
+            n_levels=self._sample_storage.get_n_levels()
+        )
 
         bs_plot.plot_bs_variances(self.mean_bs_l_vars)
-        #bs_plot.plot_bs_var_log_var()
-
+        # bs_plot.plot_bs_var_log_var()
         bs_plot.plot_var_regression(self, self._sample_storage.get_n_levels(), self._moments_fn)
 
     def fine_coarse_violinplot(self):
+        """
+        Create violin plots comparing fine and coarse samples across levels.
+
+        Uses pandas for data organization and mlmc.plot.violinplot for visualization.
+        """
         import pandas as pd
         from mlmc.plot import violinplot
 
@@ -276,14 +376,18 @@ def fine_coarse_violinplot(self):
 
         if n_levels > 1:
             for level_id in range(n_levels):
-                chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=self._sample_storage.get_n_collected()[level_id]))
+                chunk_spec = next(
+                    self._sample_storage.chunks(
+                        level_id=level_id,
+                        n_samples=self._sample_storage.get_n_collected()[level_id]
+                    )
+                )
                 samples = np.squeeze(self._quantity.samples(chunk_spec, axis=0))
                 if level_id == 0:
                     label = "{} F{} {} C".format(level_id, ' ' * label_n_spaces, level_id + 1)
                     data = {'samples': samples[:, 0], 'type': 'fine', 'level': label}
                     dframe = pd.DataFrame(data)
                 else:
-
                     data = {'samples': samples[:, 1], 'type': 'coarse', 'level': label}
                     dframe = pd.concat([dframe, pd.DataFrame(data)], axis=0)
 
@@ -291,16 +395,21 @@ def fine_coarse_violinplot(self):
                         label = "{} F{} {} C".format(level_id, ' ' * label_n_spaces, level_id + 1)
                         data = {'samples': samples[:, 0], 'type': 'fine', 'level': label}
                         dframe = pd.concat([dframe, pd.DataFrame(data)], axis=0)
+
         violinplot.fine_coarse_violinplot(dframe)
 
     @staticmethod
     def estimate_domain(quantity, sample_storage, quantile=None):
         """
-        Estimate moments domain from MLMC samples.
-        :param quantity: mlmc.quantity.Quantity instance, represents the real quantity
-        :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples
-        :param quantile: float in interval (0, 1), None means whole sample range
-        :return: lower_bound, upper_bound
+        Estimate lower and upper bounds of the domain from MLMC samples.
+
+        :param quantity: mlmc.quantity.Quantity
+            Quantity object representing the stochastic quantity.
+        :param sample_storage: mlmc.sample_storage.SampleStorage
+            Storage object containing all level samples.
+        :param quantile: float, optional
+            Quantile value in (0, 1). None defaults to 0.01.
+        :return: tuple (lower_bound, upper_bound)
         """
         ranges = []
         if quantile is None:
@@ -310,20 +419,25 @@ def estimate_domain(quantity, sample_storage, quantile=None):
             try:
                 sample_storage.get_n_collected()[level_id]
             except AttributeError:
-                print("No collected values for level {}".format(level_id))
+                print(f"No collected values for level {level_id}")
                 break
 
-            #print("sample_storage.get_n_collected()[level_id] ", type(sample_storage.get_n_collected()[level_id]))
             print("sample_storage.get_n_collected() ", type(sample_storage.get_n_collected()[0]))
 
             if isinstance(sample_storage.get_n_collected()[level_id], AttributeError):
                 print("continue")
                 continue
-            chunk_spec = next(sample_storage.chunks(level_id=level_id, n_samples=sample_storage.get_n_collected()[level_id]))
-            fine_samples = quantity.samples(chunk_spec)[..., 0]  # Fine samples at level 0
+
+            chunk_spec = next(
+                sample_storage.chunks(
+                    level_id=level_id,
+                    n_samples=sample_storage.get_n_collected()[level_id]
+                )
+            )
+            fine_samples = quantity.samples(chunk_spec)[..., 0]
             fine_samples = np.squeeze(fine_samples)
             print("fine samples ", fine_samples)
-            fine_samples = fine_samples[~np.isnan(fine_samples)]  # remove NaN
+            fine_samples = fine_samples[~np.isnan(fine_samples)]
             ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)]))
 
         ranges = np.array(ranges)
@@ -331,44 +445,65 @@ def estimate_domain(quantity, sample_storage, quantile=None):
 
     def construct_density(self, tol=1e-8, reg_param=0.0, orth_moments_tol=1e-4, exact_pdf=None):
         """
-        Construct approximation of the density using given moment functions.
+        Construct an approximate probability density function using orthogonal moments.
+
+        :param tol: float, default=1e-8
+            Optimization tolerance for density estimation.
+        :param reg_param: float, default=0.0
+            Regularization parameter to stabilize estimation.
+        :param orth_moments_tol: float, default=1e-4
+            Tolerance for orthogonalization of moments.
+        :param exact_pdf: callable, optional
+            Reference exact PDF for validation or comparison.
+        :return: tuple (distribution, info, result, moments_object)
         """
         if not isinstance(self._quantity.qtype, ScalarType):
-            raise NotImplementedError("Currently, we only support ScalarType quantities")
+            raise NotImplementedError("Currently, only ScalarType quantities are supported.")
 
         cov_mean = qe.estimate_mean(qe.covariance(self._quantity, self._moments_fn))
         cov_mat = cov_mean.mean
-        moments_obj, info = mlmc.tool.simple_distribution.construct_ortogonal_moments(self._moments_fn,
-                                                                                      cov_mat,
-                                                                                      tol=orth_moments_tol)
+        moments_obj, info = mlmc.tool.simple_distribution.construct_ortogonal_moments(
+            self._moments_fn, cov_mat, tol=orth_moments_tol
+        )
+
         moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_obj))
         est_moments = moments_mean.mean
         est_vars = moments_mean.var
 
-        # if exact_pdf is not None:
-        #     exact_moments = mlmc.tool.simple_distribution.compute_exact_moments(moments_obj, exact_pdf)
-
         est_vars = np.ones(moments_obj.size)
         min_var, max_var = np.min(est_vars[1:]), np.max(est_vars[1:])
-        #print("min_err: {} max_err: {} ratio: {}".format(min_var, max_var, max_var / min_var))
         moments_data = np.stack((est_moments, est_vars), axis=1)
-        distr_obj = mlmc.tool.simple_distribution.SimpleDistribution(moments_obj, moments_data,
-                                                                     domain=moments_obj.domain)
-        result = distr_obj.estimate_density_minimize(tol, reg_param)  # 0.95 two side quantile
+
+        distr_obj = mlmc.tool.simple_distribution.SimpleDistribution(
+            moments_obj, moments_data, domain=moments_obj.domain
+        )
+        result = distr_obj.estimate_density_minimize(tol, reg_param)
 
         return distr_obj, info, result, moments_obj
 
     def get_level_samples(self, level_id, n_samples=None):
         """
-        Get level samples from storage
-        :param level_id: int, level identifier
-        :param n_samples> int, number of samples to retrieve, if None first chunk of data is retrieved
-        :return: level samples, shape: (M, N, 1) for level 0, (M, N, 2) otherwise
+        Retrieve MLMC samples for a given level.
+
+        :param level_id: int
+            Level index to access.
+        :param n_samples: int, optional
+            Number of samples to retrieve. If None, retrieves all available samples.
+        :return: ndarray
+            Samples for the specified level.
         """
         chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=n_samples))
         return self._quantity.samples(chunk_spec=chunk_spec)
 
     def kurtosis_check(self, quantity=None):
+        """
+        Compute and return the kurtosis of the given or stored quantity.
+
+        :param quantity: mlmc.quantity.Quantity, optional
+            Quantity for which to compute kurtosis. Defaults to the stored quantity.
+        :return: float or ndarray
+            Computed kurtosis per level.
+        """
         if quantity is None:
             quantity = self._quantity
         moments_mean_quantity = qe.estimate_mean(quantity)
@@ -377,14 +512,21 @@ def kurtosis_check(self, quantity=None):
 
 
 def consistency_check(quantity, sample_storage=None):
+    """
+    Check consistency between fine and coarse level samples in MLMC.
 
+    :param quantity: mlmc.quantity.Quantity instance
+    :param sample_storage: mlmc.sample_storage.SampleStorage instance
+    :return: dict mapping level_id -> consistency metric
+    """
     fine_samples = {}
     coarse_samples = {}
+
     for chunk_spec in quantity.get_quantity_storage().chunks():
         samples = quantity.samples(chunk_spec)
-        chunk, n_mask_samples = mask_nan_samples(samples)
+        chunk, _ = mask_nan_samples(samples)
 
-        # No samples in chunk
+        # Skip empty chunks
         if chunk.shape[1] == 0:
             continue
 
@@ -404,7 +546,8 @@ def consistency_check(quantity, sample_storage=None):
             diff_var = np.var(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id]))
 
             val = np.abs(coarse_mean - fine_mean + diff_mean) / (
-                        3 * (np.sqrt(coarse_var) + np.sqrt(fine_var) + np.sqrt(diff_var)))
+                    3 * (np.sqrt(coarse_var) + np.sqrt(fine_var) + np.sqrt(diff_var))
+            )
 
             assert np.isclose(coarse_mean - fine_mean + diff_mean, 0)
             assert val < 0.9
@@ -414,37 +557,15 @@ def consistency_check(quantity, sample_storage=None):
     return cons_check_val
 
 
-# def estimate_domain(quantity, sample_storage, quantile=None):
-#     """
-#     Estimate moments domain from MLMC samples.
-#     :param quantity: mlmc.quantity.Quantity instance, represents the real quantity
-#     :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples
-#     :param quantile: float in interval (0, 1), None means whole sample range
-#     :return: lower_bound, upper_bound
-#     """
-#     ranges = []
-#     if quantile is None:
-#         quantile = 0.01
-#
-#     for level_id in range(sample_storage.get_n_levels()):
-#         fine_samples = quantity.samples(ChunkSpec(level_id=level_id, n_samples=sample_storage.get_n_collected()[0]))[..., 0]
-#
-#         fine_samples = np.squeeze(fine_samples)
-#         ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)]))
-#
-#     ranges = np.array(ranges)
-#     return np.min(ranges[:, 0]), np.max(ranges[:, 1])
-
-
 def coping_with_high_kurtosis(vars, costs, kurtosis, kurtosis_threshold=100):
     """
-    Coping with high kurtosis is recommended by prof. M. Giles in http://people.maths.ox.ac.uk/~gilesm/talks/MCQMC_22_b.pdf
-    :param vars: vars[L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance.
-    :param costs: cost of level's sample
-    :param kurtosis: each level's sample kurtosis
-    :param kurtosis_threshold: Kurtosis is considered to be too high if it is above this threshold.
-                              Original variances are underestimated and therefore modified in this metod
-    :return: vars
+    Adjust variance estimates if kurtosis is unusually high to avoid underestimation.
+
+    :param vars: ndarray of shape (L, M) with level variances for moments
+    :param costs: cost of computing samples per level
+    :param kurtosis: kurtosis of each level
+    :param kurtosis_threshold: threshold above which kurtosis is considered "high"
+    :return: adjusted vars ndarray
     """
     for l_id in range(2, vars.shape[0]):
         if kurtosis[l_id] > kurtosis_threshold:
@@ -454,16 +575,15 @@ def coping_with_high_kurtosis(vars, costs, kurtosis, kurtosis_threshold=100):
 
 def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels, theta=0, kurtosis=None):
     """
-    Estimate optimal number of samples for individual levels that should provide a target variance of
-    resulting moment estimate.
-    This also set given moment functions to be used for further estimates if not specified otherwise.
-    :param target_variance: Constrain to achieve this variance.
-    :param prescribe_vars: vars[ L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance.
-    :param n_ops: number of operations at each level
+    Estimate optimal number of samples per level to reach a target variance.
+
+    :param target_variance: desired variance for MLMC estimator
+    :param prescribe_vars: ndarray of level variances (L x M)
+    :param n_ops: cost/operations per level
     :param n_levels: number of levels
-    :param theta: number of samples N_l control parameter, suitable values: 0.25 ... 0.5
-    :param kurtosis: levels' kurtosis
-    :return: np.array with number of optimal samples for individual levels and moments_fn, array (LxR)
+    :param theta: safety factor (0 ≤ theta < 1)
+    :param kurtosis: optional ndarray of kurtosis per level
+    :return: ndarray of optimal number of samples per moment function
     """
     vars = prescribe_vars
 
@@ -471,31 +591,39 @@ def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_op
         vars = coping_with_high_kurtosis(vars, n_ops, kurtosis)
 
     sqrt_var_n = np.sqrt(vars.T * n_ops)  # moments_fn in rows, levels in cols
-    total = np.sum(sqrt_var_n, axis=1)  # sum over levels
-    n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int)  # moments_fn in cols
-    n_samples_estimate = 1/(1-theta) * n_samples_estimate
+    total = np.sum(sqrt_var_n, axis=1)
+    n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int)
+    n_samples_estimate = 1 / (1 - theta) * n_samples_estimate
 
     # Limit maximal number of samples per level
     n_samples_estimate_safe = np.maximum(
-        np.minimum(n_samples_estimate, vars * n_levels / target_variance), 2)
+        np.minimum(n_samples_estimate, vars * n_levels / target_variance),
+        2
+    )
 
     return np.max(n_samples_estimate_safe, axis=1).astype(int)
 
 
 def calc_level_params(step_range, n_levels):
+    """
+    Compute level-dependent step sizes for MLMC.
+
+    :param step_range: tuple (h_fine, h_coarse)
+    :param n_levels: number of levels
+    :return: list of step sizes per level
+    """
     assert step_range[0] > step_range[1]
     level_parameters = []
     for i_level in range(n_levels):
-        if n_levels == 1:
-            level_param = 1
-        else:
-            level_param = i_level / (n_levels - 1)
+        level_param = 1 if n_levels == 1 else i_level / (n_levels - 1)
         level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param])
-
     return level_parameters
 
 
 def determine_sample_vec(n_collected_samples, n_levels, sample_vector=None):
+    """
+    Determine the sample vector for bootstrapping or MLMC calculations.
+    """
     if sample_vector is None:
         sample_vector = n_collected_samples
     if len(sample_vector) > n_levels:
@@ -505,44 +633,34 @@ def determine_sample_vec(n_collected_samples, n_levels, sample_vector=None):
 
 def determine_level_parameters(n_levels, step_range):
     """
-    Determine level parameters,
-    In this case, a step of fine simulation at each level
-    :param n_levels: number of MLMC levels
-    :param step_range: simulation step range
-    :return: List
+    Wrapper to calculate level parameters (simulation step sizes).
     """
     assert step_range[0] > step_range[1]
     level_parameters = []
     for i_level in range(n_levels):
-        if n_levels == 1:
-            level_param = 1
-        else:
-            level_param = i_level / (n_levels - 1)
+        level_param = 1 if n_levels == 1 else i_level / (n_levels - 1)
         level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param])
-
     return level_parameters
 
 
 def determine_n_samples(n_levels, n_samples=None):
     """
-    Set target number of samples for each level
-    :param n_levels: number of levels
-    :param n_samples: array of number of samples
-    :return: None
+    Generate an array of target sample sizes for each level.
+
+    :param n_levels: number of MLMC levels
+    :param n_samples: int or list of 2 ints to define start/end for exponential spacing
+    :return: ndarray of sample sizes for each level
     """
     if n_samples is None:
         n_samples = [100, 3]
-    # Num of samples to ndarray
+
     n_samples = np.atleast_1d(n_samples)
 
-    # Just maximal number of samples is set
     if len(n_samples) == 1:
         n_samples = np.array([n_samples[0], 3])
 
-    # Create number of samples for all levels
     if len(n_samples) == 2:
         n0, nL = n_samples
         n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), n_levels))).astype(int)
 
     return n_samples
-
diff --git a/mlmc/sim/synth_simulation.py b/mlmc/sim/synth_simulation.py
index 2dd69701..14a3ca61 100644
--- a/mlmc/sim/synth_simulation.py
+++ b/mlmc/sim/synth_simulation.py
@@ -1,228 +1,3 @@
-# import os
-# import ruamel.yaml as ruyaml
-# import numpy as np
-# from typing import List
-# import scipy.stats as stats
-# from mlmc.sim.simulation import Simulation
-# from mlmc.quantity.quantity_spec import QuantitySpec
-# from mlmc.level_simulation import LevelSimulation
-#
-#
-# class SynthSimulation(Simulation):
-#     """
-#     Synthetic simulation for testing MLMC workflows.
-#
-#     Produces artificial fine and coarse samples based on a distribution and a
-#     numerical step size. Can introduce NaNs to simulate failed simulations.
-#     """
-#
-#     # Class-level counters for failed samples and result bookkeeping
-#     n_nans = 0
-#     nan_fraction = 0
-#     len_results = 0
-#     result_dict = {}
-#
-#     def __init__(self, config=None):
-#         """
-#         Initialize the synthetic simulation.
-#
-#         :param config: Dictionary with keys:
-#                        - distr: scipy.stats distribution (default: normal)
-#                        - complexity: exponent for operation estimate (default: 2)
-#                        - nan_fraction: fraction of failed samples to simulate
-#                        - sim_method: unused here, placeholder for method type
-#         """
-#         super().__init__()
-#         if config is None:
-#             config = dict(distr=stats.norm(), complexity=2)
-#         self.config = config
-#
-#         # Reset class-level counters
-#         SynthSimulation.n_nans = 0
-#         SynthSimulation.nan_fraction = config.get('nan_fraction', 0.0)
-#         SynthSimulation.len_results = 0
-#
-#         # Indicates that this simulation does not require a workspace
-#         self.need_workspace: bool = False
-#
-#     @staticmethod
-#     def sample_fn(x, h):
-#         """
-#         Synthetic sample function introducing a small numerical error.
-#
-#         :param x: Random sample from distribution
-#         :param h: Simulation step size
-#         :return: Synthetic simulation output
-#         """
-#         return x + h * np.sqrt(1e-4 + np.abs(x))
-#
-#     @staticmethod
-#     def sample_fn_no_error(x, h):
-#         """
-#         Sample function without added error.
-#
-#         :param x: Random sample
-#         :param h: Simulation step (unused)
-#         :return: Original sample
-#         """
-#         return x
-#
-#     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
-#         """
-#         Create a LevelSimulation object for the fine and coarse levels.
-#
-#         :param fine_level_params: List of fine-level parameters (step size)
-#         :param coarse_level_params: List of coarse-level parameters (step size)
-#         :return: LevelSimulation object configured for MLMC
-#         """
-#         config = dict()
-#         config["fine"] = {"step": fine_level_params[0]}
-#         config["coarse"] = {"step": coarse_level_params[0]}
-#         config["distr"] = self.config["distr"]
-#         config["res_format"] = self.result_format()
-#
-#         return LevelSimulation(config_dict=config, task_size=self.n_ops_estimate(fine_level_params[0]))
-#
-#     @staticmethod
-#     def generate_random_samples(distr, seed, size):
-#         """
-#         Generate fine and coarse random samples from a given distribution.
-#
-#         Optionally simulates a fraction of NaN results to represent failed simulations.
-#
-#         :param distr: scipy.stats distribution object
-#         :param seed: random seed
-#         :param size: number of samples to generate
-#         :return: Tuple (fine_samples, coarse_samples)
-#         """
-#         SynthSimulation.len_results += 1
-#         distr.random_state = np.random.RandomState(seed)
-#         y = distr.rvs(size=size)
-#
-#         # Simulate failed samples
-#         if SynthSimulation.n_nans / (1e-10 + SynthSimulation.len_results) < SynthSimulation.nan_fraction:
-#             SynthSimulation.n_nans += 1
-#             y = [np.nan]
-#
-#         return y, y
-#
-#     @staticmethod
-#     def calculate(config, seed):
-#         """
-#         Compute fine and coarse simulation results.
-#
-#         :param config: Dictionary with LevelSimulation configuration
-#         :param seed: Random seed
-#         :return: Tuple (fine_result_flat, coarse_result_flat)
-#         """
-#         quantity_format = config["res_format"]
-#
-#         # Generate random samples for fine and coarse levels
-#         fine_random, coarse_random = SynthSimulation.generate_random_samples(
-#             config["distr"], seed, np.prod(quantity_format[0].shape)
-#         )
-#
-#         fine_step = config["fine"]["step"]
-#         coarse_step = config["coarse"]["step"]
-#
-#         fine_result = SynthSimulation.sample_fn(fine_random, fine_step)
-#         coarse_result = np.zeros(len(fine_result)) if coarse_step == 0 else SynthSimulation.sample_fn(coarse_random, coarse_step)
-#
-#         if np.any(np.isnan(fine_result)) or np.any(np.isnan(coarse_result)):
-#             raise Exception("Simulation produced NaN result")
-#
-#         # Map results to quantity specifications
-#         results = []
-#         for result in [fine_result, coarse_result]:
-#             quantities = []
-#             for quantity in quantity_format:
-#                 locations = np.array([result + i for i in range(len(quantity.locations))]) if coarse_step != 0 else np.array([result for _ in range(len(quantity.locations))])
-#                 times = np.array([locations for _ in range(len(quantity.times))])
-#                 quantities.append(times)
-#             results.append(np.array(quantities))
-#
-#         return results[0].flatten(), results[1].flatten()
-#
-#     def n_ops_estimate(self, step):
-#         """
-#         Estimate the computational cost for a given step size.
-#
-#         :param step: simulation step size
-#         :return: estimated number of operations
-#         """
-#         return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0))
-#
-#     def result_format(self) -> List[QuantitySpec]:
-#         """
-#         Define the result format for this synthetic simulation.
-#
-#         :return: List of QuantitySpec objects
-#         """
-#         spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20'])
-#         spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
-#         return [spec1, spec2]
-#
-#
-# class SynthSimulationWorkspace(SynthSimulation):
-#     """
-#     Synthetic simulation that requires a workspace.
-#
-#     Extends SynthSimulation by supporting workspace-based execution and configuration
-#     read from a YAML file.
-#     """
-#
-#     CONFIG_FILE = 'synth_sim_config.yaml'
-#
-#     def __init__(self, config):
-#         """
-#         :param config: Dictionary containing workspace configuration:
-#                        - config_yaml: path to YAML configuration file
-#                        - nan_fraction: fraction of failed samples
-#         """
-#         self.config_yaml = config["config_yaml"]
-#
-#         # Reset counters
-#         SynthSimulationWorkspace.n_nans = 0
-#         SynthSimulationWorkspace.nan_fraction = config.get('nan_fraction', 0.0)
-#         SynthSimulationWorkspace.len_results = 0
-#
-#         # Indicates that this simulation needs a workspace
-#         self.need_workspace: bool = True
-#
-#     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
-#         """
-#         Create a LevelSimulation object using workspace configuration.
-#
-#         :param fine_level_params: List of fine-level parameters (step size)
-#         :param coarse_level_params: List of coarse-level parameters (step size)
-#         :return: LevelSimulation object
-#         """
-#         config = dict()
-#         config["fine"] = {"step": fine_level_params[0]}
-#         config["coarse"] = {"step": coarse_level_params[0]}
-#         config["res_format"] = self.result_format()
-#
-#         job_weight = 20000
-#
-#         return LevelSimulation(
-#             config_dict=config,
-#             common_files=[self.config_yaml],
-#             task_size=1.0 / job_weight,
-#             need_sample_workspace=self.need_workspace
-#         )
-#
-#     @staticmethod
-#     def _read_config():
-#         """
-#         Read workspace configuration from YAML file.
-#
-#         :return: Dictionary with configuration values
-#         """
-#         with open(os.path.join(os.getcwd(), SynthSimulationWorkspace.CONFIG_FILE)) as file:
-#             yaml = ruyaml.YAML(typ='rt')
-#             config = yaml.load(file)
-#         return config
-
 import os
 import ruamel.yaml as ruyaml
 import numpy as np
@@ -234,6 +9,14 @@
 
 
 class SynthSimulation(Simulation):
+    """
+    Artificial (synthetic) simulation used for testing and examples.
+
+    The simulation generates random samples from a specified distribution and
+    optionally injects numerical error / NaN failures according to configuration.
+    It implements the Simulation interface: provides `level_instance`, `calculate`,
+    and `result_format` methods and a simple cost estimator `n_ops_estimate`.
+    """
 
     n_nans = 0
     nan_fraction = 0
@@ -243,49 +26,59 @@ class SynthSimulation(Simulation):
     # Artificial simulation. Just random parameter + numerical error."""
     def __init__(self, config=None):
         """
-        :param config: Dict:
-                distr= particular distribution,
-                complexity=2,
-                nan_fraction=fraction of failed samples
-                sim_method=used method for calculating sample result
+        Initialize the synthetic simulation.
+
+        :param config: Dict, optional configuration with keys:
+                       - 'distr': a scipy.stats distribution object (default: stats.norm())
+                       - 'complexity': exponent used for cost estimate (default: 2)
+                       - 'nan_fraction': fraction of samples that should be returned as NaN
+                       If config is None, a default normal distribution is used.
         """
         super().__init__()
         if config is None:
             config = dict(distr=stats.norm(), complexity=2)
         self.config = config
+
+        # Static counters / settings used across instances
         SynthSimulation.n_nans = 0
         SynthSimulation.nan_fraction = config.get('nan_fraction', 0.0)
         SynthSimulation.len_results = 0
-        # This attribute is obligatory
+
+        # Indicates whether this simulation needs a workspace directory for samples
         self.need_workspace: bool = False
 
     @staticmethod
     def sample_fn(x, h):
         """
-        Calculates the simulation sample
-        :param x: Distribution sample
-        :param h: Simluation step
-        :return: sample
+        Compute a (noisy) synthetic sample value for given distribution samples.
+
+        :param x: Distribution sample(s) (scalar or array-like).
+        :param h: Simulation step (resolution parameter). Typically small positive float.
+        :return: Computed sample(s). Introduces small h-dependent perturbation:
+                 x + h * sqrt(1e-4 + |x|). This can produce outliers for certain x.
         """
-        # This function can cause many outliers depending on chosen domain of moments function
         return x + h * np.sqrt(1e-4 + np.abs(x))
 
     @staticmethod
     def sample_fn_no_error(x, h):
         """
-        Calculates the simulation sample
-        :param x: Distribution sample
-        :param h: Simluation step
-        :return: sample
+        Compute a synthetic sample without introducing numerical error.
+
+        :param x: Distribution sample(s) (scalar or array-like).
+        :param h: Simulation step (ignored for this function).
+        :return: The input sample(s) unchanged (identity mapping).
         """
         return x
 
     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
         """
+        Create a LevelSimulation configured for a pair of fine/coarse level parameters.
 
-        :param fine_level_params:
-        :param coarse_level_params:
-        :return:
+        :param fine_level_params: List-like where the first element is the fine step size.
+        :param coarse_level_params: List-like where the first element is the coarse step size.
+        :return: LevelSimulation instance initialized with:
+                 - config_dict containing 'fine.step', 'coarse.step', 'distr', and 'res_format'
+                 - task_size estimated by n_ops_estimate(...)
         """
         config = dict()
         config["fine"] = {}
@@ -300,16 +93,19 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
     @staticmethod
     def generate_random_samples(distr, seed, size):
         """
-        Generate random samples from given distribution
-        :param distr: scipy distribution
-        :param seed: uint32
-        :param size: size of result
-        :return: fine sample, coarse sample
+        Draw random samples from the provided scipy distribution reproducibly.
+
+        :param distr: scipy.stats distribution object (must support .rvs()).
+        :param seed: Integer seed used to construct a RandomState for reproducibility.
+        :param size: Number of samples to draw.
+        :return: Tuple (fine_samples, coarse_samples). For this synthetic sim both are identical.
+                 May return [np.nan] to simulate a failed sample according to nan_fraction.
         """
         SynthSimulation.len_results += 1
         distr.random_state = np.random.RandomState(seed)
         y = distr.rvs(size=size)
 
+        # Inject NaN failures up to configured fraction
         if SynthSimulation.n_nans / (1e-10 + SynthSimulation.len_results) < SynthSimulation.nan_fraction:
             SynthSimulation.n_nans += 1
             y = [np.nan]
@@ -319,35 +115,51 @@ def generate_random_samples(distr, seed, size):
     @staticmethod
     def calculate(config, seed):
         """
-        Calculate fine and coarse sample and also extract their results
-        :param config: dictionary containing simulation configuration
-        :param seed: random number generator seed
-        :return: np.ndarray, np.ndarray
+        Calculate fine and coarse samples and convert them to the expected result format.
+
+        :param config: Dictionary containing simulation configuration (must include 'res_format',
+                       'fine.step' and 'coarse.step' keys).
+        :param seed: Integer RNG seed for reproducibility.
+        :return: Tuple (fine_flat, coarse_flat) where both are 1D numpy arrays produced by
+                 flattening the per-quantity/time/location arrays constructed below.
+        :raises: Exception if any resulting sample contains NaN.
         """
         quantity_format = config["res_format"]
-        fine_random, coarse_random = SynthSimulation.generate_random_samples(config["distr"], seed, np.prod(quantity_format[0].shape))
+
+        # Generate base random values for fine and coarse (identical in this toy sim)
+        fine_random, coarse_random = SynthSimulation.generate_random_samples(
+            config["distr"],
+            seed,
+            np.prod(quantity_format[0].shape)
+        )
 
         fine_step = config["fine"]["step"]
         coarse_step = config["coarse"]["step"]
 
+        # Compute sample values for fine and coarse levels
         fine_result = SynthSimulation.sample_fn(fine_random, fine_step)
 
         if coarse_step == 0:
-            coarse_result = np.zeros(len(fine_result))
+            coarse_result = np.zeros(len(fine_result))  # coarse = zero baseline if step==0
         else:
             coarse_result = SynthSimulation.sample_fn(coarse_random, coarse_step)
 
+        # Fail hard if NaNs are present
         if np.any(np.isnan(fine_result)) or np.any(np.isnan(coarse_result)):
             raise Exception("result is nan")
 
+        # Convert results into list-of-quantities × times × locations arrays and then flatten
         results = []
         for result in [fine_result, coarse_result]:
             quantities = []
             for quantity in quantity_format:
                 if coarse_step == 0:
+                    # replicate the same result for each location (coarse step 0 special case)
                     locations = np.array([result for _ in range(len(quantity.locations))])
                 else:
+                    # create simple distinct location-dependent arrays for demonstration
                     locations = np.array([result + i for i in range(len(quantity.locations))])
+                # repeat across times
                 times = np.array([locations for _ in range(len(quantity.times))])
                 quantities.append(times)
 
@@ -356,21 +168,42 @@ def calculate(config, seed):
         return results[0].flatten(), results[1].flatten()
 
     def n_ops_estimate(self, step):
+        """
+        Estimate number of operations (cost) for a sample at given step size.
+
+        :param step: Level step size (h).
+        :return: Estimated operation count (float). Uses configured complexity exponent.
+        """
         return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0))
 
     def result_format(self) -> List[QuantitySpec]:
         """
-        Result format
-        :return:
+        Define the synthetic simulation's result format.
+
+        :return: List[QuantitySpec] describing the shape, units, times and locations
+                 for each reported quantity. This informs how `calculate` arranges
+                 and flattens outputs.
         """
         spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20'])
         spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
-        # spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=[(1, 2, 3), (4, 5, 6)])
-        # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=[(7, 8, 9), (10, 11, 12)])
+        # Alternative examples with numeric locations (commented out)
+        # spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3],
+        #                      locations=[(1, 2, 3), (4, 5, 6)])
+        # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3],
+        #                      locations=[(7, 8, 9), (10, 11, 12)])
         return [spec1, spec2]
 
 
+
 class SynthSimulationWorkspace(SynthSimulation):
+    """
+    Synthetic simulation variant that requires a workspace (reads config from YAML).
+
+    This subclass behaves like `SynthSimulation` but:
+      - Reads distribution and nan_fraction from a YAML configuration file.
+      - Declares `need_workspace = True` so sample files are written to/read from disk.
+      - Supplies `common_files` (the YAML) to LevelSimulation so workspaces get that file.
+    """
 
     n_nans = 0
     nan_fraction = 0
@@ -382,48 +215,55 @@ class SynthSimulationWorkspace(SynthSimulation):
     # Artificial simulation. Just random parameter + numerical error."""
     def __init__(self, config):
         """
-        :param config: Dict:
-                distr= particular distribution,
-                complexity=2,
-                nan_fraction=fraction of failed samples
-                sim_method=used method for calculating sample result
+        Initialize the workspace-capable synthetic simulation.
+
+        :param config: Dict with at least:
+            - "config_yaml": path to YAML configuration file (relative or absolute)
+            Optionally may contain 'nan_fraction' as a fallback.
         """
         self.config_yaml = config["config_yaml"]
 
+        # Reset static counters
         SynthSimulationWorkspace.n_nans = 0
         SynthSimulationWorkspace.nan_fraction = config.get('nan_fraction', 0.0)
         SynthSimulationWorkspace.len_results = 0
 
-        # This attribute is obligatory
+        # This simulation requires a workspace directory for sample execution
         self.need_workspace: bool = True
 
     @staticmethod
     def sample_fn(x, h):
         """
-        Calculates the simulation sample
-        :param x: Distribution sample
-        :param h: Simluation step
-        :return: sample
+        Compute a (noisy) synthetic sample value for given distribution samples.
+
+        :param x: Distribution sample(s) (scalar or array-like).
+        :param h: Simulation step (resolution parameter).
+        :return: Computed sample(s): x + h * sqrt(1e-4 + |x|).
         """
-        # This function can cause many outliers depending on chosen domain of moments function
         return x + h * np.sqrt(1e-4 + np.abs(x))
 
     @staticmethod
     def sample_fn_no_error(x, h):
         """
-        Calculates the simulation sample
-        :param x: Distribution sample
-        :param h: Simluation step
-        :return: sample
+        Identity sampling function (no added numerical error).
+
+        :param x: Distribution sample(s).
+        :param h: Simulation step (ignored).
+        :return: x (unchanged).
         """
         return x
 
     def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
         """
-
-        :param fine_level_params:
-        :param coarse_level_params:
-        :return:
+        Produce a LevelSimulation configured to use the YAML config as a common file.
+
+        :param fine_level_params: list-like where first element is fine step size.
+        :param coarse_level_params: list-like where first element is coarse step size.
+        :return: LevelSimulation configured with:
+                 - config_dict: containing 'fine.step', 'coarse.step', 'res_format'
+                 - common_files: list containing the YAML path (so worker/workspace has it)
+                 - task_size: small constant (1/job_weight) to simulate job weighting
+                 - need_sample_workspace: True (this class requires workspace)
         """
         config = dict()
         config["fine"] = {}
@@ -433,21 +273,30 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
         config["coarse"]["step"] = coarse_level_params[0]
         config["res_format"] = self.result_format()
 
+        # Use a fixed job weight to keep task_size small (simulating many small jobs)
         job_weight = 20000
 
-        return LevelSimulation(config_dict=config,
-                               common_files=[self.config_yaml],
-                               task_size=1.0 / job_weight,
-                               need_sample_workspace=self.need_workspace)
+        return LevelSimulation(
+            config_dict=config,
+            common_files=[self.config_yaml],
+            task_size=1.0 / job_weight,
+            need_sample_workspace=self.need_workspace
+        )
 
     @staticmethod
     def generate_random_samples(distr, seed, size):
         """
-        Generate random samples from given distribution
-        :param distr: scipy distribution
-        :param seed: uint32
-        :param size: size of result
-        :return: fine sample, coarse sample
+        Draw random samples based on YAML-specified distribution names.
+
+        This implementation currently supports only the string "norm" which
+        maps to scipy.stats.norm(loc=1, scale=2). A NotImplementedError is raised
+        for other distribution identifiers.
+
+        :param distr: Either a string identifier (e.g. "norm") or a scipy distribution.
+        :param seed: Integer RNG seed used to create a RandomState for reproducibility.
+        :param size: Integer number of samples to draw.
+        :return: Tuple (fine_samples, coarse_samples) — identical arrays for this toy sim.
+                 May return [np.nan] to simulate a failed sample according to nan_fraction.
         """
         SynthSimulationWorkspace.len_results += 1
 
@@ -459,6 +308,7 @@ def generate_random_samples(distr, seed, size):
         distr.random_state = np.random.RandomState(seed)
         y = distr.rvs(size=size)
 
+        # Inject NaN failure if configured fraction not yet reached
         if SynthSimulationWorkspace.n_nans / (1e-10 + SynthSimulationWorkspace.len_results) < SynthSimulationWorkspace.nan_fraction:
             SynthSimulationWorkspace.n_nans += 1
             y = [np.nan]
@@ -468,18 +318,28 @@ def generate_random_samples(distr, seed, size):
     @staticmethod
     def calculate(config, seed):
         """
-        Calculate fine and coarse sample and also extract their results
-        :param config: dictionary containing simulation configuration
-        :param seed: random number generator seed
-        :return: np.ndarray, np.ndarray
+        Calculate fine and coarse samples (using values from the YAML config file).
+
+        Workflow:
+          1. Read YAML configuration (via _read_config) to get distribution and nan_fraction.
+          2. Generate base random numbers (fine_random, coarse_random).
+          3. Compute fine_result and coarse_result via sample functions.
+          4. Assemble results into arrays shaped by res_format and flatten them.
+
+        :param config: LevelSimulation.config_dict (must include 'res_format', 'fine.step', 'coarse.step').
+        :param seed: Integer RNG seed.
+        :return: Tuple (fine_flat, coarse_flat) — 1D numpy arrays produced by flattening quantities × times × locations.
+        :raises: Exception if any computed result contains NaN.
         """
+        # Load runtime YAML config (distribution name and nan_fraction)
         config_file = SynthSimulationWorkspace._read_config()
         SynthSimulationWorkspace.nan_fraction = config_file["nan_fraction"]
 
         quantity_format = config["res_format"]
 
-        fine_random, coarse_random = SynthSimulationWorkspace.generate_random_samples(config_file["distr"], seed,
-                                                                                      np.prod(quantity_format[0].shape))
+        fine_random, coarse_random = SynthSimulationWorkspace.generate_random_samples(
+            config_file["distr"], seed, np.prod(quantity_format[0].shape)
+        )
 
         fine_step = config["fine"]["step"]
         coarse_step = config["coarse"]["step"]
@@ -497,7 +357,6 @@ def calculate(config, seed):
         results = []
         for result in [fine_result, coarse_result]:
             quantities = []
-
             for quantity in quantity_format:
                 if coarse_step == 0:
                     locations = np.array([result for _ in range(len(quantity.locations))])
@@ -510,11 +369,24 @@ def calculate(config, seed):
         return results[0].flatten(), results[1].flatten()
 
     def n_ops_estimate(self, step):
-        # @TODO: how to determine n ops
+        """
+        Estimate a synthetic operation count for the workspace-enabled simulation.
+        :param step: Level step size.
+        :return: Estimated operation cost (float). Uses a fixed exponent of 2 here.
+        """
         return (1 / step) ** 2 * np.log(max(1 / step, 2.0))
 
     @staticmethod
     def _read_config():
+        """
+        Read the YAML configuration file (CONFIG_FILE) from the current working directory.
+
+        The YAML is parsed using ruamel.yaml and should contain keys expected by this class
+        (e.g. "distr" and "nan_fraction").
+
+        :return: Parsed configuration dictionary.
+        :raises: IOError / FileNotFoundError if the YAML file is missing.
+        """
         with open(os.path.join(os.getcwd(), SynthSimulationWorkspace.CONFIG_FILE)) as file:
             yaml = ruyaml.YAML(typ='rt')
             config = yaml.load(file)

From 5ccc3ee83e1780cc188fe69a44f18f4eab79f8b3 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 12:54:23 +0200
Subject: [PATCH 18/31] readme

---
 README.rst | 116 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 88 insertions(+), 28 deletions(-)

diff --git a/README.rst b/README.rst
index 2bc32b23..7526b252 100644
--- a/README.rst
+++ b/README.rst
@@ -1,6 +1,5 @@
-
 MLMC
-----
+====
 
 .. image:: https://github.com/GeoMop/MLMC/workflows/package/badge.svg
     :target: https://github.com/GeoMop/MLMC/actions
@@ -8,56 +7,117 @@ MLMC
     :target: https://pypi.org/project/mlmc/
 .. image:: https://img.shields.io/pypi/pyversions/mlmc.svg
     :target: https://pypi.org/project/mlmc/
+.. image:: https://img.shields.io/badge/License-GPLv3-blue.svg
+    :target: https://www.gnu.org/licenses/gpl-3.0.html
 
-MLMC provides tools for the multilevel Monte Carlo method.
-
-mlmc package includes:
+Overview
+--------
 
-- samples scheduling
-- estimation of generalized moment functions
-- probability density function approximation
-- advanced post-processing with Quantity structure
+**MLMC** is a Python library implementing the **Multilevel Monte Carlo (MLMC)** method.
+It provides tools for sampling, moment estimation, density reconstruction, and
+statistical post-processing, all built around the flexible ``Quantity`` abstraction.
 
+Originally developed as part of the `GeoMop <http://geomop.github.io/>`_ project,
+MLMC aims to simplify stochastic analysis, uncertainty quantification,
+and convergence studies in scientific computing workflows.
 
-It is meant as part of the `GeoMop  <http://geomop.github.io/>`_ project in particular Analysis component.
+Features
+--------
 
+* Sample scheduling
+* Estimation of generalized moment functions
+* Advanced post-processing with the ``Quantity`` structure
+* Approximation of probability density functions (PDFs)
+* Bootstrap and regression-based variance estimation
+* Diagnostic tools (consistency check, ...)
 
 Installation
-----------------------------------
-Package can be installed via pip.
+------------
 
-.. code-block::
+The package is available on PyPI and can be installed via pip:
+
+.. code-block:: bash
 
     pip install mlmc
 
+To install the latest development version:
+
+.. code-block:: bash
+
+    git clone https://github.com/GeoMop/MLMC.git
+    cd MLMC
+    pip install -e .
 
 Documentation
 -------------
-You can find the documentation including tutorials under https://mlmc.readthedocs.io/
 
+Full documentation including tutorials and examples is available at:
+`https://mlmc.readthedocs.io/ <https://mlmc.readthedocs.io/>`_
+
+Topics covered include:
+
+* Basic MLMC workflow and examples
+* Definition and composition of ``Quantity`` objects
+* Moment and covariance estimation
+* PDF and density reconstruction
+
+Example
+-------
+
+A minimal example illustrating the MLMC workflow:
+
+.. code-block:: python
+
+    import mlmc
+    from mlmc import sample_storage, quantity
+
+    # Define your quantity and storage
+    storage = sample_storage.Memory()
+    q = quantity.SomeQuantityDefinition(...)
+
+    # Create an MLMC estimator
+    estimator = mlmc.Estimate(quantity=q, sample_storage=storage)
+
+    # Estimate statistical moments
+    means, variances = estimator.estimate_moments()
+
+    # Construct an approximate probability density
+    distribution, info, result, moments = estimator.construct_density()
+
+    # Visualize variance breakdown
+    estimator.plot_variances()
 
 Development
 -----------
 
-Provided that you want to contribute, create a pull request and make sure you run `tox` before. Tox
-installs necessary requirements as well as the developed package itself into clear virtual environment
-and call pytest to search in the `test` folder for tests to execute.
+Contributions are welcome!
+To contribute, please fork the repository and create a pull request.
 
+Before submitting, make sure all tests pass by running ``tox``:
+
+.. code-block:: bash
+
+    pip install tox
+    tox
+
+``tox`` creates a clean virtual environment, installs all dependencies,
+runs unit tests via ``pytest``, and checks that the package installs correctly.
 
 Requirements
 ------------
-- `NumPy  <https://pypi.org/project/numpy/>`_
-- `SciPy  <https://pypi.org/project/scipy/>`_
-- `h5py  <https://pypi.org/project/h5py/>`_
-- `attrs  <https://pypi.org/project/attrs/>`_
-- `ruamel.yaml  <https://pypi.org/project/ruamel.yaml/>`_
-- `gstools  <https://pypi.org/project/gstools/>`_
-- `memoization  <https://pypi.org/project/memoization/>`_
-- `sklearn  <https://pypi.org/project/sklearn/>`_
 
+MLMC depends on the following Python packages:
 
-Licence
--------
-* Free software: GPL 3.0  License
+* `NumPy <https://pypi.org/project/numpy/>`_
+* `SciPy <https://pypi.org/project/scipy/>`_
+* `h5py <https://pypi.org/project/h5py/>`_
+* `attrs <https://pypi.org/project/attrs/>`_
+* `ruamel.yaml <https://pypi.org/project/ruamel.yaml/>`_
+* `gstools <https://pypi.org/project/gstools/>`_
+* `memoization <https://pypi.org/project/memoization/>`_
+* `scikit-learn <https://pypi.org/project/scikit-learn/>`_
 
+License
+-------
 
+* Free software: **GNU General Public License v3.0**

From 9afdeb3dab4d7a5d0380ec33743f156b51d6d42b Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik@tul.cz>
Date: Thu, 16 Oct 2025 12:55:28 +0200
Subject: [PATCH 19/31] Update README.rst

---
 README.rst | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/README.rst b/README.rst
index 7526b252..af58a6e7 100644
--- a/README.rst
+++ b/README.rst
@@ -61,31 +61,6 @@ Topics covered include:
 * Moment and covariance estimation
 * PDF and density reconstruction
 
-Example
--------
-
-A minimal example illustrating the MLMC workflow:
-
-.. code-block:: python
-
-    import mlmc
-    from mlmc import sample_storage, quantity
-
-    # Define your quantity and storage
-    storage = sample_storage.Memory()
-    q = quantity.SomeQuantityDefinition(...)
-
-    # Create an MLMC estimator
-    estimator = mlmc.Estimate(quantity=q, sample_storage=storage)
-
-    # Estimate statistical moments
-    means, variances = estimator.estimate_moments()
-
-    # Construct an approximate probability density
-    distribution, info, result, moments = estimator.construct_density()
-
-    # Visualize variance breakdown
-    estimator.plot_variances()
 
 Development
 -----------

From 87053fdef8a5c1afb923dc37aecddf029b99ae90 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik@tul.cz>
Date: Thu, 16 Oct 2025 13:23:52 +0200
Subject: [PATCH 20/31] Update README.rst

---
 README.rst | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/README.rst b/README.rst
index af58a6e7..2cc38ac7 100644
--- a/README.rst
+++ b/README.rst
@@ -10,31 +10,26 @@ MLMC
 .. image:: https://img.shields.io/badge/License-GPLv3-blue.svg
     :target: https://www.gnu.org/licenses/gpl-3.0.html
 
-Overview
---------
 
 **MLMC** is a Python library implementing the **Multilevel Monte Carlo (MLMC)** method.
-It provides tools for sampling, moment estimation, density reconstruction, and
-statistical post-processing, all built around the flexible ``Quantity`` abstraction.
+It provides tools for sampling, moment estimation, statistical post-processing, and more.
 
-Originally developed as part of the `GeoMop <http://geomop.github.io/>`_ project,
-MLMC aims to simplify stochastic analysis, uncertainty quantification,
-and convergence studies in scientific computing workflows.
+Originally developed as part of the `GeoMop <http://geomop.github.io/>`_ project.
 
 Features
 --------
 
 * Sample scheduling
-* Estimation of generalized moment functions
+* Estimation of generalized moments
 * Advanced post-processing with the ``Quantity`` structure
-* Approximation of probability density functions (PDFs)
+* Approximation of probability density functions using the maximum entropy method
 * Bootstrap and regression-based variance estimation
-* Diagnostic tools (consistency check, ...)
+* Diagnostic tools (e.g., consistency checks)
 
 Installation
 ------------
 
-The package is available on PyPI and can be installed via pip:
+The package is available on PyPI and can be installed with pip:
 
 .. code-block:: bash
 
@@ -51,7 +46,7 @@ To install the latest development version:
 Documentation
 -------------
 
-Full documentation including tutorials and examples is available at:
+Full documentation, including tutorials, is available at:
 `https://mlmc.readthedocs.io/ <https://mlmc.readthedocs.io/>`_
 
 Topics covered include:
@@ -59,7 +54,7 @@ Topics covered include:
 * Basic MLMC workflow and examples
 * Definition and composition of ``Quantity`` objects
 * Moment and covariance estimation
-* PDF and density reconstruction
+* Probability density function reconstruction
 
 
 Development

From bb7ca5bbee7583c235845ba72cc2bb3c5d0fc749 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 15:05:53 +0200
Subject: [PATCH 21/31] improved docs

---
 docs/requirements.txt                       |   7 +-
 docs/source/conf.py                         | 103 +++++-------
 docs/source/examples.rst                    |  15 +-
 docs/source/examples_postprocessing.rst     | 120 ++++++++++----
 docs/source/examples_quantity.rst           | 171 ++++++++++----------
 docs/source/examples_sampler_creation.rst   |  86 +++++++---
 docs/source/examples_samples_scheduling.rst | 137 ++++++++++------
 docs/source/index.rst                       |  41 ++---
 docs/source/mlmc.plot.rst                   |  28 ++--
 docs/source/mlmc.tool.rst                   |   8 -
 10 files changed, 424 insertions(+), 292 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 83f22df7..6f4c1547 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,9 +1,8 @@
 numpy
 scipy
-sklearn
+scikit-learn
 h5py>=3.1.0
-ruamel.yaml
+ruamel.yaml==0.17.26
 attrs
 gstools
-memoization
-matplotlib
+memoization
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 55a3cec7..c79215b3 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,78 +1,68 @@
 # Configuration file for the Sphinx documentation builder.
 #
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
+# For a full list of configuration options see:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
 import os
 import sys
 import datetime
+
+# -- Path setup --------------------------------------------------------------
+
+# Add project root to sys.path
 sys.path.insert(0, os.path.abspath("../../"))
 
 # -- Project information -----------------------------------------------------
-# General information about the project.
+
 curr_year = datetime.datetime.now().year
 project = "mlmc"
-copyright = "{}, Jan Březina, Martin Špetlík".format(curr_year)
-author = "Jan Březina, Martin Špetlík"
+copyright = f"{curr_year}, Martin Špetlík, Jan Březina"
+author = "Martin Špetlík, Jan Březina"
 
 # The full version, including alpha/beta/rc tags
-release = '1.0.1'
-
+release = "1.0.2"
 
 # -- General configuration ---------------------------------------------------
 
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.doctest',
-    'sphinx.ext.autosectionlabel'
-    ]
-
-# autosummaries from source-files
-autosummary_generate = True
-# dont show __init__ docstring
-autoclass_content = 'class'
-# sort class members
-autodoc_member_order = "groupwise"
-# autodoc_member_order = 'bysource'
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.doctest",
+    "sphinx.ext.autosectionlabel",
+    "sphinx_autodoc_typehints",
+    "myst_parser",
+    "nbsphinx",
+    "sphinx_copybutton",
+]
+
+# Autodoc settings
+autosummary_generate = True           # Generate autosummary files
+autoclass_content = "class"           # Don't repeat __init__ docstring
+autodoc_member_order = "groupwise"    # Grouped members in docs
+autodoc_typehints = "description"     # Show type hints in docstring
+
+# Napoleon settings for Google-style docstrings
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+napoleon_use_param = True
+napoleon_use_ivar = True
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+templates_path = ["_templates"]
 
+# Exclude build files and system junk
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # -- Options for HTML output -------------------------------------------------
 
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "sphinx_rtd_theme" #'alabaster'
+html_theme = "sphinx_rtd_theme"
 
 html_theme_options = {
-    #    'canonical_url': '',
-    #    'analytics_id': '',
     "logo_only": False,
     "display_version": True,
     "prev_next_buttons_location": "top",
-    #    'style_external_links': False,
-    #    'vcs_pageview_mode': '',
-    # Toc options
     "collapse_navigation": False,
     "sticky_navigation": True,
     "navigation_depth": 4,
@@ -80,18 +70,11 @@
     "titles_only": False,
 }
 
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-#html_static_path = ['_static']
-
-# autodoc_default_options = {
-#     'members': True,
-#     # The ones below should be optional but work nicely together with
-#     # example_package/autodoctest/doc/source/_templates/autosummary/class.rst
-#     # and other defaults in sphinx-autodoc.
-#     'show-inheritance': True,
-#     'inherited-members': True,
-#     'no-special-members': True,
-# }
+# Optional: uncomment if you have static files like custom CSS
+# html_static_path = ["_static"]
+
+# This tells Sphinx which file is the master doc (entry point)
 master_doc = "contents"
+
+# -- Optional convenience: print path info on build --------------------------
+print(f"[conf.py] Using sys.path[0]: {sys.path[0]}")
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index 23b4acd6..7c0271ad 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -4,7 +4,10 @@ Tutorials
 
 .. automodule:: examples
 
-The following tutorials illustrates how to use mlmc package.
+This section provides step-by-step tutorials demonstrating how to use the **mlmc** package.
+
+Each tutorial builds upon the previous one — starting from sampler creation, through sample scheduling and quantity handling, to full postprocessing and probability density estimation.
+
 
 .. toctree::
    :includehidden:
@@ -16,4 +19,12 @@ The following tutorials illustrates how to use mlmc package.
    examples_postprocessing
 
 
-You can find more complex examples in :any:`examples.shooting`
+Additional Examples
+-------------------
+
+You can find more advanced and domain-specific examples (e.g., stochastic simulations or PDE-based problems) in:
+
+:mod:`examples.shooting`
+
+These examples demonstrate how to integrate MLMC with real-world simulation workflows.
+
diff --git a/docs/source/examples_postprocessing.rst b/docs/source/examples_postprocessing.rst
index 56479b6f..cb01a45d 100644
--- a/docs/source/examples_postprocessing.rst
+++ b/docs/source/examples_postprocessing.rst
@@ -1,64 +1,105 @@
-Results postprocessing
+.. _examples results postprocessing:
+Results Postprocessing
 ======================
 
-If you already know how to create a sampler, schedule samples and handle quantities,
-postprocessing will be easy for you. Otherwise, see the previous tutorials before.
+Once you know how to **create a sampler**, **schedule samples**, and **work with quantities**,
+postprocessing becomes straightforward.
+If you haven’t gone through those steps yet, please review the earlier tutorials first.
 
+Estimating Moments
+------------------
 
-First, schedule samples and estimate moments for a particular quantity
+We start by scheduling samples and estimating moments for a specific quantity.
 
 .. testcode::
 
     import mlmc
-    n_levels = 3 # number of MLMC levels
-    step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
-    target_var = 1e-4
-    n_moments = 10
+
+    n_levels = 3                     # Number of MLMC levels
+    step_range = [0.5, 0.005]        # Simulation steps for coarsest and finest levels
+    target_var = 1e-4                # Desired target variance
+    n_moments = 10                   # Number of moment functions
+
+    # Compute level parameters (simulation steps per level)
     level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
-    # level_parameters determine each level simulation steps
-    # level_parameters can be manually prescribed as a list of lists
 
+    # Initialize components
     simulation_factory = mlmc.SynthSimulation()
     sampling_pool = mlmc.OneProcessPool()
-    # Memory() storage keeps samples in the computer main memory
-    sample_storage = mlmc.Memory()
+    sample_storage = mlmc.Memory()   # In-memory sample storage
 
-    sampler = mlmc.Sampler(sample_storage=sample_storage,
-                                   sampling_pool=sampling_pool,
-                                   sim_factory=simulation_factory,
-                                   level_parameters=level_parameters)
+    # Create the sampler
+    sampler = mlmc.Sampler(
+        sample_storage=sample_storage,
+        sampling_pool=sampling_pool,
+        sim_factory=simulation_factory,
+        level_parameters=level_parameters
+    )
 
+    # Schedule and run initial samples
     sampler.set_initial_n_samples()
     sampler.schedule_samples()
+
     running = 1
     while running > 0:
         running = 0
         running += sampler.ask_sampling_pool_for_samples()
 
-    # Get particular quantity
+Accessing Quantities
+--------------------
+
+Now we extract a specific **Quantity** from the results and set up the estimation.
+
+.. testcode::
+
+    # Obtain the root quantity representing all simulation outputs
     root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format())
-    length = root_quantity['length']
+
+    # Select a sub-quantity
+    length = root_quantity["length"]
     time = length[1]
-    location = time['10']
+    location = time["10"]
     q_value = location[0]
 
 
+Defining the Domain and Estimator
+---------------------------------
+
+Before estimating higher-order statistics, we determine the valid domain and define the estimator.
+
+.. testcode::
+
     true_domain = mlmc.Estimate.estimate_domain(q_value, sample_storage)
     moments_fn = mlmc.Legendre(n_moments, true_domain)
-    estimate_obj = mlmc.Estimate(q_value, sample_storage=sampler.sample_storage,
-                                           moments_fn=moments_fn)
+
+    estimate_obj = mlmc.Estimate(
+        q_value,
+        sample_storage=sampler.sample_storage,
+        moments_fn=moments_fn
+    )
+
+
+Variance and Sample Estimation
+------------------------------
+
+We can now estimate variances and determine how many samples are required
+to achieve the target variance.
+
+.. testcode::
 
     variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
 
     from mlmc.estimator import estimate_n_samples_for_target_variance
-    n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
-                                                         n_levels=sampler.n_levels)
+    n_estimated = estimate_n_samples_for_target_variance(
+        target_var, variances, n_ops, n_levels=sampler.n_levels
+    )
 
+    # Add and process samples iteratively until convergence
     while not sampler.process_adding_samples(n_estimated):
-        # New estimation according to already finished samples
         variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
-        n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
-                                                             n_levels=sampler.n_levels)
+        n_estimated = estimate_n_samples_for_target_variance(
+            target_var, variances, n_ops, n_levels=sampler.n_levels
+        )
 
     running = 1
     while running > 0:
@@ -66,17 +107,36 @@ First, schedule samples and estimate moments for a particular quantity
         running += sampler.ask_sampling_pool_for_samples()
 
 
-Probability density function approximation
----------------------
+Probability Density Function Approximation
+------------------------------------------
+
+Finally, we can construct and visualize an approximation of the **probability density function (PDF)**.
 
 .. testcode::
 
     from mlmc.plot.plots import Distribution
+
     distr_obj, result, _, _ = estimate_obj.construct_density()
-    distr_plot = Distribution(title="distributions", error_plot=None)
+
+    distr_plot = Distribution(title="Distributions", error_plot=None)
     distr_plot.add_distribution(distr_obj)
 
+    # For single-level simulations, add a histogram of raw samples
     if n_levels == 1:
         samples = estimate_obj.get_level_samples(level_id=0)[..., 0]
-        distr_plot.add_raw_samples(np.squeeze(samples)) # add histogram
+        distr_plot.add_raw_samples(np.squeeze(samples))
+
     distr_plot.show()
+
+
+**Summary**
+-----------
+
+In this tutorial, you learned how to:
+- Estimate statistical moments from MLMC results.
+- Automatically determine the required number of samples to reach a target variance.
+- Construct and visualize an estimated probability density function.
+
+This completes the **postprocessing** workflow of the MLMC pipeline.
+
+
diff --git a/docs/source/examples_quantity.rst b/docs/source/examples_quantity.rst
index 9ab4fa58..09812832 100644
--- a/docs/source/examples_quantity.rst
+++ b/docs/source/examples_quantity.rst
@@ -1,31 +1,39 @@
 .. _examples quantity:
 
-Quantity tutorial
+Quantity Tutorial
 =================
 
-An overview of basic :any:`mlmc.quantity.quantity.Quantity` operations.
-Quantity related classes and functions allow estimate mean and variance of MLMC samples results,
-derive other quantities from original ones and much more.
+This tutorial provides an overview of basic :any:`mlmc.quantity.quantity.Quantity` operations.
+
+The :mod:`mlmc.quantity` module and its related classes allow you to:
+- Estimate means and variances of MLMC sample results.
+- Derive new quantities from existing ones.
+- Perform arithmetic and NumPy-based operations on quantities.
+
+
+Setup
+-----
+
+Before exploring `Quantity` operations, we first set up a simple synthetic MLMC sampler.
 
 .. testcode::
     :hide:
 
     import mlmc
-    n_levels = 3 # number of MLMC levels
-    step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
+    n_levels = 3  # number of MLMC levels
+    step_range = [0.5, 0.005]  # simulation steps at the coarsest and finest levels
     level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
-    # level_parameters determine each level simulation steps
-    # level_parameters can be manually prescribed as a list of lists
 
     simulation_factory = mlmc.SynthSimulation()
     sampling_pool = mlmc.OneProcessPool()
-    # Memory() storage keeps samples in the computer main memory
     sample_storage = mlmc.Memory()
 
-    sampler = mlmc.Sampler(sample_storage=sample_storage,
-                                   sampling_pool=sampling_pool,
-                                   sim_factory=simulation_factory,
-                                   level_parameters=level_parameters)
+    sampler = mlmc.Sampler(
+        sample_storage=sample_storage,
+        sampling_pool=sampling_pool,
+        sim_factory=simulation_factory,
+        level_parameters=level_parameters
+    )
 
     n_samples = [100, 75, 50]
     sampler.set_initial_n_samples(n_samples)
@@ -36,7 +44,6 @@ derive other quantities from original ones and much more.
         running += sampler.ask_sampling_pool_for_samples()
 
 
-
 .. testcode::
 
     import numpy as np
@@ -44,7 +51,10 @@ derive other quantities from original ones and much more.
     from examples.synthetic_quantity import create_sampler
 
 
-First, the synthetic Quantity with the following :code:`result_format` is created
+Creating a Synthetic Quantity
+-----------------------------
+
+We begin by creating a synthetic :class:`mlmc.quantity.quantity.Quantity` with a predefined ``result_format``:
 
 .. testcode::
 
@@ -52,72 +62,60 @@ First, the synthetic Quantity with the following :code:`result_format` is create
     #     mlmc.QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']),
     #     mlmc.QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']),
     # ]
-    # Meaning: sample results contain data on two quantities in three time steps [1, 2, 3] and in two locations,
-    #          each quantity can have different shape
 
     sampler, simulation_factory, moments_fn = create_sampler()
     root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format())
 
-:code:`root_quantity` is :py:class:`mlmc.quantity.quantity.Quantity` instance and represents the whole result data.
-According to :code:`result_format` it contains two sub-quantities named "length" and "width".
+This means the sample results contain data for two quantities (`length` and `width`) at three time steps `[1, 2, 3]` and two locations each.
+
+:code:`root_quantity` is an instance of :class:`mlmc.quantity.quantity.Quantity`, representing the full result data structure.
+
 
+Mean Estimates
+--------------
 
-Mean estimates
----------------
-To get estimated mean of a quantity:
+To compute the estimated mean of a quantity:
 
 .. testcode::
 
     root_quantity_mean = mlmc.quantity.quantity_estimate.estimate_mean(root_quantity)
 
-:code:`root_quantity_mean` is an instance of :py:class:`mlmc.quantity.quantity.QuantityMean`
+The returned object, :code:`root_quantity_mean`, is a :class:`mlmc.quantity.quantity.QuantityMean` instance.
 
-To get the total mean value:
+To retrieve statistical values:
 
 .. testcode::
 
+    # Total mean and variance
     root_quantity_mean.mean
-
-To get the total variance value:
-
-.. testcode::
-
     root_quantity_mean.var
 
-To get means at each level:
-
-.. testcode::
-
+    # Means and variances at each level
     root_quantity_mean.l_means
-
-To get variances at each level:
-
-.. testcode::
-
     root_quantity_mean.l_vars
 
 
-Estimate moments and covariance matrix
---------------------------------------
+Moments and Covariance Estimation
+---------------------------------
 
-Create a quantity representing moments and get their estimates
+To create and estimate statistical moments:
 
 .. testcode::
 
     moments_quantity = mlmc.quantity.quantity_estimate.moments(root_quantity, moments_fn=moments_fn)
     moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(moments_quantity)
 
-To obtain central moments, use:
+To obtain **central moments**, first subtract the mean:
 
 .. testcode::
 
     central_root_quantity = root_quantity - root_quantity_mean.mean
-    central_moments_quantity = mlmc.quantity.quantity_estimate.moments(central_root_quantity,
-                                                                            moments_fn=moments_fn)
+    central_moments_quantity = mlmc.quantity.quantity_estimate.moments(
+        central_root_quantity, moments_fn=moments_fn
+    )
     central_moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(central_moments_quantity)
 
-
-Create a quantity representing a covariance matrix
+To estimate a **covariance matrix**:
 
 .. testcode::
 
@@ -125,36 +123,31 @@ Create a quantity representing a covariance matrix
     cov_mean = mlmc.quantity.quantity_estimate.estimate_mean(covariance_quantity)
 
 
-
-Quantity selection
+Quantity Selection
 ------------------
 
-According to the result_format, it is possible to select items from a quantity
+You can access and manipulate sub-quantities directly using the structure defined by `result_format`:
 
 .. testcode::
 
     length = root_quantity["length"]  # Get quantity with name="length"
-    width = root_quantity["width"]  # Get quantity with name="width"
+    width = root_quantity["width"]    # Get quantity with name="width"
 
-:code:`length` and :code:`width` are still :py:class:`mlmc.quantity.quantity.Quantity` instances
+Both are still :class:`mlmc.quantity.quantity.Quantity` instances.
 
-To get a quantity at particular time:
+Selecting by **time**:
 
 .. testcode::
 
     length_locations = length.time_interpolation(2.5)
 
-:code:`length_locations` represents results for all locations of quantity named "length" at the time 2.5
-
-To get quantity at particular location:
+Selecting by **location**:
 
 .. testcode::
 
     length_result = length_locations['10']
 
-:code:`length_result` represents results shape=(2, 1) of quantity named "length" at the time 2,5 and location '10'
-
-Now it is possible to slice Quantity :code:`length_result` the same way as :code:`np.ndarray`. For example:
+Now, :code:`length_result` behaves like a NumPy array:
 
 .. testcode::
 
@@ -164,39 +157,41 @@ Now it is possible to slice Quantity :code:`length_result` the same way as :code
     length_result[:1, :1]
     length_result[:2, ...]
 
-Keep in mind:
-    - all derived quantities such as :code:`length_locations` and :code:`length_result`, ... are still :py:class:`mlmc.quantity.quantity.Quantity` instances
-    - selecting location before time is not supported!
+.. note::
 
+   - All derived quantities (like :code:`length_locations` or :code:`length_result`) remain `Quantity` instances.
+   - Selecting a **location before time** is not supported.
 
-Binary operations
+
+Binary Operations
 -----------------
-Following operations are supported
 
- - Addition, subtraction, ... of compatible quantities
+`Quantity` supports standard arithmetic operations:
+
+**Between quantities:**
 
-    .. testcode::
+.. testcode::
 
-        quantity = root_quantity + root_quantity
-        quantity = root_quantity + root_quantity + root_quantity
+    quantity = root_quantity + root_quantity
+    quantity = root_quantity + root_quantity + root_quantity
 
- -  Operations with Quantity and a constant
+**With constants:**
 
-     .. testcode::
+.. testcode::
 
-        const = 5
-        quantity_const_add = root_quantity + const
-        quantity_const_sub = root_quantity - const
-        quantity_const_mult = root_quantity * const
-        quantity_const_div = root_quantity / const
-        quantity_const_mod = root_quantity % const
-        quantity_add_mult = root_quantity + root_quantity * const
+    const = 5
+    quantity_const_add = root_quantity + const
+    quantity_const_sub = root_quantity - const
+    quantity_const_mult = root_quantity * const
+    quantity_const_div = root_quantity / const
+    quantity_const_mod = root_quantity % const
+    quantity_add_mult = root_quantity + root_quantity * const
 
 
-NumPy universal functions
+NumPy Universal Functions
 --------------------------
 
-Examples of tested NumPy universal functions:
+`Quantity` objects are compatible with many NumPy universal functions (`ufuncs`):
 
 .. testcode::
 
@@ -209,41 +204,41 @@ Examples of tested NumPy universal functions:
     x = np.ones(24)
     quantity_np_divide_const = np.divide(x, root_quantity)
     quantity_np_add_const = np.add(x, root_quantity)
-    quantity_np_arctan2_cosnt = np.arctan2(x, root_quantity)
+    quantity_np_arctan2_const = np.arctan2(x, root_quantity)
 
 
-Quantity selection by conditions
----------------------------------
+Conditional Selection
+----------------------
 
-Method :code:`select` returns :py:class:`mlmc.quantity.quantity.Quantity` instance
+You can select parts of a quantity using logical conditions via the :code:`select()` method.
 
 .. testcode::
 
     selected_quantity = root_quantity.select(0 < root_quantity)
 
+Or using comparisons between quantities:
+
 .. testcode::
 
     quantity_add = root_quantity + root_quantity
     quantity_add_select = quantity_add.select(root_quantity < quantity_add)
     root_quantity_selected = root_quantity.select(-1 != root_quantity)
 
-Logical operation among more provided conditions is AND
+Multiple conditions are combined using logical **AND**:
 
 .. testcode::
 
     quantity_add.select(root_quantity < quantity_add, root_quantity < 10)
 
-User can use one of the logical NumPy universal functions
+Use NumPy logical functions for more complex conditions:
 
 .. testcode::
 
     selected_quantity_or = root_quantity.select(np.logical_or(0 < root_quantity, root_quantity < 10))
 
-It is possible to explicitly define the selection condition of one quantity by another quantity
+You can also explicitly define the selection mask:
 
 .. testcode::
 
-    mask = np.logical_and(0 < root_quantity, root_quantity < 10)  # mask is Quantity instance
+    mask = np.logical_and(0 < root_quantity, root_quantity < 10)
     q_bounded = root_quantity.select(mask)
-
-
diff --git a/docs/source/examples_sampler_creation.rst b/docs/source/examples_sampler_creation.rst
index b363d774..29dab5b0 100644
--- a/docs/source/examples_sampler_creation.rst
+++ b/docs/source/examples_sampler_creation.rst
@@ -1,56 +1,102 @@
-Sampler creation
+Sampler Creation
 =================
-Sampler controls the execution of MLMC samples.
 
+The **Sampler** controls the execution and management of MLMC (Multilevel Monte Carlo) samples.
+This example demonstrates how to configure all essential components for an MLMC simulation.
 
-First, import mlmc package and define basic MLMC parameters.
+
+
+Basic Setup
+------------
+
+First, import the :mod:`mlmc` package and define basic MLMC parameters.
 
 .. testcode::
 
     import mlmc
-    n_levels = 3 # number of MLMC levels
-    step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
+
+    # Define number of MLMC levels
+    n_levels = 3
+
+    # Simulation step sizes at the coarsest and finest levels
+    step_range = [0.5, 0.005]
+
+    # Compute level parameters (simulation steps per level)
     level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
-    # level_parameters determine each level simulation steps
-    # level_parameters can be manually prescribed as a list of lists
 
+    # Alternatively, you can specify level_parameters manually as a list of lists.
 
-Prepare a simulation, it must be instance of class that inherits from :any:`mlmc.sim.simulation.Simulation`.
+
+
+Simulation Definition
+----------------------
+
+Prepare a simulation instance.
+The simulation class must inherit from :class:`mlmc.sim.simulation.Simulation`.
 
 .. testcode::
 
     simulation_factory = mlmc.SynthSimulation()
 
-Create a sampling pool.
+This factory will be used by the sampler to create individual simulation runs.
+
+
+
+Sampling Pool
+--------------
+
+Next, create a sampling pool that controls how samples are executed.
 
 .. testcode::
 
     sampling_pool = mlmc.OneProcessPool()
 
+The :class:`mlmc.sampling_pool.OneProcessPool` executes samples sequentially within a single process.
+
+You can also use:
+
+- :class:`mlmc.sampling_pool.ProcessPool` — executes samples in parallel across multiple processes.
+- :class:`mlmc.sampling_pool_pbs.SamplingPoolPBS` — submits jobs to a PBS (Portable Batch System) cluster for distributed computation.
+
 
-You can also use :any:`mlmc.sampling_pool.ProcessPool` which supports parallel execution of MLMC samples.
-In order to use PBS (portable batch system), employ :any:`mlmc.sampling_pool_pbs.SamplingPoolPBS`.
 
+Sample Storage
+---------------
 
-Create a sample storage. It contains sample's related data e.g. simulation result.
+The **sample storage** keeps all data related to simulation results.
 
 .. testcode::
 
-    # Memory() storage keeps samples in the computer main memory
+    # Memory storage keeps samples in main memory
     sample_storage = mlmc.Memory()
 
-We support also HDF5 file storage :any:`mlmc.sample_storage_hdf.SampleStorageHDF`.
+Alternatively, use persistent file-based storage:
 
+- :class:`mlmc.sample_storage_hdf.SampleStorageHDF` — stores results in an HDF5 file for long-term reuse and analysis.
 
-Finally, create a sampler that manages scheduling MLMC samples and also saves the results.
+
+
+Sampler Initialization
+-----------------------
+
+Finally, create the **Sampler** instance.
+It coordinates sample scheduling, simulation execution, and result collection.
 
 .. testcode::
 
-    sampler = mlmc.Sampler(sample_storage=sample_storage,
-                                   sampling_pool=sampling_pool,
-                                   sim_factory=simulation_factory,
-                                   level_parameters=level_parameters)
+    sampler = mlmc.Sampler(
+        sample_storage=sample_storage,
+        sampling_pool=sampling_pool,
+        sim_factory=simulation_factory,
+        level_parameters=level_parameters
+    )
+
+The sampler is now ready to generate and manage MLMC samples.
+
 
 
+Next Steps
+-----------
 
-:ref:`examples samples scheduling`
\ No newline at end of file
+Proceed to the next example:
+:ref:`examples samples scheduling`
diff --git a/docs/source/examples_samples_scheduling.rst b/docs/source/examples_samples_scheduling.rst
index 135a421c..32ad7fd0 100644
--- a/docs/source/examples_samples_scheduling.rst
+++ b/docs/source/examples_samples_scheduling.rst
@@ -1,128 +1,156 @@
 .. _examples samples scheduling:
 
-Samples scheduling
+Samples Scheduling
 ==================
 
-Once you create a sampler you can schedule samples.
+Once you have created a **Sampler**, you can schedule the execution of MLMC samples in different ways.
 
+This tutorial demonstrates two approaches:
+1. Scheduling with a prescribed number of samples.
+2. Scheduling to reach a target variance automatically.
 
-1. Prescribe the exact number of samples
-----------------------------------------------------------------
+
+1. Prescribing an Exact Number of Samples
+-----------------------------------------
+
+In this approach, you explicitly set how many samples should be created at each MLMC level.
 
 .. testcode::
     :hide:
 
     import mlmc
-    n_levels = 3 # number of MLMC levels
-    step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
+    n_levels = 3  # number of MLMC levels
+    step_range = [0.5, 0.005]  # simulation steps at the coarsest and finest levels
     level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
-    # level_parameters determine each level simulation steps
-    # level_parameters can be manually prescribed as a list of lists
 
     simulation_factory = mlmc.SynthSimulation()
     sampling_pool = mlmc.OneProcessPool()
-    # Memory() storage keeps samples in the computer main memory
     sample_storage = mlmc.Memory()
 
-    sampler = mlmc.Sampler(sample_storage=sample_storage,
-                                   sampling_pool=sampling_pool,
-                                   sim_factory=simulation_factory,
-                                   level_parameters=level_parameters)
+    sampler = mlmc.Sampler(
+        sample_storage=sample_storage,
+        sampling_pool=sampling_pool,
+        sim_factory=simulation_factory,
+        level_parameters=level_parameters
+    )
+
 
+Set the number of samples for each level:
 
 .. testcode::
 
     n_samples = [100, 75, 50]
     sampler.set_initial_n_samples(n_samples)
 
-Schedule set samples.
+Schedule the prescribed samples:
 
 .. testcode::
 
     sampler.schedule_samples()
 
-You can wait until all samples are finished.
+Wait until all samples have finished:
 
 .. testcode::
 
-        running = 1
-        while running > 0:
-            running = 0
-            running += sampler.ask_sampling_pool_for_samples()
+    running = 1
+    while running > 0:
+        running = 0
+        running += sampler.ask_sampling_pool_for_samples()
+
 
+2. Prescribing a Target Variance
+--------------------------------
 
-2. Prescribe a target variance
--------------------------------------------------------------
+This approach automatically determines the number of samples needed to achieve a desired **target variance**.
 
-Set target variance and number of random variable moments that must meet this variance.
+Define the target variance and the number of **moment functions** used for estimation:
 
 .. testcode::
 
-     target_var = 1e-4
-     n_moments = 10
+    target_var = 1e-4
+    n_moments = 10
 
-The first phase is the same as the first approach, but the initial samples are automatically determined
-as a sequence from 100 samples at the coarsest level to 10 samples at the finest level.
+As before, initialize and run a first batch of samples (automatically ranging from 100 samples on the coarsest level
+to 10 on the finest level):
 
 .. testcode::
 
     sampler.set_initial_n_samples()
     sampler.schedule_samples()
+
     running = 1
     while running > 0:
         running = 0
         running += sampler.ask_sampling_pool_for_samples()
 
 
-The :py:class:`mlmc.quantity.quantity.Quantity` instance is created, for details see :ref:`examples quantity`
+Creating the Quantity and Moment Functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Next, create a :py:class:`mlmc.quantity.quantity.Quantity` object representing the MLMC results.
 
 .. testcode::
 
-    root_quantity = mlmc.make_root_quantity(storage=sampler.sample_storage,
-                                   q_specs=sampler.sample_storage.load_result_format())
+    root_quantity = mlmc.make_root_quantity(
+        storage=sampler.sample_storage,
+        q_specs=sampler.sample_storage.load_result_format()
+    )
 
-:code:`root_quantity` contains the structure of sample results and also allows access to their values.
+The :code:`root_quantity` contains both the data structure and the sampled values.
 
-In order to estimate moment values including variance, moment functions class (in this case Legendre polynomials) instance
-and :py:class:`mlmc.estimator.Estimate` instance are created.
+Now we create the **moment functions** and an :py:class:`mlmc.estimator.Estimate` instance to perform statistical estimation.
 
 .. testcode::
 
     true_domain = mlmc.Estimate.estimate_domain(root_quantity, sample_storage)
     moments_fn = mlmc.Legendre(n_moments, true_domain)
 
-    estimate_obj = mlmc.Estimate(root_quantity, sample_storage=sampler.sample_storage,
-                                           moments_fn=moments_fn)
+    estimate_obj = mlmc.Estimate(
+        root_quantity,
+        sample_storage=sampler.sample_storage,
+        moments_fn=moments_fn
+    )
+
 
+Estimating Variances and Computational Cost
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-At first, the variance of moments and average execution time per sample at each level are estimated from already finished samples.
+From the finished samples, estimate the variance of moments and the average computational cost per sample for each level:
 
 .. testcode::
 
     variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
 
-Then, an initial estimate of the number of MLMC samples that should meet prescribed target variance is conducted.
+
+Estimating the Required Number of Samples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Using the estimated variances and costs, determine the number of samples required to meet the **target variance**:
 
 .. testcode::
 
     from mlmc.estimator import estimate_n_samples_for_target_variance
-    n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
-                                                         n_levels=sampler.n_levels)
+
+    n_estimated = estimate_n_samples_for_target_variance(
+        target_var, variances, n_ops, n_levels=sampler.n_levels
+    )
 
 
-Now it is time for our sampling algorithm that gradually schedules samples and refines the total number of samples
-until the number of estimated samples is greater than the number of scheduled samples.
+Iterative Sampling Process
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The sampling algorithm incrementally schedules additional samples until the estimated number of required samples is reached.
 
 .. testcode::
 
     while not sampler.process_adding_samples(n_estimated):
-        # New estimation according to already finished samples
+        # Recalculate estimates based on completed samples
         variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
-        n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
-                                                             n_levels=sampler.n_levels)
-
+        n_estimated = estimate_n_samples_for_target_variance(
+            target_var, variances, n_ops, n_levels=sampler.n_levels
+        )
 
-Finally, wait until all samples are finished.
+Finally, wait until all samples are completed:
 
 .. testcode::
 
@@ -131,6 +159,21 @@ Finally, wait until all samples are finished.
         running = 0
         running += sampler.ask_sampling_pool_for_samples()
 
-Since our sampling algorithm determines the number of samples according to moment variances,
-the type of moment functions (Legendre by default) might affect total number of MLMC samples.
 
+Notes
+-----
+
+- The sampling algorithm automatically adjusts the number of samples per level based on estimated **moment variances**.
+- The choice of moment functions (e.g. :class:`mlmc.Legendre`) influences the total number of samples needed.
+- For most cases, the default **Legendre polynomials** provide a good balance between accuracy and computational cost.
+
+
+**Summary**
+-----------
+
+In this tutorial, you learned how to:
+- Schedule MLMC samples with a fixed number of samples per level.
+- Automatically adapt the number of samples to achieve a target variance.
+- Use moment functions and variance regression for optimal sample allocation.
+
+Continue to the next section for :ref:`examples results postprocessing`.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 27a83141..a329294f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,29 +1,30 @@
-=====
-MLMC
-=====
+MLMC: Multilevel Monte Carlo method
+====================================
+
+MLMC is a Python library for efficient uncertainty quantification using the multilevel Monte Carlo method.
+
+It provides tools for:
+- Parallel sample scheduling
+- Generalized moment estimation
+- Probability density approximation
+- Advanced post-processing via the Quantity abstraction
 
 .. image:: https://github.com/GeoMop/MLMC/workflows/package/badge.svg
-    :target: https://github.com/GeoMop/MLMC/actions
+   :target: https://github.com/GeoMop/MLMC/actions
 .. image:: https://img.shields.io/pypi/v/mlmc.svg
-    :target: https://pypi.org/project/mlmc/
+   :target: https://pypi.org/project/mlmc/
 .. image:: https://img.shields.io/pypi/pyversions/mlmc.svg
-    :target: https://pypi.org/project/mlmc/
-
-MLMC provides tools for the multilevel Monte Carlo method, which is theoretically described by `M. Giles <https://people.maths.ox.ac.uk/gilesm/files/acta15.pdf>`_.
-
-mlmc package includes:
-
-- samples scheduling
-- estimation of generalized moment functions
-- probability density function approximation
-- advanced post-processing with our Quantity structure
-
+   :target: https://pypi.org/project/mlmc/
 
 Installation
-============
-mlmc can be installed via `pip <https://pypi.org/project/mlmc/>`_
+------------
+Install via pip:
+
+.. code-block:: bash
 
-.. code-block:: none
+   pip install mlmc
 
-    pip install mlmc
+Documentation
+-------------
+Tutorials and API reference are available below.
 
diff --git a/docs/source/mlmc.plot.rst b/docs/source/mlmc.plot.rst
index 47f0bd09..f5faf6cb 100644
--- a/docs/source/mlmc.plot.rst
+++ b/docs/source/mlmc.plot.rst
@@ -1,4 +1,4 @@
-mlmc.plot
+mlmc.plot package
 =================
 
 .. automodule:: mlmc.plot
@@ -6,29 +6,31 @@ mlmc.plot
    :undoc-members:
    :show-inheritance:
 
+---
+
 Submodules
 ----------
 
-mlmc.plot.plots module
-----------------------
+.. toctree::
+   :maxdepth: 1
 
-.. automodule:: mlmc.plot.plots
-   :members:
-   :undoc-members:
-   :show-inheritance:
+   mlmc.plot.plots
+   mlmc.plot.violinplot
 
-mlmc.plot.violinplot module
----------------------------
+---
 
-.. automodule:: mlmc.plot.violinplot
+mlmc.plot.plots
+---------------
+
+.. automodule:: mlmc.plot.plots
    :members:
    :undoc-members:
    :show-inheritance:
 
-Module contents
----------------
+mlmc.plot.violinplot
+--------------------
 
-.. automodule:: mlmc.plot
+.. automodule:: mlmc.plot.violinplot
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/mlmc.tool.rst b/docs/source/mlmc.tool.rst
index 6e0bdc51..8234e205 100644
--- a/docs/source/mlmc.tool.rst
+++ b/docs/source/mlmc.tool.rst
@@ -6,14 +6,6 @@ mlmc.tool
 Submodules
 ----------
 
-mlmc.tool.context\_statprof module
-----------------------------------
-
-.. automodule:: mlmc.tool.context_statprof
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
 mlmc.tool.distribution module
 -----------------------------
 

From aab4806e6e1c6de6366e7e24aea15d8b75f5edf1 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 15:31:59 +0200
Subject: [PATCH 22/31] test sample fix

---
 test/test_sampler.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/test_sampler.py b/test/test_sampler.py
index e03dab97..08422192 100644
--- a/test/test_sampler.py
+++ b/test/test_sampler.py
@@ -74,6 +74,3 @@ def test_sampler_hdf():
     n_estimated = np.array([100, 50, 20])
     sampler.process_adding_samples(n_estimated, 0, 0.1)
     assert np.allclose(sampler._n_target_samples, init_samples + (n_estimated * 0.1), atol=1)
-
-
-test_sampler_hdf()
\ No newline at end of file

From f725322b35377c68ab847e9029821ac400a2d1d1 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 16:01:19 +0200
Subject: [PATCH 23/31] readthedocs

---
 .readthedocs.yaml     | 17 +++++++++--------
 docs/requirements.txt |  7 +++++++
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 0299153c..7cdd6911 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,16 +1,17 @@
-# Required
+# .readthedocs.yml
 version: 2
 
-# Build documentation in the docs/ directory with Sphinx
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+
 sphinx:
-   configuration: docs/source/conf.py
+  configuration: docs/source/conf.py
 
-# Optionally build your docs in additional formats such as PDF
 formats:
-   - pdf
+  - pdf
 
-# Optionally set the version of Python and requirements required to build your docs
 python:
-   version: 3.8
-   install:
+  install:
     - requirements: docs/requirements.txt
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 6f4c1547..952dff32 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,10 @@
+# --- Documentation dependencies ---
+sphinx>=7.0
+sphinx-rtd-theme
+sphinx-autodoc-typehints
+sphinxcontrib-napoleon
+
+# --- MLMC runtime dependencies ---
 numpy
 scipy
 scikit-learn

From 7bfda381848a8a916c6301ee9b30eb679b1084db Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 16:11:41 +0200
Subject: [PATCH 24/31] readthedocs improt fix

---
 docs/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 952dff32..c053be4d 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,6 +3,7 @@ sphinx>=7.0
 sphinx-rtd-theme
 sphinx-autodoc-typehints
 sphinxcontrib-napoleon
+myst-parser
 
 # --- MLMC runtime dependencies ---
 numpy

From 13d6d1124e2fb6a7ea6665452b0de02650a929f6 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 16:19:09 +0200
Subject: [PATCH 25/31] fix sphinx imports

---
 docs/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index c053be4d..5852805c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -4,6 +4,8 @@ sphinx-rtd-theme
 sphinx-autodoc-typehints
 sphinxcontrib-napoleon
 myst-parser
+nbsphinx
+sphinx_copybutton
 
 # --- MLMC runtime dependencies ---
 numpy

From 0edaf1484851f18cd6b89ce6ac4e2d56f1e41717 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 16 Oct 2025 16:43:52 +0200
Subject: [PATCH 26/31] docs version

---
 docs/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index c79215b3..43609bc6 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -20,7 +20,7 @@
 author = "Martin Špetlík, Jan Březina"
 
 # The full version, including alpha/beta/rc tags
-release = "1.0.2"
+release = "1.0.3"
 
 # -- General configuration ---------------------------------------------------
 

From 538bca208f4e57798dbd3e0cafacbd9521e6d78b Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Sat, 18 Oct 2025 11:02:44 +0200
Subject: [PATCH 27/31] correlated field

---
 mlmc/random/correlated_field.py | 84 +++++++++++++++++++++++----------
 1 file changed, 58 insertions(+), 26 deletions(-)

diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py
index 7867c7b4..cfb19c51 100644
--- a/mlmc/random/correlated_field.py
+++ b/mlmc/random/correlated_field.py
@@ -401,68 +401,100 @@ def _sample(self):
 
 class GSToolsSpatialCorrelatedField(RandomFieldBase):
     """
-    Spatially correlated random field using the GSTools library.
+    Spatially correlated random field generator using GSTools.
 
-    Uses Fourier modes to generate spatial random fields efficiently.
+    This class acts as an adapter between :mod:`gstools` and the MLMC
+    random field interface (:class:`mlmc.random.random_field_base.RandomFieldBase`).
+    It supports 1D, 2D, and 3D random fields with optional logarithmic transformation,
+    and can generate fields on both structured and unstructured grids.
     """
 
-    def __init__(self, model, mode_no=1000, log=False, sigma=1, seed=None):
+    def __init__(self, model, mode_no=1000, log=False, sigma=1, seed=None, mode=None, structured=False):
         """
-        Initialize GSTools-based spatial random field.
+        Initialize a spatially correlated random field generator.
 
-        :param model: gstools covariance model (subclass of gstools.CovModel)
-        :param mode_no: Number of Fourier modes (default 1000)
-        :param log: If True, output field is exponentiated
-        :param sigma: Standard deviation
-        :param seed: Optional random seed for reproducibility
+        :param model: Covariance model instance (subclass of ``gstools.covmodel.CovModel``)
+            defining the spatial correlation structure.
+        :param mode_no: Number of Fourier modes used in the random field generation.
+            Default is 1000.
+        :param log: If True, applies an exponential transformation to obtain
+            a lognormal field. Default is False.
+        :param sigma: Standard deviation scaling factor applied to the generated field.
+            Default is 1.
+        :param seed: Random seed for reproducibility. Default is None.
+        :param mode: Sampling mode for GSTools SRF. Use "fft" for structured grids or
+            None for unstructured. Default is None.
+        :param structured: If True, assumes a structured grid for field evaluation.
+            Default is False.
         """
         self.model = model
         self.mode_no = mode_no
-        self.srf = gstools.SRF(model, mode_no=mode_no, seed=seed)
+        if mode == "fft":
+            self.srf = gstools.SRF(model, mode="fft", seed=seed)
+        else:
+            self.srf = gstools.SRF(model, mode_no=mode_no, seed=seed)
         self.mu = self.srf.mean
         self.sigma = sigma
         self.dim = model.dim
         self.log = log
+        self.structured = structured
 
     def change_srf(self, seed):
         """
-        Generate a new spatial random field with a different seed.
+        Reinitialize the GSTools random field with a new random seed.
 
-        :param seed: Random seed
+        :param seed: Random seed used to reinitialize the underlying
+            :class:`gstools.SRF` instance.
+        :return: None
         """
         self.srf = gstools.SRF(self.model, seed=seed, mode_no=self.mode_no)
 
-    def random_field(self):
+    def random_field(self, seed=None):
         """
-        Evaluate the spatial random field at the current points.
+        Generate a raw random field realization (without scaling or transformation).
 
-        :return: Field values (np.ndarray)
+        :param seed: Optional random seed for reproducibility. Default is None.
+        :return: numpy.ndarray
+            Field values evaluated at the points defined by :meth:`set_points`.
         """
         if self.dim == 1:
             x = self.points
-            x = x.reshape(len(x),)
-            return self.srf((x,))
+            x.reshape(len(x))
+            field = self.srf((x,))
         elif self.dim == 2:
             x, y = self.points.T
             x = x.reshape(len(x), 1)
             y = y.reshape(len(y), 1)
-            return self.srf((x, y))
-        else:  # dim == 3
+            field = self.srf((x, y))
+        else:
             x, y, z = self.points.T
             x = x.reshape(len(x), 1)
             y = y.reshape(len(y), 1)
             z = z.reshape(len(z), 1)
-            return self.srf((x, y, z))
 
-    def sample(self):
+            if self.structured:
+                field = self.srf([np.squeeze(x), np.squeeze(y), np.squeeze(z)], seed=seed)
+                field = field.flatten()
+            else:
+                if seed is not None:
+                    field = self.srf(self.points.T, seed=seed)
+                else:
+                    field = self.srf(self.points.T)
+        return field
+
+    def sample(self, seed=None):
         """
-        Generate a realization of the GSTools spatial random field.
+        Evaluate the scaled random field at the defined points.
 
-        :return: Field values (np.ndarray)
+        :param seed: Optional random seed for reproducibility. Default is None.
+        :return: numpy.ndarray
+            Field values evaluated at the defined points, scaled by ``sigma``
+            and shifted by ``mu``. If ``log=True``, returns
+            ``exp(sigma * field + mu)`` instead.
         """
-        field = self.random_field()
-        field = self.sigma * field + self.mu
-        return np.exp(field) if self.log else field
+        if not self.log:
+            return self.sigma * self.random_field(seed) + self.mu
+        return np.exp(self.sigma * self.random_field(seed) + self.mu)
 
 
 class FourierSpatialCorrelatedField(RandomFieldBase):

From 0e566a4b15673556349f03fdb6dac02f4d883c11 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik@tul.cz>
Date: Sat, 18 Oct 2025 11:22:21 +0200
Subject: [PATCH 28/31] Update pythonpackage.yml

---
 .github/workflows/pythonpackage.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 6da8b668..9b13faa5 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:      
       matrix:
-        python-version: [3.10.18, 3.12]
+        python-version: [3.10, 3.12]
 
     steps:
     - uses: actions/checkout@v2

From b3dfacb13f2ba68a8d39fcdb2b9757dd5d9929b6 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik@tul.cz>
Date: Sat, 18 Oct 2025 11:24:03 +0200
Subject: [PATCH 29/31] Update pythonpackage.yml

---
 .github/workflows/pythonpackage.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 9b13faa5..d9626f48 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:      
       matrix:
-        python-version: [3.10, 3.12]
+        python-version: ["3.10", "3.12"]
 
     steps:
     - uses: actions/checkout@v2

From 262847f15c0b546a865152f6f62d0e99e4194722 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Sun, 19 Oct 2025 14:08:19 +0200
Subject: [PATCH 30/31] setup update

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index f9301bd2..6b54a5a4 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
 from setuptools import find_packages
 from setuptools import setup
 
-__version__ = '1.0.2'
+__version__ = '1.0.3'
 
 
 # For long description:
@@ -61,5 +61,5 @@ def read(*names, **kwargs):
     # include automatically all files in the template MANIFEST.in
     include_package_data=True,
     zip_safe=False,
-    install_requires=['numpy', 'scipy', 'scikit-learn', 'h5py>=3.1.0', 'ruamel.yaml', 'attrs', 'gstools', 'memoization'],
+    install_requires=['numpy', 'scipy', 'scikit-learn', 'h5py>=3.1.0', 'ruamel.yaml==0.17.26', 'attrs', 'gstools', 'memoization'],
 )

From 0c708498a1d4463f019b04994bfe7e13490233f3 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Sun, 19 Oct 2025 14:25:02 +0200
Subject: [PATCH 31/31] fix ruamel.yaml version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6b54a5a4..4f583fca 100644
--- a/setup.py
+++ b/setup.py
@@ -61,5 +61,5 @@ def read(*names, **kwargs):
     # include automatically all files in the template MANIFEST.in
     include_package_data=True,
     zip_safe=False,
-    install_requires=['numpy', 'scipy', 'scikit-learn', 'h5py>=3.1.0', 'ruamel.yaml==0.17.26', 'attrs', 'gstools', 'memoization'],
+    install_requires=['numpy', 'scipy', 'scikit-learn', 'h5py>=3.1.0', 'ruamel.yaml', 'attrs', 'gstools', 'memoization'],
 )