diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index e3bddcb2..d9626f48 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: [3.6, 3.7, 3.8]
+ python-version: ["3.10", "3.12"]
steps:
- uses: actions/checkout@v2
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 0299153c..7cdd6911 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,16 +1,17 @@
-# Required
+# .readthedocs.yml
version: 2
-# Build documentation in the docs/ directory with Sphinx
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.10"
+
sphinx:
- configuration: docs/source/conf.py
+ configuration: docs/source/conf.py
-# Optionally build your docs in additional formats such as PDF
formats:
- - pdf
+ - pdf
-# Optionally set the version of Python and requirements required to build your docs
python:
- version: 3.8
- install:
+ install:
- requirements: docs/requirements.txt
diff --git a/README.rst b/README.rst
index 2bc32b23..2cc38ac7 100644
--- a/README.rst
+++ b/README.rst
@@ -1,6 +1,5 @@
-
MLMC
-----
+====
.. image:: https://github.com/GeoMop/MLMC/workflows/package/badge.svg
:target: https://github.com/GeoMop/MLMC/actions
@@ -8,56 +7,87 @@ MLMC
:target: https://pypi.org/project/mlmc/
.. image:: https://img.shields.io/pypi/pyversions/mlmc.svg
:target: https://pypi.org/project/mlmc/
+.. image:: https://img.shields.io/badge/License-GPLv3-blue.svg
+ :target: https://www.gnu.org/licenses/gpl-3.0.html
-MLMC provides tools for the multilevel Monte Carlo method.
-
-mlmc package includes:
-- samples scheduling
-- estimation of generalized moment functions
-- probability density function approximation
-- advanced post-processing with Quantity structure
+**MLMC** is a Python library implementing the **Multilevel Monte Carlo (MLMC)** method.
+It provides tools for sampling, moment estimation, statistical post-processing, and more.
+Originally developed as part of the `GeoMop `_ project.
-It is meant as part of the `GeoMop `_ project in particular Analysis component.
+Features
+--------
+* Sample scheduling
+* Estimation of generalized moments
+* Advanced post-processing with the ``Quantity`` structure
+* Approximation of probability density functions using the maximum entropy method
+* Bootstrap and regression-based variance estimation
+* Diagnostic tools (e.g., consistency checks)
Installation
-----------------------------------
-Package can be installed via pip.
+------------
+
+The package is available on PyPI and can be installed with pip:
-.. code-block::
+.. code-block:: bash
pip install mlmc
+To install the latest development version:
+
+.. code-block:: bash
+
+ git clone https://github.com/GeoMop/MLMC.git
+ cd MLMC
+ pip install -e .
Documentation
-------------
-You can find the documentation including tutorials under https://mlmc.readthedocs.io/
+
+Full documentation, including tutorials, is available at:
+`https://mlmc.readthedocs.io/ `_
+
+Topics covered include:
+
+* Basic MLMC workflow and examples
+* Definition and composition of ``Quantity`` objects
+* Moment and covariance estimation
+* Probability density function reconstruction
Development
-----------
-Provided that you want to contribute, create a pull request and make sure you run `tox` before. Tox
-installs necessary requirements as well as the developed package itself into clear virtual environment
-and call pytest to search in the `test` folder for tests to execute.
+Contributions are welcome!
+To contribute, please fork the repository and create a pull request.
+
+Before submitting, make sure all tests pass by running ``tox``:
+
+.. code-block:: bash
+ pip install tox
+ tox
+
+``tox`` creates a clean virtual environment, installs all dependencies,
+runs unit tests via ``pytest``, and checks that the package installs correctly.
Requirements
------------
-- `NumPy `_
-- `SciPy `_
-- `h5py `_
-- `attrs `_
-- `ruamel.yaml `_
-- `gstools `_
-- `memoization `_
-- `sklearn `_
+MLMC depends on the following Python packages:
-Licence
--------
-* Free software: GPL 3.0 License
+* `NumPy `_
+* `SciPy `_
+* `h5py `_
+* `attrs `_
+* `ruamel.yaml `_
+* `gstools `_
+* `memoization `_
+* `scikit-learn `_
+License
+-------
+* Free software: **GNU General Public License v3.0**
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 83f22df7..5852805c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,9 +1,18 @@
+# --- Documentation dependencies ---
+sphinx>=7.0
+sphinx-rtd-theme
+sphinx-autodoc-typehints
+sphinxcontrib-napoleon
+myst-parser
+nbsphinx
+sphinx_copybutton
+
+# --- MLMC runtime dependencies ---
numpy
scipy
-sklearn
+scikit-learn
h5py>=3.1.0
-ruamel.yaml
+ruamel.yaml==0.17.26
attrs
gstools
-memoization
-matplotlib
+memoization
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 55a3cec7..43609bc6 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,78 +1,68 @@
# Configuration file for the Sphinx documentation builder.
#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
+# For a full list of configuration options see:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
import os
import sys
import datetime
+
+# -- Path setup --------------------------------------------------------------
+
+# Add project root to sys.path
sys.path.insert(0, os.path.abspath("../../"))
# -- Project information -----------------------------------------------------
-# General information about the project.
+
curr_year = datetime.datetime.now().year
project = "mlmc"
-copyright = "{}, Jan Březina, Martin Špetlík".format(curr_year)
-author = "Jan Březina, Martin Špetlík"
+copyright = f"{curr_year}, Martin Špetlík, Jan Březina"
+author = "Martin Špetlík, Jan Březina"
# The full version, including alpha/beta/rc tags
-release = '1.0.1'
-
+release = "1.0.3"
# -- General configuration ---------------------------------------------------
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
extensions = [
- 'sphinx.ext.autodoc',
- 'sphinx.ext.autosummary',
- 'sphinx.ext.napoleon',
- 'sphinx.ext.viewcode',
- 'sphinx.ext.doctest',
- 'sphinx.ext.autosectionlabel'
- ]
-
-# autosummaries from source-files
-autosummary_generate = True
-# dont show __init__ docstring
-autoclass_content = 'class'
-# sort class members
-autodoc_member_order = "groupwise"
-# autodoc_member_order = 'bysource'
+ "sphinx.ext.autodoc",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.napoleon",
+ "sphinx.ext.viewcode",
+ "sphinx.ext.doctest",
+ "sphinx.ext.autosectionlabel",
+ "sphinx_autodoc_typehints",
+ "myst_parser",
+ "nbsphinx",
+ "sphinx_copybutton",
+]
+
+# Autodoc settings
+autosummary_generate = True # Generate autosummary files
+autoclass_content = "class" # Don't repeat __init__ docstring
+autodoc_member_order = "groupwise" # Grouped members in docs
+autodoc_typehints = "description" # Show type hints in docstring
+
+# Napoleon settings for Google-style docstrings
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+napoleon_use_param = True
+napoleon_use_ivar = True
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+templates_path = ["_templates"]
+# Exclude build files and system junk
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# -- Options for HTML output -------------------------------------------------
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-#
-html_theme = "sphinx_rtd_theme" #'alabaster'
+html_theme = "sphinx_rtd_theme"
html_theme_options = {
- # 'canonical_url': '',
- # 'analytics_id': '',
"logo_only": False,
"display_version": True,
"prev_next_buttons_location": "top",
- # 'style_external_links': False,
- # 'vcs_pageview_mode': '',
- # Toc options
"collapse_navigation": False,
"sticky_navigation": True,
"navigation_depth": 4,
@@ -80,18 +70,11 @@
"titles_only": False,
}
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-#html_static_path = ['_static']
-
-# autodoc_default_options = {
-# 'members': True,
-# # The ones below should be optional but work nicely together with
-# # example_package/autodoctest/doc/source/_templates/autosummary/class.rst
-# # and other defaults in sphinx-autodoc.
-# 'show-inheritance': True,
-# 'inherited-members': True,
-# 'no-special-members': True,
-# }
+# Optional: uncomment if you have static files like custom CSS
+# html_static_path = ["_static"]
+
+# This tells Sphinx which file is the master doc (entry point)
master_doc = "contents"
+
+# -- Optional convenience: print path info on build --------------------------
+print(f"[conf.py] Using sys.path[0]: {sys.path[0]}")
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index 23b4acd6..7c0271ad 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -4,7 +4,10 @@ Tutorials
.. automodule:: examples
-The following tutorials illustrates how to use mlmc package.
+This section provides step-by-step tutorials demonstrating how to use the **mlmc** package.
+
+Each tutorial builds upon the previous one — starting from sampler creation, through sample scheduling and quantity handling, to full postprocessing and probability density estimation.
+
.. toctree::
:includehidden:
@@ -16,4 +19,12 @@ The following tutorials illustrates how to use mlmc package.
examples_postprocessing
-You can find more complex examples in :any:`examples.shooting`
+Additional Examples
+-------------------
+
+You can find more advanced and domain-specific examples (e.g., stochastic simulations or PDE-based problems) in:
+
+:mod:`examples.shooting`
+
+These examples demonstrate how to integrate MLMC with real-world simulation workflows.
+
diff --git a/docs/source/examples_postprocessing.rst b/docs/source/examples_postprocessing.rst
index 56479b6f..cb01a45d 100644
--- a/docs/source/examples_postprocessing.rst
+++ b/docs/source/examples_postprocessing.rst
@@ -1,64 +1,105 @@
-Results postprocessing
+.. _examples results postprocessing:
+Results Postprocessing
======================
-If you already know how to create a sampler, schedule samples and handle quantities,
-postprocessing will be easy for you. Otherwise, see the previous tutorials before.
+Once you know how to **create a sampler**, **schedule samples**, and **work with quantities**,
+postprocessing becomes straightforward.
+If you haven’t gone through those steps yet, please review the earlier tutorials first.
+Estimating Moments
+------------------
-First, schedule samples and estimate moments for a particular quantity
+We start by scheduling samples and estimating moments for a specific quantity.
.. testcode::
import mlmc
- n_levels = 3 # number of MLMC levels
- step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
- target_var = 1e-4
- n_moments = 10
+
+ n_levels = 3 # Number of MLMC levels
+ step_range = [0.5, 0.005] # Simulation steps for coarsest and finest levels
+ target_var = 1e-4 # Desired target variance
+ n_moments = 10 # Number of moment functions
+
+ # Compute level parameters (simulation steps per level)
level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
- # level_parameters determine each level simulation steps
- # level_parameters can be manually prescribed as a list of lists
+ # Initialize components
simulation_factory = mlmc.SynthSimulation()
sampling_pool = mlmc.OneProcessPool()
- # Memory() storage keeps samples in the computer main memory
- sample_storage = mlmc.Memory()
+ sample_storage = mlmc.Memory() # In-memory sample storage
- sampler = mlmc.Sampler(sample_storage=sample_storage,
- sampling_pool=sampling_pool,
- sim_factory=simulation_factory,
- level_parameters=level_parameters)
+ # Create the sampler
+ sampler = mlmc.Sampler(
+ sample_storage=sample_storage,
+ sampling_pool=sampling_pool,
+ sim_factory=simulation_factory,
+ level_parameters=level_parameters
+ )
+ # Schedule and run initial samples
sampler.set_initial_n_samples()
sampler.schedule_samples()
+
running = 1
while running > 0:
running = 0
running += sampler.ask_sampling_pool_for_samples()
- # Get particular quantity
+Accessing Quantities
+--------------------
+
+Now we extract a specific **Quantity** from the results and set up the estimation.
+
+.. testcode::
+
+ # Obtain the root quantity representing all simulation outputs
root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format())
- length = root_quantity['length']
+
+ # Select a sub-quantity
+ length = root_quantity["length"]
time = length[1]
- location = time['10']
+ location = time["10"]
q_value = location[0]
+Defining the Domain and Estimator
+---------------------------------
+
+Before estimating higher-order statistics, we determine the valid domain and define the estimator.
+
+.. testcode::
+
true_domain = mlmc.Estimate.estimate_domain(q_value, sample_storage)
moments_fn = mlmc.Legendre(n_moments, true_domain)
- estimate_obj = mlmc.Estimate(q_value, sample_storage=sampler.sample_storage,
- moments_fn=moments_fn)
+
+ estimate_obj = mlmc.Estimate(
+ q_value,
+ sample_storage=sampler.sample_storage,
+ moments_fn=moments_fn
+ )
+
+
+Variance and Sample Estimation
+------------------------------
+
+We can now estimate variances and determine how many samples are required
+to achieve the target variance.
+
+.. testcode::
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
from mlmc.estimator import estimate_n_samples_for_target_variance
- n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
- n_levels=sampler.n_levels)
+ n_estimated = estimate_n_samples_for_target_variance(
+ target_var, variances, n_ops, n_levels=sampler.n_levels
+ )
+ # Add and process samples iteratively until convergence
while not sampler.process_adding_samples(n_estimated):
- # New estimation according to already finished samples
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
- n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
- n_levels=sampler.n_levels)
+ n_estimated = estimate_n_samples_for_target_variance(
+ target_var, variances, n_ops, n_levels=sampler.n_levels
+ )
running = 1
while running > 0:
@@ -66,17 +107,36 @@ First, schedule samples and estimate moments for a particular quantity
running += sampler.ask_sampling_pool_for_samples()
-Probability density function approximation
----------------------
+Probability Density Function Approximation
+------------------------------------------
+
+Finally, we can construct and visualize an approximation of the **probability density function (PDF)**.
.. testcode::
from mlmc.plot.plots import Distribution
+
distr_obj, result, _, _ = estimate_obj.construct_density()
- distr_plot = Distribution(title="distributions", error_plot=None)
+
+ distr_plot = Distribution(title="Distributions", error_plot=None)
distr_plot.add_distribution(distr_obj)
+ # For single-level simulations, add a histogram of raw samples
if n_levels == 1:
samples = estimate_obj.get_level_samples(level_id=0)[..., 0]
- distr_plot.add_raw_samples(np.squeeze(samples)) # add histogram
+ distr_plot.add_raw_samples(np.squeeze(samples))
+
distr_plot.show()
+
+
+**Summary**
+-----------
+
+In this tutorial, you learned how to:
+- Estimate statistical moments from MLMC results.
+- Automatically determine the required number of samples to reach a target variance.
+- Construct and visualize an estimated probability density function.
+
+This completes the **postprocessing** workflow of the MLMC pipeline.
+
+
diff --git a/docs/source/examples_quantity.rst b/docs/source/examples_quantity.rst
index 9ab4fa58..09812832 100644
--- a/docs/source/examples_quantity.rst
+++ b/docs/source/examples_quantity.rst
@@ -1,31 +1,39 @@
.. _examples quantity:
-Quantity tutorial
+Quantity Tutorial
=================
-An overview of basic :any:`mlmc.quantity.quantity.Quantity` operations.
-Quantity related classes and functions allow estimate mean and variance of MLMC samples results,
-derive other quantities from original ones and much more.
+This tutorial provides an overview of basic :any:`mlmc.quantity.quantity.Quantity` operations.
+
+The :mod:`mlmc.quantity` module and its related classes allow you to:
+- Estimate means and variances of MLMC sample results.
+- Derive new quantities from existing ones.
+- Perform arithmetic and NumPy-based operations on quantities.
+
+
+Setup
+-----
+
+Before exploring `Quantity` operations, we first set up a simple synthetic MLMC sampler.
.. testcode::
:hide:
import mlmc
- n_levels = 3 # number of MLMC levels
- step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
+ n_levels = 3 # number of MLMC levels
+ step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
- # level_parameters determine each level simulation steps
- # level_parameters can be manually prescribed as a list of lists
simulation_factory = mlmc.SynthSimulation()
sampling_pool = mlmc.OneProcessPool()
- # Memory() storage keeps samples in the computer main memory
sample_storage = mlmc.Memory()
- sampler = mlmc.Sampler(sample_storage=sample_storage,
- sampling_pool=sampling_pool,
- sim_factory=simulation_factory,
- level_parameters=level_parameters)
+ sampler = mlmc.Sampler(
+ sample_storage=sample_storage,
+ sampling_pool=sampling_pool,
+ sim_factory=simulation_factory,
+ level_parameters=level_parameters
+ )
n_samples = [100, 75, 50]
sampler.set_initial_n_samples(n_samples)
@@ -36,7 +44,6 @@ derive other quantities from original ones and much more.
running += sampler.ask_sampling_pool_for_samples()
-
.. testcode::
import numpy as np
@@ -44,7 +51,10 @@ derive other quantities from original ones and much more.
from examples.synthetic_quantity import create_sampler
-First, the synthetic Quantity with the following :code:`result_format` is created
+Creating a Synthetic Quantity
+-----------------------------
+
+We begin by creating a synthetic :class:`mlmc.quantity.quantity.Quantity` with a predefined ``result_format``:
.. testcode::
@@ -52,72 +62,60 @@ First, the synthetic Quantity with the following :code:`result_format` is create
# mlmc.QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']),
# mlmc.QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']),
# ]
- # Meaning: sample results contain data on two quantities in three time steps [1, 2, 3] and in two locations,
- # each quantity can have different shape
sampler, simulation_factory, moments_fn = create_sampler()
root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format())
-:code:`root_quantity` is :py:class:`mlmc.quantity.quantity.Quantity` instance and represents the whole result data.
-According to :code:`result_format` it contains two sub-quantities named "length" and "width".
+This means the sample results contain data for two quantities (`length` and `width`) at three time steps `[1, 2, 3]` and two locations each.
+
+:code:`root_quantity` is an instance of :class:`mlmc.quantity.quantity.Quantity`, representing the full result data structure.
+
+Mean Estimates
+--------------
-Mean estimates
----------------
-To get estimated mean of a quantity:
+To compute the estimated mean of a quantity:
.. testcode::
root_quantity_mean = mlmc.quantity.quantity_estimate.estimate_mean(root_quantity)
-:code:`root_quantity_mean` is an instance of :py:class:`mlmc.quantity.quantity.QuantityMean`
+The returned object, :code:`root_quantity_mean`, is a :class:`mlmc.quantity.quantity.QuantityMean` instance.
-To get the total mean value:
+To retrieve statistical values:
.. testcode::
+ # Total mean and variance
root_quantity_mean.mean
-
-To get the total variance value:
-
-.. testcode::
-
root_quantity_mean.var
-To get means at each level:
-
-.. testcode::
-
+ # Means and variances at each level
root_quantity_mean.l_means
-
-To get variances at each level:
-
-.. testcode::
-
root_quantity_mean.l_vars
-Estimate moments and covariance matrix
---------------------------------------
+Moments and Covariance Estimation
+---------------------------------
-Create a quantity representing moments and get their estimates
+To create and estimate statistical moments:
.. testcode::
moments_quantity = mlmc.quantity.quantity_estimate.moments(root_quantity, moments_fn=moments_fn)
moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(moments_quantity)
-To obtain central moments, use:
+To obtain **central moments**, first subtract the mean:
.. testcode::
central_root_quantity = root_quantity - root_quantity_mean.mean
- central_moments_quantity = mlmc.quantity.quantity_estimate.moments(central_root_quantity,
- moments_fn=moments_fn)
+ central_moments_quantity = mlmc.quantity.quantity_estimate.moments(
+ central_root_quantity, moments_fn=moments_fn
+ )
central_moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(central_moments_quantity)
-
-Create a quantity representing a covariance matrix
+To estimate a **covariance matrix**:
.. testcode::
@@ -125,36 +123,31 @@ Create a quantity representing a covariance matrix
cov_mean = mlmc.quantity.quantity_estimate.estimate_mean(covariance_quantity)
-
-Quantity selection
+Quantity Selection
------------------
-According to the result_format, it is possible to select items from a quantity
+You can access and manipulate sub-quantities directly using the structure defined by `result_format`:
.. testcode::
length = root_quantity["length"] # Get quantity with name="length"
- width = root_quantity["width"] # Get quantity with name="width"
+ width = root_quantity["width"] # Get quantity with name="width"
-:code:`length` and :code:`width` are still :py:class:`mlmc.quantity.quantity.Quantity` instances
+Both are still :class:`mlmc.quantity.quantity.Quantity` instances.
-To get a quantity at particular time:
+Selecting by **time**:
.. testcode::
length_locations = length.time_interpolation(2.5)
-:code:`length_locations` represents results for all locations of quantity named "length" at the time 2.5
-
-To get quantity at particular location:
+Selecting by **location**:
.. testcode::
length_result = length_locations['10']
-:code:`length_result` represents results shape=(2, 1) of quantity named "length" at the time 2,5 and location '10'
-
-Now it is possible to slice Quantity :code:`length_result` the same way as :code:`np.ndarray`. For example:
+Now, :code:`length_result` behaves like a NumPy array:
.. testcode::
@@ -164,39 +157,41 @@ Now it is possible to slice Quantity :code:`length_result` the same way as :code
length_result[:1, :1]
length_result[:2, ...]
-Keep in mind:
- - all derived quantities such as :code:`length_locations` and :code:`length_result`, ... are still :py:class:`mlmc.quantity.quantity.Quantity` instances
- - selecting location before time is not supported!
+.. note::
+ - All derived quantities (like :code:`length_locations` or :code:`length_result`) remain `Quantity` instances.
+ - Selecting a **location before time** is not supported.
-Binary operations
+
+Binary Operations
-----------------
-Following operations are supported
- - Addition, subtraction, ... of compatible quantities
+`Quantity` supports standard arithmetic operations:
+
+**Between quantities:**
- .. testcode::
+.. testcode::
- quantity = root_quantity + root_quantity
- quantity = root_quantity + root_quantity + root_quantity
+ quantity = root_quantity + root_quantity
+ quantity = root_quantity + root_quantity + root_quantity
- - Operations with Quantity and a constant
+**With constants:**
- .. testcode::
+.. testcode::
- const = 5
- quantity_const_add = root_quantity + const
- quantity_const_sub = root_quantity - const
- quantity_const_mult = root_quantity * const
- quantity_const_div = root_quantity / const
- quantity_const_mod = root_quantity % const
- quantity_add_mult = root_quantity + root_quantity * const
+ const = 5
+ quantity_const_add = root_quantity + const
+ quantity_const_sub = root_quantity - const
+ quantity_const_mult = root_quantity * const
+ quantity_const_div = root_quantity / const
+ quantity_const_mod = root_quantity % const
+ quantity_add_mult = root_quantity + root_quantity * const
-NumPy universal functions
+NumPy Universal Functions
--------------------------
-Examples of tested NumPy universal functions:
+`Quantity` objects are compatible with many NumPy universal functions (`ufuncs`):
.. testcode::
@@ -209,41 +204,41 @@ Examples of tested NumPy universal functions:
x = np.ones(24)
quantity_np_divide_const = np.divide(x, root_quantity)
quantity_np_add_const = np.add(x, root_quantity)
- quantity_np_arctan2_cosnt = np.arctan2(x, root_quantity)
+ quantity_np_arctan2_const = np.arctan2(x, root_quantity)
-Quantity selection by conditions
----------------------------------
+Conditional Selection
+----------------------
-Method :code:`select` returns :py:class:`mlmc.quantity.quantity.Quantity` instance
+You can select parts of a quantity using logical conditions via the :code:`select()` method.
.. testcode::
selected_quantity = root_quantity.select(0 < root_quantity)
+Or using comparisons between quantities:
+
.. testcode::
quantity_add = root_quantity + root_quantity
quantity_add_select = quantity_add.select(root_quantity < quantity_add)
root_quantity_selected = root_quantity.select(-1 != root_quantity)
-Logical operation among more provided conditions is AND
+Multiple conditions are combined using logical **AND**:
.. testcode::
quantity_add.select(root_quantity < quantity_add, root_quantity < 10)
-User can use one of the logical NumPy universal functions
+Use NumPy logical functions for more complex conditions:
.. testcode::
selected_quantity_or = root_quantity.select(np.logical_or(0 < root_quantity, root_quantity < 10))
-It is possible to explicitly define the selection condition of one quantity by another quantity
+You can also explicitly define the selection mask:
.. testcode::
- mask = np.logical_and(0 < root_quantity, root_quantity < 10) # mask is Quantity instance
+ mask = np.logical_and(0 < root_quantity, root_quantity < 10)
q_bounded = root_quantity.select(mask)
-
-
diff --git a/docs/source/examples_sampler_creation.rst b/docs/source/examples_sampler_creation.rst
index b363d774..29dab5b0 100644
--- a/docs/source/examples_sampler_creation.rst
+++ b/docs/source/examples_sampler_creation.rst
@@ -1,56 +1,102 @@
-Sampler creation
+Sampler Creation
=================
-Sampler controls the execution of MLMC samples.
+The **Sampler** controls the execution and management of MLMC (Multilevel Monte Carlo) samples.
+This example demonstrates how to configure all essential components for an MLMC simulation.
-First, import mlmc package and define basic MLMC parameters.
+
+
+Basic Setup
+------------
+
+First, import the :mod:`mlmc` package and define basic MLMC parameters.
.. testcode::
import mlmc
- n_levels = 3 # number of MLMC levels
- step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
+
+ # Define number of MLMC levels
+ n_levels = 3
+
+ # Simulation step sizes at the coarsest and finest levels
+ step_range = [0.5, 0.005]
+
+ # Compute level parameters (simulation steps per level)
level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
- # level_parameters determine each level simulation steps
- # level_parameters can be manually prescribed as a list of lists
+ # Alternatively, you can specify level_parameters manually as a list of lists.
-Prepare a simulation, it must be instance of class that inherits from :any:`mlmc.sim.simulation.Simulation`.
+
+
+Simulation Definition
+----------------------
+
+Prepare a simulation instance.
+The simulation class must inherit from :class:`mlmc.sim.simulation.Simulation`.
.. testcode::
simulation_factory = mlmc.SynthSimulation()
-Create a sampling pool.
+This factory will be used by the sampler to create individual simulation runs.
+
+
+
+Sampling Pool
+--------------
+
+Next, create a sampling pool that controls how samples are executed.
.. testcode::
sampling_pool = mlmc.OneProcessPool()
+The :class:`mlmc.sampling_pool.OneProcessPool` executes samples sequentially within a single process.
+
+You can also use:
+
+- :class:`mlmc.sampling_pool.ProcessPool` — executes samples in parallel across multiple processes.
+- :class:`mlmc.sampling_pool_pbs.SamplingPoolPBS` — submits jobs to a PBS (Portable Batch System) cluster for distributed computation.
+
-You can also use :any:`mlmc.sampling_pool.ProcessPool` which supports parallel execution of MLMC samples.
-In order to use PBS (portable batch system), employ :any:`mlmc.sampling_pool_pbs.SamplingPoolPBS`.
+Sample Storage
+---------------
-Create a sample storage. It contains sample's related data e.g. simulation result.
+The **sample storage** keeps all data related to simulation results.
.. testcode::
- # Memory() storage keeps samples in the computer main memory
+ # Memory storage keeps samples in main memory
sample_storage = mlmc.Memory()
-We support also HDF5 file storage :any:`mlmc.sample_storage_hdf.SampleStorageHDF`.
+Alternatively, use persistent file-based storage:
+- :class:`mlmc.sample_storage_hdf.SampleStorageHDF` — stores results in an HDF5 file for long-term reuse and analysis.
-Finally, create a sampler that manages scheduling MLMC samples and also saves the results.
+
+
+Sampler Initialization
+-----------------------
+
+Finally, create the **Sampler** instance.
+It coordinates sample scheduling, simulation execution, and result collection.
.. testcode::
- sampler = mlmc.Sampler(sample_storage=sample_storage,
- sampling_pool=sampling_pool,
- sim_factory=simulation_factory,
- level_parameters=level_parameters)
+ sampler = mlmc.Sampler(
+ sample_storage=sample_storage,
+ sampling_pool=sampling_pool,
+ sim_factory=simulation_factory,
+ level_parameters=level_parameters
+ )
+
+The sampler is now ready to generate and manage MLMC samples.
+
+Next Steps
+-----------
-:ref:`examples samples scheduling`
\ No newline at end of file
+Proceed to the next example:
+:ref:`examples samples scheduling`
diff --git a/docs/source/examples_samples_scheduling.rst b/docs/source/examples_samples_scheduling.rst
index 135a421c..32ad7fd0 100644
--- a/docs/source/examples_samples_scheduling.rst
+++ b/docs/source/examples_samples_scheduling.rst
@@ -1,128 +1,156 @@
.. _examples samples scheduling:
-Samples scheduling
+Samples Scheduling
==================
-Once you create a sampler you can schedule samples.
+Once you have created a **Sampler**, you can schedule the execution of MLMC samples in different ways.
+This tutorial demonstrates two approaches:
+1. Scheduling with a prescribed number of samples.
+2. Scheduling to reach a target variance automatically.
-1. Prescribe the exact number of samples
-----------------------------------------------------------------
+
+1. Prescribing an Exact Number of Samples
+-----------------------------------------
+
+In this approach, you explicitly set how many samples should be created at each MLMC level.
.. testcode::
:hide:
import mlmc
- n_levels = 3 # number of MLMC levels
- step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
+ n_levels = 3 # number of MLMC levels
+ step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels
level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range)
- # level_parameters determine each level simulation steps
- # level_parameters can be manually prescribed as a list of lists
simulation_factory = mlmc.SynthSimulation()
sampling_pool = mlmc.OneProcessPool()
- # Memory() storage keeps samples in the computer main memory
sample_storage = mlmc.Memory()
- sampler = mlmc.Sampler(sample_storage=sample_storage,
- sampling_pool=sampling_pool,
- sim_factory=simulation_factory,
- level_parameters=level_parameters)
+ sampler = mlmc.Sampler(
+ sample_storage=sample_storage,
+ sampling_pool=sampling_pool,
+ sim_factory=simulation_factory,
+ level_parameters=level_parameters
+ )
+
+Set the number of samples for each level:
.. testcode::
n_samples = [100, 75, 50]
sampler.set_initial_n_samples(n_samples)
-Schedule set samples.
+Schedule the prescribed samples:
.. testcode::
sampler.schedule_samples()
-You can wait until all samples are finished.
+Wait until all samples have finished:
.. testcode::
- running = 1
- while running > 0:
- running = 0
- running += sampler.ask_sampling_pool_for_samples()
+ running = 1
+ while running > 0:
+ running = 0
+ running += sampler.ask_sampling_pool_for_samples()
+
+2. Prescribing a Target Variance
+--------------------------------
-2. Prescribe a target variance
--------------------------------------------------------------
+This approach automatically determines the number of samples needed to achieve a desired **target variance**.
-Set target variance and number of random variable moments that must meet this variance.
+Define the target variance and the number of **moment functions** used for estimation:
.. testcode::
- target_var = 1e-4
- n_moments = 10
+ target_var = 1e-4
+ n_moments = 10
-The first phase is the same as the first approach, but the initial samples are automatically determined
-as a sequence from 100 samples at the coarsest level to 10 samples at the finest level.
+As before, initialize and run a first batch of samples (automatically ranging from 100 samples on the coarsest level
+to 10 on the finest level):
.. testcode::
sampler.set_initial_n_samples()
sampler.schedule_samples()
+
running = 1
while running > 0:
running = 0
running += sampler.ask_sampling_pool_for_samples()
-The :py:class:`mlmc.quantity.quantity.Quantity` instance is created, for details see :ref:`examples quantity`
+Creating the Quantity and Moment Functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Next, create a :py:class:`mlmc.quantity.quantity.Quantity` object representing the MLMC results.
.. testcode::
- root_quantity = mlmc.make_root_quantity(storage=sampler.sample_storage,
- q_specs=sampler.sample_storage.load_result_format())
+ root_quantity = mlmc.make_root_quantity(
+ storage=sampler.sample_storage,
+ q_specs=sampler.sample_storage.load_result_format()
+ )
-:code:`root_quantity` contains the structure of sample results and also allows access to their values.
+The :code:`root_quantity` contains both the data structure and the sampled values.
-In order to estimate moment values including variance, moment functions class (in this case Legendre polynomials) instance
-and :py:class:`mlmc.estimator.Estimate` instance are created.
+Now we create the **moment functions** and an :py:class:`mlmc.estimator.Estimate` instance to perform statistical estimation.
.. testcode::
true_domain = mlmc.Estimate.estimate_domain(root_quantity, sample_storage)
moments_fn = mlmc.Legendre(n_moments, true_domain)
- estimate_obj = mlmc.Estimate(root_quantity, sample_storage=sampler.sample_storage,
- moments_fn=moments_fn)
+ estimate_obj = mlmc.Estimate(
+ root_quantity,
+ sample_storage=sampler.sample_storage,
+ moments_fn=moments_fn
+ )
+
+Estimating Variances and Computational Cost
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-At first, the variance of moments and average execution time per sample at each level are estimated from already finished samples.
+From the finished samples, estimate the variance of moments and the average computational cost per sample for each level:
.. testcode::
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
-Then, an initial estimate of the number of MLMC samples that should meet prescribed target variance is conducted.
+
+Estimating the Required Number of Samples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Using the estimated variances and costs, determine the number of samples required to meet the **target variance**:
.. testcode::
from mlmc.estimator import estimate_n_samples_for_target_variance
- n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
- n_levels=sampler.n_levels)
+
+ n_estimated = estimate_n_samples_for_target_variance(
+ target_var, variances, n_ops, n_levels=sampler.n_levels
+ )
-Now it is time for our sampling algorithm that gradually schedules samples and refines the total number of samples
-until the number of estimated samples is greater than the number of scheduled samples.
+Iterative Sampling Process
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The sampling algorithm incrementally schedules additional samples until the estimated number of required samples is reached.
.. testcode::
while not sampler.process_adding_samples(n_estimated):
- # New estimation according to already finished samples
+ # Recalculate estimates based on completed samples
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
- n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
- n_levels=sampler.n_levels)
-
+ n_estimated = estimate_n_samples_for_target_variance(
+ target_var, variances, n_ops, n_levels=sampler.n_levels
+ )
-Finally, wait until all samples are finished.
+Finally, wait until all samples are completed:
.. testcode::
@@ -131,6 +159,21 @@ Finally, wait until all samples are finished.
running = 0
running += sampler.ask_sampling_pool_for_samples()
-Since our sampling algorithm determines the number of samples according to moment variances,
-the type of moment functions (Legendre by default) might affect total number of MLMC samples.
+Notes
+-----
+
+- The sampling algorithm automatically adjusts the number of samples per level based on estimated **moment variances**.
+- The choice of moment functions (e.g. :class:`mlmc.Legendre`) influences the total number of samples needed.
+- For most cases, the default **Legendre polynomials** provide a good balance between accuracy and computational cost.
+
+
+**Summary**
+-----------
+
+In this tutorial, you learned how to:
+- Schedule MLMC samples with a fixed number of samples per level.
+- Automatically adapt the number of samples to achieve a target variance.
+- Use moment functions and variance regression for optimal sample allocation.
+
+Continue to the next section for :ref:`examples results postprocessing`.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 27a83141..a329294f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,29 +1,30 @@
-=====
-MLMC
-=====
+MLMC: Multilevel Monte Carlo method
+====================================
+
+MLMC is a Python library for efficient uncertainty quantification using the multilevel Monte Carlo method.
+
+It provides tools for:
+- Parallel sample scheduling
+- Generalized moment estimation
+- Probability density approximation
+- Advanced post-processing via the Quantity abstraction
.. image:: https://github.com/GeoMop/MLMC/workflows/package/badge.svg
- :target: https://github.com/GeoMop/MLMC/actions
+ :target: https://github.com/GeoMop/MLMC/actions
.. image:: https://img.shields.io/pypi/v/mlmc.svg
- :target: https://pypi.org/project/mlmc/
+ :target: https://pypi.org/project/mlmc/
.. image:: https://img.shields.io/pypi/pyversions/mlmc.svg
- :target: https://pypi.org/project/mlmc/
-
-MLMC provides tools for the multilevel Monte Carlo method, which is theoretically described by `M. Giles `_.
-
-mlmc package includes:
-
-- samples scheduling
-- estimation of generalized moment functions
-- probability density function approximation
-- advanced post-processing with our Quantity structure
-
+ :target: https://pypi.org/project/mlmc/
Installation
-============
-mlmc can be installed via `pip `_
+------------
+Install via pip:
+
+.. code-block:: bash
-.. code-block:: none
+ pip install mlmc
- pip install mlmc
+Documentation
+-------------
+Tutorials and API reference are available below.
diff --git a/docs/source/mlmc.plot.rst b/docs/source/mlmc.plot.rst
index 47f0bd09..f5faf6cb 100644
--- a/docs/source/mlmc.plot.rst
+++ b/docs/source/mlmc.plot.rst
@@ -1,4 +1,4 @@
-mlmc.plot
+mlmc.plot package
=================
.. automodule:: mlmc.plot
@@ -6,29 +6,31 @@ mlmc.plot
:undoc-members:
:show-inheritance:
+---
+
Submodules
----------
-mlmc.plot.plots module
-----------------------
+.. toctree::
+ :maxdepth: 1
-.. automodule:: mlmc.plot.plots
- :members:
- :undoc-members:
- :show-inheritance:
+ mlmc.plot.plots
+ mlmc.plot.violinplot
-mlmc.plot.violinplot module
----------------------------
+---
-.. automodule:: mlmc.plot.violinplot
+mlmc.plot.plots
+---------------
+
+.. automodule:: mlmc.plot.plots
:members:
:undoc-members:
:show-inheritance:
-Module contents
----------------
+mlmc.plot.violinplot
+--------------------
-.. automodule:: mlmc.plot
+.. automodule:: mlmc.plot.violinplot
:members:
:undoc-members:
:show-inheritance:
diff --git a/docs/source/mlmc.tool.rst b/docs/source/mlmc.tool.rst
index 6e0bdc51..8234e205 100644
--- a/docs/source/mlmc.tool.rst
+++ b/docs/source/mlmc.tool.rst
@@ -6,14 +6,6 @@ mlmc.tool
Submodules
----------
-mlmc.tool.context\_statprof module
-----------------------------------
-
-.. automodule:: mlmc.tool.context_statprof
- :members:
- :undoc-members:
- :show-inheritance:
-
mlmc.tool.distribution module
-----------------------------
diff --git a/examples/quantity_operations.py b/examples/quantity_operations.py
index e8cd5b5a..e4b0ecc6 100644
--- a/examples/quantity_operations.py
+++ b/examples/quantity_operations.py
@@ -1,31 +1,63 @@
+"""
+Quantity examples and quick reference.
+
+This module demonstrates basic operations with mlmc Quantity objects.
+It shows how to:
+ - create a synthetic root quantity (result format specified below),
+ - compute mean estimates,
+ - estimate moments and covariance,
+ - select sub-quantities, time-interpolate and slice,
+ - perform arithmetic and NumPy ufuncs with Quantity objects,
+ - perform selections using conditions and masks.
+
+Result format used in the synthetic example:
+
+result_format = [
+ mlmc.quantity.quantity_spec.QuantitySpec(
+ name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']
+ ),
+ mlmc.quantity.quantity_spec.QuantitySpec(
+ name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']
+ ),
+]
+
+Meaning:
+ - sample results contain data on two quantities ("length" and "width"),
+ - each quantity is evaluated at three times [1,2,3] and two locations,
+ - each quantity can also have its own internal shape.
+"""
import numpy as np
import mlmc.quantity.quantity_spec
from mlmc.quantity.quantity import make_root_quantity
import mlmc.quantity.quantity_estimate
from examples.synthetic_quantity import create_sampler
-# An overview of basic Quantity operations
-
-######################################
-### Create synthetic quantity ###
-######################################
-# Synthetic Quantity with the following result format
-# result_format = [
-# mlmc.quantity.quantity_spec.QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']),
-# mlmc.quantity.quantity_spec.QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']),
-# ]
-# Meaning: sample results contain data on two quantities in three time steps [1, 2, 3] and in two locations,
-# each quantity can have different shape
+# -----------------------------------------------------------------------------
+# Create synthetic quantity
+# -----------------------------------------------------------------------------
+"""
+Create a synthetic sampler + factory and produce a root Quantity instance.
+- create_sampler() returns: (sampler, simulation_factory, moments_fn)
+- make_root_quantity(storage, q_specs) builds a Quantity object that represents the
+ whole result data structure (root with named sub-quantities).
+"""
sampler, simulation_factory, moments_fn = create_sampler()
root_quantity = make_root_quantity(sampler.sample_storage, simulation_factory.result_format())
-# root_quantity is mlmc.quantity.quantity.Quantity instance and represents the whole result data,
+# root_quantity is an mlmc.quantity.quantity.Quantity instance and represents the whole result data,
# it contains two sub-quantities named "length" and "width"
-###################################
-#### Mean estimates #####
-###################################
-# To get estimated mean of a quantity:
+# -----------------------------------------------------------------------------
+# Mean estimates
+# -----------------------------------------------------------------------------
+"""
+Compute and inspect mean estimates for a Quantity.
+
+- estimate_mean(quantity) returns a QuantityMean instance that contains:
+ - .mean : mean Quantity
+ - .var : variance information
+ - .l_vars : level variances (if available)
+"""
root_quantity_mean = mlmc.quantity.quantity_estimate.estimate_mean(root_quantity)
# root_quantity_mean is an instance of mlmc.quantity.QuantityMean
# To get overall mean value:
@@ -35,55 +67,60 @@
# To get level variance value:
root_quantity_mean.l_vars
-#########################################################
-#### Estimate moments and covariance matrix #####
-#########################################################
-# Create a quantity representing moments
+# -----------------------------------------------------------------------------
+# Estimate moments and covariance matrix
+# -----------------------------------------------------------------------------
+"""
+Construct moments and covariance quantities from a root Quantity.
+
+- moments(root_quantity, moments_fn=moments_fn) returns a Quantity of moments.
+- estimate_mean(moments_quantity) computes means for those moments.
+- covariance(root_quantity, moments_fn=moments_fn) returns a Quantity describing covariance.
+"""
moments_quantity = mlmc.quantity.quantity_estimate.moments(root_quantity, moments_fn=moments_fn)
moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(moments_quantity)
# Central moments:
central_root_quantity = root_quantity - root_quantity_mean.mean
-central_moments_quantity = mlmc.quantity.quantity_estimate.moments(central_root_quantity, moments_fn=moments_fn)
+central_moments_quantity = mlmc.quantity.quantity_estimate.moments(
+ central_root_quantity, moments_fn=moments_fn
+)
central_moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(central_moments_quantity)
# Create a quantity representing covariance matrix
covariance_quantity = mlmc.quantity.quantity_estimate.covariance(root_quantity, moments_fn=moments_fn)
cov_mean = mlmc.quantity.quantity_estimate.estimate_mean(covariance_quantity)
-# Both moments() and covariance() calls return mlmc.quantity.quantity.Quantity instance
+# Both moments() and covariance() calls return mlmc.quantity.quantity.Quantity instances
-##################################
-### Quantity selection ####
-##################################
-# According to the result_format, tt is possible to select items from a quantity
+# -----------------------------------------------------------------------------
+# Quantity selection
+# -----------------------------------------------------------------------------
+"""
+Examples of indexing and time/ location selection on a Quantity.
+
+According to the result_format you can select by name, then time, then location.
+Selecting location before time is not supported.
+"""
length = root_quantity["length"] # Get quantity with name="length"
width = root_quantity["width"] # Get quantity with name="width"
-# length and width are still mlmc.quantity.quantity.Quantity instances
-# To get a quantity at particular time:
+# To get a quantity at a particular (interpolated) time:
length_locations = length.time_interpolation(2.5)
# length_locations represents results for all locations of quantity named "length" at the time 2.5
-# To get quantity at particular location
+# To get quantity at particular location:
length_result = length_locations['10']
-# length_result represents results shape=(2, 1) of quantity named "length" at the time 2,5 and location '10'
-
-# Now it is possible to slice Quantity length_result the same way as np.ndarray
-# For example:
-# length_result[1, 0]
-# length_result[:, 0]
-# length_result[:, :]
-# length_result[:1, :1]
-# length_result[:2, ...]
-
-# Keep in mind:
-# - all derived quantities such as length_locations and length_result, ... are still mlmc.quantity.quantity.Quantity instances
-# - selecting location before time is not supported!
-
-###################################
-#### Binary operations #####
-###################################
-# Following operations are supported
-# Addition of compatible quantities
+# length_result represents shape=(2, 1) data of "length" at time 2.5 and location '10'
+
+# You can slice Quantity like an ndarray:
+# length_result[1, 0], length_result[:, 0], length_result[:2, ...], etc.
+
+# -----------------------------------------------------------------------------
+# Binary operations
+# -----------------------------------------------------------------------------
+"""
+Supported arithmetic operations between compatible Quantity objects and scalars.
+The result is a Quantity instance with the same result_format structure.
+"""
quantity = root_quantity + root_quantity
quantity = root_quantity + root_quantity + root_quantity
@@ -96,11 +133,13 @@
quantity_const_mod = root_quantity % const
quantity_add_mult = root_quantity + root_quantity * const
-
-###################################
-#### NumPy universal functions ####
-###################################
-# Examples of tested NumPy universal functions:
+# -----------------------------------------------------------------------------
+# NumPy universal functions (ufuncs)
+# -----------------------------------------------------------------------------
+"""
+Many NumPy ufuncs are supported and return Quantity instances:
+- np.add, np.max, np.sin, np.sum, np.maximum, np.divide, np.arctan2, ...
+"""
quantity_np_add = np.add(root_quantity, root_quantity)
quantity_np_max = np.max(root_quantity, axis=0, keepdims=True)
quantity_np_sin = np.sin(root_quantity)
@@ -112,10 +151,21 @@
quantity_np_add_const = np.add(x, root_quantity)
quantity_np_arctan2_cosnt = np.arctan2(x, root_quantity)
-################################################
-#### Quantity selection by a condition ####
-################################################
-# Method select returns mlmc.quantity.quantity.Quantity instance
+# -----------------------------------------------------------------------------
+# Quantity selection by a condition
+# -----------------------------------------------------------------------------
+"""
+The select(condition) method extracts elements of a Quantity according to a boolean condition;
+it returns a Quantity (masking is internal and shape-aware).
+
+Examples:
+ - selected_quantity = root_quantity.select(0 < root_quantity)
+ - quantity_add_select = quantity_add.select(root_quantity < quantity_add)
+ - root_quantity_selected = root_quantity.select(-1 != root_quantity)
+
+You can combine conditions via logical ufuncs (np.logical_or / np.logical_and), and
+you can explicitly pass a Quantity mask (mask is a Quantity instance).
+"""
selected_quantity = root_quantity.select(0 < root_quantity)
quantity_add = root_quantity + root_quantity
@@ -125,9 +175,11 @@
# Logical operation for more conditions is AND
quantity_add.select(root_quantity < quantity_add, root_quantity < 10)
-# User can use one of the logical NumPy universal functions
-selected_quantity_or = root_quantity.select(np.logical_or(0 < root_quantity, root_quantity < 10))
+# Use NumPy logical ufuncs for complex masks
+selected_quantity_or = root_quantity.select(
+ np.logical_or(0 < root_quantity, root_quantity < 10)
+)
-# It is possible to explicitly define the selection condition of one quantity by another quantity
-mask = np.logical_and(0 < root_quantity, root_quantity < 10) # mask is Quantity instance
+# Explicit mask Quantity
+mask = np.logical_and(0 < root_quantity, root_quantity < 10) # mask is a Quantity instance
q_bounded = root_quantity.select(mask)
diff --git a/examples/shooting/shooting_1D.py b/examples/shooting/shooting_1D.py
index 609d2d6a..12ba7036 100644
--- a/examples/shooting/shooting_1D.py
+++ b/examples/shooting/shooting_1D.py
@@ -16,141 +16,164 @@
# - create Quantity instance
# - approximate density
class ProcessShooting1D:
+ """
+ Example driver for a 1D shooting problem using the MLMC framework.
+
+ This tutorial class demonstrates how to:
+ - create a Sampler with storage and sampling pool,
+ - schedule and collect MLMC samples,
+ - estimate moments and build an approximate probability density function
+ of the quantity of interest.
+
+ The constructor runs a full example MLMC workflow when the module is executed.
+ """
def __init__(self):
+ """
+ Initialize parameters, create sampler, generate samples, collect them and postprocess.
+
+ The constructor sets up:
+ - MLMC level parameters,
+ - sampling and storage components,
+ - schedule and collection of samples,
+ - postprocessing including moment estimation and density approximation.
+ """
n_levels = 2
# Number of MLMC levels
+
step_range = [1, 1e-3]
# step_range [simulation step at the coarsest level, simulation step at the finest level]
+
level_parameters = ProcessShooting1D.determine_level_parameters(n_levels, step_range)
- # Determine each level parameters (in this case, simulation step at each level), level_parameters should be
- # simulation dependent
- self._sample_sleep = 0#30
- # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run
- self._sample_timeout = 60
- # Maximum waiting time for running simulations
- self._adding_samples_coef = 0.1
- self._n_moments = 20
- # number of generalized statistical moments used for MLMC number of samples estimation
- self._quantile = 0.01
- # Setting parameters that are utilized when scheduling samples
- ###
- # MLMC run
- ###
+ # Determine each level parameters (in this case, simulation step at each level)
+
+ # Sampling control parameters
+ self._sample_sleep = 0 # seconds to sleep between checking sampling pool (was 30)
+ self._sample_timeout = 60 # maximum waiting time for running simulations (seconds)
+ self._adding_samples_coef = 0.1 # coefficient used when dynamically adding samples
+
+ # Moment / distribution settings
+ self._n_moments = 20 # number of generalized moments used for MLMC sample estimation
+ self._quantile = 0.01 # quantile used to estimate domain for moment functions
+
+ # MLMC run: create sampler and run sampling workflow
sampler = self.create_sampler(level_parameters=level_parameters)
- # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run
self.generate_samples(sampler, n_samples=None, target_var=1e-3)
- # Generate MLMC samples, there are two ways:
- # 1) set exact number of samples at each level,
- # e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50])
- # 2) set target variance of MLMC estimates,
- # e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6)
+ # Generate MLMC samples. Two ways:
+ # 1) Provide exact n_samples per level: generate_samples(sampler, n_samples=[...])
+ # 2) Provide target variance and let algorithm decide counts: target_var=1e-6
+
self.all_collect(sampler)
- # Check if all samples are finished
- ###
- # Postprocessing
- ###
+ # Ensure all scheduled samples finished
+
+ # Postprocessing: compute moments and approximate distributions
self.process_results(sampler, n_levels)
- # Postprocessing, MLMC is finished at this point
def create_sampler(self, level_parameters):
"""
- Create:
- # sampling pool - the way sample simulations are executed
- # sample storage - stores sample results
- # sampler - controls MLMC execution
- :param level_parameters: list of lists
- :return: mlmc.sampler.Sampler instance
+ Create the Sampler with storage and sampling pool and return it.
+
+ Components created:
+ - sampling_pool: OneProcessPool (runs all samples in the same process)
+ - simulation_factory: instance of ShootingSimulation1D
+ - sample_storage: Memory() (in-memory storage)
+ - sampler: mlmc.sampler.Sampler
+
+ :param level_parameters: List of level parameter lists (simulation-dependent).
+ :return: mlmc.sampler.Sampler instance configured for this example.
"""
- # Create OneProcessPool - all run in the same process
+ # Use OneProcessPool for simplicity (sequential execution). For parallel local runs,
+ # replace with ProcessPool(n) or ThreadPool(n).
sampling_pool = OneProcessPool()
- # There is another option mlmc.sampling_pool.ProcessPool() - supports local parallel sample simulation run
- # sampling_pool = ProcessPool(n), n - number of parallel simulations, depends on computer architecture
- # Simulation configuration which is passed to simulation constructor
+ # Simulation configuration dictionary passed to simulation factory
simulation_config = {
"start_position": np.array([0, 0]),
"start_velocity": np.array([10, 0]),
"area_borders": np.array([-100, 200, -300, 400]),
"max_time": 10,
- "complexity": 2, # used for initial estimate of number of operations per sample
+ "complexity": 2, # used as prior for cost estimates
'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
}
- # Create simulation factory, instance of class that inherits from mlmc.sim.simulation
+ # Simulation factory (constructs LevelSimulation instances internally)
simulation_factory = ShootingSimulation1D(config=simulation_config)
- # Create simple sample storage
- # Memory() storage keeps samples in computer main memory
+ # Lightweight in-memory sample storage
sample_storage = Memory()
- # We support also HDF file storage mlmc.sample_storage_hdf.SampleStorageHDF()
- # sample_storage = SampleStorageHDF(file_path=path_to_HDF5_file)
+ # Alternative: HDF storage (persistent) - sample_storage_hdf.SampleStorageHDF(...)
- # Create sampler
- # Controls the execution of MLMC
- sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
+ # Create and return the Sampler that orchestrates MLMC
+ sampler = Sampler(sample_storage=sample_storage,
+ sampling_pool=sampling_pool,
+ sim_factory=simulation_factory,
level_parameters=level_parameters)
return sampler
def generate_samples(self, sampler, n_samples=None, target_var=None):
"""
- Generate MLMC samples
- :param sampler: mlmc.sampler.Sampler instance
- :param n_samples: None or list, number of samples at each level
- :param target_var: target variance of MLMC estimates
- :return: None
+ Schedule and generate MLMC samples, optionally targetting a global variance.
+
+ :param sampler: mlmc.sampler.Sampler instance controlling sampling.
+ :param n_samples: None or list of exact sample counts per level.
+ :param target_var: Target variance for MLMC estimator (if not None).
+ :return: None (results are stored in sampler.sample_storage).
"""
- # The number of samples is set by user
+ # If user provided exact counts, set them; otherwise let sampler pick initial counts
if n_samples is not None:
sampler.set_initial_n_samples(n_samples)
- # The number of initial samples is determined automatically
else:
sampler.set_initial_n_samples()
- # Samples are scheduled and the program is waiting for all of them to be completed.
+
+ # Schedule initial samples and wait for them to complete
sampler.schedule_samples()
sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout)
self.all_collect(sampler)
- # MLMC estimates target variance is set
+ # If a target variance is requested, iteratively estimate variances and add more samples
if target_var is not None:
- # The mlmc.quantity.quantity.Quantity instance is created
- # parameters 'storage' and 'q_specs' are obtained from sample_storage,
- # originally 'q_specs' is set in the simulation class
+ # Create a Quantity for the root results; needed for moments estimation
root_quantity = make_root_quantity(storage=sampler.sample_storage,
q_specs=sampler.sample_storage.load_result_format())
- # Moment functions object is created
- # The MLMC algorithm determines number of samples according to the moments variance,
- # Type of moment functions (Legendre by default) might affect the total number of MLMC samples
+ # Build moment functions (Legendre polynomials on estimated domain)
moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments)
- estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage,
- moments_fn=moments_fn)
+ estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, moments_fn=moments_fn)
- # Initial estimation of the number of samples at each level
+ # Initial variance and cost estimates from currently finished samples
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
- # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples
+
+ # Compute initial recommended number of samples for each level
n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
- n_levels=sampler.n_levels)
-
- #####
- # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples
- #####
- # Loop until number of estimated samples is greater than the number of scheduled samples
- while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef,
- timeout=self._sample_timeout):
- # New estimation according to already finished samples
+ n_levels=sampler.n_levels)
+
+ # Gradually schedule additional samples until the estimate stabilizes
+ while not sampler.process_adding_samples(n_estimated,
+ self._sample_sleep,
+ self._adding_samples_coef,
+ timeout=self._sample_timeout):
+ # Re-estimate variance / ops using newly finished samples and recompute targets
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
n_levels=sampler.n_levels)
def set_moments(self, quantity, sample_storage, n_moments=25):
+ """
+ Create a Legendre-moments function object for the given quantity.
+
+ :param quantity: mlmc.quantity.quantity.Quantity instance (root quantity).
+ :param sample_storage: sample storage instance to estimate domain from samples.
+ :param n_moments: Number of moments (basis size) to construct.
+ :return: Legendre(n_moments, domain) instance to be used for moment estimation.
+ """
true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
return Legendre(n_moments, true_domain)
def all_collect(self, sampler):
"""
- Collect samples, wait until all samples are finished
- :param sampler: mlmc.sampler.Sampler object
+ Wait until all scheduled samples finish by repeatedly collecting finished samples.
+
+ :param sampler: mlmc.sampler.Sampler instance.
:return: None
"""
running = 1
@@ -161,37 +184,44 @@ def all_collect(self, sampler):
def process_results(self, sampler, n_levels):
"""
- Process MLMC results
- :param sampler: mlmc.sampler.Sampler instance
- :param n_levels: int, number of MLMC levels
+ Postprocess completed samples: estimate moments and approximate distribution.
+
+ Steps:
+ - Load result format and build Quantity objects
+ - Choose a target item inside the quantity and check its mean
+ - Compute estimated domain, construct moment functions (Legendre)
+ - Estimate moments and their variances, compute distribution approximation
+
+ :param sampler: mlmc.sampler.Sampler instance containing sample_storage.
+ :param n_levels: int, number of MLMC levels used in the run.
:return: None
"""
sample_storage = sampler.sample_storage
- # Load result format from the sample storage
+
+ # Load result format (list of QuantitySpec) and create root Quantity
result_format = sample_storage.load_result_format()
- # Create Quantity instance representing our real quantity of interest
root_quantity = make_root_quantity(sample_storage, result_format)
- # It is possible to access items of the quantity according to the result format
+ # Access an example item from the nested quantity to demonstrate API
target = root_quantity['target']
time = target[10]
position = time['0']
q_value = position[0]
- # Compute moments, first estimate domain of moment functions
+ # Estimate domain for moment functions from sample data and build moments function
estimated_domain = Estimate.estimate_domain(q_value, sample_storage, quantile=self._quantile)
moments_fn = Legendre(self._n_moments, estimated_domain)
- # Create estimator for the quantity
+ # Create an estimator and compute moment means and variances
estimator = Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
- # Estimate moment means and variances
means, vars = estimator.estimate_moments(moments_fn)
- # Generally, root quantity has different domain than its items
+
+ # For root-level moments use separate estimated domain
root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage,
- quantile=self._quantile)
+ quantile=self._quantile)
root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain)
- # There is another possible approach to calculating all moments at once and then select desired quantity
+ # Alternative approach: compute moments for the entire root quantity and then extract target
moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True)
moments_mean = estimate_mean(moments_quantity)
target_mean = moments_mean['target']
@@ -199,35 +229,45 @@ def process_results(self, sampler, n_levels):
location_mean = time_mean['0'] # locations: ['0']
value_mean = location_mean[0] # result shape: (1,)
+ # Quick assertion expected for the tutorial problem (value_mean should be 1)
assert value_mean.mean[0] == 1
+
+ # Build approximate density for the selected quantity
self.approx_distribution(estimator, n_levels, tol=1e-8)
def approx_distribution(self, estimator, n_levels, tol=1.95):
"""
- Probability density function approximation
- :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated
- :param n_levels: int, number of MLMC levels
- :param tol: Tolerance of the fitting problem, with account for variances in moments.
+ Construct and display an approximate probability density function for the quantity.
+
+ :param estimator: mlmc.estimator.Estimate instance which contains the quantity and methods
+ for constructing the density approximation.
+ :param n_levels: int, number of MLMC levels (used for optional plotting of raw samples).
+ :param tol: Tolerance parameter for the density fitting problem (default 1.95).
:return: None
"""
distr_obj, result, _, _ = estimator.construct_density(tol=tol)
distr_plot = Distribution(title="distributions", error_plot=None)
distr_plot.add_distribution(distr_obj)
+ # Optionally overlay raw samples for single-level case
if n_levels == 1:
samples = estimator.get_level_samples(level_id=0)[..., 0]
distr_plot.add_raw_samples(np.squeeze(samples))
+
distr_plot.show(None)
distr_plot.reset()
@staticmethod
def determine_level_parameters(n_levels, step_range):
"""
- Determine level parameters,
- In this case, a step of fine simulation at each level
- :param n_levels: number of MLMC levels
- :param step_range: simulation step range
- :return: list of lists
+ Determine level parameters for MLMC from the coarsest and finest step sizes.
+
+ The default strategy interpolates in log-space between step_range[0] and step_range[1]
+ across n_levels and returns a list of single-entry lists containing the step for each level.
+
+ :param n_levels: int number of MLMC levels.
+ :param step_range: Sequence [coarse_step, fine_step] where coarse_step > fine_step.
+ :return: list of lists where each inner list contains the parameter(s) for that level.
"""
assert step_range[0] > step_range[1]
level_parameters = []
diff --git a/examples/shooting/shooting_1D_mcqmc.py b/examples/shooting/shooting_1D_mcqmc.py
new file mode 100644
index 00000000..af251f06
--- /dev/null
+++ b/examples/shooting/shooting_1D_mcqmc.py
@@ -0,0 +1,274 @@
+import numpy as np
+import mlmc.estimator as est
+from mlmc.estimator import Estimate, estimate_n_samples_for_target_variance
+from mlmc.sampler import Sampler
+from mlmc.sample_storage import Memory
+from mlmc.sampling_pool import OneProcessPool
+from examples.shooting.simulation_shooting_1D import ShootingSimulation1D
+from mlmc.quantity.quantity import make_root_quantity
+from mlmc.quantity.quantity_estimate import moments, estimate_mean
+from mlmc.moments import Legendre
+from mlmc.plot.plots import Distribution
+
+
+# Tutorial class for 1D shooting simulation, includes
+# - samples scheduling
+# - process results:
+# - create Quantity instance
+# - approximate density
+class ProcessShooting1D:
+ """
+ Example driver for a 1D shooting problem using the MLMC framework.
+
+ Demonstrates:
+ - creating the Sampler, sampling_pool and sample storage,
+ - scheduling and collecting MLMC samples,
+ - estimating moments and building an approximate PDF for a quantity of interest.
+ """
+
+ def __init__(self):
+ """
+ Initialize parameters, create sampler, schedule and collect samples, and postprocess.
+
+ The constructor executes a full example run:
+ - determine level parameters,
+ - create sampler,
+ - generate samples (either user-specified or determined from a target variance),
+ - collect all results,
+ - postprocess and approximate distribution of the chosen quantity.
+ """
+ n_levels = 3
+ # Number of MLMC levels
+
+ step_range = [1, 1e-3]
+ # step_range [simulation step at the coarsest level, simulation step at the finest level]
+
+ level_parameters = ProcessShooting1D.determine_level_parameters(n_levels, step_range)
+ # Determine each level parameters (in this case, simulation step at each level)
+
+ self._sample_sleep = 0 # seconds to sleep while polling sampling pool
+ self._sample_timeout = 60 # maximum waiting time for sampling pool operations (seconds)
+ self._adding_samples_coef = 0.1 # coefficient used when adding samples adaptively
+
+ self._n_moments = 20
+ # number of generalized statistical moments used for MLMC sample estimation
+ self._quantile = 0.01
+ # quantile used to estimate domain of distribution for moment basis
+
+ # MLMC run
+ sampler = self.create_sampler(level_parameters=level_parameters)
+ # Create sampler (mlmc.Sampler instance) - controls MLMC run
+ self.generate_samples(sampler, n_samples=None, target_var=1e-3)
+ # Generate MLMC samples (either explicit counts or target variance)
+ self.all_collect(sampler)
+ # Wait for all scheduled samples to finish
+
+ # Postprocessing
+ self.process_results(sampler, n_levels)
+ # Postprocessing complete
+
+ def create_sampler(self, level_parameters):
+ """
+ Create and configure sampler components: sampling pool, simulation factory and storage.
+
+ :param level_parameters: list of per-level parameter lists (simulation-dependent).
+ :return: mlmc.sampler.Sampler instance configured with Memory storage and OneProcessPool.
+ """
+ # Use OneProcessPool for sequential runs. Replace with ProcessPool/ThreadPool for parallel runs.
+ sampling_pool = OneProcessPool()
+
+ # Simulation configuration passed into the simulation factory
+ simulation_config = {
+ "start_position": np.array([0, 0]),
+ "start_velocity": np.array([10, 0]),
+ "area_borders": np.array([-100, 200, -300, 400]),
+ "max_time": 10,
+ "complexity": 2, # used for initial estimate of operations per sample
+ "fields_params": dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
+ }
+
+ # Create simulation factory (produces LevelSimulation instances)
+ simulation_factory = ShootingSimulation1D(config=simulation_config)
+
+ # In-memory sample storage (alternative: HDF-based storage)
+ sample_storage = Memory()
+
+ # Create and return the Sampler orchestrating MLMC
+ sampler = Sampler(sample_storage=sample_storage,
+ sampling_pool=sampling_pool,
+ sim_factory=simulation_factory,
+ level_parameters=level_parameters)
+ return sampler
+
+ def generate_samples(self, sampler, n_samples=None, target_var=None):
+ """
+ Schedule and generate MLMC samples. If target_var is provided, iteratively determine
+ and add samples until the MLMC variance target is reached.
+
+ :param sampler: mlmc.sampler.Sampler instance controlling sampling.
+ :param n_samples: Optional list of exact sample counts per level. If provided, used as initial counts.
+ :param target_var: Optional float target variance for MLMC estimator. If provided, algorithm estimates counts.
+ :return: None
+ """
+ # Set initial number of samples (user-specified or default)
+ if n_samples is not None:
+ sampler.set_initial_n_samples(n_samples)
+ else:
+ sampler.set_initial_n_samples()
+
+ # Schedule and start the initial batch of samples, then wait for completion
+ sampler.schedule_samples()
+ sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout)
+ self.all_collect(sampler)
+
+ # If a target_var is provided, compute required sample counts and add samples iteratively
+ if target_var is not None:
+ # Build a root quantity (required for moments estimation)
+ root_quantity = make_root_quantity(storage=sampler.sample_storage,
+ q_specs=sampler.sample_storage.load_result_format())
+
+ # Create moment functions (Legendre) on estimated domain
+ moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments)
+ estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, moments_fn=moments_fn)
+
+ # Estimate variances and costs from finished samples
+ variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
+
+ # Compute estimated number of samples per level for the target variance
+ n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+ n_levels=sampler.n_levels)
+
+ # Iteratively add samples until the scheduler has scheduled enough
+ while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef,
+ timeout=self._sample_timeout):
+ variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+ n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+ n_levels=sampler.n_levels)
+
+ def set_moments(self, quantity, sample_storage, n_moments=25):
+ """
+ Build Legendre moment basis on the domain estimated from samples.
+
+ :param quantity: Quantity (or root quantity) used to estimate domain.
+ :param sample_storage: Sample storage used to compute domain estimate.
+ :param n_moments: Number of Legendre basis functions / moments.
+ :return: Legendre(n_moments, domain) instance.
+ """
+ true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
+ return Legendre(n_moments, true_domain)
+
+ def all_collect(self, sampler):
+ """
+ Repeatedly collect finished samples until none are running.
+
+ :param sampler: mlmc.sampler.Sampler instance to poll for finished samples.
+ :return: None
+ """
+ running = 1
+ while running > 0:
+ running = 0
+ running += sampler.ask_sampling_pool_for_samples()
+ print("N running: ", running)
+
+ def process_results(self, sampler, n_levels):
+ """
+ Postprocess completed samples:
+ - build Quantity objects,
+ - compute moment means/variances,
+ - perform checks and consistency tests,
+ - approximate distribution for the target quantity.
+
+ :param sampler: mlmc.sampler.Sampler instance (contains sample_storage).
+ :param n_levels: int, number of MLMC levels used in the run.
+ :return: None
+ """
+ sample_storage = sampler.sample_storage
+
+ # Load result format and create root quantity
+ result_format = sample_storage.load_result_format()
+ root_quantity = make_root_quantity(sample_storage, result_format)
+
+ print("N collected ", sample_storage.get_n_collected())
+
+ # Access a nested item (example of how to index Quantity)
+ target = root_quantity['target']
+ time = target[10]
+ position = time['0']
+ q_value = position[0]
+
+ # Estimate domain from samples and build moments function
+ estimated_domain = Estimate.estimate_domain(q_value, sample_storage, quantile=self._quantile)
+ moments_fn = Legendre(self._n_moments, estimated_domain)
+
+ # Estimator for the selected quantity
+ estimator = Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
+
+ # Compute moment means and variances
+ means, vars = estimator.estimate_moments(moments_fn)
+
+ # Diagnostics and consistency checks
+ #est.plot_checks(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
+ est.consistency_check(quantity=q_value, sample_storage=sample_storage)
+ estimator.kurtosis_check(q_value)
+
+ # Optionally compute moments for full root quantity and extract target mean
+ root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage,
+ quantile=self._quantile)
+ root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain)
+ moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True)
+ moments_mean = estimate_mean(moments_quantity)
+ target_mean = moments_mean['target']
+ time_mean = target_mean[10]
+ location_mean = time_mean['0']
+ value_mean = location_mean[0]
+
+ # Example assertion for tutorial (problem-dependent)
+ assert value_mean.mean[0] == 1
+
+ # Build and show approximate density
+ self.approx_distribution(estimator, n_levels, tol=1e-8)
+
+ def approx_distribution(self, estimator, n_levels, tol=1.95):
+ """
+ Approximate and display the probability density function for the estimator's quantity.
+
+ :param estimator: mlmc.estimator.Estimate instance (contains quantity and methods for density construction).
+ :param n_levels: int, number of MLMC levels (used for optional raw-sample overlay).
+ :param tol: Tolerance for density fitting (accounts for moment variances).
+ :return: None
+ """
+ distr_obj, result, _, _ = estimator.construct_density(tol=tol)
+ distr_plot = Distribution(title="distributions", error_plot=None)
+ distr_plot.add_distribution(distr_obj)
+
+ if n_levels == 1:
+ samples = estimator.get_level_samples(level_id=0)[..., 0]
+ distr_plot.add_raw_samples(np.squeeze(samples))
+
+ distr_plot.show(None)
+ distr_plot.reset()
+
+ @staticmethod
+ def determine_level_parameters(n_levels, step_range):
+ """
+ Determine parameters for each MLMC level (here a single step size per level).
+
+ Interpolates between step_range[0] (coarse) and step_range[1] (fine) across levels.
+
+ :param n_levels: int number of MLMC levels.
+ :param step_range: [coarse_step, fine_step] with coarse_step > fine_step.
+ :return: list of lists; each inner list contains the step parameter for that level.
+ """
+ assert step_range[0] > step_range[1]
+ level_parameters = []
+ for i_level in range(n_levels):
+ if n_levels == 1:
+ level_param = 1
+ else:
+ level_param = i_level / (n_levels - 1)
+ level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param])
+ return level_parameters
+
+
+if __name__ == "__main__":
+ ProcessShooting1D()
diff --git a/examples/shooting/shooting_2D.py b/examples/shooting/shooting_2D.py
index b58ebe73..06d3bbcb 100644
--- a/examples/shooting/shooting_2D.py
+++ b/examples/shooting/shooting_2D.py
@@ -14,55 +14,85 @@
class ProcessShooting2D:
+ """
+ Example driver for a 2D shooting simulation using the MLMC framework.
+
+ Demonstrates:
+ - building sampler, sampling pool and sample storage,
+ - scheduling and collecting MLMC samples,
+ - estimating moments and approximating probability densities for quantities of interest.
+ """
def __init__(self):
+ """
+ Initialize parameters, create sampler, schedule and collect samples, and postprocess.
+
+ The constructor runs a full example MLMC workflow:
+ - determine level parameters,
+ - create sampler and sampling pool,
+ - generate MLMC samples (either user-specified or adaptively from target variance),
+ - wait for completion and postprocess results.
+ """
n_levels = 5
# Number of MLMC levels
step_range = [0.05, 0.005]
# step_range [simulation step at the coarsest level, simulation step at the finest level]
level_parameters = ProcessShooting2D.determine_level_parameters(n_levels, step_range)
- # Determine each level parameters (in this case, simulation step at each level), level_parameters should be
- # simulation dependent
- self._sample_sleep = 0 # 30
- # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run
+ # Determine each level parameters (in this case, simulation step at each level)
+
+ self._sample_sleep = 0
+ # Seconds to sleep while polling sampling pool
self._sample_timeout = 60
- # Maximum waiting time for running simulations
+ # Maximum waiting time for sampling pool operations (seconds)
self._adding_samples_coef = 0.1
self._n_moments = 20
# number of generalized statistical moments used for MLMC number of samples estimation
self._quantile = 0.001
- # Setting parameters that are utilized when scheduling samples
- ###
+ # quantile used to estimate domain for moment basis functions
+
# MLMC run
- ###
sampler = self.create_sampler(level_parameters=level_parameters)
- # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run
+ # Create sampler (mlmc.Sampler instance) - controls MLMC run
self.generate_samples(sampler, n_samples=None, target_var=1e-3)
- # Generate MLMC samples, there are two ways:
- # 1) set exact number of samples at each level,
- # e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50])
- # 2) set target variance of MLMC estimates,
- # e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6)
+ # Generate MLMC samples: either explicit counts or by target variance
self.all_collect(sampler)
- # Check if all samples are finished
- ###
+ # Wait until all samples finish
+
# Postprocessing
- ###
self.process_results(sampler, n_levels)
- # Postprocessing, MLMC is finished at this point
+ # Postprocessing complete
def create_estimator(self, quantity, sample_storage):
+ """
+ Create an Estimate object for a given quantity and storage using Legendre moments.
+
+ :param quantity: Quantity object (mlmc.quantity.quantity.Quantity item) to estimate.
+ :param sample_storage: sample storage instance used to estimate domain.
+ :return: mlmc.estimator.Estimate instance configured for the quantity.
+ """
estimated_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
moments_fn = Legendre(self._n_moments, estimated_domain)
- # Create estimator for your quantity
- return Estimate(quantity=quantity, sample_storage=sample_storage,
- moments_fn=moments_fn)
+ return Estimate(quantity=quantity, sample_storage=sample_storage, moments_fn=moments_fn)
def process_results(self, sampler, n_levels):
+ """
+ Postprocess completed MLMC samples: form quantities, create estimators and approximate distributions.
+
+ Steps performed:
+ - load result format and create root Quantity,
+ - extract x and y components of the target item,
+ - build per-component estimators,
+ - compute root-level moments and means,
+ - approximate and display distributions for x and y.
+
+ :param sampler: mlmc.sampler.Sampler instance containing completed samples.
+ :param n_levels: int, number of MLMC levels used.
+ :return: None
+ """
sample_storage = sampler.sample_storage
# Load result format from sample storage
result_format = sample_storage.load_result_format()
- # Create quantity instance representing your real quantity of interest
+ # Create quantity instance representing the quantity of interest
root_quantity = make_root_quantity(sample_storage, result_format)
# You can access item of quantity according to result format
@@ -72,15 +102,15 @@ def process_results(self, sampler, n_levels):
x_quantity_value = position[0]
y_quantity_value = position[1]
- # Create estimator for quantities
+ # Create estimators for each coordinate component
x_estimator = self.create_estimator(x_quantity_value, sample_storage)
y_estimator = self.create_estimator(y_quantity_value, sample_storage)
+ # Optionally compute moments for full root quantity (and extract means)
root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage,
quantile=self._quantile)
root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain)
- # There is another possible approach to calculating all moments at once and then choose quantity
moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True)
moments_mean = estimate_mean(moments_quantity)
target_mean = moments_mean['target']
@@ -88,15 +118,17 @@ def process_results(self, sampler, n_levels):
location_mean = time_mean['0'] # locations: ['0']
value_mean = location_mean[0] # result shape: (1,)
+ # Display approximated distributions for both coordinates
self.approx_distribution(x_estimator, n_levels, tol=1e-8)
self.approx_distribution(y_estimator, n_levels, tol=1e-8)
def approx_distribution(self, estimator, n_levels, tol=1.95):
"""
- Probability density function approximation
- :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated
- :param n_levels: int, number of MLMC levels
- :param tol: Tolerance of the fitting problem, with account for variances in moments.
+ Approximate and display the probability density function for the estimator's quantity.
+
+ :param estimator: mlmc.estimator.Estimate instance providing construct_density().
+ :param n_levels: int, number of MLMC levels (used to optionally overlay raw samples).
+ :param tol: float, tolerance for density-fitting routine (affects regularization with moment variances).
:return: None
"""
distr_obj, result, _, _ = estimator.construct_density(tol=tol)
@@ -111,26 +143,27 @@ def approx_distribution(self, estimator, n_levels, tol=1.95):
def create_sampler(self, level_parameters):
"""
- Simulation dependent configuration
- :return: mlmc.sampler instance
+ Create sampler, sampling pool and simulation factory for the 2D shooting example.
+
+ :param level_parameters: list of per-level parameter lists.
+ :return: mlmc.sampler.Sampler instance configured with Memory storage and OneProcessPool.
"""
- # Create Pbs sampling pool
sampling_pool = OneProcessPool()
simulation_config = {
"start_position": np.array([0, 0]),
"start_velocity": np.array([10, 0]),
- "area_borders": np.array([-100, 200, -300, 400]),
+ "area_borders": np.array([-100, 200, -300, 400]),
"max_time": 10,
"complexity": 2, # used for initial estimate of number of operations per sample
- 'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
+ "fields_params": dict(model='gauss', dim=1, sigma=1, corr_length=0.1),
}
# Create simulation factory
simulation_factory = ShootingSimulation2D(config=simulation_config)
- # Create HDF sample storage
+ # In-memory storage (Memory). Replace with HDF storage if persistence is needed.
sample_storage = Memory()
- # Create sampler, it manages sample scheduling and so on
+ # Create sampler which orchestrates scheduling and storage
sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
level_parameters=level_parameters)
@@ -138,59 +171,52 @@ def create_sampler(self, level_parameters):
def generate_samples(self, sampler, n_samples=None, target_var=None):
"""
- Generate MLMC samples
- :param sampler: mlmc.sampler.Sampler instance
- :param n_samples: None or list, number of samples at each level
- :param target_var: target variance of MLMC estimates
+ Schedule and generate MLMC samples. If target_var is provided, iteratively determine
+ additional samples required to reach the variance target.
+
+ :param sampler: mlmc.sampler.Sampler instance controlling the run.
+ :param n_samples: Optional list of exact sample counts per level.
+ :param target_var: Optional float target variance for MLMC estimator.
:return: None
"""
- # The number of samples is set by user
+ # Set initial samples either from user or default logic
if n_samples is not None:
sampler.set_initial_n_samples(n_samples)
- # The number of initial samples is determined automatically
else:
sampler.set_initial_n_samples()
- # Samples are scheduled and the program is waiting for all of them to be completed.
+
+ # Schedule and wait for initial batch of samples
sampler.schedule_samples()
sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout)
self.all_collect(sampler)
- # MLMC estimates target variance is set
+ # If a global target variance is specified, estimate and iteratively add samples
if target_var is not None:
- # The mlmc.quantity.quantity.Quantity instance is created
- # parameters 'storage' and 'q_specs' are obtained from sample_storage,
- # originally 'q_specs' is set in the simulation class
+ # Build a root quantity required for moment estimation
root_quantity = make_root_quantity(storage=sampler.sample_storage,
q_specs=sampler.sample_storage.load_result_format())
- # Moment functions object is created
- # The MLMC algorithm determines number of samples according to the moments variance,
- # Type of moment functions (Legendre by default) might affect the total number of MLMC samples
moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments)
estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage,
moments_fn=moments_fn)
- # Initial estimation of the number of samples at each level
+ # Estimate per-level variances and costs from finished samples
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples)
- # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples
n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
n_levels=sampler.n_levels)
- #####
- # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples
- #####
- # Loop until number of estimated samples is greater than the number of scheduled samples
+ # Loop until sampler has scheduled enough samples
while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef,
timeout=self._sample_timeout):
- # New estimation according to already finished samples
variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
n_levels=sampler.n_levels)
def all_collect(self, sampler):
"""
- Collect samples
- :param sampler: mlmc.Sampler object
+ Repeatedly poll the sampling pool and collect finished samples until none remain running.
+
+ :param sampler: mlmc.sampler.Sampler instance.
:return: None
"""
running = 1
@@ -200,17 +226,28 @@ def all_collect(self, sampler):
print("N running: ", running)
def set_moments(self, quantity, sample_storage, n_moments=25):
+ """
+ Build Legendre moments object for a given quantity using the domain estimated from samples.
+
+ :param quantity: quantity object (used to estimate domain).
+ :param sample_storage: storage used to compute the domain estimate.
+ :param n_moments: number of Legendre polynomials to use.
+ :return: Legendre instance configured for the estimated domain.
+ """
true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile)
return Legendre(n_moments, true_domain)
@staticmethod
def determine_level_parameters(n_levels, step_range):
"""
- Determine level parameters,
- In this case, a step of fine simulation at each level
- :param n_levels: number of MLMC levels
- :param step_range: simulation step range
- :return: List of lists
+ Determine level parameters for MLMC.
+
+ For this example a single per-level 'step' is returned in a list, interpolating
+ (geometrically) between step_range[0] and step_range[1].
+
+ :param n_levels: int number of MLMC levels.
+ :param step_range: [coarse_step, fine_step] with coarse_step > fine_step.
+ :return: List[List[float]]: per-level parameters (each inner list contains the step value).
"""
assert step_range[0] > step_range[1]
level_parameters = []
diff --git a/examples/shooting/simulation_shooting_1D.py b/examples/shooting/simulation_shooting_1D.py
index b5646a09..81615794 100644
--- a/examples/shooting/simulation_shooting_1D.py
+++ b/examples/shooting/simulation_shooting_1D.py
@@ -10,8 +10,17 @@
def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1, mode_no=1000):
"""
- Create correlated random field
- :return: mlmc.random.correlated_field.Field instance
+ Create a correlated random field wrapper used by the simulations.
+
+ :param model: str, covariance model name. Supported values:
+ 'gauss' (default), 'exp', 'TPLgauss', 'TPLexp', 'TPLStable'.
+ :param corr_length: float, correlation length (len_scale) for the covariance model.
+ :param dim: int, spatial dimension of the covariance model.
+ :param log: bool, whether the generated random field is treated as log-field.
+ :param sigma: float, standard deviation (sigma) for the random field.
+ :param mode_no: int, number of Fourier modes (used by GSTools SRF).
+ :return: cf.Field instance that wraps a GSToolsSpatialCorrelatedField configured
+ with the selected covariance model.
"""
if model == 'exp':
model = gstools.Exponential(dim=dim, len_scale=corr_length)
@@ -28,22 +37,42 @@ def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1,
class ShootingSimulation1D(Simulation):
+ """
+ Simple 1D "shooting" simulation used as example for MLMC workflow.
+
+ The class implements:
+ - level_instance(...) to create LevelSimulation configurations,
+ - calculate(...) static method to produce paired (fine, coarse) samples,
+ - result_format() describing the output QuantitySpec.
+ """
def __init__(self, config):
"""
- :param config: Dict, simulation configuration
+ Initialize simulation factory.
+
+ :param config: Dict containing simulation configuration. Expected keys include:
+ - 'start_position': np.ndarray shape (2,)
+ - 'start_velocity': np.ndarray shape (2,)
+ - 'area_borders': np.ndarray [xmin, xmax, ymin, ymax]
+ - 'max_time': float
+ - 'complexity': float used in n_ops_estimate
+ - 'fields_params': dict passed to create_corr_field()
"""
super().__init__()
self._config = config
- # This attribute is obligatory, if True workspace is created
+ # If True, a workspace for each sample will be created (not used here).
self.need_workspace: bool = False
def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
"""
- Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.level_simulation)
- :param fine_level_params: fine simulation step at particular level
- :param coarse_level_params: coarse simulation step at particular level
- :return: mlmc.LevelSimulation object
+ Create a LevelSimulation object for given fine and coarse level parameters.
+
+ This constructs the simulation configuration for one MLMC level (fine/coarse pair)
+ and returns a LevelSimulation that knows which calculate() function to call.
+
+ :param fine_level_params: List[float], typically a single-element list containing the fine step size.
+ :param coarse_level_params: List[float], typically a single-element list containing the coarse step size.
+ :return: LevelSimulation configured with the derived config dictionary and task size.
"""
config = copy.deepcopy(self._config)
config["fine"] = {}
@@ -51,6 +80,8 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
config["fine"]["step"] = fine_level_params[0]
config["coarse"]["step"] = coarse_level_params[0]
config["res_format"] = self.result_format()
+
+ # compute number of elements per level from complexity and step
config["fine"]["n_elements"] = int(config["complexity"] / config["fine"]["step"])
if config["coarse"]["step"] > 0:
config["coarse"]["n_elements"] = int(config["complexity"] / config["coarse"]["step"])
@@ -63,20 +94,33 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
@staticmethod
def calculate(config, seed):
"""
- Calculate fine and coarse sample and also extract their results
- :param config: dictionary containing simulation configuration
- :param seed: random number generator seed
- :return: np.ndarray, np.ndarray
+ Build random field and produce a paired (fine, coarse) simulation result.
+
+ This is the static worker function used by LevelSimulation to generate one sample pair.
+ It creates the correlated random field, samples it on the fine+coarse point set and
+ runs the deterministic integrator for fine and coarse input arrays.
+
+ :param config: dict, level-specific configuration produced by level_instance().
+ It must include 'fields_params', and 'fine'/'coarse' with 'n_elements'
+ and 'step' keys, plus other simulation settings.
+ :param seed: int, RNG seed passed by sampler (currently unused inside; RNG is handled by field implementation).
+ :return: tuple (fine_result, coarse_result), each is a scalar float (y coordinate at final time or NaN).
"""
- # Create random field structure
+ # create random field generator (GSTools wrapper)
field = create_corr_field(**config['fields_params'])
+
+ # create sample points (concatenated fine + coarse) and remember fine size
points, n_fine_points = ShootingSimulation1D.create_points(config)
+
+ # assign points to field and generate realization
field.set_points(points)
- fine_input_sample, coarse_input_sample = ShootingSimulation1D.generate_random_sample(field,
- coarse_step=config["coarse"]["step"],
- n_fine_elements=n_fine_points)
+ # sample fine and coarse inputs from the realizations
+ fine_input_sample, coarse_input_sample = ShootingSimulation1D.generate_random_sample(
+ field, coarse_step=config["coarse"]["step"], n_fine_elements=n_fine_points
+ )
+ # run simulator for fine and coarse inputs
fine_res = ShootingSimulation1D._run_sample(config, fine_input_sample)
coarse_res = ShootingSimulation1D._run_sample(config, coarse_input_sample)
@@ -85,36 +129,46 @@ def calculate(config, seed):
@staticmethod
def _run_sample(config, rnd_input_samples):
"""
- Simulation of 1D shooting
- :param config: dictionary containing simulation configuration
- :param rnd_input_samples: np.ndarray, shape: (number of elements )
+ Run the explicit Euler "shooting" simulation for a given input array.
+
+ The dynamics are:
+ X_{k+1} = X_k + dt * V_k
+ V_{k+1} = V_k + dt * F_k
+ where F_k is drawn from the correlated field sample (rnd_input_samples).
+
+ If the simulated projectile exits `area_borders`, the function returns np.nan.
+
+ :param config: dict, simulation configuration (same structure as in calculate()).
+ :param rnd_input_samples: np.ndarray-like, shape (n_elements,) with random forcing values.
+ :return: float, y-coordinate of the projectile at the end of simulation
+ (or np.nan if it left the domain).
"""
n_elements = len(rnd_input_samples)
- x, y, time, n = 0, 0, 0, 0
- X = config["start_position"]
- V = config['start_velocity']
+ X = config["start_position"].copy() # 2-vector [x, y]
+ V = config['start_velocity'].copy() # 2-vector
+ y = 0.0
- # Time step
+ # compute time step (if n_elements == 0 dt is unused and loop is skipped)
if n_elements != 0:
dt = config['max_time'] / n_elements
- # Loop through random array F
+
+ # step through the random forcing and integrate
for i in range(n_elements):
- # New coordinates
+ # update position and velocity (explicit Euler)
X = X + dt * V
- # New vector of speed
V = V + dt * rnd_input_samples[i]
x = X[0]
y = X[1]
- if x > config['area_borders'][1] or x < config['area_borders'][0] or\
- y > config['area_borders'][3] or y < config['area_borders'][2]:
+ # check domain boundaries; if outside, record NaN and stop
+ if x > config['area_borders'][1] or x < config['area_borders'][0] or \
+ y > config['area_borders'][3] or y < config['area_borders'][2]:
y = np.nan
break
+ # current time (not used beyond stopping condition)
time = dt * (i + 1)
-
- # End simulation if time is bigger then maximum time
if time >= config['max_time']:
break
@@ -122,23 +176,43 @@ def _run_sample(config, rnd_input_samples):
@staticmethod
def create_points(config):
+ """
+ Create concatenated evaluation points for the random field for fine and coarse parts.
+
+ Points are returned as an (n_fine + n_coarse, 1) array where each row is a coordinate
+ at which the GSTools SRF will be evaluated. The first n_fine rows correspond to the fine mesh,
+ the remaining rows (if any) to the coarse mesh.
+
+ :param config: dict, level configuration that contains 'fine' and 'coarse' sub-dicts
+ with 'n_elements' integer entries and other simulation parameters.
+ :return: tuple (points, n_fine_elements)
+ - points: np.ndarray shape (n_fine + n_coarse, 1)
+ - n_fine_elements: int number of fine elements (split index)
+ """
n_fine_elements = config["fine"]["n_elements"]
n_coarse_elements = config["coarse"]["n_elements"]
assert n_fine_elements > n_coarse_elements
+ # build a 1D coordinate array for GSTools evaluation; here we use distance ~ velocity * time
points = np.empty((n_fine_elements + n_coarse_elements, 1))
- points[:, 0] = np.concatenate((np.linspace(0, config["start_velocity"][0]*config["max_time"],
- n_fine_elements),
- np.linspace(0, config["start_velocity"][0]*config["max_time"],
- n_coarse_elements)))
+ points[:, 0] = np.concatenate((
+ np.linspace(0, config["start_velocity"][0] * config["max_time"], n_fine_elements),
+ np.linspace(0, config["start_velocity"][0] * config["max_time"], n_coarse_elements)
+ ))
return points, n_fine_elements
@staticmethod
def generate_random_sample(field, coarse_step, n_fine_elements):
"""
- Generate random field, both fine and coarse part.
- :return: List, List
+ Sample a realization from the provided correlated field and split it into fine and coarse parts.
+
+ :param field: cf.Field instance created by create_corr_field(...) and already `set_points()` called.
+ :param coarse_step: float, coarse level step (if 0 then no coarse part is returned).
+ :param n_fine_elements: int, number of entries that belong to the fine-level part (split index).
+ :return: tuple (fine_input_sample, coarse_input_sample)
+ - fine_input_sample: np.ndarray shape (n_fine_elements,)
+ - coarse_input_sample: np.ndarray shape (n_coarse_elements,) or [] if coarse_step == 0
"""
field_sample = field.sample()
fine_input_sample = field_sample[:n_fine_elements]
@@ -148,13 +222,22 @@ def generate_random_sample(field, coarse_step, n_fine_elements):
return fine_input_sample, coarse_input_sample
def n_ops_estimate(self, step):
+ """
+ Estimate computational cost (# of operations) for a sample at a given step size.
+
+ :param step: float, simulation step size for the fine level
+ :return: float, heuristic estimate of cost used by MLMC to balance work across levels
+ """
return (1 / step) ** self._config['complexity'] * np.log(max(1 / step, 2.0))
def result_format(self) -> List[QuantitySpec]:
"""
- Result format
- :return:
+ Describe the output format of the simulation for MLMC.
+
+ :return: List[QuantitySpec] describing the result vector(s). Here we return a single
+ scalar quantity 'target' representing y-position at final time.
"""
spec1 = QuantitySpec(name="target", unit="m", shape=(1,), times=[10], locations=['0'])
return [spec1]
+
diff --git a/examples/shooting/simulation_shooting_2D.py b/examples/shooting/simulation_shooting_2D.py
index 79571eb1..f906f503 100644
--- a/examples/shooting/simulation_shooting_2D.py
+++ b/examples/shooting/simulation_shooting_2D.py
@@ -9,45 +9,78 @@
def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1, mode_no=1000):
"""
- Create random fields
- :return:
+ Create a correlated random field wrapper (GSTools model -> mlmc.random.correlated_field.Field).
+
+ :param model: str, covariance model name. Supported strings: 'gauss' (default), 'exp',
+ 'TPLgauss', 'TPLexp', 'TPLStable'.
+ :param corr_length: float, correlation length (len_scale) supplied to the GSTools model.
+ :param dim: int, spatial dimension of the covariance model.
+ :param log: bool, if True the field will be considered in log-space when sampling.
+ :param sigma: float, multiplicative sigma parameter for the SRF generator.
+ :param mode_no: int, number of Fourier modes used by GSTools SRF.
+ :return: cf.Field — an mlmc.random.correlated_field.Field wrapping a GSTools SRF.
"""
if model == 'exp':
model = gstools.Exponential(dim=dim, len_scale=corr_length)
elif model == 'TPLgauss':
- model = gstools.TPLGaussian(dim=dim, len_scale=corr_length)
+ model = gstools.TPLGaussian(dim=dim, len_scale=corr_length)
elif model == 'TPLexp':
- model = gstools.TPLExponential(dim=dim, len_scale=corr_length)
+ model = gstools.TPLExponential(dim=dim, len_scale=corr_length)
elif model == 'TPLStable':
- model = gstools.TPLStable(dim=dim, len_scale=corr_length)
+ model = gstools.TPLStable(dim=dim, len_scale=corr_length)
else:
- model = gstools.Gaussian(dim=dim, len_scale=corr_length)
+ model = gstools.Gaussian(dim=dim, len_scale=corr_length)
+
return cf.Field('conductivity', cf.GSToolsSpatialCorrelatedField(model, log=log, sigma=sigma, mode_no=mode_no))
class ShootingSimulation2D(Simulation):
+ """
+ Example 2D shooting simulation used with MLMC sampler.
+
+ Implements:
+ - level_instance(...) to create per-level LevelSimulation config,
+ - calculate(...) static method used to produce a (fine, coarse) sample pair,
+ - result_format() describing output QuantitySpec.
+ """
def __init__(self, config):
"""
- :param config: Dict, simulation configuration
+ Initialize the simulation factory.
+
+ :param config: dict containing base simulation configuration. Expected keys:
+ - 'start_position': np.ndarray shape (2,)
+ - 'start_velocity': np.ndarray shape (2,)
+ - 'area_borders': np.ndarray [xmin, xmax, ymin, ymax]
+ - 'max_time': float
+ - 'complexity': float used by n_ops_estimate
+ - 'fields_params': dict passed to create_corr_field()
"""
super().__init__()
self.config = config
- # This attribute is obligatory
+ # If True, a sample workspace will be created (not required here)
self.need_workspace: bool = False
def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
"""
+ Construct LevelSimulation for given fine/coarse parameters.
+
+ This method mutates a copy of self.config into a level-specific config and
+ returns a LevelSimulation that will call ShootingSimulation2D.calculate to
+ generate samples for this level.
- :param fine_level_params:
- :param coarse_level_params:
- :return:
+ :param fine_level_params: list-like, typically [fine_step]
+ :param coarse_level_params: list-like, typically [coarse_step]
+ :return: LevelSimulation configured for this level
"""
+ # Update level-specific fields in self.config
self.config["fine"] = {}
self.config["coarse"] = {}
self.config["fine"]["step"] = fine_level_params[0]
self.config["coarse"]["step"] = coarse_level_params[0]
self.config["res_format"] = self.result_format()
+
+ # compute number of elements per level from complexity and step sizes
self.config["fine"]["n_elements"] = int(self.config["complexity"] / self.config["fine"]["step"])
if self.config["coarse"]["step"] > 0:
self.config["coarse"]["n_elements"] = int(self.config["complexity"] / self.config["coarse"]["step"])
@@ -60,84 +93,124 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
@staticmethod
def calculate(config, seed):
"""
- Calculate fine and coarse sample and also extract their results
- :param config: dictionary containing simulation configuration
- :param seed: random number generator seed
- :return: np.ndarray, np.ndarray
+ Build correlated fields and produce paired fine/coarse simulation results.
+
+ This static function is intended to be executed by the LevelSimulation worker.
+ It:
+ - creates two independent correlated fields (x and y components),
+ - sets evaluation points,
+ - samples the fields and splits the realization into fine and coarse parts,
+ - simulates the projectile dynamics for fine/coarse inputs.
+
+ :param config: dict, level-specific configuration produced by level_instance().
+ :param seed: int, RNG seed (currently not used directly; GSTools RNG is internal).
+ :return: tuple (fine_result, coarse_result)
+ - fine_result: np.ndarray or scalar describing the fine-level output
+ - coarse_result: np.ndarray or scalar describing the coarse-level output
"""
- # Create random field structure
+ # Create independent correlated random fields for X and Y forcing
field_x = create_corr_field(**config['fields_params'])
field_y = create_corr_field(**config['fields_params'])
+ # create concatenated points and get number of fine points
points, n_fine_points = ShootingSimulation2D.create_points(config)
field_x.set_points(points)
field_y.set_points(points)
- fine_input_sample, coarse_input_sample = ShootingSimulation2D.generate_random_sample(field_x, field_y,
- coarse_step=config["coarse"]["step"],
- n_fine_elements=n_fine_points)
+ # sample and split into fine and coarse inputs
+ fine_input_sample, coarse_input_sample = ShootingSimulation2D.generate_random_sample(
+ field_x, field_y, coarse_step=config["coarse"]["step"], n_fine_elements=n_fine_points
+ )
+ # run dynamics for fine and coarse inputs
fine_res = ShootingSimulation2D._run_sample(config, fine_input_sample)
- coarse_res = ShootingSimulation2D._run_sample(config, fine_input_sample)
+ coarse_res = ShootingSimulation2D._run_sample(config, coarse_input_sample)
return fine_res, coarse_res
@staticmethod
def _run_sample(config, rnd_input_samples):
"""
- Simulation of 2D shooting
- :param config: dictionary containing simulation configuration
- :param rnd_input_samples: np.ndarray, shape: (number of elements )
+ Run the explicit Euler integrator for the 2D shooting model.
+
+ Dynamics:
+ X_{k+1} = X_k + dt * V_k
+ V_{k+1} = V_k + dt * F_k
+
+ The method returns the final position vector X (or [nan, nan] if the projectile exits area_borders).
+
+ :param config: dict, level-specific simulation configuration.
+ :param rnd_input_samples: array-like with shape (n_elements, 2) or (n_elements,) depending on caller.
+ :return: np.ndarray shape (2,) representing final [x, y] or [nan, nan] if out-of-bounds.
"""
n_elements = len(rnd_input_samples)
- X = config["start_position"]
- V = config['start_velocity']
+ # Use copies so we don't mutate config objects
+ X = np.array(config["start_position"], dtype=float).copy()
+ V = np.array(config['start_velocity'], dtype=float).copy()
- # Time step
+ # compute time step if there are elements
if n_elements != 0:
dt = config['max_time'] / n_elements
- # Loop through random array F
+ # integrate step-by-step
for i in range(n_elements):
- # New coordinates
+ # update position
X = X + dt * V
- # New vector of speed
+ # update velocity: rnd_input_samples[i] must be 2-vector (Fx, Fy)
V = V + dt * rnd_input_samples[i]
x = X[0]
y = X[1]
- if x > config['area_borders'][1] or x < config['area_borders'][0] or\
- y > config['area_borders'][3] or y < config['area_borders'][2]:
- X = [np.nan, np.nan]
+ # if projectile leaves the bounding box, return NaNs
+ if x > config['area_borders'][1] or x < config['area_borders'][0] or \
+ y > config['area_borders'][3] or y < config['area_borders'][2]:
+ X = np.array([np.nan, np.nan])
break
time = dt * (i + 1)
-
- # End simulation if time is bigger then maximum time
if time >= config['max_time']:
break
+
return X
@staticmethod
def create_points(config):
+ """
+ Build concatenated evaluation points for the correlated fields (fine first, then coarse).
+
+ The points are 1D coordinates used for the GSTools SRF evaluation; the first
+ n_fine rows correspond to the fine mesh, the remainder to the coarse mesh.
+
+ :param config: dict with 'fine' and 'coarse' sub-dicts containing 'n_elements'.
+ :return: tuple (points, n_fine_elements)
+ - points: np.ndarray shape (n_fine + n_coarse, 1)
+ - n_fine_elements: int number of fine-level points
+ """
n_fine_elements = config["fine"]["n_elements"]
n_coarse_elements = config["coarse"]["n_elements"]
assert n_fine_elements > n_coarse_elements
points = np.empty((n_fine_elements + n_coarse_elements, 1))
- points[:, 0] = np.concatenate((np.linspace(0, config["start_velocity"][0]*config["max_time"],
- n_fine_elements),
- np.linspace(0, config["start_velocity"][0]*config["max_time"],
- n_coarse_elements)))
+ points[:, 0] = np.concatenate((
+ np.linspace(0, config["start_velocity"][0] * config["max_time"], n_fine_elements),
+ np.linspace(0, config["start_velocity"][0] * config["max_time"], n_coarse_elements)
+ ))
return points, n_fine_elements
@staticmethod
def generate_random_sample(field_x, field_y, coarse_step, n_fine_elements):
"""
- Generate random field, both fine and coarse part.
- :return: List, List
+ Sample two correlated fields (x and y components) and split the realization into fine and coarse parts.
+
+ :param field_x: cf.Field for x-component (already had set_points called).
+ :param field_y: cf.Field for y-component (already had set_points called).
+ :param coarse_step: float, coarse-level step size (if 0 there is no coarse part).
+ :param n_fine_elements: int, how many samples belong to the fine-level portion.
+ :return: tuple (fine_input_sample, coarse_input_sample)
+ - fine_input_sample: np.ndarray shape (n_fine_elements, 2) with x and y forcing
+ - coarse_input_sample: np.ndarray shape (n_coarse_elements, 2) or empty array if coarse_step==0
"""
field_sample_x = field_x.sample()
field_sample_y = field_y.sample()
@@ -153,13 +226,19 @@ def generate_random_sample(field_x, field_y, coarse_step, n_fine_elements):
return fine_input_sample, coarse_input_sample
def n_ops_estimate(self, step):
+ """
+ Heuristic estimate of computational expense for a single fine-level sample.
+
+ :param step: float, fine-level step size.
+ :return: float, cost estimate used by MLMC scheduler.
+ """
return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0))
def result_format(self) -> List[QuantitySpec]:
"""
- Result format
- :return:
+ Describe the result format (QuantitySpec) produced by calculate().
+
+ :return: list of QuantitySpec objects describing outputs. Here a single 2D target vector.
"""
spec1 = QuantitySpec(name="target", unit="m", shape=(2,), times=[10], locations=['0'])
return [spec1]
-
diff --git a/examples/synthetic_quantity.py b/examples/synthetic_quantity.py
index ad792dd0..0fcec175 100644
--- a/examples/synthetic_quantity.py
+++ b/examples/synthetic_quantity.py
@@ -19,6 +19,27 @@
def fill_sample_storage(sample_storage, result_format):
+ """
+ Populate a sample storage with synthetic scheduled and finished samples for tests.
+
+ This saves:
+ - global meta-data (result_format, level_parameters)
+ - scheduled samples per level
+ - finished (successful) samples per level
+ - number-of-operations records per level
+
+ Parameters
+ ----------
+ sample_storage : mlmc.sample_storage.* instance
+ Storage object to be filled (Memory or HDF).
+ result_format : list[QuantitySpec]
+ Result format used to determine sizes of flattened sample vectors.
+
+ Returns
+ -------
+ tuple
+ (result_format, sizes) where `sizes` is the last computed per-quantity sizes list.
+ """
np.random.seed(123)
n_levels = 3
@@ -55,7 +76,37 @@ def fill_sample_storage(sample_storage, result_format):
return result_format, sizes
+
def create_sampler(clean=True, memory=True, n_moments=5):
+ """
+ Create and prepare a Sampler with synthetic simulation factory and initial scheduled samples.
+
+ The function:
+ - creates a temporary working directory (when clean=True),
+ - constructs a SynthSimulationForTests factory,
+ - chooses Memory or HDF sample storage,
+ - creates a OneProcessPool sampling_pool,
+ - constructs a Sampler instance,
+ - prepares a Monomial moments function,
+ - schedules an initial set of samples and triggers immediate sample execution.
+
+ Parameters
+ ----------
+ clean : bool, optional
+ If True, removes and recreates the test working directory before use (default True).
+ memory : bool, optional
+ If True use Memory() storage; otherwise use SampleStorageHDF (default True).
+ n_moments : int, optional
+ Number of monomial moments (unused in this helper besides constructing moments_fn) (default 5).
+
+ Returns
+ -------
+ tuple
+ (sampler, simulation_factory, moments_fn)
+ - sampler: mlmc.sampler.Sampler instance with initial samples scheduled/executed
+ - simulation_factory: SynthSimulationForTests instance used by the sampler
+ - moments_fn: Monomial moments object constructed for the true_domain
+ """
# Set work dir
np.random.seed(1234)
n_levels = 3
@@ -76,10 +127,6 @@ def create_sampler(clean=True, memory=True, n_moments=5):
simulation_config = dict(distr=distr, complexity=2, nan_fraction=failed_fraction, sim_method='_sample_fn')
simulation_factory = SynthSimulationForTests(simulation_config)
- # shutil.copyfile('synth_sim_config.yaml', os.path.join(work_dir, 'synth_sim_config.yaml'))
- # simulation_config = {"config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')}
- # simulation_workspace = SynthSimulationWorkspace(simulation_config)
-
# Create sample storages
if memory:
sample_storage = Memory()
@@ -87,7 +134,6 @@ def create_sampler(clean=True, memory=True, n_moments=5):
sample_storage = SampleStorageHDF(file_path=os.path.join(work_dir, "mlmc_test.hdf5"))
# Create sampling pools
sampling_pool = OneProcessPool()
- # sampling_pool_dir = OneProcessPool(work_dir=work_dir)
if clean:
if sampling_pool._output_dir is not None:
@@ -111,5 +157,3 @@ def create_sampler(clean=True, memory=True, n_moments=5):
sampler.ask_sampling_pool_for_samples()
return sampler, simulation_factory, moments_fn
-
-
diff --git a/mlmc/archive/flow123d_mock.py b/mlmc/archive/flow123d_mock.py
deleted file mode 100644
index aca6f1a0..00000000
--- a/mlmc/archive/flow123d_mock.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Mockup for the Flow123d simulator.
-"""
-
-import argparse
-import ruamel.yaml as yaml
-
-parser = argparse.ArgumentParser(description='Flow123d mockup.')
-parser.add_argument('yaml_file', metavar='', type=string, nargs=1,
- help='Main input YAML file.')
-
-args = parser.parse_args()
-
-with open(args.yaml_file, 'r') as f:
- input = yaml.load(f)
- mesh_file = input.mesh.mesh_file
-
-def conductivity_field_average(gmsh):
- pass
diff --git a/mlmc/estimator.py b/mlmc/estimator.py
index af4f6e03..cc98be3b 100644
--- a/mlmc/estimator.py
+++ b/mlmc/estimator.py
@@ -3,6 +3,7 @@
import scipy.integrate as integrate
import mlmc.quantity.quantity_estimate as qe
import mlmc.tool.simple_distribution
+from mlmc.quantity.quantity_estimate import mask_nan_samples
from mlmc.quantity.quantity_types import ScalarType
from mlmc.plot import plots
from mlmc.quantity.quantity_spec import ChunkSpec
@@ -10,42 +11,88 @@
class Estimate:
"""
- Provides wrapper methods for moments estimation, pdf approximation, ...
+ A wrapper class for moment estimation, PDF approximation, and related MLMC post-processing.
+
+ Provides utility methods to:
+ - Estimate statistical moments, variances, and covariances
+ - Perform regression-based variance estimation
+ - Conduct bootstrap resampling
+ - Construct approximate probability density functions
+ - Visualize and analyze MLMC variance and sample distributions
"""
+
def __init__(self, quantity, sample_storage, moments_fn=None):
+ """
+ Initialize the Estimate instance.
+
+ :param quantity: mlmc.quantity.Quantity
+ Quantity object representing the stochastic quantity of interest.
+ :param sample_storage: mlmc.sample_storage.SampleStorage
+ Storage containing MLMC samples for each level.
+ :param moments_fn: callable, optional
+ Function defining the statistical moments to be estimated.
+ """
self._quantity = quantity
self._sample_storage = sample_storage
self._moments_fn = moments_fn
+ self._moments_mean = None
@property
def quantity(self):
+ """Return the current Quantity object."""
return self._quantity
@quantity.setter
def quantity(self, quantity):
+ """Set a new Quantity object."""
self._quantity = quantity
@property
def n_moments(self):
+ """Return the number of moment functions defined."""
return self._moments_fn.size
+ @property
+ def moments_mean_obj(self):
+ """Return the most recently computed mean of the moments."""
+ return self._moments_mean
+
+ @moments_mean_obj.setter
+ def moments_mean_obj(self, moments_mean):
+ """
+ Set the estimated mean of the moments.
+
+ :param moments_mean: mlmc.quantity.quantity.QuantityMean
+ Object containing mean and variance of the estimated moments.
+ :raises TypeError: If the object is not an instance of QuantityMean.
+ """
+ if not isinstance(moments_mean, mlmc.quantity.quantity.QuantityMean):
+ raise TypeError
+ self._moments_mean = moments_mean
+
def estimate_moments(self, moments_fn=None):
"""
- Use collected samples to estimate moments and variance of this estimate.
- :param moments_fn: moments function
- :return: estimate_of_moment_means, estimate_of_variance_of_estimate ; arrays of length n_moments
+ Estimate the mean and variance of the defined moment functions.
+
+ :param moments_fn: callable, optional
+ Function to compute statistical moments. If None, uses the stored function.
+ :return: tuple (moment_means, moment_variances)
+ Arrays of length n_moments representing estimated means and variances.
"""
if moments_fn is None:
moments_fn = self._moments_fn
moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn))
+ self.moments_mean_obj = moments_mean
return moments_mean.mean, moments_mean.var
def estimate_covariance(self, moments_fn=None):
"""
- Use collected samples to estimate covariance matrix and variance of this estimate.
- :param moments_fn: moments function
- :return: estimate_of_moment_means, estimate_of_variance_of_estimate ; arrays of length n_moments
+ Estimate the covariance matrix and its variance from MLMC samples.
+
+ :param moments_fn: callable, optional
+ Function defining moment evaluations. If None, uses the stored one.
+ :return: tuple (covariance_matrix, covariance_variance)
"""
if moments_fn is None:
moments_fn = self._moments_fn
@@ -55,14 +102,20 @@ def estimate_covariance(self, moments_fn=None):
def estimate_diff_vars_regression(self, n_created_samples, moments_fn=None, raw_vars=None):
"""
- Estimate variances using linear regression model.
- Assumes increasing variance with moments_fn, use only two moments_fn with highest average variance.
- :param n_created_samples: number of created samples on each level
- :param moments_fn: Moment evaluation function
- :return: array of variances, n_ops_estimate
+ Estimate variances using a linear regression model.
+
+ Assumes that variance increases with moment order. Typically, only two moments
+ with the highest average variance are used.
+
+ :param n_created_samples: array-like
+ Number of created samples on each MLMC level.
+ :param moments_fn: callable, optional
+ Moment evaluation function.
+ :param raw_vars: ndarray, optional
+ Precomputed raw variance estimates.
+ :return: tuple (variance_array, n_ops_estimate)
"""
self._n_created_samples = n_created_samples
- # vars shape L x R
if raw_vars is None:
if moments_fn is None:
moments_fn = self._moments_fn
@@ -70,21 +123,24 @@ def estimate_diff_vars_regression(self, n_created_samples, moments_fn=None, raw_
sim_steps = np.squeeze(self._sample_storage.get_level_parameters())
vars = self._all_moments_variance_regression(raw_vars, sim_steps)
- # We need to get n_ops_estimate from storage
+
return vars, self._sample_storage.get_n_ops()
def estimate_diff_vars(self, moments_fn=None):
"""
- Estimate moments_fn variance from samples
- :param moments_fn: Moment evaluation functions
- :return: (diff_variance, n_samples);
- diff_variance - shape LxR, variances of diffs of moments_fn
- n_samples - shape L, num samples for individual levels.
+ Estimate the variance of moment differences between consecutive MLMC levels.
+
+ :param moments_fn: callable, optional
+ Moment evaluation functions.
+ :return: tuple (diff_variance, n_samples)
+ diff_variance - shape (L, R): variances of differences of moments.
+ n_samples - shape (L,): number of samples per level.
"""
moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn))
return moments_mean.l_vars, moments_mean.n_samples
def _all_moments_variance_regression(self, raw_vars, sim_steps):
+ """Apply variance regression across all moment functions."""
reg_vars = raw_vars.copy()
n_moments = raw_vars.shape[1]
for m in range(1, n_moments):
@@ -94,53 +150,53 @@ def _all_moments_variance_regression(self, raw_vars, sim_steps):
def _moment_variance_regression(self, raw_vars, sim_steps):
"""
- Estimate level variance using separate model for every moment.
+ Perform regression-based smoothing of level variance for a single moment.
- log(var_l) = A + B * log(h_l) + C * log^2(hl),
- for l = 0, .. L-1
- :param raw_vars: moments_fn variances raws, shape (L,)
- :param sim_steps: simulation steps, shape (L,)
- :return: np.array (L, )
+ Model:
+ log(var_l) = A + B * log(h_l) + C * log^2(h_l)
+
+ :param raw_vars: ndarray, shape (L,)
+ Raw variance estimates of a single moment.
+ :param sim_steps: ndarray, shape (L,)
+ Simulation step sizes or level parameters.
+ :return: ndarray, shape (L,)
+ Smoothed variance estimates.
"""
L, = raw_vars.shape
L1 = L - 1
if L < 3 or np.allclose(raw_vars, 0):
return raw_vars
- # estimate of variances of variances, compute scaling
W = 1.0 / np.sqrt(self._variance_of_variance())
- W = W[1:] # ignore level 0
+ W = W[1:]
W = np.ones((L - 1,))
- # Use linear regresion to improve estimate of variances V1, ...
- # model log var_{r,l} = a_r + b * log step_l
- # X_(r,l), j = dirac_{r,j}
-
- K = 3 # number of parameters
+ K = 3
X = np.zeros((L1, K))
log_step = np.log(sim_steps[1:])
X[:, 0] = np.ones(L1)
X[:, 1] = np.full(L1, log_step)
X[:, 2] = np.full(L1, log_step ** 2)
- WX = X * W[:, None] # scale
- log_vars = np.log(raw_vars[1:]) # omit first variance
- log_vars = W * log_vars # scale RHS
+ WX = X * W[:, None]
+ log_vars = np.log(raw_vars[1:])
+ log_vars = W * log_vars
params, res, rank, sing_vals = np.linalg.lstsq(WX, log_vars)
new_vars = raw_vars.copy()
new_vars[1:] = np.exp(np.dot(X, params))
-
return new_vars
def _variance_of_variance(self, n_samples=None):
"""
- Approximate variance of log(X) where
- X is from ch-squared with df=n_samples - 1.
- Return array of variances for actual n_samples array.
+ Approximate the variance of log(X), where X follows a chi-squared distribution.
+
+ Used to approximate the uncertainty of variance estimates.
- :param n_samples: Optional array with n_samples.
- :return: array of variances of variance estimate.
+ :param n_samples: array-like, optional
+ Number of samples per level.
+ :return: ndarray
+ Variance of variance estimates per level.
"""
if n_samples is None:
n_samples = self._n_created_samples
@@ -169,21 +225,29 @@ def compute_moment(moment):
return np.array(vars)
def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
-
+ """
+ Perform bootstrap resampling to estimate uncertainty in MLMC estimators.
+
+ :param n_subsamples: int, default=100
+ Number of bootstrap subsamples.
+ :param sample_vector: ndarray, optional
+ Sampling vector for selecting subsamples.
+ :param moments_fn: callable, optional
+ Moment evaluation function.
+ """
if moments_fn is not None:
self._moments_fn = moments_fn
else:
moments_fn = self._moments_fn
- sample_vector = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
- n_levels=self._sample_storage.get_n_levels(),
- sample_vector=sample_vector)
- bs_mean = []
- bs_var = []
- bs_l_means = []
- bs_l_vars = []
+ sample_vector = determine_sample_vec(
+ n_collected_samples=self._sample_storage.get_n_collected(),
+ n_levels=self._sample_storage.get_n_levels(),
+ sample_vector=sample_vector
+ )
+ bs_mean, bs_var, bs_l_means, bs_l_vars = [], [], [], []
for i in range(n_subsamples):
- quantity_subsample = self.quantity.select(self.quantity.subsample(sample_vec=sample_vector))
+ quantity_subsample = self.quantity.subsample(sample_vec=sample_vector)
moments_quantity = qe.moments(quantity_subsample, moments_fn=moments_fn, mom_at_bottom=False)
q_mean = qe.estimate_mean(moments_quantity)
@@ -192,6 +256,9 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
bs_l_means.append(q_mean.l_means)
bs_l_vars.append(q_mean.l_vars)
+ self.bs_mean = bs_mean
+ self.bs_var = bs_var
+
self.mean_bs_mean = np.mean(bs_mean, axis=0)
self.mean_bs_var = np.mean(bs_var, axis=0)
self.mean_bs_l_means = np.mean(bs_l_means, axis=0)
@@ -202,51 +269,105 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None):
self.var_bs_l_means = np.var(bs_l_means, axis=0, ddof=1)
self.var_bs_l_vars = np.var(bs_l_vars, axis=0, ddof=1)
- self._bs_level_mean_variance = self.var_bs_l_means * np.array(self._sample_storage.get_n_collected())[:, None]
+ self._bs_level_mean_variance = (
+ self.var_bs_l_means * np.array(self._sample_storage.get_n_collected())[:, None]
+ )
- def bs_target_var_n_estimated(self, target_var, sample_vec=None):
- sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
- n_levels=self._sample_storage.get_n_levels(),
- sample_vector=sample_vec)
+ def bs_target_var_n_estimated(self, target_var, sample_vec=None, n_subsamples=100):
+ """
+ Estimate the number of samples required to achieve a target variance.
+
+ :param target_var: float
+ Desired target variance for MLMC estimation.
+ :param sample_vec: ndarray, optional
+ Sampling vector specifying subsamples per level.
+ :param n_subsamples: int, default=100
+ Number of bootstrap resamplings to perform.
+ :return: ndarray
+ Estimated number of samples required at each level.
+ """
+ sample_vec = determine_sample_vec(
+ n_collected_samples=self._sample_storage.get_n_collected(),
+ n_levels=self._sample_storage.get_n_levels(),
+ sample_vector=sample_vec
+ )
- self.est_bootstrap(n_subsamples=300, sample_vector=sample_vec)
+ self.est_bootstrap(n_subsamples=n_subsamples, sample_vector=sample_vec)
- variances, n_ops = self.estimate_diff_vars_regression(sample_vec, raw_vars=self.mean_bs_l_vars)
- n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops,
- n_levels=self._sample_storage.get_n_levels())
+ variances, n_ops = self.estimate_diff_vars_regression(
+ sample_vec, raw_vars=self.mean_bs_l_vars
+ )
+
+ n_estimated = estimate_n_samples_for_target_variance(
+ target_var, variances, n_ops, n_levels=self._sample_storage.get_n_levels()
+ )
return n_estimated
def plot_variances(self, sample_vec=None):
+ """
+ Plot variance breakdown from bootstrap and regression data.
+
+ :param sample_vec: ndarray, optional
+ Sampling vector specifying subsamples per level.
+ """
var_plot = plots.VarianceBreakdown(10)
- sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
- n_levels=self._sample_storage.get_n_levels(),
- sample_vector=sample_vec)
+ sample_vec = determine_sample_vec(
+ n_collected_samples=self._sample_storage.get_n_collected(),
+ n_levels=self._sample_storage.get_n_levels(),
+ sample_vector=sample_vec
+ )
self.est_bootstrap(n_subsamples=100, sample_vector=sample_vec)
- var_plot.add_variances(self.mean_bs_l_vars, sample_vec, ref_level_vars=self._bs_level_mean_variance)
+ var_plot.add_variances(
+ self.mean_bs_l_vars,
+ sample_vec,
+ ref_level_vars=self._bs_level_mean_variance
+ )
var_plot.show(None)
def plot_bs_var_log(self, sample_vec=None):
- sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(),
- n_levels=self._sample_storage.get_n_levels(),
- sample_vector=sample_vec)
+ """
+ Generate log-scale bootstrap variance plots and variance regression fits.
- moments_quantity = qe.moments(self._quantity, moments_fn=self._moments_fn, mom_at_bottom=False)
+ :param sample_vec: ndarray, optional
+ Sampling vector specifying subsamples per level.
+ """
+ sample_vec = determine_sample_vec(
+ n_collected_samples=self._sample_storage.get_n_collected(),
+ n_levels=self._sample_storage.get_n_levels(),
+ sample_vector=sample_vec
+ )
+
+ moments_quantity = qe.moments(
+ self._quantity, moments_fn=self._moments_fn, mom_at_bottom=False
+ )
q_mean = qe.estimate_mean(moments_quantity)
- bs_plot = plots.BSplots(bs_n_samples=sample_vec, n_samples=self._sample_storage.get_n_collected(),
- n_moments=self._moments_fn.size, ref_level_var=q_mean.l_vars)
+ bs_plot = plots.BSplots(
+ bs_n_samples=sample_vec,
+ n_samples=self._sample_storage.get_n_collected(),
+ n_moments=self._moments_fn.size,
+ ref_level_var=q_mean.l_vars
+ )
- bs_plot.plot_means_and_vars(self.mean_bs_mean[1:], self.mean_bs_var[1:], n_levels=self._sample_storage.get_n_levels())
+ bs_plot.plot_means_and_vars(
+ self.mean_bs_mean[1:],
+ self.mean_bs_var[1:],
+ n_levels=self._sample_storage.get_n_levels()
+ )
bs_plot.plot_bs_variances(self.mean_bs_l_vars)
- #bs_plot.plot_bs_var_log_var()
-
+ # bs_plot.plot_bs_var_log_var()
bs_plot.plot_var_regression(self, self._sample_storage.get_n_levels(), self._moments_fn)
def fine_coarse_violinplot(self):
+ """
+ Create violin plots comparing fine and coarse samples across levels.
+
+ Uses pandas for data organization and mlmc.plot.violinplot for visualization.
+ """
import pandas as pd
from mlmc.plot import violinplot
@@ -255,14 +376,18 @@ def fine_coarse_violinplot(self):
if n_levels > 1:
for level_id in range(n_levels):
- chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=self._sample_storage.get_n_collected()[level_id]))
+ chunk_spec = next(
+ self._sample_storage.chunks(
+ level_id=level_id,
+ n_samples=self._sample_storage.get_n_collected()[level_id]
+ )
+ )
samples = np.squeeze(self._quantity.samples(chunk_spec, axis=0))
if level_id == 0:
label = "{} F{} {} C".format(level_id, ' ' * label_n_spaces, level_id + 1)
data = {'samples': samples[:, 0], 'type': 'fine', 'level': label}
dframe = pd.DataFrame(data)
else:
-
data = {'samples': samples[:, 1], 'type': 'coarse', 'level': label}
dframe = pd.concat([dframe, pd.DataFrame(data)], axis=0)
@@ -270,16 +395,21 @@ def fine_coarse_violinplot(self):
label = "{} F{} {} C".format(level_id, ' ' * label_n_spaces, level_id + 1)
data = {'samples': samples[:, 0], 'type': 'fine', 'level': label}
dframe = pd.concat([dframe, pd.DataFrame(data)], axis=0)
+
violinplot.fine_coarse_violinplot(dframe)
@staticmethod
def estimate_domain(quantity, sample_storage, quantile=None):
"""
- Estimate moments domain from MLMC samples.
- :param quantity: mlmc.quantity.Quantity instance, represents the real quantity
- :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples
- :param quantile: float in interval (0, 1), None means whole sample range
- :return: lower_bound, upper_bound
+ Estimate lower and upper bounds of the domain from MLMC samples.
+
+ :param quantity: mlmc.quantity.Quantity
+ Quantity object representing the stochastic quantity.
+ :param sample_storage: mlmc.sample_storage.SampleStorage
+ Storage object containing all level samples.
+ :param quantile: float, optional
+ Quantile value in (0, 1). None defaults to 0.01.
+ :return: tuple (lower_bound, upper_bound)
"""
ranges = []
if quantile is None:
@@ -289,13 +419,25 @@ def estimate_domain(quantity, sample_storage, quantile=None):
try:
sample_storage.get_n_collected()[level_id]
except AttributeError:
- print("No collected values for level {}".format(level_id))
+ print(f"No collected values for level {level_id}")
break
- chunk_spec = next(sample_storage.chunks(n_samples=sample_storage.get_n_collected()[level_id]))
- fine_samples = quantity.samples(chunk_spec)[..., 0] # Fine samples at level 0
+ print("sample_storage.get_n_collected() ", type(sample_storage.get_n_collected()[0]))
+
+ if isinstance(sample_storage.get_n_collected()[level_id], AttributeError):
+ print("continue")
+ continue
+
+ chunk_spec = next(
+ sample_storage.chunks(
+ level_id=level_id,
+ n_samples=sample_storage.get_n_collected()[level_id]
+ )
+ )
+ fine_samples = quantity.samples(chunk_spec)[..., 0]
fine_samples = np.squeeze(fine_samples)
- fine_samples = fine_samples[~np.isnan(fine_samples)] # remove NaN
+ print("fine samples ", fine_samples)
+ fine_samples = fine_samples[~np.isnan(fine_samples)]
ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)]))
ranges = np.array(ranges)
@@ -303,102 +445,185 @@ def estimate_domain(quantity, sample_storage, quantile=None):
def construct_density(self, tol=1e-8, reg_param=0.0, orth_moments_tol=1e-4, exact_pdf=None):
"""
- Construct approximation of the density using given moment functions.
+ Construct an approximate probability density function using orthogonal moments.
+
+ :param tol: float, default=1e-8
+ Optimization tolerance for density estimation.
+ :param reg_param: float, default=0.0
+ Regularization parameter to stabilize estimation.
+ :param orth_moments_tol: float, default=1e-4
+ Tolerance for orthogonalization of moments.
+ :param exact_pdf: callable, optional
+ Reference exact PDF for validation or comparison.
+ :return: tuple (distribution, info, result, moments_object)
"""
if not isinstance(self._quantity.qtype, ScalarType):
- raise NotImplementedError("Currently, we only support ScalarType quantities")
+ raise NotImplementedError("Currently, only ScalarType quantities are supported.")
cov_mean = qe.estimate_mean(qe.covariance(self._quantity, self._moments_fn))
cov_mat = cov_mean.mean
- moments_obj, info = mlmc.tool.simple_distribution.construct_ortogonal_moments(self._moments_fn,
- cov_mat,
- tol=orth_moments_tol)
+ moments_obj, info = mlmc.tool.simple_distribution.construct_ortogonal_moments(
+ self._moments_fn, cov_mat, tol=orth_moments_tol
+ )
+
moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_obj))
est_moments = moments_mean.mean
est_vars = moments_mean.var
- # if exact_pdf is not None:
- # exact_moments = mlmc.tool.simple_distribution.compute_exact_moments(moments_obj, exact_pdf)
-
est_vars = np.ones(moments_obj.size)
min_var, max_var = np.min(est_vars[1:]), np.max(est_vars[1:])
- #print("min_err: {} max_err: {} ratio: {}".format(min_var, max_var, max_var / min_var))
moments_data = np.stack((est_moments, est_vars), axis=1)
- distr_obj = mlmc.tool.simple_distribution.SimpleDistribution(moments_obj, moments_data,
- domain=moments_obj.domain)
- result = distr_obj.estimate_density_minimize(tol, reg_param) # 0.95 two side quantile
+
+ distr_obj = mlmc.tool.simple_distribution.SimpleDistribution(
+ moments_obj, moments_data, domain=moments_obj.domain
+ )
+ result = distr_obj.estimate_density_minimize(tol, reg_param)
return distr_obj, info, result, moments_obj
def get_level_samples(self, level_id, n_samples=None):
"""
- Get level samples from storage
- :param level_id: int, level identifier
- :param n_samples> int, number of samples to retrieve, if None first chunk of data is retrieved
- :return: level samples, shape: (M, N, 1) for level 0, (M, N, 2) otherwise
+ Retrieve MLMC samples for a given level.
+
+ :param level_id: int
+ Level index to access.
+ :param n_samples: int, optional
+ Number of samples to retrieve. If None, retrieves all available samples.
+ :return: ndarray
+ Samples for the specified level.
"""
chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=n_samples))
return self._quantity.samples(chunk_spec=chunk_spec)
+ def kurtosis_check(self, quantity=None):
+ """
+ Compute and return the kurtosis of the given or stored quantity.
+
+ :param quantity: mlmc.quantity.Quantity, optional
+ Quantity for which to compute kurtosis. Defaults to the stored quantity.
+ :return: float or ndarray
+ Computed kurtosis per level.
+ """
+ if quantity is None:
+ quantity = self._quantity
+ moments_mean_quantity = qe.estimate_mean(quantity)
+ kurtosis = qe.level_kurtosis(quantity, moments_mean_quantity)
+ return kurtosis
+
-def estimate_domain(quantity, sample_storage, quantile=None):
+def consistency_check(quantity, sample_storage=None):
"""
- Estimate moments domain from MLMC samples.
- :param quantity: mlmc.quantity.Quantity instance, represents the real quantity
- :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples
- :param quantile: float in interval (0, 1), None means whole sample range
- :return: lower_bound, upper_bound
+ Check consistency between fine and coarse level samples in MLMC.
+
+ :param quantity: mlmc.quantity.Quantity instance
+ :param sample_storage: mlmc.sample_storage.SampleStorage instance
+ :return: dict mapping level_id -> consistency metric
"""
- ranges = []
- if quantile is None:
- quantile = 0.01
+ fine_samples = {}
+ coarse_samples = {}
+
+ for chunk_spec in quantity.get_quantity_storage().chunks():
+ samples = quantity.samples(chunk_spec)
+ chunk, _ = mask_nan_samples(samples)
+
+ # Skip empty chunks
+ if chunk.shape[1] == 0:
+ continue
+ fine_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 0])
+ if chunk_spec.level_id > 0:
+ coarse_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 1])
+
+ cons_check_val = {}
for level_id in range(sample_storage.get_n_levels()):
- fine_samples = quantity.samples(ChunkSpec(level_id=level_id, n_samples=sample_storage.get_n_collected()[0]))[..., 0]
+ if level_id > 0:
+ fine_mean = np.mean(fine_samples[level_id])
+ coarse_mean = np.mean(coarse_samples[level_id])
+ diff_mean = np.mean(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id]))
+
+ fine_var = np.var(fine_samples[level_id])
+ coarse_var = np.var(fine_samples[level_id])
+ diff_var = np.var(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id]))
- fine_samples = np.squeeze(fine_samples)
- ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)]))
+ val = np.abs(coarse_mean - fine_mean + diff_mean) / (
+ 3 * (np.sqrt(coarse_var) + np.sqrt(fine_var) + np.sqrt(diff_var))
+ )
- ranges = np.array(ranges)
- return np.min(ranges[:, 0]), np.max(ranges[:, 1])
+ assert np.isclose(coarse_mean - fine_mean + diff_mean, 0)
+ assert val < 0.9
+ cons_check_val[level_id] = val
-def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels):
+ return cons_check_val
+
+
+def coping_with_high_kurtosis(vars, costs, kurtosis, kurtosis_threshold=100):
"""
- Estimate optimal number of samples for individual levels that should provide a target variance of
- resulting moment estimate.
- This also set given moment functions to be used for further estimates if not specified otherwise.
- :param target_variance: Constrain to achieve this variance.
- :param prescribe_vars: vars[ L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance.
- :param n_ops: number of operations at each level
+ Adjust variance estimates if kurtosis is unusually high to avoid underestimation.
+
+ :param vars: ndarray of shape (L, M) with level variances for moments
+ :param costs: cost of computing samples per level
+ :param kurtosis: kurtosis of each level
+ :param kurtosis_threshold: threshold above which kurtosis is considered "high"
+ :return: adjusted vars ndarray
+ """
+ for l_id in range(2, vars.shape[0]):
+ if kurtosis[l_id] > kurtosis_threshold:
+ vars[l_id] = np.maximum(vars[l_id], 0.5 * vars[l_id - 1] * costs[l_id - 1] / costs[l_id])
+ return vars
+
+
+def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels, theta=0, kurtosis=None):
+ """
+ Estimate optimal number of samples per level to reach a target variance.
+
+ :param target_variance: desired variance for MLMC estimator
+ :param prescribe_vars: ndarray of level variances (L x M)
+ :param n_ops: cost/operations per level
:param n_levels: number of levels
- :return: np.array with number of optimal samples for individual levels and moments_fn, array (LxR)
+ :param theta: safety factor (0 ≤ theta < 1)
+ :param kurtosis: optional ndarray of kurtosis per level
+ :return: ndarray of optimal number of samples per moment function
"""
vars = prescribe_vars
+
+ if kurtosis is not None and len(vars) == len(kurtosis):
+ vars = coping_with_high_kurtosis(vars, n_ops, kurtosis)
+
sqrt_var_n = np.sqrt(vars.T * n_ops) # moments_fn in rows, levels in cols
- total = np.sum(sqrt_var_n, axis=1) # sum over levels
- n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int) # moments_fn in cols
+ total = np.sum(sqrt_var_n, axis=1)
+ n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int)
+ n_samples_estimate = 1 / (1 - theta) * n_samples_estimate
+
# Limit maximal number of samples per level
n_samples_estimate_safe = np.maximum(
- np.minimum(n_samples_estimate, vars * n_levels / target_variance), 2)
+ np.minimum(n_samples_estimate, vars * n_levels / target_variance),
+ 2
+ )
return np.max(n_samples_estimate_safe, axis=1).astype(int)
def calc_level_params(step_range, n_levels):
+ """
+ Compute level-dependent step sizes for MLMC.
+
+ :param step_range: tuple (h_fine, h_coarse)
+ :param n_levels: number of levels
+ :return: list of step sizes per level
+ """
assert step_range[0] > step_range[1]
level_parameters = []
for i_level in range(n_levels):
- if n_levels == 1:
- level_param = 1
- else:
- level_param = i_level / (n_levels - 1)
+ level_param = 1 if n_levels == 1 else i_level / (n_levels - 1)
level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param])
-
return level_parameters
def determine_sample_vec(n_collected_samples, n_levels, sample_vector=None):
+ """
+ Determine the sample vector for bootstrapping or MLMC calculations.
+ """
if sample_vector is None:
sample_vector = n_collected_samples
if len(sample_vector) > n_levels:
@@ -408,45 +633,34 @@ def determine_sample_vec(n_collected_samples, n_levels, sample_vector=None):
def determine_level_parameters(n_levels, step_range):
"""
- Determine level parameters,
- In this case, a step of fine simulation at each level
- :param n_levels: number of MLMC levels
- :param step_range: simulation step range
- :return: List
+ Wrapper to calculate level parameters (simulation step sizes).
"""
assert step_range[0] > step_range[1]
level_parameters = []
for i_level in range(n_levels):
- if n_levels == 1:
- level_param = 1
- else:
- level_param = i_level / (n_levels - 1)
+ level_param = 1 if n_levels == 1 else i_level / (n_levels - 1)
level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param])
-
return level_parameters
def determine_n_samples(n_levels, n_samples=None):
"""
- Set target number of samples for each level
- :param n_levels: number of levels
- :param n_samples: array of number of samples
- :return: None
+ Generate an array of target sample sizes for each level.
+
+ :param n_levels: number of MLMC levels
+ :param n_samples: int or list of 2 ints to define start/end for exponential spacing
+ :return: ndarray of sample sizes for each level
"""
if n_samples is None:
n_samples = [100, 3]
- # Num of samples to ndarray
+
n_samples = np.atleast_1d(n_samples)
- # Just maximal number of samples is set
if len(n_samples) == 1:
n_samples = np.array([n_samples[0], 3])
- # Create number of samples for all levels
if len(n_samples) == 2:
n0, nL = n_samples
n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), n_levels))).astype(int)
return n_samples
-
-
diff --git a/mlmc/level_simulation.py b/mlmc/level_simulation.py
index 67c5c23d..593f28c8 100644
--- a/mlmc/level_simulation.py
+++ b/mlmc/level_simulation.py
@@ -1,34 +1,37 @@
import attr
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional, Callable
from mlmc.quantity.quantity_spec import QuantitySpec
@attr.s(auto_attribs=True)
class LevelSimulation:
"""
- This class is used to pass simulation data at a given level between a Sampler and a SamplingPool
- User shouldn't change this class
+ Class for passing simulation configuration and metadata for a given level between
+ a Sampler and a SamplingPool.
+
+ User shouldn't modify this class manually.
"""
+
config_dict: Dict[Any, Any]
- # Calculate configuration.
+ # Level-specific simulation configuration dictionary.
- common_files: List[str] = None
- # List of files in the level workspace to copy/symlink to the sample workspace.
+ common_files: Optional[List[str]] = None
+ # List of files in the level workspace to copy or symlink to the sample workspace.
need_sample_workspace: bool = False
- # If the simulation needs sample workspace at all.
+ # Whether the simulation requires an individual workspace for each sample.
- task_size: int = 0
- # Relative size of the simulation at this level.
- # When using PBS, keep in mind that the pbs job size is the sum of task_sizes, and if this sum is above 1,
- # the job is scheduled and PBS scheduler manages it
+ task_size: float = 0.0
+ # Relative size (or computational cost) of the simulation task at this level.
+ # When using PBS or SLURM, note that the job size is the sum of task_sizes.
+ # If this sum exceeds 1.0, the job is queued and scheduled by the system.
- ### User shouldn't modify the following attributes ###
- _calculate: Any = None
- # Calculate method
+ ### Internal attributes — users should not modify these ###
+ _calculate: Optional[Callable] = None
+ # Calculation method used internally by the sampler.
- _level_id: int = None
- # Level id is set by mlmc.sampler.Sampler. It is internal variable and user shouldn't change it.
+ _level_id: Optional[int] = None
+ # Level identifier, set automatically by mlmc.sampler.Sampler.
- _result_format: List[QuantitySpec] = None
- # Simulation result format
+ _result_format: Optional[List[QuantitySpec]] = None
+ # Format specification for simulation results (defined by QuantitySpec instances).
diff --git a/mlmc/moments.py b/mlmc/moments.py
index 2329566e..ba1f932a 100644
--- a/mlmc/moments.py
+++ b/mlmc/moments.py
@@ -5,9 +5,26 @@
class Moments:
"""
- Class for calculating moments of a random variable
+ Base class for computing moment functions of a random variable.
+
+ Provides transformation, scaling, and evaluation utilities common
+ to various types of generalized moment bases (monomial, Fourier, Legendre, etc.).
"""
+
def __init__(self, size, domain, log=False, safe_eval=True):
+ """
+ Initialize the moment function set.
+
+ :param size: int
+ Number of moment functions.
+ :param domain: tuple(float, float)
+ Domain of the input variable (min, max).
+ :param log: bool
+ If True, use logarithmic transformation of the domain.
+ :param safe_eval: bool
+ If True, clip transformed values outside the reference domain
+ and replace them with NaN.
+ """
assert size > 0
self.size = size
self.domain = domain
@@ -25,6 +42,7 @@ def __init__(self, size, domain, log=False, safe_eval=True):
self._linear_scale = (self.ref_domain[1] - self.ref_domain[0]) / diff
self._linear_shift = lin_domain[0]
+ # Define transformation and inverse transformation functions
if safe_eval and log:
self.transform = lambda val: self.clip(self.linear(np.log(val)))
self.inv_transform = lambda ref: np.exp(self.inv_linear(ref))
@@ -40,78 +58,153 @@ def __init__(self, size, domain, log=False, safe_eval=True):
def __eq__(self, other):
"""
- Compare two moment functions. Equal if they returns same values.
+ Compare two Moments objects for equality.
+
+ :param other: Moments
+ Another Moments instance.
+ :return: bool
+ True if both instances have the same parameters and configuration.
"""
- return type(self) is type(other) \
- and self.size == other.size \
- and np.all(self.domain == other.domain) \
- and self._is_log == other._is_log \
- and self._is_clip == other._is_clip
+ return (
+ type(self) is type(other)
+ and self.size == other.size
+ and np.all(self.domain == other.domain)
+ and self._is_log == other._is_log
+ and self._is_clip == other._is_clip
+ )
def change_size(self, size):
"""
- Return moment object with different size.
- :param size: int, new number of _moments_fn
+ Return a new moment object with a different number of basis functions.
+
+ :param size: int
+ New number of moment functions.
+ :return: Moments
+ New instance of the same class with updated size.
"""
return self.__class__(size, self.domain, self._is_log, self._is_clip)
def clip(self, value):
"""
- Remove outliers and replace them with NaN
- :param value: array of numbers
- :return: masked_array, out
+ Clip values to the reference domain, replacing outliers with NaN.
+
+ :param value: array-like
+ Input data to be clipped.
+ :return: ndarray
+ Array with out-of-bound values replaced by NaN.
"""
- # Masked array
out = ma.masked_outside(value, self.ref_domain[0], self.ref_domain[1])
- # Replace outliers with NaN
return ma.filled(out, np.nan)
def linear(self, value):
+ """Apply linear transformation to reference domain."""
return (value - self._linear_shift) * self._linear_scale + self.ref_domain[0]
def inv_linear(self, value):
+ """Inverse linear transformation back to the original domain."""
return (value - self.ref_domain[0]) / self._linear_scale + self._linear_shift
def __call__(self, value):
+ """Evaluate all moment functions for the given value(s)."""
return self._eval_all(value, self.size)
def eval(self, i, value):
- return self._eval_all(value, i+1)[:, -1]
+ """
+ Evaluate the i-th moment function.
+
+ :param i: int
+ Index of the moment function to evaluate (0-based).
+ :param value: float or array-like
+ Input value(s).
+ :return: ndarray
+ Values of the i-th moment function.
+ """
+ return self._eval_all(value, i + 1)[:, -1]
def eval_single_moment(self, i, value):
"""
- Be aware this implementation is inefficient for large i
- :param i: int, order of moment
- :param value: float
- :return: np.ndarray
+ Evaluate a single moment function (less efficient for large i).
+
+ :param i: int
+ Order of the moment.
+ :param value: float or array-like
+ Input value(s).
+ :return: ndarray
+ Evaluated moment values.
"""
- return self._eval_all(value, i+1)[..., i]
+ return self._eval_all(value, i + 1)[..., i]
def eval_all(self, value, size=None):
+ """
+ Evaluate all moments up to the specified size.
+
+ :param value: float or array-like
+ Input value(s).
+ :param size: int or None
+ Number of moments to evaluate. If None, use self.size.
+ :return: ndarray
+ Matrix of evaluated moments.
+ """
if size is None:
size = self.size
return self._eval_all(value, size)
def eval_all_der(self, value, size=None, degree=1):
+ """
+ Evaluate derivatives of all moment functions.
+
+ :param value: float or array-like
+ Input value(s).
+ :param size: int or None
+ Number of moments to evaluate.
+ :param degree: int
+ Derivative degree (1 for first derivative, etc.).
+ :return: ndarray
+ Matrix of evaluated derivatives.
+ """
if size is None:
size = self.size
return self._eval_all_der(value, size, degree)
def eval_diff(self, value, size=None):
+ """
+ Evaluate first derivatives of all moment functions.
+
+ :param value: float or array-like
+ Input value(s).
+ :param size: int or None
+ Number of moments to evaluate.
+ :return: ndarray
+ Matrix of first derivatives.
+ """
if size is None:
size = self.size
return self._eval_diff(value, size)
def eval_diff2(self, value, size=None):
+ """
+ Evaluate second derivatives of all moment functions.
+
+ :param value: float or array-like
+ Input value(s).
+ :param size: int or None
+ Number of moments to evaluate.
+ :return: ndarray
+ Matrix of second derivatives.
+ """
if size is None:
size = self.size
return self._eval_diff2(value, size)
+# -------------------------------------------------------------------------
+# Specific moment types
+# -------------------------------------------------------------------------
class Monomial(Moments):
"""
- Monomials generalized moments
+ Monomial basis functions for generalized moment evaluation.
"""
+
def __init__(self, size, domain=(0, 1), ref_domain=None, log=False, safe_eval=True):
if ref_domain is not None:
self.ref_domain = ref_domain
@@ -120,33 +213,49 @@ def __init__(self, size, domain=(0, 1), ref_domain=None, log=False, safe_eval=Tr
super().__init__(size, domain, log=log, safe_eval=safe_eval)
def _eval_all(self, value, size):
- # Create array from values and transform values outside the ref domain
+ """
+ Evaluate monomial basis (Vandermonde matrix).
+
+ :param value: array-like
+ Input values.
+ :param size: int
+ Number of moments to compute.
+ :return: ndarray
+ Vandermonde matrix of monomials.
+ """
t = self.transform(np.atleast_1d(value))
- # Vandermonde matrix
return np.polynomial.polynomial.polyvander(t, deg=size - 1)
def eval(self, i, value):
+ """Evaluate the i-th monomial t^i."""
t = self.transform(np.atleast_1d(value))
- return t**i
+ return t ** i
class Fourier(Moments):
"""
- Fourier functions generalized moments
+ Fourier basis functions for generalized moment evaluation.
"""
- def __init__(self, size, domain=(0, 2*np.pi), ref_domain=None, log=False, safe_eval=True):
+
+ def __init__(self, size, domain=(0, 2 * np.pi), ref_domain=None, log=False, safe_eval=True):
if ref_domain is not None:
self.ref_domain = ref_domain
else:
- self.ref_domain = (0, 2*np.pi)
-
+ self.ref_domain = (0, 2 * np.pi)
super().__init__(size, domain, log=log, safe_eval=safe_eval)
def _eval_all(self, value, size):
- # Transform values
+ """
+ Evaluate Fourier moment basis (cosine/sine terms).
+
+ :param value: array-like
+ Input values.
+ :param size: int
+ Number of moments to compute.
+ :return: ndarray
+ Matrix of evaluated Fourier functions.
+ """
t = self.transform(np.atleast_1d(value))
-
- # Half the number of moments
R = int(size / 2)
shorter_sin = 1 - int(size % 2)
k = np.arange(1, R + 1)
@@ -154,26 +263,33 @@ def _eval_all(self, value, size):
res = np.empty((len(t), size))
res[:, 0] = 1
-
- # Odd column index
res[:, 1::2] = np.cos(kx[:, :])
- # Even column index
res[:, 2::2] = np.sin(kx[:, : R - shorter_sin])
return res
def eval(self, i, value):
+ """
+ Evaluate a single Fourier basis function.
+
+ :param i: int
+ Index of the moment function.
+ :param value: float or array-like
+ Input values.
+ :return: ndarray
+ Evaluated function values.
+ """
t = self.transform(np.atleast_1d(value))
if i == 0:
return 1
elif i % 2 == 1:
- return np.sin( (i - 1) / 2 * t)
+ return np.sin((i - 1) / 2 * t)
else:
return np.cos(i / 2 * t)
class Legendre(Moments):
"""
- Legendre polynomials generalized moments
+ Legendre polynomial basis functions for generalized moments.
"""
def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True):
@@ -182,6 +298,7 @@ def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True):
else:
self.ref_domain = (-1, 1)
+ # Precompute derivative matrices
self.diff_mat = np.zeros((size, size))
for n in range(size - 1):
self.diff_mat[n, n + 1::2] = 2 * n + 1
@@ -190,19 +307,26 @@ def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True):
super().__init__(size, domain, log, safe_eval)
def _eval_value(self, x, size):
- return np.polynomial.legendre.legvander(x, deg=size-1)
+ """Evaluate Legendre polynomials up to the given order."""
+ return np.polynomial.legendre.legvander(x, deg=size - 1)
def _eval_all(self, value, size):
+ """Evaluate all Legendre polynomials."""
value = self.transform(np.atleast_1d(value))
return np.polynomial.legendre.legvander(value, deg=size - 1)
def _eval_all_der(self, value, size, degree=1):
"""
- Derivative of Legendre polynomials
- :param value: values to evaluate
- :param size: number of moments
- :param degree: degree of derivative
- :return:
+ Evaluate derivatives of Legendre polynomials.
+
+ :param value: array-like
+ Points at which to evaluate.
+ :param size: int
+ Number of moment functions.
+ :param degree: int
+ Derivative order.
+ :return: ndarray
+ Matrix of derivative values.
"""
value = self.transform(np.atleast_1d(value))
eval_values = np.empty((value.shape + (size,)))
@@ -211,7 +335,7 @@ def _eval_all_der(self, value, size, degree=1):
if s == 0:
coef = [1]
else:
- coef = np.zeros(s+1)
+ coef = np.zeros(s + 1)
coef[-1] = 1
coef = np.polynomial.legendre.legder(coef, degree)
@@ -219,26 +343,38 @@ def _eval_all_der(self, value, size, degree=1):
return eval_values
def _eval_diff(self, value, size):
+ """Evaluate first derivatives using precomputed differentiation matrix."""
t = self.transform(np.atleast_1d(value))
P_n = np.polynomial.legendre.legvander(t, deg=size - 1)
return P_n @ self.diff_mat
def _eval_diff2(self, value, size):
+ """Evaluate second derivatives using precomputed differentiation matrix."""
t = self.transform(np.atleast_1d(value))
P_n = np.polynomial.legendre.legvander(t, deg=size - 1)
return P_n @ self.diff2_mat
class TransformedMoments(Moments):
+ """
+ Linearly transformed moment basis.
+
+ Creates a new set of moment functions as linear combinations
+ of another existing set of basis functions.
+ """
+
def __init__(self, other_moments, matrix):
"""
- Set a new moment functions as linear combination of the previous.
- new_moments = matrix . old_moments
+ Initialize transformed moment functions.
+
+ :param other_moments: Moments
+ Original set of moment functions.
+ :param matrix: ndarray
+ Linear transformation matrix where:
+ new_moments = matrix @ old_moments
- We assume that new_moments[0] is still == 1. That means
- first row of the matrix must be (1, 0 , ...).
- :param other_moments: Original _moments_fn.
- :param matrix: Linear combinations of the original _moments_fn.
+ The first row must correspond to (1, 0, 0, ...),
+ ensuring that new_moments[0] = 1.
"""
n, m = matrix.shape
assert m == other_moments.size
@@ -248,27 +384,34 @@ def __init__(self, other_moments, matrix):
self._transform = matrix
def __eq__(self, other):
- return type(self) is type(other) \
- and self.size == other.size \
- and self._origin == other._origin \
- and np.all(self._transform == other._transform)
+ """Check equality with another TransformedMoments object."""
+ return (
+ type(self) is type(other)
+ and self.size == other.size
+ and self._origin == other._origin
+ and np.all(self._transform == other._transform)
+ )
def _eval_all(self, value, size):
+ """Evaluate all transformed moment functions."""
orig_moments = self._origin._eval_all(value, self._origin.size)
x1 = np.matmul(orig_moments, self._transform.T)
return x1[..., :size]
def _eval_all_der(self, value, size, degree=1):
+ """Evaluate derivatives of transformed moment functions."""
orig_moments = self._origin._eval_all_der(value, self._origin.size, degree=degree)
x1 = np.matmul(orig_moments, self._transform.T)
return x1[..., :size]
def _eval_diff(self, value, size):
+ """Evaluate first derivatives of transformed moment functions."""
orig_moments = self._origin.eval_diff(value, self._origin.size)
x1 = np.matmul(orig_moments, self._transform.T)
return x1[..., :size]
def _eval_diff2(self, value, size):
+ """Evaluate second derivatives of transformed moment functions."""
orig_moments = self._origin.eval_diff2(value, self._origin.size)
x1 = np.matmul(orig_moments, self._transform.T)
return x1[..., :size]
diff --git a/mlmc/plot/diagnostic_plots.py b/mlmc/plot/diagnostic_plots.py
new file mode 100644
index 00000000..cefd0520
--- /dev/null
+++ b/mlmc/plot/diagnostic_plots.py
@@ -0,0 +1,166 @@
+import numpy as np
+import matplotlib
+
+matplotlib.rcParams.update({'font.size': 22})
+import matplotlib.pyplot as plt
+
+
+def log_var_per_level(l_vars, levels=None, moments=[0], err_l_vars=None):
+ """
+ Plot log₂ of variance per level and fit a slope to estimate the decay rate β.
+
+ The function plots the base-2 logarithm of the variance for each level
+ and fits a linear model to estimate the convergence rate β, based on
+ the slope of log₂(variance) vs. level.
+
+ :param l_vars: Array of shape (n_levels, n_moments) representing
+ the variance of each moment at each level.
+ :param levels: Optional array of level indices (default: np.arange(n_levels)).
+ :param moments: List of moment indices to include in the plot.
+ :param err_l_vars: Optional array of errors corresponding to l_vars.
+ :return: None
+ """
+ n_levels = l_vars.shape[0]
+ if levels is None:
+ levels = np.arange(n_levels)
+
+ fig, ax = plt.subplots(figsize=(8, 5))
+
+ for m in moments:
+ y = np.log2(l_vars[:, m])
+ ax.plot(levels, y, 'o-', label=f'm={m}')
+
+ slope, intercept = np.polyfit(levels, y, 1)
+ beta = -slope
+ ax.plot(
+ levels,
+ slope * levels + intercept,
+ '--',
+ label=f'fit m={m}: slope={slope:.2f}, beta≈{beta:.2f}'
+ )
+
+ ax.set_ylabel(r'$\log_2 \, V_\ell$')
+ ax.set_xlabel('level $\ell$')
+ ax.legend()
+ ax.grid(True, which="both")
+ plt.tight_layout()
+ plt.show()
+
+
+def log_mean_per_level(l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]):
+ """
+ Plot log₂ of absolute mean per level for specified statistical moments.
+
+ :param l_means: Array of mean values per level and moment.
+ :param err_means: Optional array of mean estimation errors (unused).
+ :param err_l_means: Optional array of level-mean estimation errors (unused).
+ :param moments: List of moment indices to include in the plot.
+ :return: None
+ """
+ fig, ax1 = plt.subplots(figsize=(8, 5))
+ print("l means ", l_means)
+ for m in moments:
+ line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label=f"m={m}", marker="s")
+
+ ax1.set_ylabel('log' + r'$_2$' + 'mean')
+ ax1.set_xlabel('level' + r'$l$')
+ plt.legend()
+ plt.tight_layout()
+ plt.show()
+
+
+def sample_cost_per_level(costs, levels=None):
+ """
+ Plot log₂ of sample cost per level and fit a slope to estimate γ.
+
+ The slope of the linear regression line provides an estimate of the
+ cost scaling parameter γ.
+
+ :param costs: Array of computational costs per sample for each level.
+ :param levels: Optional array of level indices (default: 0, 1, ...).
+ :return: Estimated γ (float), the slope of the fitted line.
+ """
+ n_levels = len(costs)
+ if levels is None:
+ levels = np.arange(n_levels)
+
+ y = np.log2(costs)
+ slope, intercept = np.polyfit(levels, y, 1)
+ gamma = slope
+
+ fig, ax = plt.subplots(figsize=(8, 5))
+ ax.plot(levels, y, 'o-', label='log2(cost)')
+ ax.plot(
+ levels,
+ slope * levels + intercept,
+ '--',
+ label=f'fit: slope={slope:.2f}, gamma≈{gamma:.2f}'
+ )
+
+ ax.set_ylabel(r'$\log_2 \, C_\ell$')
+ ax.set_xlabel('level $\ell$')
+ ax.legend()
+ ax.grid(True, which="both")
+ plt.tight_layout()
+ plt.show()
+
+ return gamma
+
+
+def variance_to_cost_ratio(l_vars, costs, moments=[1, 2, 3, 4]):
+ """
+ Plot the log₂ of variance-to-cost ratio per level for given statistical moments.
+
+ The ratio Vₗ/Cₗ is computed for each level, and the slope of its
+ log₂-linear fit indicates the decay behavior relative to computational cost.
+
+ :param l_vars: Array of variances per level and moment (shape: n_levels × n_moments).
+ :param costs: Array of costs per sample for each level.
+ :param moments: List of moment indices to include in the plot.
+ :return: None
+ """
+ print("l_vars ", l_vars)
+ print(costs)
+ n_levels = l_vars.shape[0]
+ levels = np.arange(n_levels)
+ fig, ax1 = plt.subplots(figsize=(8, 5))
+ print('costs ', costs)
+ print("levels ", levels)
+ for m in moments:
+ line2, = ax1.plot(np.log2(l_vars[:, m] / costs), label=f"m={m}", marker="s")
+
+ print("l vars ", l_vars[:, m])
+ print("np.log2(l_vars[:, m]/costs) ", np.log2(l_vars[:, m] / costs))
+
+ # Fit a straight line: log2(V/C) ≈ a + b * level
+ coeffs = np.polyfit(levels, np.log2(l_vars[:, m] / costs), 1)
+ slope, intercept = coeffs[0], coeffs[1]
+ ax1.plot(levels, slope * levels + intercept, '--', label=f'fit: slope={slope:.2f}')
+
+ ax1.set_ylabel('log' + r'$_2$' + 'variance to cost ratio')
+ ax1.set_xlabel('level' + r'$l$')
+ plt.legend()
+ plt.tight_layout()
+ plt.show()
+
+
+def kurtosis_per_level(means, l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]):
+ """
+ Plot log₂ of mean values per level (often used for analyzing kurtosis trends).
+
+ :param means: Array of global mean values per moment (unused in plotting).
+ :param l_means: Array of level-wise mean values per moment.
+ :param err_means: Optional array of mean estimation errors (unused).
+ :param err_l_means: Optional array of level-mean estimation errors (unused).
+ :param moments: List of moment indices to include in the plot.
+ :return: None
+ """
+ fig, ax1 = plt.subplots(figsize=(8, 5))
+ for m in moments:
+ line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label=f"m={m}", marker="s")
+
+ ax1.set_ylabel('log ' + r'$_2$ ' + 'mean')
+ ax1.set_xlabel('level ' + r'$l$')
+ plt.legend()
+ plt.tight_layout()
+ plt.show()
diff --git a/mlmc/plot/violinplot.py b/mlmc/plot/violinplot.py
index 85e9a1b1..74dfe836 100644
--- a/mlmc/plot/violinplot.py
+++ b/mlmc/plot/violinplot.py
@@ -2,18 +2,56 @@
import seaborn
from mlmc.plot.plots import _show_and_save
import matplotlib
+
+# Set default font size for all plots
matplotlib.rcParams.update({'font.size': 22})
+
import matplotlib.pyplot as plt
class ViolinPlotter(seaborn.categorical._ViolinPlotter):
+ """
+ Custom subclass of seaborn's internal _ViolinPlotter to modify how quartiles
+ and mean lines are drawn inside a violin plot.
+
+ This class extends the default behavior by drawing the 25th, 50th, and 75th
+ percentiles as dashed lines, and the mean as a solid line across the violin body.
+ """
+
def draw_quartiles(self, ax, data, support, density, center, split=False):
+ """
+ Draw quartile and mean lines on the violin plot.
+
+ Parameters
+ ----------
+ ax : matplotlib.axes.Axes
+ The axes object to draw on.
+ data : array-like
+ Input data for a single violin.
+ support : array-like
+ Grid over which the kernel density was evaluated.
+ density : array-like
+ Corresponding kernel density values.
+ center : float
+ Position of the violin on the categorical axis.
+ split : bool, default=False
+ Whether the violin is split by hue (two sides).
+
+ Notes
+ -----
+ - The mean is drawn as a solid line.
+ - Quartiles (25%, 50%, 75%) are drawn as dashed lines.
+ - The density scaling follows seaborn’s internal behavior.
+ """
+ # Compute quartiles and mean of the data
q25, q50, q75 = np.percentile(data, [25, 50, 75])
mean = np.mean(data)
+ # Draw mean line (solid)
self.draw_to_density(ax, center, mean, support, density, split,
linewidth=self.linewidth)
+ # Draw quartile lines (dashed)
self.draw_to_density(ax, center, q25, support, density, split,
linewidth=self.linewidth,
dashes=[self.linewidth * 1.5] * 2)
@@ -33,39 +71,110 @@ def violinplot(
bw="scott", cut=2, scale="area", scale_hue=True, gridsize=100,
width=.8, inner="box", split=False, dodge=True, orient=None,
linewidth=None, color=None, palette=None, saturation=.75,
- ax=None, **kwargs,):
-
- plotter = ViolinPlotter(x, y, hue, data, order, hue_order,
- bw, cut, scale, scale_hue, gridsize,
- width, inner, split, dodge, orient, linewidth,
- color, palette, saturation)
-
+ ax=None, **kwargs,
+):
+ """
+ Wrapper around the custom ViolinPlotter class to generate a violin plot.
+
+ Parameters
+ ----------
+ x, y, hue : str, optional
+ Variable names for the categorical axis, numeric axis, and hue grouping.
+ data : DataFrame, optional
+ Dataset containing the variables.
+ order, hue_order : list, optional
+ Order of categories for x and hue variables.
+ bw : str or float, default="scott"
+ Bandwidth method or scalar for kernel density estimation.
+ cut : float, default=2
+ How far the violin extends beyond extreme data points.
+ scale : {"area", "count", "width"}, default="area"
+ Method for scaling the width of each violin.
+ scale_hue : bool, default=True
+ Whether to scale by hue levels within each category.
+ gridsize : int, default=100
+ Number of points in the KDE grid.
+ width : float, default=0.8
+ Width of each violin.
+ inner : {"box", "quartile", "point", "stick", None}, default="box"
+ Representation inside each violin.
+ split : bool, default=False
+ Draw half-violins when hue is used.
+ dodge : bool, default=True
+ Separate violins for each hue level.
+ orient : {"v", "h"}, optional
+ Plot orientation; inferred if not specified.
+ linewidth : float, optional
+ Width of the line used for drawing violins and quartiles.
+ color : matplotlib color, optional
+ Color for all violins.
+ palette : str or sequence, optional
+ Color palette for hue levels.
+ saturation : float, default=0.75
+ Saturation for colors.
+ ax : matplotlib.axes.Axes, optional
+ Axes object to draw on; created if None.
+ **kwargs :
+ Additional arguments passed to seaborn’s internal methods.
+
+ Returns
+ -------
+ ax : matplotlib.axes.Axes
+ The axes containing the drawn violin plot.
+ """
+ # Initialize a custom violin plotter instance
+ plotter = ViolinPlotter(
+ x, y, hue, data, order, hue_order,
+ bw, cut, scale, scale_hue, gridsize,
+ width, inner, split, dodge, orient, linewidth,
+ color, palette, saturation
+ )
+
+ # Create a new axes if none provided
if ax is None:
ax = plt.gca()
+ # Draw the plot using the seaborn-based custom plotter
plotter.plot(ax)
return ax
def fine_coarse_violinplot(data_frame):
+ """
+ Generate a split violin plot comparing fine and coarse simulation samples per level.
+
+ Parameters
+ ----------
+ data_frame : pandas.DataFrame
+ Must contain the columns:
+ - 'level' : int, simulation level
+ - 'samples' : float, sample values
+ - 'type' : str, either 'fine' or 'coarse'
+
+ Notes
+ -----
+ - Uses log scale on the y-axis.
+ - Calls `_show_and_save` to display and save the resulting plot.
+ - Produces a split violin plot (fine vs coarse) for each level.
+ """
+ # Create a single subplot for the violin plot
fig, axes = plt.subplots(1, 1, figsize=(22, 10))
- # mean with confidence interval
- # sns.pointplot(x='level', y='samples', hue='type', data=data_frame, estimator=np.mean,
- # palette="Set2", join=False, ax=axes)
-
- # line is not suitable for our purpose
- # sns.lineplot(x="level", y="samples", hue="type",# err_style="band", ci='sd'
- # estimator=np.median, data=data_frame, ax=axes)
-
- violinplot(x="level", y="samples", hue='type', data=data_frame, palette="Set2",
- split=True, scale="area", inner="quartile", ax=axes)
+ # Draw split violin plot for 'fine' and 'coarse' samples per level
+ violinplot(
+ x="level", y="samples", hue='type', data=data_frame,
+ palette="Set2", split=True, scale="area",
+ inner="quartile", ax=axes
+ )
+ # Use logarithmic y-scale (typical for MLMC variance/cost visualizations)
axes.set_yscale('log')
axes.set_ylabel('')
axes.set_xlabel('')
+
+ # Remove legend frame and content
axes.legend([], [], frameon=False)
+ # Display and save plot using utility function
_show_and_save(fig, "violinplot", "violinplot")
_show_and_save(fig, None, "violinplot")
-
diff --git a/mlmc/quantity/quantity.py b/mlmc/quantity/quantity.py
index bdeddb96..6a1696f4 100644
--- a/mlmc/quantity/quantity.py
+++ b/mlmc/quantity/quantity.py
@@ -13,12 +13,13 @@
def make_root_quantity(storage: SampleStorage, q_specs: List[QuantitySpec]):
"""
- Create a root quantity that has QuantityStorage as the input quantity,
+ Create a root quantity that has QuantityStorage as the input quantity.
QuantityStorage is the only class that directly accesses the stored data.
- Quantity type is created based on the q_spec parameter
- :param storage: SampleStorage
- :param q_specs: same as result format in simulation class
- :return: QuantityStorage
+ The returned QuantityStorage uses a QType built from provided QuantitySpec objects.
+
+ :param storage: SampleStorage instance that provides stored samples
+ :param q_specs: list of QuantitySpec describing the simulation result format
+ :return: QuantityStorage that wraps the provided SampleStorage with a matching QType
"""
dict_types = []
for q_spec in q_specs:
@@ -33,29 +34,37 @@ def make_root_quantity(storage: SampleStorage, q_specs: List[QuantitySpec]):
class Quantity:
+ """
+ Represents a quantity (a measurable value or expression) constructed from a QType,
+ an operation (callable) and zero-or-more input quantities. Quantities are lazy:
+ their actual data are returned by calling `samples(chunk_spec)`.
+
+ - qtype: structure description (QType)
+ - _operation: callable that takes sample-chunks from input_quantities and returns result chunks
+ - _input_quantities: dependencies (other Quantity instances)
+ """
+
def __init__(self, quantity_type, operation, input_quantities=[]):
"""
- Quantity class represents real quantity and also provides operation that can be performed with stored values.
- Each Quantity has Qtype which describes its structure.
- :param quantity_type: QType instance
- :param operation: function
- :param input_quantities: List[Quantity]
+ :param quantity_type: QType instance describing the shape/structure
+ :param operation: callable implementing the transform on input chunks
+ :param input_quantities: List[Quantity] dependencies (may be empty for constants)
"""
self.qtype = quantity_type
self._operation = operation
self._input_quantities = input_quantities
- # List of quantities on which the 'self' depends, their number have to match number of arguments
- # to the operation.
+ # Underlying QuantityStorage (inherited from one of the inputs, if present)
self._storage = self.get_quantity_storage()
- # QuantityStorage instance
+ # Selection identifier - used to tie selections together (set by select)
self._selection_id = self.set_selection_id()
- # Identifier of selection, should be set in select() method
+ # Validate that input quantities use consistent selection/storage
self._check_selection_ids()
def get_quantity_storage(self):
"""
- Get QuantityStorage instance
- :return: None, QuantityStorage
+ Find the first QuantityStorage among inputs (if any) and return it.
+
+ :return: QuantityStorage instance or None if not found
"""
if len(self._input_quantities) == 0:
return None
@@ -68,9 +77,11 @@ def get_quantity_storage(self):
def set_selection_id(self):
"""
- Set selection id
- selection id is None by default,
- but if we create new quantity from quantities that are result of selection we need to pass selection id
+ Determine the selection id for this Quantity. If inputs have a selection id
+ (created by select), inherit it; if multiple different selection ids are
+ present among inputs, raise an exception.
+
+ :return: selection id or None
"""
selection_id = None
for input_quantity in self._input_quantities:
@@ -82,12 +93,11 @@ def set_selection_id(self):
def _check_selection_ids(self):
"""
- Make sure the all input quantities come from the same QuantityStorage
+ Ensure that all input quantities that have selection ids share the same one.
+ If no QuantityStorage is present, nothing to check.
"""
- # All input quantities are QuantityConst instances
if self._storage is None:
return
- # Check selection ids otherwise
for input_quantity in self._input_quantities:
sel_id = input_quantity.selection_id()
if sel_id is None:
@@ -97,8 +107,10 @@ def _check_selection_ids(self):
def selection_id(self):
"""
- Get storage ids of all input quantities
- :return: List[int]
+ Return this Quantity's selection id. If not set, use id(self._storage) to
+ identify the underlying storage instance.
+
+ :return: selection identifier (int or None)
"""
if self._selection_id is not None:
return self._selection_id
@@ -109,71 +121,93 @@ def selection_id(self):
def size(self) -> int:
"""
- Quantity size from qtype
+ Return the number of scalar components described by the QType.
+
:return: int
"""
return self.qtype.size()
def get_cache_key(self, chunk_spec):
"""
- Create cache key
+ Create a cache key used by memoization for samples. We include:
+ - level id
+ - chunk id
+ - chunk size (derived from slice)
+ - id(self) to distinguish different quantity instances
+
+ :param chunk_spec: ChunkSpec
+ :return: tuple key
"""
chunk_size = None
if chunk_spec.chunk_slice is not None:
chunk_size = chunk_spec.chunk_slice.stop - chunk_spec.chunk_slice.start
- return (chunk_spec.level_id, chunk_spec.chunk_id, chunk_size, id(self)) # redundant parentheses needed due to py36, py37
+ return (chunk_spec.level_id, chunk_spec.chunk_id, chunk_size, id(self)) # py36/37 compatibility
@cached(custom_key_maker=get_cache_key)
def samples(self, chunk_spec):
"""
- Return list of sample chunks for individual levels.
- Possibly calls underlying quantities.
- :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
- :return: np.ndarray or None
+ Evaluate and return the data chunk for this quantity at the specified chunk_spec.
+ Calls samples(chunk_spec) recursively on inputs and passes the results to _operation.
+
+ :param chunk_spec: ChunkSpec object with level_id, chunk_id, and optional slice
+ :return: np.ndarray (M, chunk_size, 2) or None
"""
chunks_quantity_level = [q.samples(chunk_spec) for q in self._input_quantities]
return self._operation(*chunks_quantity_level)
def _reduction_op(self, quantities, operation):
"""
+ Helper for building a reduction Quantity from many inputs.
+
+ If any input is a non-constant Quantity, return a Quantity with the operation and inputs.
+ If all inputs are QuantityConst, evaluate the operation immediately and return QuantityConst.
+
:param quantities: List[Quantity]
- :param operation: function which is run with given quantities
+ :param operation: Callable to apply
:return: Quantity or QuantityConst
"""
for quantity in quantities:
if not isinstance(quantity, QuantityConst):
return Quantity(quantity.qtype, operation=operation, input_quantities=quantities)
- # Quantity from QuantityConst instances
+ # All constant -> precompute value
return QuantityConst(quantities[0].qtype, value=operation(*[q._value for q in quantities]))
def select(self, *args):
"""
- Performs sample selection based on conditions
- :param args: Quantity
- :return: Quantity
+ Apply boolean selection masks to this Quantity's samples.
+
+ :param args: One or more Quantity instances with BoolType that act as masks.
+ :return: Quantity representing the selected samples (mask applied on sample axis)
"""
- # args always has len() at least 1
+ # First mask
masks = args[0]
+ # Validate masks are BoolType
for quantity in args:
if not isinstance(quantity.qtype.base_qtype(), qt.BoolType):
raise Exception("Quantity: {} doesn't have BoolType, instead it has QType: {}"
.format(quantity, quantity.qtype.base_qtype()))
- # More conditions leads to default AND
+ # Combine multiple masks with logical AND
if len(args) > 1:
for m in args[1:]:
- masks = np.logical_and(masks, m) # method from this module
+ masks = np.logical_and(masks, m)
def op(x, mask):
- return x[..., mask, :] # [...sample size, cut number of samples, 2]
+ # Mask samples (reduce number of sample columns)
+ return x[..., mask, :] # [..., selected_samples, 2]
+
q = Quantity(quantity_type=self.qtype, input_quantities=[self, masks], operation=op)
- q._selection_id = id(q)
+ q._selection_id = id(q) # mark selection id to ensure consistency
return q
def __array_ufunc__(self, ufunc, method, *args, **kwargs):
+ """
+ Support numpy ufuncs by routing them through _method which constructs a new Quantity.
+ """
return Quantity._method(ufunc, method, *args, **kwargs)
+ # Arithmetic operator wrappers - build new Quantities or constants as needed.
def __add__(self, other):
return Quantity.create_quantity([self, Quantity.wrap(other)], Quantity.add_op)
@@ -207,19 +241,17 @@ def __rmod__(self, other):
@staticmethod
def create_quantity(quantities, operation):
"""
- Create new quantity (Quantity or QuantityConst) based on given quantities and operation.
- There are two scenarios:
- 1. At least one of quantities is Quantity instance then all quantities are considered to be input_quantities
- of new Quantity
- 2. All of quantities are QuantityConst instances then new QuantityConst is created
- :param quantities: List[Quantity]
- :param operation: function which is run with given quantities
- :return: Quantity
+ Create a new Quantity or QuantityConst. If any input is non-constant, return
+ a Quantity that will evaluate lazily. If all are constant, return QuantityConst.
+
+ :param quantities: list-like of Quantity / QuantityConst
+ :param operation: callable to combine inputs
+ :return: Quantity or QuantityConst
"""
for quantity in quantities:
if not isinstance(quantity, QuantityConst):
return Quantity(quantity.qtype, operation=operation, input_quantities=quantities)
- # Quantity from QuantityConst instances
+ # all constant -> precompute
return QuantityConst(quantities[0].qtype, value=operation(*[q._value for q in quantities]))
@staticmethod
@@ -245,35 +277,40 @@ def mod_op(x, y):
@staticmethod
def _process_mask(x, y, operator):
"""
- Create samples mask
- All values for sample must meet given condition, if any value doesn't meet the condition,
- whole sample is eliminated
- :param x: Quantity chunk
- :param y: Quantity chunk or int, float
- :param operator: operator module function
- :return: np.ndarray of bools
+ Create a boolean mask that marks full samples passing the given per-element condition.
+
+ The operator is applied elementwise; then we require that *every* element within the sample
+ passes to keep that sample. This collapses non-sample axes and returns a 1-D boolean array.
+
+ :param x: Quantity chunk (ndarray)
+ :param y: Quantity chunk or scalar
+ :param operator: operator module function like operator.lt
+ :return: 1-D boolean numpy array indexing samples
"""
mask = operator(x, y)
+ # collapse over spatial/time axes and per-sample axis, keep sample index axis
return mask.all(axis=tuple(range(mask.ndim - 2))).all(axis=1)
def _mask_quantity(self, other, op):
"""
- Create quantity that represent bool mask
- :param other: number or Quantity
- :param op: operation
- :return: Quantity
+ Helper to build a BoolType Quantity representing comparisons (>, <, ==, etc.)
+
+ :param other: Quantity or scalar to compare with
+ :param op: operation callable that builds the boolean mask from chunked arrays
+ :return: Quantity producing a boolean mask per sample
"""
bool_type = qt.BoolType()
- new_qtype = self.qtype
- new_qtype = new_qtype.replace_scalar(bool_type)
+ new_qtype = self.qtype.replace_scalar(bool_type)
other = Quantity.wrap(other)
+ # Only scalar base types support comparison
if not isinstance(self.qtype.base_qtype(), qt.ScalarType) or not isinstance(other.qtype.base_qtype(), qt.ScalarType):
raise TypeError("Quantity has base qtype {}. "
"Quantities with base qtype ScalarType are the only ones that support comparison".
format(self.qtype.base_qtype()))
return Quantity(quantity_type=new_qtype, input_quantities=[self, other], operation=op)
+ # Comparison operators returning boolean mask Quantities
def __lt__(self, other):
def lt_op(x, y):
return Quantity._process_mask(x, y, operator.lt)
@@ -281,59 +318,66 @@ def lt_op(x, y):
def __le__(self, other):
def le_op(x, y):
- return self._process_mask(x, y, operator.le)
+ return Quantity._process_mask(x, y, operator.le)
return self._mask_quantity(other, le_op)
def __gt__(self, other):
def gt_op(x, y):
- return self._process_mask(x, y, operator.gt)
+ return Quantity._process_mask(x, y, operator.gt)
return self._mask_quantity(other, gt_op)
def __ge__(self, other):
def ge_op(x, y):
- return self._process_mask(x, y, operator.ge)
+ return Quantity._process_mask(x, y, operator.ge)
return self._mask_quantity(other, ge_op)
def __eq__(self, other):
def eq_op(x, y):
- return self._process_mask(x, y, operator.eq)
+ return Quantity._process_mask(x, y, operator.eq)
return self._mask_quantity(other, eq_op)
def __ne__(self, other):
def ne_op(x, y):
- return self._process_mask(x, y, operator.ne)
+ return Quantity._process_mask(x, y, operator.ne)
return self._mask_quantity(other, ne_op)
@staticmethod
def pick_samples(chunk, subsample_params):
"""
- Pick samples some samples from chunk in order to have 'k' samples from 'n' after all chunks are processed
- Inspired by https://dl.acm.org/doi/10.1145/23002.23003 method S
+ Subsample a chunk using Method S (hypergeometric sampling) so that across chunks
+ we end up with k samples from n total.
- :param chunk: np.ndarray, shape M, N, 2, where N denotes number of samples in chunk
- :param subsample_params: instance of SubsampleParams class, it has two parameters:
- k: number of samples which we want to get from all chunks
- n: number of all samples among all chunks
- :return: np.ndarray
+ :param chunk: ndarray of shape (M, N, 2) where N is number of samples in this chunk
+ :param subsample_params: object with attributes k (remaining desired) and n (remaining available)
+ :return: selected sub-chunk array with shape (M, m, 2), where m is chosen by hypergeometric draw
"""
+ # Draw how many to pick from this chunk using hypergeometric distribution
size = scipy.stats.hypergeom(subsample_params.n, subsample_params.k, chunk.shape[1]).rvs(size=1)
out = RNG.choice(chunk, size=size, axis=1)
- subsample_params.k -= out.shape[1]
- subsample_params.n -= chunk.shape[1]
+ subsample_params.k -= out.shape[1] # reduce remaining desired
+ subsample_params.n -= chunk.shape[1] # reduce remaining available
return out
def subsample(self, sample_vec):
"""
- Subsampling
- :param sample_vec: list of number of samples at each level
- :return: Quantity
+ Build a Quantity that implements subsampling across levels to obtain a specified
+ number of samples per level (sample_vec).
+
+ Returns a Quantity whose operation will pick samples according to subsample params
+ stored per-level. Uses QuantityConst with a level-aware _adjust_value to pass
+ different parameters to each level chunk.
+
+ :param sample_vec: list-like of desired numbers of samples per level
+ :return: Quantity producing subsampled chunks
"""
class SubsampleParams:
+ """
+ Small helper to carry per-level parameters while subsampling across chunks.
+ """
def __init__(self, num_subsample, num_collected):
"""
- Auxiliary object for subsampling
- :param num_subsample: the number of samples we want to obtain from all samples
- :param num_collected: total number of samples
+ :param num_subsample: desired number of samples to pick from this level
+ :param num_collected: total available samples on this level
"""
self._orig_k = num_subsample
self._orig_n = num_collected
@@ -342,36 +386,41 @@ def __init__(self, num_subsample, num_collected):
self.n = num_collected
self.total_n = num_collected
- # SubsampleParams for each level
+ # Build params per level using level collected counts from the storage
subsample_level_params = {key: SubsampleParams(sample_vec[key], value)
for key, value in enumerate(self.get_quantity_storage().n_collected())}
- # Create a QuantityConst of dictionary in the sense of hashing dictionary items
+
+ # Wrap a hashed version of this parameters dict in a QuantityConst to feed into operation
quantity_subsample_params = Quantity.wrap(hash(frozenset(subsample_level_params.items())))
def adjust_value(values, level_id):
"""
- Custom implementation of QuantityConst.adjust_value()
- It allows us to get different parameters for different levels
+ Method assigned to QuantityConst._adjust_value so each level receives its own SubsampleParams.
+ Re-initializes k/n for repeated calls.
"""
subsample_l_params_obj = subsample_level_params[level_id]
subsample_l_params_obj.k = subsample_l_params_obj._orig_k
subsample_l_params_obj.n = subsample_l_params_obj._orig_n
subsample_l_params_obj.total_n = subsample_l_params_obj._orig_total_n
return subsample_l_params_obj
+
quantity_subsample_params._adjust_value = adjust_value
+ # Build resulting Quantity that uses pick_samples as its operation
return Quantity(quantity_type=self.qtype.replace_scalar(qt.BoolType()),
input_quantities=[self, quantity_subsample_params], operation=Quantity.pick_samples)
def __getitem__(self, key):
"""
- Get items from Quantity, quantity type must support brackets access
- :param key: str, int, tuple
- :return: Quantity
+ Create a Quantity representing indexed/ sliced access into this quantity (similar to numpy slicing).
+
+ :param key: index or slice or tuple interpreted by qtype.get_key
+ :return: Quantity restricted to the requested key
"""
- new_qtype, start = self.qtype.get_key(key) # New quantity type
+ new_qtype, start = self.qtype.get_key(key) # New quantity type for selection
if not isinstance(self.qtype, qt.ArrayType):
+ # Convert key to a slice covering the sub-array if base is not ArrayType
key = slice(start, start + new_qtype.size())
def _make_getitem_op(y):
@@ -380,7 +429,11 @@ def _make_getitem_op(y):
return Quantity(quantity_type=new_qtype, input_quantities=[self], operation=_make_getitem_op)
def __getattr__(self, name):
- static_fun = getattr(self.qtype, name) # We support only static function call forwarding
+ """
+ Forward static QType methods as Quantity methods so that QType-level helpers are available
+ as operations on quantities (e.g., aggregation helpers).
+ """
+ static_fun = getattr(self.qtype, name)
def apply_on_quantity(*attr, **d_attr):
return static_fun(self, *attr, **d_attr)
@@ -389,11 +442,12 @@ def apply_on_quantity(*attr, **d_attr):
@staticmethod
def _concatenate(quantities, qtype, axis=0):
"""
- Concatenate level_chunks
- :param quantities: list of quantities
- :param qtype: QType
- :param axis: int
- :return: Quantity
+ Construct a Quantity that concatenates multiple quantities along a given axis.
+
+ :param quantities: sequence of Quantity instances
+ :param qtype: QType describing result shape
+ :param axis: axis along which concatenation happens
+ :return: Quantity that when evaluated concatenates input chunks
"""
def op_concatenate(*chunks):
y = np.concatenate(tuple(chunks), axis=axis)
@@ -403,12 +457,12 @@ def op_concatenate(*chunks):
@staticmethod
def _get_base_qtype(args_quantities):
"""
- Get quantities base Qtype
- :param args_quantities: list of quantities and other passed arguments,
- we expect at least one of the arguments is Quantity
- :return: base QType, ScalarType if any quantity has that base type, otherwise BoolType
+ Determine base QType for arithmetic/ufunc results: if any argument has a ScalarType base
+ return ScalarType(), otherwise BoolType().
+
+ :param args_quantities: iterable containing Quantity instances and possibly other values
+ :return: base QType instance
"""
- # Either all quantities are BoolType or it is considered to be ScalarType
for quantity in args_quantities:
if isinstance(quantity, Quantity):
if type(quantity.qtype.base_qtype()) == qt.ScalarType:
@@ -418,14 +472,17 @@ def _get_base_qtype(args_quantities):
@staticmethod
def _method(ufunc, method, *args, **kwargs):
"""
- Process input parameters to perform numpy ufunc.
- Get base QType of passed quantities, QuantityStorage instance, ...
- Determine the resulting QType from the first few samples
- :param ufunc: ufunc object that was called
- :param method: string, indicating which Ufunc method was called
- :param args: tuple of the input arguments to the ufunc
- :param kwargs: dictionary containing the optional input arguments of the ufunc
- :return: Quantity
+ Generic handler for numpy ufunc operations mapped to Quantities.
+
+ 1) Wrap inputs as Quantities.
+ 2) Determine the result QType by calling the ufunc on a small sample.
+ 3) Return a new Quantity that performs the ufunc at evaluation time.
+
+ :param ufunc: numpy ufunc object
+ :param method: method name to call on ufunc (e.g., '__call__' or 'reduce')
+ :param args: positional arguments passed to ufunc (may include Quantities)
+ :param kwargs: optional ufunc kwargs
+ :return: Quantity representing ufunc applied to inputs
"""
def _ufunc_call(*input_quantities_chunks):
return getattr(ufunc, method)(*input_quantities_chunks, **kwargs)
@@ -438,9 +495,10 @@ def _ufunc_call(*input_quantities_chunks):
@staticmethod
def wrap(value):
"""
- Convert flat, bool or array (list) to Quantity
- :param value: flat, bool, array (list) or Quantity
- :return: Quantity
+ Convert a primitive (int, float, bool), a numpy/list array, or an existing Quantity into a Quantity.
+
+ :param value: scalar, bool, list/ndarray, or Quantity
+ :return: Quantity or QuantityConst wrapping the value
"""
if isinstance(value, Quantity):
return value
@@ -459,27 +517,32 @@ def wrap(value):
@staticmethod
def _result_qtype(method, quantities):
"""
- Determine QType from evaluation with given method and first few samples from storage
- :param quantities: list of Quantities
- :param method: ufunc function
- :return: QType
+ Infer the resulting QType for an operation by evaluating the operation on the first
+ available chunk from each input quantity.
+
+ :param method: callable that takes input chunks and returns sample chunk result
+ :param quantities: list of Quantity instances
+ :return: inferred QType (ArrayType)
"""
chunks_quantity_level = []
for q in quantities:
quantity_storage = q.get_quantity_storage()
- # QuantityConst doesn't have QuantityStorage
+ # If QuantityConst (no storage), use an empty default ChunkSpec
if quantity_storage is None:
chunk_spec = ChunkSpec()
else:
chunk_spec = next(quantity_storage.chunks())
chunks_quantity_level.append(q.samples(chunk_spec))
- result = method(*chunks_quantity_level) # numpy array of [M, <=10, 2]
+ result = method(*chunks_quantity_level) # expect shape [M, <=10, 2]
qtype = qt.ArrayType(shape=result.shape[0], qtype=Quantity._get_base_qtype(quantities))
return qtype
@staticmethod
def QArray(quantities):
+ """
+ Build a Quantity representing an array-of-quantities aggregated into a single QType.
+ """
flat_quantities = np.array(quantities).flatten()
qtype = Quantity._check_same_qtype(flat_quantities)
array_type = qt.ArrayType(np.array(quantities).shape, qtype)
@@ -487,24 +550,40 @@ def QArray(quantities):
@staticmethod
def QDict(key_quantity):
+ """
+ Build a Quantity representing a dictionary of quantities.
+ :param key_quantity: iterable of (key, Quantity)
+ """
dict_type = qt.DictType([(key, quantity.qtype) for key, quantity in key_quantity])
return Quantity._concatenate(np.array(key_quantity)[:, 1], qtype=dict_type)
@staticmethod
def QTimeSeries(time_quantity):
+ """
+ Build a Quantity representing a time series constructed from (time, Quantity) pairs.
+ """
qtype = Quantity._check_same_qtype(np.array(time_quantity)[:, 1])
times = np.array(time_quantity)[:, 0]
return Quantity._concatenate(np.array(time_quantity)[:, 1], qtype=qt.TimeSeriesType(times=times, qtype=qtype))
-
@staticmethod
def QField(key_quantity):
+ """
+ Build a Quantity representing a field (mapping of locations to quantities).
+ """
Quantity._check_same_qtype(np.array(key_quantity)[:, 1])
field_type = qt.FieldType([(key, quantity.qtype) for key, quantity in key_quantity])
return Quantity._concatenate(np.array(key_quantity)[:, 1], qtype=field_type)
@staticmethod
def _check_same_qtype(quantities):
+ """
+ Validate that all provided quantities share the same QType.
+
+ :param quantities: sequence of Quantity instances
+ :return: the shared QType
+ :raise ValueError: if a mismatch is found
+ """
qtype = quantities[0].qtype
for quantity in quantities[1:]:
if qtype != quantity.qtype:
@@ -513,26 +592,28 @@ def _check_same_qtype(quantities):
class QuantityConst(Quantity):
+ """
+ Represents a constant quantity whose value is stored directly in the instance.
+ The samples() method returns the constant value broadcasted to the requested chunk shape.
+ """
+
def __init__(self, quantity_type, value):
"""
- QuantityConst class represents constant quantity and also provides operation
- that can be performed with quantity values.
- The quantity is constant, meaning that this class stores the data itself
- :param quantity_type: QType instance
- :param value: quantity value
+ :param quantity_type: QType describing the const
+ :param value: scalar or array-like value
"""
self.qtype = quantity_type
self._value = self._process_value(value)
+ # No input dependencies for a constant
self._input_quantities = []
- # List of input quantities should be empty,
- # but we still need this attribute due to storage_id() and level_ids() method
self._selection_id = None
def _process_value(self, value):
"""
- Reshape value if array, otherwise create array first
- :param value: quantity value
- :return: value with shape [M, 1, 1] which suitable for further broadcasting
+ Ensure the constant is stored as an array with axes [M, 1, 1] suitable for broadcasting.
+
+ :param value: scalar or array-like
+ :return: ndarray shaped for broadcasting into (M, chunk_size, 2)
"""
if isinstance(value, (int, float, bool)):
value = np.array([value])
@@ -540,42 +621,50 @@ def _process_value(self, value):
def selection_id(self):
"""
- Get storage ids of all input quantities
- :return: List[int]
+ Constants have no selection id (they are independent of storage).
"""
return self._selection_id
def _adjust_value(self, value, level_id=None):
"""
- Allows process value based on chunk_epc params (such as level_id, ...).
- The custom implementation is used in Qunatity.subsample method
- :param value: np.ndarray
- :param level_id: int
- :return: np.ndarray, particular type depends on implementation
+ Hook to adjust constant value per-level. By default returns the stored value unchanged.
+ This method gets overridden by consumers (e.g., subsample) to provide level-specific params.
+
+ :param value: constant value array
+ :param level_id: int, level index (optional)
+ :return: possibly adjusted value
"""
return value
@cached(custom_key_maker=Quantity.get_cache_key)
def samples(self, chunk_spec):
"""
- Get constant values with an enlarged number of axes
- :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
- :return: np.ndarray
+ Return the constant value, optionally adjusted for the given level via _adjust_value.
+
+ :param chunk_spec: ChunkSpec with level_id
+ :return: ndarray representing the constant for this chunk
"""
return self._adjust_value(self._value, chunk_spec.level_id)
class QuantityMean:
+ """
+ Container for aggregated mean/variance results computed by mlmc.quantity.quantity_estimate.estimate_mean.
+
+ - qtype: QType of the quantity
+ - _l_means: per-level mean contributions (L x M flattened)
+ - _l_vars: per-level variance contributions (L x M flattened)
+ - _n_samples: number of samples used per level
+ - _n_rm_samples: number of removed samples per level
+ """
def __init__(self, quantity_type, l_means, l_vars, n_samples, n_rm_samples):
"""
- QuantityMean represents results of mlmc.quantity_estimate.estimate_mean method
:param quantity_type: QType
- :param l_means: np.ndarray, shape: L, M
- :param l_vars: np.ndarray, shape: L, M
- :param n_samples: List, number of samples that were used for means at each level
- :param n_rm_samples: List, number of removed samples at each level,
- n_samples + n_rm_samples = all successfully collected samples
+ :param l_means: ndarray shape (L, M_flat) of level-wise mean contributions
+ :param l_vars: ndarray shape (L, M_flat) of level-wise variance contributions
+ :param n_samples: list/ndarray length L with number of samples used per level
+ :param n_rm_samples: list/ndarray length L with removed samples count per level
"""
self.qtype = quantity_type
self._mean = None
@@ -587,29 +676,43 @@ def __init__(self, quantity_type, l_means, l_vars, n_samples, n_rm_samples):
def _calculate_mean_var(self):
"""
- Calculates the overall estimates of the mean and the variance from the means and variances at each level
+ Compute overall mean and variance from per-level contributions:
+ mean = sum_l l_means[l]
+ var = sum_l (l_vars[l] / n_samples[l])
"""
self._mean = np.sum(self._l_means, axis=0)
self._var = np.sum(self._l_vars / self._n_samples[:, None], axis=0)
@property
def mean(self):
+ """
+ Reshaped overall mean according to QType.
+ """
if self._mean is None:
self._calculate_mean_var()
return self._reshape(self._mean)
@property
def var(self):
+ """
+ Reshaped overall variance according to QType.
+ """
if self._var is None:
self._calculate_mean_var()
return self._reshape(self._var)
@property
def l_means(self):
+ """
+ Level means reshaped according to QType for each level.
+ """
return np.array([self._reshape(means) for means in self._l_means])
@property
def l_vars(self):
+ """
+ Level variances reshaped according to QType for each level.
+ """
return np.array([self._reshape(vars) for vars in self._l_vars])
@property
@@ -622,74 +725,96 @@ def n_rm_samples(self):
def _reshape(self, data):
"""
- Reshape passed data, expected means or vars
- :param data: flatten np.ndarray
- :return: np.ndarray, reshaped data, the final data shape depends on the particular QType
- there is currently a reshape for ArrayType only
+ Reshape a flat data vector (flattened M) into the structure determined by qtype.
+
+ :param data: flattened ndarray
+ :return: reshaped ndarray according to qtype
"""
return self.qtype.reshape(data)
def __getitem__(self, key):
"""
- Get item from current QuantityMean, quantity type must support brackets access
- All levels means and vars are reshaped to their QType shape and then the item is gotten,
- ath the end, new QuantityMean instance is created with flatten selected means and vars
- :param key: str, int, tuple
- :return: QuantityMean
+ Index into QuantityMean similarly to Quantity.__getitem__:
+ reshape level-wise means/vars and select the requested key, then return a new QuantityMean.
+
+ :param key: indexing key (int, slice, str, etc.)
+ :return: QuantityMean restricted to the requested key
"""
new_qtype, start = self.qtype.get_key(key) # New quantity type
if not isinstance(self.qtype, qt.ArrayType):
key = slice(start, start + new_qtype.size())
- # Getting items, it performs reshape inside
+ # Selecting and reshaping level arrays
l_means = self.l_means[:, key]
l_vars = self.l_vars[:, key]
- return QuantityMean(quantity_type=new_qtype, l_means=l_means.reshape((l_means.shape[0], -1)),
- l_vars=l_vars.reshape((l_vars.shape[0], -1)), n_samples=self._n_samples,
+ return QuantityMean(quantity_type=new_qtype,
+ l_means=l_means.reshape((l_means.shape[0], -1)),
+ l_vars=l_vars.reshape((l_vars.shape[0], -1)),
+ n_samples=self._n_samples,
n_rm_samples=self._n_rm_samples)
class QuantityStorage(Quantity):
+ """
+ Special Quantity that provides direct access to SampleStorage.
+ It implements the bridge between storage and the Quantity abstraction.
+ """
+
def __init__(self, storage, qtype):
"""
- Special Quantity for direct access to SampleStorage
- :param storage: mlmc._sample_storage.SampleStorage child
- :param qtype: QType
+ :param storage: SampleStorage instance (in-memory or HDF5, etc.)
+ :param qtype: QType describing stored data structure
"""
+ # Store underlying storage reference and QType
self._storage = storage
self.qtype = qtype
+ # No operation or inputs required for storage root
self._input_quantities = []
self._operation = None
def level_ids(self):
"""
- Number of levels
+ Return list of available level ids from the SampleStorage.
:return: List[int]
"""
return self._storage.get_level_ids()
def selection_id(self):
"""
- Identity of QuantityStorage instance
+ Identity of this QuantityStorage (unique by object id).
:return: int
"""
return id(self)
def get_quantity_storage(self):
+ """
+ For QuantityStorage the storage is itself.
+ :return: self
+ """
return self
def chunks(self, level_id=None):
+ """
+ Proxy to SampleStorage.chunks which yields ChunkSpec instances describing available chunks.
+ :param level_id: optional level id to restrict chunks
+ :return: generator of ChunkSpec
+ """
return self._storage.chunks(level_id)
def samples(self, chunk_spec):
"""
- Get results for given level id and chunk id
- :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
- :return: Array[M, chunk size, 2]
+ Retrieve stored sample pairs for the requested level/chunk.
+
+ :param chunk_spec: ChunkSpec describing (level, chunk slice)
+ :return: ndarray shaped [M, chunk_size, 2] where M is number of result quantities
"""
return self._storage.sample_pairs_level(chunk_spec) # Array[M, chunk size, 2]
def n_collected(self):
+ """
+ Return number of collected results per level from the underlying SampleStorage.
+ :return: list of ints
+ """
return self._storage.get_n_collected()
diff --git a/mlmc/quantity/quantity_estimate.py b/mlmc/quantity/quantity_estimate.py
index 2436d7b9..e2553138 100644
--- a/mlmc/quantity/quantity_estimate.py
+++ b/mlmc/quantity/quantity_estimate.py
@@ -5,102 +5,146 @@
def mask_nan_samples(chunk):
"""
- Mask out samples that contain NaN in either fine or coarse part of the result
- :param chunk: np.ndarray [M, chunk_size, 2]
- :return: chunk: np.ndarray, number of masked samples: int
+ Remove (mask out) samples containing NaN values in either the fine or coarse part of the result.
+
+ :param chunk: np.ndarray of shape [M, chunk_size, 2]
+ M - quantity size (number of scalar components),
+ chunk_size - number of samples in the chunk,
+ 2 - fine and coarse parts of the result.
+ :return: (filtered_chunk, n_masked)
+ filtered_chunk: np.ndarray with invalid samples removed,
+ n_masked: int, number of masked (removed) samples.
"""
- # Fine and coarse moments_fn mask
+ # Identify any sample with NaNs in its fine or coarse component
mask = np.any(np.isnan(chunk), axis=0).any(axis=1)
return chunk[..., ~mask, :], np.count_nonzero(mask)
def cache_clear():
+ """
+ Clear cached Quantity sample evaluations.
+
+ Used before running MLMC estimations to ensure fresh data is fetched from storage.
+ """
mlmc.quantity.quantity.Quantity.samples.cache_clear()
mlmc.quantity.quantity.QuantityConst.samples.cache_clear()
-def estimate_mean(quantity):
+def estimate_mean(quantity, form="diff", operation_func=None, **kwargs):
"""
- MLMC mean estimator.
- The MLMC method is used to compute the mean estimate to the Quantity dependent on the collected samples.
- The squared error of the estimate (the estimator variance) is estimated using the central limit theorem.
- Data is processed by chunks, so that it also supports big data processing
- :param quantity: Quantity
- :return: QuantityMean which holds both mean and variance
+ Estimate the MLMC mean (and variance) of a Quantity using multilevel sampling.
+
+ The function computes per-level means and variances from simulation results.
+ Supports large datasets via chunked processing and handles NaN-masked samples.
+
+ :param quantity: Quantity instance to estimate.
+ :param form: str, type of estimation:
+ - "diff": estimate based on differences (fine - coarse) → standard MLMC approach.
+ - "fine": estimate using fine-level data only.
+ - "coarse": estimate using coarse-level data only.
+ :param operation_func: Optional transformation applied to chunk data before accumulation
+ (e.g., for moment or kurtosis computation).
+ :param kwargs: Additional keyword arguments passed to operation_func.
+ :return: QuantityMean object containing mean, variance, and sample statistics per level.
"""
+ # Reset cached quantity evaluations
cache_clear()
+
quantity_vec_size = quantity.size()
sums = None
sums_of_squares = None
- # initialization
+ # Initialize level-specific storage
quantity_storage = quantity.get_quantity_storage()
level_ids = quantity_storage.level_ids()
n_levels = np.max(level_ids) + 1
n_samples = [0] * n_levels
n_rm_samples = [0] * n_levels
+ # Iterate through data chunks
for chunk_spec in quantity_storage.chunks():
samples = quantity.samples(chunk_spec)
chunk, n_mask_samples = mask_nan_samples(samples)
n_samples[chunk_spec.level_id] += chunk.shape[1]
n_rm_samples[chunk_spec.level_id] += n_mask_samples
- # No samples in chunk
+ # Skip empty chunks
if chunk.shape[1] == 0:
continue
- assert (chunk.shape[0] == quantity_vec_size)
+ assert chunk.shape[0] == quantity_vec_size
- # Set variables for level sums and sums of squares
+ # Allocate accumulators at first valid chunk
if sums is None:
sums = [np.zeros(chunk.shape[0]) for _ in range(n_levels)]
sums_of_squares = [np.zeros(chunk.shape[0]) for _ in range(n_levels)]
- if chunk_spec.level_id == 0:
+ # Select appropriate data form for the estimator
+ if form == "fine":
chunk_diff = chunk[:, :, 0]
+ elif form == "coarse":
+ chunk_diff = np.zeros_like(chunk[:, :, 0]) if chunk_spec.level_id == 0 else chunk[:, :, 1]
else:
- chunk_diff = chunk[:, :, 0] - chunk[:, :, 1]
+ # Default MLMC difference (fine - coarse)
+ chunk_diff = chunk[:, :, 0] if chunk_spec.level_id == 0 else chunk[:, :, 0] - chunk[:, :, 1]
+
+ # Optional user-defined transformation of data
+ if operation_func is not None:
+ chunk_diff = operation_func(chunk_diff, chunk_spec, **kwargs)
+ # Accumulate sums and squared sums for this level
sums[chunk_spec.level_id] += np.sum(chunk_diff, axis=1)
- sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff**2, axis=1)
+ sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff ** 2, axis=1)
if sums is None:
- raise Exception("All samples were masked")
+ raise Exception("All samples were masked (no valid data found).")
+ # Compute means and variances for each level
l_means = []
l_vars = []
for s, sp, n in zip(sums, sums_of_squares, n_samples):
l_means.append(s / n)
if n > 1:
- l_vars.append((sp - (s ** 2 / n)) / (n-1))
+ l_vars.append((sp - (s ** 2 / n)) / (n - 1))
else:
l_vars.append(np.full(len(s), np.inf))
- return mlmc.quantity.quantity.QuantityMean(quantity.qtype, l_means=l_means, l_vars=l_vars, n_samples=n_samples,
- n_rm_samples=n_rm_samples)
+ # Construct QuantityMean object with level statistics
+ return mlmc.quantity.quantity.QuantityMean(
+ quantity.qtype,
+ l_means=l_means,
+ l_vars=l_vars,
+ n_samples=n_samples,
+ n_rm_samples=n_rm_samples
+ )
def moment(quantity, moments_fn, i=0):
"""
- Create quantity with operation that evaluates particular moment
- :param quantity: Quantity instance
- :param moments_fn: mlmc.moments.Moments child
- :param i: index of moment
- :return: Quantity
+ Construct a Quantity that represents a single statistical moment.
+
+ :param quantity: Base Quantity instance.
+ :param moments_fn: Instance of mlmc.moments.Moments defining the moment computation.
+ :param i: Index of the moment to compute.
+ :return: New Quantity that computes the i-th moment.
"""
def eval_moment(x):
return moments_fn.eval_single_moment(i, value=x)
- return mlmc.quantity.quantity.Quantity(quantity_type=quantity.qtype, input_quantities=[quantity], operation=eval_moment)
+
+ return mlmc.quantity.quantity.Quantity(
+ quantity_type=quantity.qtype,
+ input_quantities=[quantity],
+ operation=eval_moment
+ )
def moments(quantity, moments_fn, mom_at_bottom=True):
"""
- Create quantity with operation that evaluates moments_fn
- :param quantity: Quantity
- :param moments_fn: mlmc.moments.Moments child
- :param mom_at_bottom: bool, if True moments are underneath,
- a scalar is substituted with an array of moments of that scalar
- :return: Quantity
+ Construct a Quantity representing all moments defined by a given Moments object.
+
+ :param quantity: Base Quantity.
+ :param moments_fn: mlmc.moments.Moments child defining moment evaluations.
+ :param mom_at_bottom: bool, if True, moments are added at the lowest (scalar) level of the Quantity type.
+ :return: Quantity that computes all defined moments.
"""
def eval_moments(x):
if mom_at_bottom:
@@ -109,48 +153,85 @@ def eval_moments(x):
mom = moments_fn.eval_all(x).transpose((3, 0, 1, 2)) # [R, M, N, 2]
return mom.reshape((np.prod(mom.shape[:-2]), mom.shape[-2], mom.shape[-1])) # [M, N, 2]
- # Create quantity type which has moments_fn at the bottom
+ # Define new Quantity type according to desired hierarchy
if mom_at_bottom:
moments_array_type = qt.ArrayType(shape=(moments_fn.size,), qtype=qt.ScalarType())
moments_qtype = quantity.qtype.replace_scalar(moments_array_type)
- # Create quantity type that has moments_fn on the surface
else:
moments_qtype = qt.ArrayType(shape=(moments_fn.size,), qtype=quantity.qtype)
- return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_moments)
+
+ return mlmc.quantity.quantity.Quantity(
+ quantity_type=moments_qtype,
+ input_quantities=[quantity],
+ operation=eval_moments
+ )
def covariance(quantity, moments_fn, cov_at_bottom=True):
"""
- Create quantity with operation that evaluates covariance matrix
- :param quantity: Quantity
- :param moments_fn: mlmc.moments.Moments child
- :param cov_at_bottom: bool, if True cov matrices are underneath,
- a scalar is substituted with a matrix of moments of that scalar
- :return: Quantity
+ Construct a Quantity representing covariance matrices of the given moments.
+
+ :param quantity: Base Quantity.
+ :param moments_fn: mlmc.moments.Moments child defining moment evaluations.
+ :param cov_at_bottom: bool, if True covariance matrices are attached at the scalar level of the Quantity type.
+ :return: Quantity that computes covariance matrices.
"""
def eval_cov(x):
+ # Compute all moments (fine and coarse)
moments = moments_fn.eval_all(x)
mom_fine = moments[..., 0, :]
cov_fine = np.einsum('...i,...j', mom_fine, mom_fine)
if moments.shape[-2] == 1:
+ # Single level (no coarse)
cov = np.array([cov_fine])
else:
mom_coarse = moments[..., 1, :]
cov_coarse = np.einsum('...i,...j', mom_coarse, mom_coarse)
cov = np.array([cov_fine, cov_coarse])
+ # Reshape covariance according to desired data layout
if cov_at_bottom:
- cov = cov.transpose((1, 3, 4, 2, 0)) # [M, R, R, N, 2]
+ cov = cov.transpose((1, 3, 4, 2, 0)) # [M, R, R, N, 2]
else:
- cov = cov.transpose((3, 4, 1, 2, 0)) # [R, R, M, N, 2]
+ cov = cov.transpose((3, 4, 1, 2, 0)) # [R, R, M, N, 2]
return cov.reshape((np.prod(cov.shape[:-2]), cov.shape[-2], cov.shape[-1]))
- # Create quantity type which has covariance matrices at the bottom
+ # Adjust Quantity type for covariance structure
if cov_at_bottom:
- moments_array_type = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=qt.ScalarType())
+ moments_array_type = qt.ArrayType(shape=(moments_fn.size, moments_fn.size), qtype=qt.ScalarType())
moments_qtype = quantity.qtype.replace_scalar(moments_array_type)
- # Create quantity type that has covariance matrices on the surface
else:
- moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=quantity.qtype)
- return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_cov)
+ moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size), qtype=quantity.qtype)
+
+ return mlmc.quantity.quantity.Quantity(
+ quantity_type=moments_qtype,
+ input_quantities=[quantity],
+ operation=eval_cov
+ )
+
+
+def kurtosis_numerator(chunk_diff, chunk_spec, l_means):
+ """
+ Compute the numerator for the sample kurtosis:
+ E[(Y_l - E[Y_l])^4]
+ :param chunk_diff: np.ndarray [quantity shape, number of samples]
+ :param chunk_spec: quantity_spec.ChunkSpec describing current level and chunk.
+ :param l_means: List of per-level means used for centering.
+ :return: np.ndarray of the same shape as input.
+ """
+ return (chunk_diff - l_means[chunk_spec.level_id]) ** 4
+
+
+def level_kurtosis(quantity, means_obj):
+ """
+ Estimate the sample kurtosis for each level:
+ E[(Y_l - E[Y_l])^4] / (Var[Y_l])^2, where Y_l = fine_l - coarse_l
+
+ :param quantity: Quantity instance.
+ :param means_obj: QuantityMean object containing level means and variances.
+ :return: np.ndarray of kurtosis values per level.
+ """
+ numerator_means_obj = estimate_mean(quantity, operation_func=kurtosis_numerator, l_means=means_obj.l_means)
+ kurtosis = numerator_means_obj.l_means / (means_obj.l_vars) ** 2
+ return kurtosis
diff --git a/mlmc/quantity/quantity_spec.py b/mlmc/quantity/quantity_spec.py
index ea25dc66..377adad8 100644
--- a/mlmc/quantity/quantity_spec.py
+++ b/mlmc/quantity/quantity_spec.py
@@ -5,24 +5,56 @@
@attr.s(auto_attribs=True, eq=False)
class QuantitySpec:
+ """
+ Specification of a physical quantity for simulation or data storage.
+
+ :param name: Name of the quantity (e.g. 'pressure', 'velocity').
+ :param unit: Unit of the quantity (e.g. 'm/s', 'Pa').
+ :param shape: Tuple describing the shape of the data (e.g. (64, 64)).
+ :param times: List of time points associated with this quantity.
+ :param locations: List of either string-based identifiers or 3D coordinates
+ (x, y, z) where the quantity is defined.
+ """
+
name: str
unit: str
shape: Tuple[int, int]
times: List[float]
locations: Union[List[str], List[Tuple[float, float, float]]]
- # Note: auto generated eq raises ValueError
def __eq__(self, other):
- if (self.name, self.unit) == (other.name, other.unit) \
- and np.array_equal(self.shape, other.shape)\
- and np.array_equal(self.times, other.times)\
- and not (set(self.locations) - set(other.locations)):
- return True
- return False
+ """
+ Compare two QuantitySpec instances for equality.
+
+ :param other: Another QuantitySpec instance to compare with.
+ :return: True if both instances describe the same quantity, False otherwise.
+ """
+ if not isinstance(other, QuantitySpec):
+ return False
+
+ # Compare name, unit, shape, and times
+ same_basic_attrs = (
+ (self.name, self.unit) == (other.name, other.unit)
+ and np.array_equal(self.shape, other.shape)
+ and np.array_equal(self.times, other.times)
+ )
+
+ # Compare locations (set difference = ∅ → same)
+ same_locations = not (set(self.locations) - set(other.locations))
+
+ return same_basic_attrs and same_locations
@attr.s(auto_attribs=True)
class ChunkSpec:
+ """
+ Specification of a simulation or dataset chunk.
+
+ :param chunk_id: Integer identifier of the chunk.
+ :param chunk_slice: Slice object defining the range of data indices in the chunk.
+ :param level_id: Identifier of the refinement or simulation level.
+ """
+
chunk_id: int = None
chunk_slice: slice = None
level_id: int = None
diff --git a/mlmc/quantity/quantity_types.py b/mlmc/quantity/quantity_types.py
index 41d6ea7f..56b42b69 100644
--- a/mlmc/quantity/quantity_types.py
+++ b/mlmc/quantity/quantity_types.py
@@ -7,23 +7,37 @@
class QType(metaclass=abc.ABCMeta):
+ """
+ Base class for quantity types.
+
+ :param qtype: inner/contained QType or Python type
+ """
+
def __init__(self, qtype):
self._qtype = qtype
def size(self) -> int:
"""
- Size of type
+ Size of the type in flattened units.
+
:return: int
"""
+ raise NotImplementedError
def base_qtype(self):
+ """
+ Return the base scalar/bool type for nested types.
+
+ :return: QType
+ """
return self._qtype.base_qtype()
def replace_scalar(self, substitute_qtype):
"""
- Find ScalarType and replace it with substitute_qtype
- :param substitute_qtype: QType, replaces ScalarType
- :return: QType
+ Find ScalarType and replace it with substitute_qtype.
+
+ :param substitute_qtype: QType that replaces ScalarType
+ :return: QType (new instance with scalar replaced)
"""
inner_qtype = self._qtype.replace_scalar(substitute_qtype)
new_qtype = copy.deepcopy(self)
@@ -31,85 +45,121 @@ def replace_scalar(self, substitute_qtype):
return new_qtype
@staticmethod
- def keep_dims(chunk):
+ def keep_dims(chunk: np.ndarray) -> np.ndarray:
"""
- Always keep chunk shape to be [M, chunk size, 2]!
- For scalar quantities, the input block can have the shape (chunk size, 2)
- Sometimes we need to 'flatten' first few shape to have desired chunk shape
- :param chunk: list
- :return: list
+ Ensure chunk has shape [M, chunk size, 2].
+
+ For scalar quantities the input block can have shape (chunk size, 2).
+ Sometimes we need to 'flatten' first few dimensions to achieve desired chunk shape.
+
+ :param chunk: numpy array
+ :return: numpy array with shape [M, chunk size, 2]
+ :raises ValueError: if chunk.ndim < 2
"""
# Keep dims [M, chunk size, 2]
if len(chunk.shape) == 2:
chunk = chunk[np.newaxis, :]
elif len(chunk.shape) > 2:
- chunk = chunk.reshape((np.prod(chunk.shape[:-2]), chunk.shape[-2], chunk.shape[-1]))
+ chunk = chunk.reshape((int(np.prod(chunk.shape[:-2])), chunk.shape[-2], chunk.shape[-1]))
else:
- raise ValueError("Chunk shape not supported")
+ raise ValueError("Chunk shape not supported: need ndim >= 2")
return chunk
- def _make_getitem_op(self, chunk, key):
+ def _make_getitem_op(self, chunk: np.ndarray, key):
"""
- Operation
- :param chunk: level chunk, list with shape [M, chunk size, 2]
- :param key: parent QType's key, needed for ArrayType
- :return: list
+ Extract a slice from chunk while preserving chunk dims.
+
+ :param chunk: level chunk, numpy array with shape [M, chunk size, 2]
+ :param key: index/slice used by parent QType
+ :return: numpy array with shape [M', chunk size', 2]
"""
return QType.keep_dims(chunk[key])
- def reshape(self, data):
+ def reshape(self, data: np.ndarray) -> np.ndarray:
+ """
+ Default reshape (identity).
+
+ :param data: numpy array
+ :return: numpy array
+ """
return data
class ScalarType(QType):
+ """
+ Scalar quantity type (leaf type).
+ """
+
def __init__(self, qtype=float):
+ """
+ :param qtype: Python type or nested type used as underlying scalar type
+ """
self._qtype = qtype
def base_qtype(self):
+ """
+ :return: base scalar QType (self or underlying BoolType base)
+ """
if isinstance(self._qtype, BoolType):
return self._qtype.base_qtype()
return self
def size(self) -> int:
- if hasattr(self._qtype, 'size'):
+ """
+ :return: int size of the scalar (defaults to 1 or uses `_qtype.size()` if present)
+ """
+ if hasattr(self._qtype, "size"):
return self._qtype.size()
return 1
def replace_scalar(self, substitute_qtype):
"""
- Find ScalarType and replace it with substitute_qtype
- :param substitute_qtype: QType, replaces ScalarType
- :return: QType
+ Replace ScalarType with substitute type.
+
+ :param substitute_qtype: QType that replaces ScalarType
+ :return: substitute_qtype
"""
return substitute_qtype
class BoolType(ScalarType):
+ """
+ Boolean scalar type (inherits ScalarType).
+ """
pass
class ArrayType(QType):
- def __init__(self, shape, qtype: QType):
+ """
+ Array quantity type.
+ :param shape: int or tuple describing array shape
+ :param qtype: contained QType for array elements
+ """
+
+ def __init__(self, shape, qtype: QType):
if isinstance(shape, int):
shape = (shape,)
-
self._shape = shape
self._qtype = qtype
def size(self) -> int:
- return np.prod(self._shape) * self._qtype.size()
+ """
+ :return: total flattened size (product of shape * inner qtype size)
+ """
+ return int(np.prod(self._shape)) * int(self._qtype.size())
def get_key(self, key):
"""
- ArrayType indexing
+ ArrayType indexing.
+
:param key: int, tuple of ints or slice objects
- :return: QuantityType - ArrayType or self._qtype
+ :return: Tuple (QuantityType, offset) where offset is 0 for this implementation
"""
- # Get new shape
+ # Get new shape by applying indexing on an empty array of the target shape
new_shape = np.empty(self._shape)[key].shape
- # One selected item is considered to be a scalar QType
+ # If one selected item is considered to be a scalar QType
if len(new_shape) == 1 and new_shape[0] == 1:
new_shape = ()
@@ -121,26 +171,42 @@ def get_key(self, key):
q_type = self._qtype
return q_type, 0
- def _make_getitem_op(self, chunk, key):
+ def _make_getitem_op(self, chunk: np.ndarray, key):
"""
- Operation
- :param chunk: list [M, chunk size, 2]
- :param key: slice
- :return:
+ Slice operation for ArrayType while restoring original shape.
+
+ :param chunk: numpy array [M, chunk size, 2]
+ :param key: slice or index to apply on the array-shaped leading dims
+ :return: numpy array with preserved dims via QType.keep_dims
"""
- # Reshape M to original shape to allow access
assert self._shape is not None
chunk = chunk.reshape((*self._shape, chunk.shape[-2], chunk.shape[-1]))
return QType.keep_dims(chunk[key])
- def reshape(self, data):
+ def reshape(self, data: np.ndarray) -> np.ndarray:
+ """
+ Reshape flattened data to array shape.
+
+ :param data: numpy array
+ :return: reshaped numpy array
+ """
if isinstance(self._qtype, ScalarType):
return data.reshape(self._shape)
else:
- return data.reshape((*self._shape, np.prod(data.shape) // np.prod(self._shape)))
+ # assume trailing dimension belongs to inner types
+ total = np.prod(data.shape)
+ leading = int(np.prod(self._shape))
+ return data.reshape((*self._shape, int(total // leading)))
class TimeSeriesType(QType):
+ """
+ Time-series quantity type.
+
+ :param times: iterable of time points
+ :param qtype: QType for each time slice
+ """
+
def __init__(self, times, qtype):
if isinstance(times, np.ndarray):
times = times.tolist()
@@ -148,96 +214,176 @@ def __init__(self, times, qtype):
self._qtype = qtype
def size(self) -> int:
- return len(self._times) * self._qtype.size()
+ """
+ :return: total size = number of time points * inner qtype.size()
+ """
+ return len(self._times) * int(self._qtype.size())
def get_key(self, key):
+ """
+ Get a qtype and offset corresponding to a given time key.
+
+ :param key: time value to locate
+ :return: Tuple (q_type, offset)
+ """
q_type = self._qtype
try:
position = self._times.index(key)
- except KeyError:
- print("Item " + str(key) + " was not found in TimeSeries" + ". Available items: " + str(list(self._times)))
+ except ValueError:
+ # keep behavior similar to original: print available items
+ print(
+ "Item "
+ + str(key)
+ + " was not found in TimeSeries"
+ + ". Available items: "
+ + str(list(self._times))
+ )
+ # raise to make the error explicit
+ raise
return q_type, position * q_type.size()
@staticmethod
def time_interpolation(quantity, value):
"""
- Interpolation in time
- :param quantity: Quantity instance
- :param value: point where to interpolate
- :return: Quantity
+ Interpolate a time-series quantity to a single time value.
+
+ :param quantity: Quantity instance with qtype being a TimeSeriesType
+ :param value: float time value where to interpolate
+ :return: Quantity object representing interpolated value
"""
def interp(y):
- split_indeces = np.arange(1, len(quantity.qtype._times)) * quantity.qtype._qtype.size()
- y = np.split(y, split_indeces, axis=-3)
+ split_indices = np.arange(1, len(quantity.qtype._times)) * quantity.qtype._qtype.size()
+ y = np.split(y, split_indices, axis=-3)
f = interpolate.interp1d(quantity.qtype._times, y, axis=0)
return f(value)
- return mlmc.quantity.quantity.Quantity(quantity_type=quantity.qtype._qtype, input_quantities=[quantity], operation=interp)
+
+ return mlmc.quantity.quantity.Quantity(
+ quantity_type=quantity.qtype._qtype,
+ input_quantities=[quantity],
+ operation=interp
+ )
class FieldType(QType):
+ """
+ Field type composed of named entries each having the same base qtype.
+
+ :param args: List of (name, QType) pairs
+ """
+
def __init__(self, args: List[Tuple[str, QType]]):
- """
- QType must have same structure
- :param args:
- """
self._dict = dict(args)
self._qtype = args[0][1]
assert all(q_type.size() == self._qtype.size() for _, q_type in args)
def size(self) -> int:
- return len(self._dict.keys()) * self._qtype.size()
+ """
+ :return: total size = number of fields * inner qtype size
+ """
+ return len(self._dict.keys()) * int(self._qtype.size())
def get_key(self, key):
+ """
+ Access sub-field by name.
+
+ :param key: field name
+ :return: Tuple (q_type, offset)
+ """
q_type = self._qtype
try:
position = list(self._dict.keys()).index(key)
- except KeyError:
- print("Key " + str(key) + " was not found in FieldType" +
- ". Available keys: " + str(list(self._dict.keys())[:5]) + "...")
+ except ValueError:
+ print(
+ "Key "
+ + str(key)
+ + " was not found in FieldType"
+ + ". Available keys: "
+ + str(list(self._dict.keys())[:5])
+ + "..."
+ )
+ raise
return q_type, position * q_type.size()
class DictType(QType):
+ """
+ Dictionary-like type of named QTypes which may differ in size.
+
+ :param args: List of (name, QType) pairs
+ """
+
def __init__(self, args: List[Tuple[str, QType]]):
- self._dict = dict(args) # Be aware we it is ordered dictionary
+ self._dict = dict(args) # keep ordered mapping semantics
self._check_base_type()
def _check_base_type(self):
+ """
+ Ensure all contained qtypes share the same base_qtype.
+
+ :raises TypeError: if base_qtypes differ
+ """
qtypes = list(self._dict.values())
qtype_0_base_type = qtypes[0].base_qtype()
for qtype in qtypes[1:]:
if not isinstance(qtype.base_qtype(), type(qtype_0_base_type)):
- raise TypeError("qtype {} has base QType {}, expecting {}. "
- "All QTypes must have same base QType, either SacalarType or BoolType".
- format(qtype, qtype.base_qtype(), qtype_0_base_type))
+ raise TypeError(
+ "qtype {} has base QType {}, expecting {}. "
+ "All QTypes must have same base QType, either ScalarType or BoolType".format(
+ qtype, qtype.base_qtype(), qtype_0_base_type
+ )
+ )
def base_qtype(self):
+ """
+ :return: base_qtype of the first element
+ """
return next(iter(self._dict.values())).base_qtype()
def size(self) -> int:
+ """
+ :return: total flattened size (sum of sizes of contained qtypes)
+ """
return int(sum(q_type.size() for _, q_type in self._dict.items()))
def get_qtypes(self):
+ """
+ :return: iterable of contained qtypes
+ """
return self._dict.values()
def replace_scalar(self, substitute_qtype):
"""
- Find ScalarType and replace it with substitute_qtype
- :param substitute_qtype: QType, replaces ScalarType
- :return: DictType
+ Replace scalar types recursively inside dict entries.
+
+ :param substitute_qtype: QType that replaces ScalarType
+ :return: new DictType instance
"""
dict_items = []
for key, qtype in self._dict.items():
new_qtype = qtype.replace_scalar(substitute_qtype)
- dict_items.append((key, new_qtype))
+ dict_items.append((key, new_qtype))
return DictType(dict_items)
def get_key(self, key):
+ """
+ Return the QType and starting offset for a named key.
+
+ :param key: name of entry
+ :return: Tuple (q_type, start_offset)
+ """
try:
q_type = self._dict[key]
except KeyError:
- print("Key " + str(key) + " was not found in DictType" +
- ". Available keys: " + str(list(self._dict.keys())[:5]) + "...")
+ print(
+ "Key "
+ + str(key)
+ + " was not found in DictType"
+ + ". Available keys: "
+ + str(list(self._dict.keys())[:5])
+ + "..."
+ )
+ raise
+
start = 0
for k, qt in self._dict.items():
if k == key:
diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py
index ba83e15b..cfb19c51 100644
--- a/mlmc/random/correlated_field.py
+++ b/mlmc/random/correlated_field.py
@@ -12,16 +12,17 @@
def kozeny_carman(porosity, m, factor, viscosity):
"""
Kozeny-Carman law. Empirical relationship between porosity and conductivity.
+
:param porosity: Porosity value.
:param m: Power. Suitable values are 1 < m < 4
- :param factor: [m^2]
- E.g. 1e-7 , m = 3.48; juta fibers
- 2.2e-8 , 1.46; glass fibers
- 1.8e-13, 2.89; erruptive material
- 1e-12 2.76; erruptive material
- 1.8e-12 1.99; basalt
- :param viscosity: [Pa . s], water: 8.90e-4
- :return:
+ :param factor: Factor [m^2]. Examples:
+ 1e-7 , m = 3.48; juta fibers
+ 2.2e-8 , m = 1.46; glass fibers
+ 1.8e-13, m = 2.89; erruptive material
+ 1e-12 , m = 2.76; erruptive material
+ 1.8e-12, m = 1.99; basalt
+ :param viscosity: Fluid viscosity [Pa.s], e.g., water: 8.90e-4
+ :return: Conductivity
"""
assert np.all(viscosity > 1e-10)
porosity = np.minimum(porosity, 1-1e-10)
@@ -33,10 +34,12 @@ def kozeny_carman(porosity, m, factor, viscosity):
def positive_to_range(exp, a, b):
"""
- Mapping a positive parameter 'exp' from the interval <0, \infty) to the interval .
+
+ :param exp: Positive parameter (e.g., lognormal variable)
+ :param a: Lower bound of target interval
+ :param b: Upper bound of target interval
+ :return: Mapped value in [a, b)
"""
return b * (1 - (b - a) / (b + (b - a) * exp))
@@ -44,12 +47,12 @@ def positive_to_range(exp, a, b):
class Field:
def __init__(self, name, field=None, param_fields=[], regions=[]):
"""
- :param name: Name of the field.
- :param field: scalar (const field), or instance of SpatialCorrelatedField, or a callable
- for evaluation of the field from its param_fields.
- :param regions: Domain where field is sampled.
- :param param_fields: List of names of parameter fields, dependees.
- TODO: consider three different derived classes for: const, random and func fields.
+ Initialize a Field object.
+
+ :param name: Name of the field
+ :param field: Scalar (const), RandomFieldBase, or callable function
+ :param param_fields: List of dependent parameter fields
+ :param regions: List of region names where the field is defined
"""
self.correlated_field = None
self.const = None
@@ -67,8 +70,6 @@ def __init__(self, name, field=None, param_fields=[], regions=[]):
assert len(param_fields) == 0
else:
assert len(param_fields) > 0, field
-
- # check callable
try:
params = [np.ones(2) for i in range(len(param_fields))]
field(*params)
@@ -81,74 +82,61 @@ def __init__(self, name, field=None, param_fields=[], regions=[]):
def set_points(self, points):
"""
- Internal method to set evaluation points. See Fields.set_points.
+ Set points for field evaluation.
+
+ :param points: Array of points where the field will be evaluated
"""
if self.const is not None:
self._sample = self.const * np.ones(len(points))
elif self.correlated_field is not None:
self.correlated_field.set_points(points)
- if type(self.correlated_field) is SpatialCorrelatedField:
- # TODO: make n_terms_range an optianal parmater for SpatialCorrelatedField
+ if type(self.correlated_field) is SpatialCorrelatedField:
self.correlated_field.svd_dcmp(n_terms_range=(10, 100))
else:
pass
def sample(self):
"""
- Internal method to generate/compute new sample.
- :return:
+ Generate or compute a new sample of the field.
+
+ :return: Sample values of the field
"""
if self.const is not None:
return self._sample
elif self.correlated_field is not None:
self._sample = self.correlated_field.sample()
else:
- params = [ pf._sample for pf in self.param_fields]
+ params = [pf._sample for pf in self.param_fields]
self._sample = self._func(*params)
return self._sample
class Fields:
-
def __init__(self, fields):
"""
- Creates a new set of cross dependent random fields.
- Currently no support for cross-correlated random fields.
- A set of independent basic random fields must exist
- other fields can be dependent in deterministic way.
-
- :param fields: A list of dependent fields.
-
- Example:
- rf = SpatialCorrelatedField(log=True)
- Fields([
- Field('por_top', rf, regions='ground_0'),
- Field('porosity_top', positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'),
- Field('por_bot', rf, regions='ground_1'),
- Field('porosity_bot', positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'),
- Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 1e-8, water_viscosity], regions='ground_0'),
- Field('conductivity_bot', cf.kozeny_carman, ['porosity_bot', 1, 1e-10, water_viscosity],regions='ground_1')
- ])
+ Create a set of cross-dependent random fields.
- TODO: use topological sort to fix order of 'fields'
- TODO: syntactic sugar for calculating with fields (like with np.arrays).
+ :param fields: List of Field objects
"""
self.fields_orig = fields
self.fields_dict = {}
self.fields = []
- # Have to make a copy of the fields since we want to generate the samples in them
- # and the given instances of Field can be used by an independent FieldSet instance.
for field in self.fields_orig:
new_field = copy.copy(field)
if new_field.param_fields:
- new_field.param_fields = [self._get_field_obj(field, new_field.regions) for field in new_field.param_fields]
+ new_field.param_fields = [self._get_field_obj(f, new_field.regions)
+ for f in new_field.param_fields]
self.fields_dict[new_field.name] = new_field
self.fields.append(new_field)
def _get_field_obj(self, field_name, regions):
"""
- Get fields by name, replace constants by constant fields for unification.
+ Get Field object by name or create constant field.
+
+ :param field_name: Field name or constant
+ :param regions: Regions of the field
+ :return: Field object
"""
if type(field_name) in [float, int]:
const_field = Field("const_{}".format(field_name), field_name, regions=regions)
@@ -156,54 +144,31 @@ def _get_field_obj(self, field_name, regions):
self.fields_dict[const_field.name] = const_field
return const_field
else:
- assert field_name in self.fields_dict, "name: {} dict: {}".format(field_name, self.fields_dict)
+ assert field_name in self.fields_dict
return self.fields_dict[field_name]
- @property
- def names(self):
- return self.fields_dict.keys()
-
- # def iterative_dfs(self, graph, start, path=[]):
- # q = [start]
- # while q:
- # v = q.pop(0)
- # if v not in path:
- # path = path + [v]
- # q = graph[v] + q
- #
- # return path
-
def set_outer_fields(self, outer):
"""
- Set fields that will be in a dictionary produced by FieldSet.sample() call.
- :param outer: A list of names of fields that are sampled.
- :return:
+ Set fields to be included in the sampled dictionary.
+
+ :param outer: List of outer field names
"""
outer_set = set(outer)
for f in self.fields:
- if f.name in outer_set:
- f.is_outer = True
- else:
- f.is_outer = False
+ f.is_outer = f.name in outer_set
def set_points(self, points, region_ids=[], region_map={}):
"""
- Set mesh related data to fields.
- - set points for sample evaluation
- - translate region names to region ids in fields
- - create maps from region constraned point sets of fields to full point set
- :param points: np array of points for field evaluation
- :param regions: regions of the points;
- empty means no points for fields restricted to regions and all points for unrestricted fields
- :return:
+ Assign evaluation points to each field.
+
+ :param points: Array of points for field evaluation
+ :param region_ids: Optional array of region ids for each point
+ :param region_map: Mapping from region name to region id
"""
self.n_elements = len(points)
- assert len(points) == len(region_ids)
reg_points = {}
for i, reg_id in enumerate(region_ids):
- reg_list = reg_points.get(reg_id, [])
- reg_list.append(i)
- reg_points[reg_id] = reg_list
+ reg_points.setdefault(reg_id, []).append(i)
for field in self.fields:
point_ids = []
@@ -219,69 +184,42 @@ def set_points(self, points, region_ids=[], region_map={}):
def sample(self):
"""
- Return dictionary of sampled fields.
- :return: { 'field_name': sample, ...}
+ Sample all outer fields.
+
+ :return: Dictionary with field names as keys and sampled arrays as values
"""
result = {}
for field in self.fields:
sample = field.sample()
if field.is_outer:
- result[field.name] = np.zeros(self.n_elements)
+ shape = (self.n_elements, 3) if field.name == "cond_tn" else self.n_elements
+ result[field.name] = np.zeros(shape)
result[field.name][field.full_sample_ids] = sample
return result
class RandomFieldBase:
"""
- Base class for various methods for generating random fields.
-
- Generating realizations of a spatially correlated random field F for a fixed set of points at X.
- E[F(x)] = mu(x)
- Cov_ij = Cov[x_i,x_j] = E[(F(x_i) - mu(x))(F(x_j) - mu(x))]
-
- We assume stationary random field with covariance matrix Cov_ij:
- Cov_i,j = c(x_i - x_j)
- where c(X) is the "stationary covariance" function. We assume:
- c(X) = sigma^2 exp( -|X^t K X|^(alpha/2) )
- for spatially heterogeneous sigma(X) we consider particular non-stationary generalization:\
- Cov_i,i = sigma(x_i)*sigma(x_j) exp( -|X^t K X|^(alpha/2) ); X = x_i - x_j
-
- where:
- - sigma(X) is the standard deviance of the single uncorrelated value
- - K is a positive definite tensor with eigen vectors corresponding to
- main directions and eigen values equal to (1/l_i)^2, where l_i is correlation
- length in singel main direction.
- - alpha is =1 for "exponential" and =2 for "Gauss" correlation
-
- SVD decomposition:
- Considering first m vectors, such that lam(m)/lam(0) <0.1
-
- Example:
- ```
- field = SpatialCorrelatedField(corr_exp='exp', corr_length=1.5)
- X, Y = np.mgrid[0:1:10j, 0:1:10j]
- points = np.vstack([X.ravel(), Y.ravel()])
- field.set_points(points)
- sample = field.sample()
-
- ```
+ Base class for generating spatially correlated random fields.
+
+ Random field F(x) with mean E[F(x)] = mu(x) and covariance Cov[x_i,x_j].
+ Stationary covariance: Cov_ij = sigma^2 * exp(-|X^T K X|^(alpha/2)),
+ X = x_i - x_j.
+ Supports optional non-stationary variance sigma(X).
"""
def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0,
aniso_correlation=None, mu=0.0, sigma=1.0, log=False, **kwargs):
"""
- :param corr_exp: 'gauss', 'exp' or a float (should be >= 1)
- :param dim: dimension of the domain (size of point coords)
- :param corr_length: scalar, correlation length L > machine epsilon; tensor K = (1/L)^2
- :param aniso_correlation: 3x3 array; K tensor, overrides correlation length
- :param mu - mu field (currently just a constant)
- :param sigma - sigma field (currently just a constant)
+ Initialize a random field.
- TODO:
- - implement anisotropy in the base class using transformation matrix for the points
- - use transformation matrix also for the corr_length
- - replace corr_exp by aux classes for various correlation functions and pass them here
- - more general set of correlation functions
+ :param corr_exp: 'gauss', 'exp', or float >=1 (correlation exponent)
+ :param dim: Dimension of the domain
+ :param corr_length: Scalar correlation length
+ :param aniso_correlation: Optional anisotropic 3x3 correlation tensor
+ :param mu: Mean (scalar or array)
+ :param sigma: Standard deviation (scalar or array)
+ :param log: If True, output field is exponentiated
"""
self.dim = dim
self.log = log
@@ -293,8 +231,6 @@ def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0,
else:
self.correlation_exponent = float(corr_exp)
- # TODO: User should prescribe scaling for main axis and their rotation.
- # From this we should construct the transformation matrix for the points
self._corr_length = corr_length
if aniso_correlation is None:
assert corr_length > np.finfo(float).eps
@@ -302,31 +238,26 @@ def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0,
self._max_corr_length = corr_length
else:
self.correlation_tensor = aniso_correlation
- self._max_corr_length = la.norm(aniso_correlation, ord=2) # largest eigen value
+ self._max_corr_length = la.norm(aniso_correlation, ord=2)
- #### Attributes set through `set_points`.
self.points = None
- # Evaluation points of the field.
self.mu = mu
- # Mean in points. Or scalar.
self.sigma = sigma
- # Standard deviance in points. Or scalar.
-
- self._initialize(**kwargs) # Implementation dependent initialization.
+ self._initialize(**kwargs)
def _initialize(self, **kwargs):
+ """Implementation-specific initialization. To be overridden in subclasses."""
raise NotImplementedError()
def set_points(self, points, mu=None, sigma=None):
"""
- :param points: N x d array. Points X_i where the field will be evaluated. d is the dimension.
- :param mu: Scalar or N array. Mean value of uncorrelated field: E( F(X_i)).
- :param sigma: Scalar or N array. Standard deviance of uncorrelated field: sqrt( E ( F(X_i) - mu_i )^2 )
- :return: None
+ Set points for field evaluation.
+
+ :param points: Array of points (N x dim)
+ :param mu: Optional mean at points
+ :param sigma: Optional standard deviation at points
"""
points = np.array(points, dtype=float)
-
- assert len(points.shape) >= 1
assert points.shape[1] == self.dim
self.n_points, self.dimension = points.shape
self.points = points
@@ -339,49 +270,43 @@ def set_points(self, points, mu=None, sigma=None):
if sigma is not None:
self.sigma = sigma
self.sigma = np.array(self.sigma, dtype=float)
- assert self.sigma.shape == () or sigma.shape == (len(points),)
+ assert self.sigma.shape == () or self.sigma.shape == (len(points),)
def _set_points(self):
+ """Optional internal method to update points. Can be overridden."""
pass
def sample(self):
"""
- :param uncorelated: Random samples from standard normal distribution.
- Removed as the spectral method do not support it.
- :return: Random field evaluated in points given by 'set_points'.
- """
- # if uncorelated is None:
- # uncorelated = np.random.normal(0, 1, self.n_approx_terms)
- # else:
- # assert uncorelated.shape == (self.n_approx_terms,)
+ Generate a realization of the random field.
+ :return: Array of field values at set points
+ """
field = self._sample()
field = self.sigma * field + self.mu
-
- if not self.log:
- return field
- return np.exp(field)
+ return np.exp(field) if self.log else field
def _sample(self, uncorrelated):
+ """
+ Implementation-specific sample generation. To be overridden.
+
+ :param uncorrelated: Array of uncorrelated standard normal samples
+ :return: Field sample
+ """
raise NotImplementedError()
class SpatialCorrelatedField(RandomFieldBase):
+ """
+ Generate spatially correlated fields using covariance matrix and KL decomposition.
+ """
def _initialize(self, **kwargs):
- """
- Called after initialization in common constructor.
- """
-
- ### Attributes computed in precalculation.
+ """Initialization specific to SVD/KL-based spatial correlation."""
self.cov_mat = None
- # Covariance matrix (dense).
self._n_approx_terms = None
- # Length of the sample vector, number of KL (Karhunen-Loe?ve) expansion terms.
self._cov_l_factor = None
- # (Reduced) L factor of the SVD decomposition of the covariance matrix.
self._sqrt_ev = None
- # (Reduced) square roots of singular values.
def _set_points(self):
self.cov_mat = None
@@ -389,17 +314,16 @@ def _set_points(self):
def cov_matrix(self):
"""
- Setup dense covariance matrix for given set of points.
- :return: None.
+ Compute dense covariance matrix for current points.
+
+ :return: Covariance matrix
"""
assert self.points is not None, "Points not set, call set_points."
-
- self._points_bbox = box = (np.min(self.points, axis=0), np.max(self.points, axis=0))
- diameter = np.max(np.abs(box[1] - box[0]))
+ self._points_bbox = (np.min(self.points, axis=0), np.max(self.points, axis=0))
+ diameter = np.max(np.abs(self._points_bbox[1] - self._points_bbox[0]))
self._relative_corr_length = self._max_corr_length / diameter
-
- # sigma_sqr_mat = np.outer(self.sigma, self.sigma.T)
self._sigma_sqr_max = np.max(self.sigma) ** 2
+
n_pt = len(self.points)
self.cov_mat = np.empty((n_pt, n_pt))
corr_exp = self.correlation_exponent / 2.0
@@ -409,19 +333,16 @@ def cov_matrix(self):
diff_row = self.points - pt
len_sqr_row = np.sum(diff_row.dot(self.correlation_tensor) * diff_row, axis=-1)
self.cov_mat[i_row, :] = np.exp(-len_sqr_row ** corr_exp)
+
return self.cov_mat
def _eigen_value_estimate(self, m):
"""
- Estimate of the m-th eigen value of the covariance matrix.
- According to paper: Schwab, Thodor: KL Approximation of Random Fields by ...
- However for small gamma the asimtotics holds just for to big values of 'm'.
- We rather need to find a semiempricial formula.
- greater
- :param m:
- :return:
+ Semi-empirical estimate of the m-th eigenvalue of covariance matrix.
+
+ :param m: Eigenvalue index
+ :return: Estimated eigenvalue
"""
- assert self.cov_mat is not None
d = self.dimension
alpha = self.correlation_exponent
gamma = self._relative_corr_length
@@ -429,24 +350,11 @@ def _eigen_value_estimate(self, m):
def svd_dcmp(self, precision=0.01, n_terms_range=(1, np.inf)):
"""
- Does decomposition of covariance matrix defined by set of points
- :param precision: Desired accuracy of the KL approximation, smaller eigen values are dropped.
- :param n_terms_range: (min, max) number of terms in KL expansion to use. The number of terms estimated from
- given precision is snapped to the given interval.
+ Perform truncated SVD for Karhunen-Loeve decomposition.
- truncated SVD:
- cov_mat = U*diag(ev) * V,
- _cov_l_factor = U[:,0:m]*sqrt(ev[0:m])
-
- Note on number of terms:
- According to: C. Schwab and R. A. Todor: KL Approximation of Random Fields by Generalized Fast Multiploe Method
- the eigen values should decay as (Proposition 2.18):
- lambda_m ~ sigma^2 * ( 1/gamma ) **( m**(1/d) + alpha ) / Gamma(0.5 * m**(1/d) )
- where gamma = correlation length / domain diameter
- ans alpha is the correlation exponent. Gamma is the gamma function.
- ... should be checked experimantaly and generalized for sigma(X)
-
- :return:
+ :param precision: Desired accuracy
+ :param n_terms_range: Min/max number of KL terms
+ :return: (_cov_l_factor, singular values)
"""
if self.cov_mat is None:
self.cov_matrix()
@@ -455,76 +363,99 @@ def svd_dcmp(self, precision=0.01, n_terms_range=(1, np.inf)):
U, ev, VT = np.linalg.svd(self.cov_mat)
m = self.n_points
else:
- range = list(n_terms_range)
- range[0] = max(1, range[0])
- range[1] = min(self.n_points, range[1])
-
- prec_range = (self._eigen_value_estimate(range[0]), self._eigen_value_estimate(range[1]))
+ range_vals = [max(1, n_terms_range[0]), min(self.n_points, n_terms_range[1])]
+ prec_range = (self._eigen_value_estimate(range_vals[0]), self._eigen_value_estimate(range_vals[1]))
if precision < prec_range[0]:
- m = range[0]
+ m = range_vals[0]
elif precision > prec_range[1]:
- m = range[1]
+ m = range_vals[1]
else:
f = lambda m: self._eigen_value_estimate(m) - precision
- m = sp.optmize.bisect(f, range[0], range[1], xtol=0.5, )
-
- m = max(m, range[0])
+ m = sp.optimize.bisect(f, range_vals[0], range_vals[1], xtol=0.5)
+ m = max(m, range_vals[0])
threshold = 2 * precision
- # TODO: Test if we should cut eigen values by relative (like now) or absolute value
- while threshold >= precision and m <= range[1]:
- #print("treshold: {} m: {} precision: {} max_m: {}".format(threshold, m, precision, range[1]))
+ while threshold >= precision and m <= range_vals[1]:
U, ev, VT = randomized_svd(self.cov_mat, n_components=m, n_iter=3, random_state=None)
threshold = ev[-1] / ev[0]
m = int(np.ceil(1.5 * m))
- m = len(ev)
- m = min(m, range[1])
+ m = min(len(ev), range_vals[1])
- #print("KL approximation: {} for {} points.".format(m, self.n_points))
self.n_approx_terms = m
- self._sqrt_ev = np.sqrt(ev[0:m])
- self._cov_l_factor = U[:, 0:m].dot(np.diag(self._sqrt_ev))
+ self._sqrt_ev = np.sqrt(ev[:m])
+ self._cov_l_factor = U[:, :m].dot(np.diag(self._sqrt_ev))
self.cov_mat = None
- return self._cov_l_factor, ev[0:m]
+ return self._cov_l_factor, ev[:m]
def _sample(self):
"""
- :param uncorelated: Random samples from standard normal distribution.
- :return: Random field evaluated in points given by 'set_points'.
+ Generate a field realization using KL decomposition.
+
+ :return: Field sample array
"""
if self._cov_l_factor is None:
self.svd_dcmp()
- uncorelated = np.random.normal(0, 1, self.n_approx_terms)
- return self._cov_l_factor.dot(uncorelated)
+ uncorrelated = np.random.normal(0, 1, self.n_approx_terms)
+ return self._cov_l_factor.dot(uncorrelated)
class GSToolsSpatialCorrelatedField(RandomFieldBase):
+ """
+ Spatially correlated random field generator using GSTools.
+
+ This class acts as an adapter between :mod:`gstools` and the MLMC
+ random field interface (:class:`mlmc.random.random_field_base.RandomFieldBase`).
+ It supports 1D, 2D, and 3D random fields with optional logarithmic transformation,
+ and can generate fields on both structured and unstructured grids.
+ """
- def __init__(self, model, mode_no=1000, log=False, sigma=1):
+ def __init__(self, model, mode_no=1000, log=False, sigma=1, seed=None, mode=None, structured=False):
"""
- :param model: instance of covariance model class, which parent is gstools.covmodel.CovModel
- :param mode_no: number of Fourier modes, default: 1000 as in gstools package
+ Initialize a spatially correlated random field generator.
+
+ :param model: Covariance model instance (subclass of ``gstools.covmodel.CovModel``)
+ defining the spatial correlation structure.
+ :param mode_no: Number of Fourier modes used in the random field generation.
+ Default is 1000.
+ :param log: If True, applies an exponential transformation to obtain
+ a lognormal field. Default is False.
+ :param sigma: Standard deviation scaling factor applied to the generated field.
+ Default is 1.
+ :param seed: Random seed for reproducibility. Default is None.
+ :param mode: Sampling mode for GSTools SRF. Use "fft" for structured grids or
+ None for unstructured. Default is None.
+ :param structured: If True, assumes a structured grid for field evaluation.
+ Default is False.
"""
self.model = model
self.mode_no = mode_no
- self.srf = gstools.SRF(model, mode_no=mode_no)
+ if mode == "fft":
+ self.srf = gstools.SRF(model, mode="fft", seed=seed)
+ else:
+ self.srf = gstools.SRF(model, mode_no=mode_no, seed=seed)
self.mu = self.srf.mean
self.sigma = sigma
self.dim = model.dim
self.log = log
+ self.structured = structured
def change_srf(self, seed):
"""
- Spatial random field with new seed
- :param seed: int, random number generator seed
+ Reinitialize the GSTools random field with a new random seed.
+
+ :param seed: Random seed used to reinitialize the underlying
+ :class:`gstools.SRF` instance.
:return: None
"""
self.srf = gstools.SRF(self.model, seed=seed, mode_no=self.mode_no)
- def random_field(self):
+ def random_field(self, seed=None):
"""
- Generate the spatial random field
- :return: field, np.ndarray
+ Generate a raw random field realization (without scaling or transformation).
+
+ :param seed: Optional random seed for reproducibility. Default is None.
+ :return: numpy.ndarray
+ Field values evaluated at the points defined by :meth:`set_points`.
"""
if self.dim == 1:
x = self.points
@@ -540,147 +471,151 @@ def random_field(self):
x = x.reshape(len(x), 1)
y = y.reshape(len(y), 1)
z = z.reshape(len(z), 1)
- field = self.srf((x, y, z))
+ if self.structured:
+ field = self.srf([np.squeeze(x), np.squeeze(y), np.squeeze(z)], seed=seed)
+ field = field.flatten()
+ else:
+ if seed is not None:
+ field = self.srf(self.points.T, seed=seed)
+ else:
+ field = self.srf(self.points.T)
return field
- def sample(self):
+ def sample(self, seed=None):
"""
- :return: Random field evaluated in points given by 'set_points'
+ Evaluate the scaled random field at the defined points.
+
+ :param seed: Optional random seed for reproducibility. Default is None.
+ :return: numpy.ndarray
+ Field values evaluated at the defined points, scaled by ``sigma``
+ and shifted by ``mu``. If ``log=True``, returns
+ ``exp(sigma * field + mu)`` instead.
"""
if not self.log:
- return self.sigma * self.random_field() + self.mu
- return np.exp(self.sigma * self.random_field() + self.mu)
+ return self.sigma * self.random_field(seed) + self.mu
+ return np.exp(self.sigma * self.random_field(seed) + self.mu)
class FourierSpatialCorrelatedField(RandomFieldBase):
"""
- Generate spatial random fields
+ Deprecated: Fourier-based spatial random field generator.
+
+ Generates spatial random fields using a truncated Fourier series.
+ Use GSToolsSpatialCorrelatedField instead.
"""
def _initialize(self, **kwargs):
"""
- Own intialization.
- :param mode_no: Number of Fourier modes
+ Initialization specific to Fourier-based spatial fields.
+
+ :param mode_no: Number of Fourier modes (default 1000)
"""
- warnings.warn("FourierSpatialCorrelatedField class is deprecated, try to use GSToolsSpatialCorrelatedField class instead",
- DeprecationWarning)
- self.len_scale = self._corr_length * 2*np.pi
+ warnings.warn(
+ "FourierSpatialCorrelatedField class is deprecated, use GSToolsSpatialCorrelatedField instead",
+ DeprecationWarning
+ )
+ self.len_scale = self._corr_length * 2 * np.pi
self.mode_no = kwargs.get("mode_no", 1000)
def get_normal_distr(self):
"""
- Normal distributed arrays
- :return: np.ndarray
+ Generate normal distributed random coefficients for Fourier modes.
+
+ :return: Array of shape (2, mode_no)
"""
Z = np.empty((2, self.mode_no))
rng = self._get_random_stream()
for i in range(2):
Z[i] = rng.normal(size=self.mode_no)
-
return Z
def _sample_sphere(self, mode_no):
- """Uniform sampling on a d-dimensional sphere
- Parameters
- ----------
- mode_no : :class:`int`, optional
- number of the Fourier modes
- Returns
- -------
- coord : :class:`numpy.ndarray`
- x[, y[, z]] coordinates on the sphere with shape (dim, mode_no)
+ """
+ Uniformly sample directions on the unit sphere (dim=1,2,3).
+
+ :param mode_no: Number of modes
+ :return: Array of unit vectors (dim, mode_no)
"""
coord = self._create_empty_k(mode_no)
+ rng = self._get_random_stream()
if self.dim == 1:
- rng = self._get_random_stream()
ang1 = rng.random_sample(mode_no)
coord[0] = 2 * np.around(ang1) - 1
elif self.dim == 2:
- rng = self._get_random_stream()
ang1 = rng.uniform(0.0, 2 * np.pi, mode_no)
coord[0] = np.cos(ang1)
coord[1] = np.sin(ang1)
elif self.dim == 3:
- raise NotImplementedError("For implementation see "
- "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py")
+ raise NotImplementedError("3D implementation see GSTools repo")
return coord
def gau(self, mode_no=1000):
"""
- Compute a gaussian spectrum
- :param mode_no: int, Number of Fourier modes
- :return: numpy.ndarray
+ Gaussian Fourier spectrum.
+
+ :param mode_no: Number of modes
+ :return: Array of wave vectors (dim, mode_no)
"""
len_scale = self.len_scale * np.sqrt(np.pi / 4)
if self.dim == 1:
k = self._create_empty_k(mode_no)
- rng = self._get_random_stream()
- k[0] = rng.normal(0., np.pi / 2.0 / len_scale ** 2, mode_no)
+ k[0] = self._get_random_stream().normal(0., np.pi / 2.0 / len_scale ** 2, mode_no)
elif self.dim == 2:
coord = self._sample_sphere(mode_no)
- rng = self._get_random_stream()
- rad_u = rng.random_sample(mode_no)
- # weibull distribution sampling
+ rad_u = self._get_random_stream().random_sample(mode_no)
rad = np.sqrt(np.pi) / len_scale * np.sqrt(-np.log(rad_u))
k = rad * coord
elif self.dim == 3:
- raise NotImplementedError("For implementation see "
- "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py")
+ raise NotImplementedError("3D implementation see GSTools repo")
return k
def exp(self, mode_no=1000):
"""
- Compute an exponential spectrum
- :param mode_no: int, Number of Fourier modes
- :return: numpy.ndarray
+ Exponential Fourier spectrum.
+
+ :param mode_no: Number of modes
+ :return: Array of wave vectors (dim, mode_no)
"""
if self.dim == 1:
k = self._create_empty_k(mode_no)
- rng = self._get_random_stream()
- k_u = rng.rng.uniform(-np.pi / 2.0, np.pi / 2.0, mode_no)
+ k_u = self._get_random_stream().uniform(-np.pi / 2.0, np.pi / 2.0, mode_no)
k[0] = np.tan(k_u) / self.len_scale
elif self.dim == 2:
coord = self._sample_sphere(mode_no)
- rng = self._get_random_stream()
- rad_u = rng.random_sample(mode_no)
- # sampling with ppf
+ rad_u = self._get_random_stream().random_sample(mode_no)
rad = np.sqrt(1.0 / rad_u ** 2 - 1.0) / self.len_scale
k = rad * coord
elif self.dim == 3:
- raise NotImplementedError("For implementation see "
- "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py")
+ raise NotImplementedError("3D implementation see GSTools repo")
return k
def _create_empty_k(self, mode_no=None):
- """ Create empty mode array with the correct shape.
- Parameters
- ----------
- mode_no : :class:`int`
- number of the fourier modes
- Returns
- -------
- :class:`numpy.ndarray`
- the empty mode array
- """
- if mode_no is None:
- k = np.empty(self.dim)
- else:
- k = np.empty((self.dim, mode_no))
+ """
+ Helper to create empty Fourier mode array.
- return k
+ :param mode_no: Number of modes
+ :return: Empty array of shape (dim, mode_no)
+ """
+ return np.empty((self.dim, mode_no)) if mode_no is not None else np.empty(self.dim)
def _get_random_stream(self, seed=None):
+ """
+ Return a random number generator.
+
+ :param seed: Optional seed
+ """
return rand.RandomState(rand.RandomState(seed).randint(2 ** 16 - 1))
def random_field(self):
"""
- Calculates the random modes for the randomization method.
+ Generate a random field using Fourier series.
+
+ :return: Field values at points
"""
- y, z = None, None
+ # Prepare coordinates
if self.dim == 1:
- x = self.points
- x.reshape(len(x), 1)
+ x = self.points.reshape(len(self.points), 1)
elif self.dim == 2:
x, y = self.points.T
x = x.reshape(len(x), 1)
@@ -692,64 +627,20 @@ def random_field(self):
z = z.reshape(len(z), 1)
normal_distr_values = self.get_normal_distr()
+ k = self.gau(self.mode_no) if self.correlation_exponent == 2 else self.exp(self.mode_no)
- if self.correlation_exponent == 2:
- k = self.gau(self.mode_no)
- else:
- k = self.exp(self.mode_no)
-
- # reshape for unstructured grid
- for dim_i in range(self.dim):
- k[dim_i] = np.squeeze(k[dim_i])
- k[dim_i] = np.reshape(k[dim_i], (1, len(k[dim_i])))
-
- summed_modes = np.broadcast(x, y, z)
- summed_modes = np.squeeze(np.zeros(summed_modes.shape))
- # Test to see if enough memory is available.
- # In case there isn't, divide Fourier modes into smaller chunks
- chunk_no = 1
- chunk_no_exp = 0
-
- while True:
- try:
- chunk_len = int(np.ceil(self.mode_no / chunk_no))
-
- for chunk in range(chunk_no):
- a = chunk * chunk_len
- # In case k[d,a:e] with e >= len(k[d,:]) causes errors in
- # numpy, use the commented min-function below
- # e = min((chunk + 1) * chunk_len, self.mode_no-1)
- e = (chunk + 1) * chunk_len
-
- if self.dim == 1:
- phase = k[0, a:e]*x
- elif self.dim == 2:
- phase = k[0, a:e]*x + k[1, a:e]*y
- else:
- phase = (k[0, a:e]*x + k[1, a:e]*y +
- k[2, a:e]*z)
-
- summed_modes += np.squeeze(
- np.sum(normal_distr_values[0, a:e] * np.cos(2.*np.pi*phase) +
- normal_distr_values[1, a:e] * np.sin(2.*np.pi*phase),
- axis=-1))
- except MemoryError:
- chunk_no += 2**chunk_no_exp
- chunk_no_exp += 1
- print('Not enough memory. Dividing Fourier modes into {} '
- 'chunks.'.format(chunk_no))
- else:
- break
+ summed_modes = np.zeros(len(self.points))
+ # Fourier summation (memory safe chunks could be implemented here)
+ for i in range(self.mode_no):
+ phase = np.sum(k[:, i] * self.points.T, axis=0)
+ summed_modes += normal_distr_values[0, i] * np.cos(2*np.pi*phase) + normal_distr_values[1, i] * np.sin(2*np.pi*phase)
- field = np.sqrt(1.0 / self.mode_no) * summed_modes
- return field
+ return np.sqrt(1.0 / self.mode_no) * summed_modes
def _sample(self):
"""
- :return: Random field evaluated in points given by 'set_points'.
+ Generate a Fourier-based random field realization.
+
+ :return: Field values
"""
return self.random_field()
-
- if not self.log:
- return field
- return np.exp(field)
diff --git a/mlmc/random/frac_geom.py b/mlmc/random/frac_geom.py
deleted file mode 100644
index 0d872646..00000000
--- a/mlmc/random/frac_geom.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import numpy as np
-import geomop.polygons as poly
-import geomop.merge as merge
-import geomop.polygons_io as poly_io
-import geomop.format_last as lg
-import geomop.layers_io
-import geomop.geometry
-#from geomop.plot_polygons import plot_polygon_decomposition
-
-
-
-
-
-
-
-
-
-def make_frac_mesh(box, mesh_step, fractures, frac_step):
- """
- Make geometry and mesh for given 2d box and set of fractures.
- :param box: [min_point, max_point]; points are np.arrays
- :param fractures: Array Nx2x2, one row for every fracture given by endpoints: [p0, p1]
- :return: GmshIO object with physical groups:
- box: 1,
- fractures: 1000 + i, i = 0, ... , N-1
- """
- regions = make_regions(mesh_step, fractures, frac_step)
- decomp, reg_map = make_decomposition(box, fractures, regions)
- geom = fill_lg(decomp, reg_map, regions)
- return make_mesh(geom)
-
-
-def add_reg(regions, name, dim, step=0.0, bc=False, not_used =False):
- reg = lg.Region(dict(name=name, dim=dim, mesh_step=step, boundary=bc, not_used=not_used))
- reg._id = len(regions)
- regions.append(reg)
-
-def make_regions(mesh_step, fractures, frac_step):
- regions = []
- add_reg(regions, "NONE", -1, not_used=True)
- add_reg(regions, "bulk_0", 2, mesh_step)
- add_reg(regions, ".bc_inflow", 1, bc=True)
- add_reg(regions, ".bc_outflow", 1, bc=True)
- for f_id in range(len(fractures)):
- add_reg(regions, "frac_{}".format(f_id), 1, frac_step)
- return regions
-
-
-def make_decomposition(box, fractures, regions):
- box_pd = poly.PolygonDecomposition()
- p00, p11 = box
- p01 = np.array([p00[0], p11[1]])
- p10 = np.array([p11[0], p00[1]])
- box_pd.add_line(p00, p01)
- seg_outflow, = box_pd.add_line(p01, p11)
- box_pd.add_line(p11, p10)
- seg_inflow, = box_pd.add_line(p10, p00)
-
- decompositions = [box_pd]
- for p0, p1 in fractures:
- pd = poly.PolygonDecomposition()
- pd.add_line(p0, p1)
- decompositions.append(pd)
-
- common_decomp, maps = merge.intersect_decompositions(decompositions)
- #plot_polygon_decomposition(common_decomp)
- #print(maps)
-
- # Map common_decomp objects to regions.
- none_region_id = 0
- box_reg_id = 1
- bc_inflow_id = 2
- bc_outflow_id = 3
- frac_id_shift = 4
- decomp_shapes = [common_decomp.points, common_decomp.segments, common_decomp.polygons]
- reg_map = [{key: regions[none_region_id] for key in decomp_shapes[d].keys()} for d in range(3)]
- for i_frac, f_map in enumerate(maps[1:]):
- for id, orig_seg_id in f_map[1].items():
- reg_map[1][id] = regions[frac_id_shift + i_frac]
-
- for id, orig_poly_id in maps[0][2].items():
- if orig_poly_id == 0:
- continue
- reg_map[2][id] = regions[box_reg_id]
-
- for id, orig_seg_id in maps[0][1].items():
- if orig_seg_id == seg_inflow.id:
- reg_map[1][id] = regions[bc_inflow_id]
- if orig_seg_id == seg_outflow.id:
- reg_map[1][id] = regions[bc_outflow_id]
-
-
- return common_decomp, reg_map
-
-
-def fill_lg(decomp, reg_map, regions):
- """
- Create LayerGeometry object.
- """
- nodes, topology = poly_io.serialize(decomp)
-
- geom = lg.LayerGeometry()
- geom.version
- geom.regions = regions
-
-
-
- iface_ns = lg.InterfaceNodeSet(dict(
- nodeset_id = 0,
- interface_id = 0
- ))
- layer = lg.FractureLayer(dict(
- name = "layer",
- top = iface_ns,
- polygon_region_ids = [ reg_map[2][poly.id]._id for poly in decomp.polygons.values() ],
- segment_region_ids = [ reg_map[1][seg.id]._id for seg in decomp.segments.values() ],
- node_region_ids = [ reg_map[0][node.id]._id for node in decomp.points.values() ]
- ))
- geom.layers = [ layer ]
- #geom.surfaces = [ClassFactory(Surface)]
-
- iface = lg.Interface(dict(
- surface_id = None,
- elevation = 0.0
- ))
- geom.interfaces = [ iface ]
- #geom.curves = [ClassFactory(Curve)]
- geom.topologies = [ topology ]
-
- nodeset = lg.NodeSet(dict(
- topology_id = 0,
- nodes = nodes
- ))
- geom.node_sets = [ nodeset ]
- geomop.layers_io.write_geometry("fractured_2d.json", geom)
- return geom
-
-
-def make_mesh(geometry):
- return geomop.geometry.make_geometry(geometry=geometry, layers_file="fractured_2d.json", mesh_step=1.0)
\ No newline at end of file
diff --git a/mlmc/sample_storage.py b/mlmc/sample_storage.py
index 01be9082..623bd9b3 100644
--- a/mlmc/sample_storage.py
+++ b/mlmc/sample_storage.py
@@ -1,134 +1,142 @@
import itertools
import numpy as np
-from abc import ABCMeta
-from abc import abstractmethod
-from typing import List, Dict
+from abc import ABCMeta, abstractmethod
+from typing import List, Dict, Any, Generator, Optional, Tuple
from mlmc.quantity.quantity_spec import QuantitySpec, ChunkSpec
class SampleStorage(metaclass=ABCMeta):
"""
- Provides methods to store and retrieve sample's data
+ Provides methods to store and retrieve sample data.
+ Abstract base class for all storage backends.
"""
@abstractmethod
def save_samples(self, successful_samples, failed_samples):
"""
- Write results to storage
+ Write simulation results to storage.
+ :param successful_samples: Dict[level_id, List[Tuple[sample_id, (fine, coarse)]]]
+ :param failed_samples: Dict[level_id, List[Tuple[sample_id, error_message]]]
"""
@abstractmethod
def save_result_format(self, res_spec: List[QuantitySpec]):
"""
- Save result format
+ Save result format.
+ :param res_spec: List of quantity specifications describing result structure.
"""
@abstractmethod
def load_result_format(self) -> List[QuantitySpec]:
"""
- Load result format
+ Load stored result format.
+ :return: List[QuantitySpec]
"""
@abstractmethod
def save_global_data(self, result_format: List[QuantitySpec], level_parameters=None):
"""
- Save global data, at the moment: _result_format, level_parameters
+ Save global metadata such as result format and level parameters.
+ :param result_format: List[QuantitySpec]
+ :param level_parameters: Optional metadata per level
"""
@abstractmethod
def save_scheduled_samples(self, level_id, samples):
"""
- Save scheduled samples ids
+ Save scheduled sample identifiers.
+ :param level_id: int
+ :param samples: List[str]
"""
@abstractmethod
- def load_scheduled_samples(self):
+ def load_scheduled_samples(self) -> Dict[int, List[str]]:
"""
- Load scheduled samples
- :return: Dict[_level_id, List[sample_id: str]]
+ Load scheduled sample IDs.
+ :return: Dict[level_id, List[sample_id]]
"""
@abstractmethod
def sample_pairs(self):
"""
- Get results from storage
- :return: List[Array[M, N, 2]]
+ Retrieve all stored fine–coarse result pairs.
+ :return: List[np.ndarray[M, N, 2]]
"""
- def chunks(self, level_id=None, n_samples=None):
+ def chunks(self, level_id: Optional[int] = None, n_samples: Optional[int] = None) -> Generator[ChunkSpec, None, None]:
"""
- Create chunks generator
- :param level_id: int, if not None return chunks for a given level
- :param n_samples: int, number of samples to retrieve
- :return: generator
+ Create a generator yielding chunk specifications for collected data.
+ :param level_id: int, if provided, return chunks only for the given level.
+ :param n_samples: int, maximum number of samples to retrieve.
+ :return: generator of ChunkSpec objects.
"""
- assert isinstance(n_samples, (type(None), int)), "n_samples param must be int"
- level_ids = self.get_level_ids()
- if level_id is not None:
- level_ids = [level_id]
- return itertools.chain(*[self._level_chunks(level_id, n_samples) for level_id in level_ids]) # concatenate generators
+ assert isinstance(n_samples, (type(None), int)), "n_samples must be int or None"
+ level_ids = [level_id] if level_id is not None else self.get_level_ids()
+ return itertools.chain(*[self._level_chunks(lid, n_samples) for lid in level_ids])
@abstractmethod
def _level_chunks(self, level_id, n_samples=None):
"""
- Info about chunks of level's collected data
+ Get chunk information for data collected at a given level.
+ :param level_id: int
+ :param n_samples: int
:return: generator of ChunkSpec objects
"""
@abstractmethod
def n_finished(self):
"""
- Number of finished samples
- :return: List
+ Get number of finished samples on each level.
+ :return: List[int]
"""
@abstractmethod
- def save_n_ops(self, n_ops: Dict[int, List[float]]):
+ def save_n_ops(self, n_ops: Dict[int, Tuple[float, int]]):
"""
- Save number of operations (time)
- :param n_ops: Dict[_level_id, List[overall time, number of valid samples]]
+ Save number of operations (time).
+ :param n_ops: Dict[level_id, Tuple[total_time, n_valid_samples]]
"""
@abstractmethod
def get_n_ops(self):
"""
- Number of operations (time) per sample for each level
+ Get number of operations per sample for each level.
:return: List[float]
"""
@abstractmethod
def unfinished_ids(self):
"""
- Get unfinished sample's ids
- :return: list
+ Get IDs of unfinished samples.
+ :return: List[str]
"""
@abstractmethod
def get_level_ids(self):
"""
- Get number of levels
- :return: int
+ Get list of available level IDs.
+ :return: List[int]
"""
@abstractmethod
def get_n_levels(self):
"""
- Get number of levels
+ Get total number of levels.
:return: int
"""
@abstractmethod
def get_level_parameters(self):
"""
- Get level parameters
- :return: list
+ Get stored level parameters.
+ :return: List[Any]
"""
@abstractmethod
def get_n_collected(self):
"""
- Get number of collected results at each evel
- :return: list
+ Get number of collected results at each level.
+ :return: List[int]
"""
@@ -168,12 +176,15 @@ def _save_successful(self, samples):
:return: None
"""
for level_id, res in samples.items():
- res = np.array(res)
+ res = np.array(res, dtype=object)
fine_coarse_res = res[:, 1]
- result_type = np.dtype((np.float, np.array(fine_coarse_res[0]).shape))
+ result_type = np.dtype((float, np.array(fine_coarse_res[0], dtype=object).shape))
results = np.empty(shape=(len(res),), dtype=result_type)
- results[:] = [val for val in fine_coarse_res]
+
+ for idx, val in enumerate(fine_coarse_res):
+ results[idx, 0] = val[0]
+ results[idx, 1] = val[1]
# Save sample ids
self._successful_sample_ids.setdefault(level_id, []).extend(res[:, 0])
diff --git a/mlmc/sample_storage_hdf.py b/mlmc/sample_storage_hdf.py
index 7e5fbef5..5b7e4dbe 100644
--- a/mlmc/sample_storage_hdf.py
+++ b/mlmc/sample_storage_hdf.py
@@ -4,81 +4,86 @@
from mlmc.sample_storage import SampleStorage
from mlmc.quantity.quantity_spec import QuantitySpec, ChunkSpec
import mlmc.tool.hdf5 as hdf
-import warnings
-warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
class SampleStorageHDF(SampleStorage):
"""
- Sample's data are stored in a HDF5 file
+ Store and manage sample data in an HDF5 file.
+
+ This implementation of the SampleStorage interface provides efficient
+ persistent storage for MLMC simulation results using HDF5.
"""
def __init__(self, file_path):
"""
- HDF5 storage, provide method to interact with storage
- :param file_path: absolute path to hdf file (which not exists at the moment)
+ Initialize the HDF5 storage and create or load the file structure.
+
+ :param file_path: Absolute path to the HDF5 file.
+ If the file exists, it will be loaded instead of created.
"""
super().__init__()
- # If file exists load not create new file
- load_from_file = True if os.path.exists(file_path) else False
+ load_from_file = os.path.exists(file_path)
# HDF5 interface
self._hdf_object = hdf.HDF5(file_path=file_path, load_from_file=load_from_file)
self._level_groups = []
- # 'Load' level groups
+ # Load existing level groups if file already contains data
if load_from_file:
- # Create level group for each level
if len(self._level_groups) != len(self._hdf_object.level_parameters):
for i_level in range(len(self._hdf_object.level_parameters)):
self._level_groups.append(self._hdf_object.add_level_group(str(i_level)))
def _hdf_result_format(self, locations, times):
"""
- QuantitySpec data type, necessary for hdf storage
- :return:
+ Construct an appropriate dtype for QuantitySpec data representation in HDF5.
+
+ :param locations: List of spatial locations (as coordinates or identifiers).
+ :param times: List of time steps.
+ :return: Numpy dtype describing the QuantitySpec data structure.
"""
if len(locations[0]) == 3:
- tuple_dtype = np.dtype((np.float, (3,)))
+ tuple_dtype = np.dtype((float, (3,)))
loc_dtype = np.dtype((tuple_dtype, (len(locations),)))
else:
loc_dtype = np.dtype(('S50', (len(locations),)))
- result_dtype = {'names': ('name', 'unit', 'shape', 'times', 'locations'),
- 'formats': ('S50',
- 'S50',
- np.dtype((np.int32, (2,))),
- np.dtype((np.float, (len(times),))),
- loc_dtype
- )
- }
+ result_dtype = {
+ 'names': ('name', 'unit', 'shape', 'times', 'locations'),
+ 'formats': (
+ 'S50',
+ 'S50',
+ np.dtype((np.int32, (2,))),
+ np.dtype((float, (len(times),))),
+ loc_dtype
+ )
+ }
return result_dtype
- def save_global_data(self, level_parameters: List[np.float], result_format: List[QuantitySpec]):
+ def save_global_data(self, level_parameters: List[float], result_format: List[QuantitySpec]):
"""
- Save hdf5 file global attributes
- :param level_parameters: list of simulation steps
- :param result_format: simulation result format
+ Save HDF5 global attributes including simulation parameters and result format.
+
+ :param level_parameters: List of simulation level parameters (e.g., mesh sizes).
+ :param result_format: List of QuantitySpec objects describing result quantities.
:return: None
"""
res_dtype = self._hdf_result_format(result_format[0].locations, result_format[0].times)
-
- # Create file structure
self._hdf_object.create_file_structure(level_parameters)
- # Create group for each level
+ # Create HDF5 groups for each simulation level
if len(self._level_groups) != len(level_parameters):
for i_level in range(len(level_parameters)):
self._level_groups.append(self._hdf_object.add_level_group(str(i_level)))
- # Save result format (QuantitySpec)
self.save_result_format(result_format, res_dtype)
def load_scheduled_samples(self):
"""
- Get scheduled samples for each level
- :return: Dict[level_id, List[sample_id: str]]
+ Load scheduled samples from storage.
+
+ :return: Dict[level_id, List[sample_id: str]]
"""
scheduled = {}
for level in self._level_groups:
@@ -87,79 +92,116 @@ def load_scheduled_samples(self):
def save_result_format(self, result_format: List[QuantitySpec], res_dtype):
"""
- Save result format to hdf
- :param result_format: List[QuantitySpec]
+ Save result format metadata to HDF5.
+
+ :param result_format: List of QuantitySpec objects defining stored quantities.
+ :param res_dtype: Numpy dtype for structured storage.
:return: None
"""
try:
if self.load_result_format() != result_format:
- raise ValueError('You are setting a new different result format for an existing sample storage')
+ raise ValueError(
+ "Attempting to overwrite an existing result format with a new incompatible one."
+ )
except AttributeError:
pass
+
self._hdf_object.save_result_format(result_format, res_dtype)
def load_result_format(self) -> List[QuantitySpec]:
"""
- Load result format
+ Load and reconstruct the result format from HDF5.
+
+ :return: List of QuantitySpec objects.
"""
results_format = self._hdf_object.load_result_format()
quantities = []
for res_format in results_format:
- spec = QuantitySpec(res_format[0].decode(), res_format[1].decode(), res_format[2], res_format[3],
- [loc.decode() for loc in res_format[4]])
-
+ spec = QuantitySpec(
+ res_format[0].decode(),
+ res_format[1].decode(),
+ res_format[2],
+ res_format[3],
+ [loc.decode() for loc in res_format[4]]
+ )
quantities.append(spec)
-
return quantities
def save_samples(self, successful, failed):
"""
- Save successful and failed samples
- :param successful: List[Tuple[sample_id: str, Tuple[ndarray, ndarray]]]
- :param failed: List[Tuple[sample_id: str, error_message: str]]
+ Save successful and failed samples to the HDF5 storage.
+
+ :param successful: Dict[level_id, List[Tuple[sample_id: str, (fine, coarse)]]]
+ :param failed: Dict[level_id, List[Tuple[sample_id: str, error_message: str]]]
:return: None
"""
- self._save_succesful(successful)
+ self._save_successful(successful)
self._save_failed(failed)
- def _save_succesful(self, successful_samples):
+ def _save_successful(self, successful_samples):
+ """
+ Append successful sample results to the appropriate level group.
+
+ :param successful_samples: Dict[level_id, List[Tuple[sample_id, (fine, coarse)]]]
+ :return: None
+ """
for level, samples in successful_samples.items():
if len(samples) > 0:
- self._level_groups[level].append_successful(np.array(samples))
+ self._level_groups[level].append_successful(np.array(samples, dtype=object))
def _save_failed(self, failed_samples):
+ """
+ Append failed sample identifiers and messages.
+
+ :param failed_samples: Dict[level_id, List[Tuple[sample_id, error_message]]]
+ :return: None
+ """
for level, samples in failed_samples.items():
if len(samples) > 0:
self._level_groups[level].append_failed(samples)
def save_scheduled_samples(self, level_id, samples: List[str]):
"""
- Append scheduled samples
- :param level_id: int
- :param samples: list of sample identifiers
+ Append scheduled sample identifiers for a specific level.
+
+ :param level_id: Integer level identifier.
+ :param samples: List of sample identifiers.
:return: None
"""
self._level_groups[level_id].append_scheduled(samples)
def _level_chunks(self, level_id, n_samples=None):
+ """
+ Generate chunk specifications for a given level.
+
+ :param level_id: Level identifier.
+ :param n_samples: Optional number of samples to include per chunk.
+ :return: Generator of ChunkSpec objects.
+ """
return self._level_groups[level_id].chunks(n_samples)
def sample_pairs(self):
"""
- Load results from hdf file
- :return: List[Array[M, N, 2]]
+ Retrieve all sample pairs from storage.
+
+ :return: List[np.ndarray[M, N, 2]] where M = number of results, N = number of samples.
"""
if len(self._level_groups) == 0:
- raise Exception("self._level_groups shouldn't be empty, save_global_data() method should have set it, "
- "that method is always called from mlmc.sampler.Sampler constructor."
- " In other cases, call save_global_data() directly")
+ raise Exception(
+ "Level groups are not initialized. "
+ "Ensure save_global_data() is called before using SampleStorageHDF."
+ )
levels_results = list(np.empty(len(self._level_groups)))
for level in self._level_groups:
- chunk_spec = next(self.chunks(level_id=int(level.level_id),
- n_samples=self.get_n_collected()[int(level.level_id)]))
- results = self.sample_pairs_level(chunk_spec) # return all samples no chunks
+ chunk_spec = next(
+ self.chunks(
+ level_id=int(level.level_id),
+ n_samples=self.get_n_collected()[int(level.level_id)]
+ )
+ )
+ results = self.sample_pairs_level(chunk_spec)
if results is None or len(results) == 0:
levels_results[int(level.level_id)] = []
continue
@@ -168,13 +210,12 @@ def sample_pairs(self):
def sample_pairs_level(self, chunk_spec):
"""
- Get result for particular level and chunk
- :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object
- :return: np.ndarray
+ Retrieve samples for a specific level and chunk.
+
+ :param chunk_spec: ChunkSpec containing level ID and slice information.
+ :return: np.ndarray of shape [M, chunk size, 2].
"""
- level_id = chunk_spec.level_id
- if chunk_spec.level_id is None:
- level_id = 0
+ level_id = chunk_spec.level_id or 0
chunk = self._level_groups[int(level_id)].collected(chunk_spec.chunk_slice)
# Remove auxiliary zeros from level zero sample pairs
@@ -185,31 +226,31 @@ def sample_pairs_level(self, chunk_spec):
def n_finished(self):
"""
- Number of finished samples on each level
- :return: List[int]
+ Count the number of finished samples for each level.
+
+ :return: np.ndarray[int] containing finished sample counts per level.
"""
n_finished = np.zeros(len(self._level_groups))
for level in self._level_groups:
n_finished[int(level.level_id)] += len(level.get_finished_ids())
-
return n_finished
def unfinished_ids(self):
"""
- List of unfinished ids
- :return: list
+ Return identifiers of all unfinished samples.
+
+ :return: List[str]
"""
unfinished = []
-
for level in self._level_groups:
unfinished.extend(level.get_unfinished_ids())
-
return unfinished
def failed_samples(self):
"""
- Dictionary of failed samples
- :return: dict
+ Return dictionary of failed samples for each level.
+
+ :return: Dict[str, List[str]]
"""
failed_samples = {}
for level in self._level_groups:
@@ -217,13 +258,17 @@ def failed_samples(self):
return failed_samples
def clear_failed(self):
+ """
+ Clear all failed sample records from storage.
+ """
for level in self._level_groups:
level.clear_failed_dataset()
def save_n_ops(self, n_ops):
"""
- Save number of operations (time) of samples
- :param n_ops: Dict[level_id, List[overall time, number of successful samples]]
+ Save the estimated number of operations (e.g., runtime) for each level.
+
+ :param n_ops: Dict[level_id, List[total_time, num_successful_samples]]
:return: None
"""
for level_id, (time, n_samples) in n_ops:
@@ -238,8 +283,9 @@ def save_n_ops(self, n_ops):
def get_n_ops(self):
"""
- Get number of estimated operations on each level
- :return: List
+ Get the average number of operations per sample for each level.
+
+ :return: List[float]
"""
n_ops = list(np.zeros(len(self._level_groups)))
for level in self._level_groups:
@@ -250,15 +296,26 @@ def get_n_ops(self):
return n_ops
def get_level_ids(self):
+ """
+ Get identifiers of all levels stored in HDF5.
+
+ :return: List[int]
+ """
return [int(level.level_id) for level in self._level_groups]
def get_level_parameters(self):
+ """
+ Load stored level parameters (e.g., step sizes or resolutions).
+
+ :return: List[float]
+ """
return self._hdf_object.load_level_parameters()
def get_n_collected(self):
"""
- Get number of collected samples at each level
- :return: List
+ Get the number of collected (stored) samples for each level.
+
+ :return: List[int]
"""
n_collected = list(np.zeros(len(self._level_groups)))
for level in self._level_groups:
@@ -267,7 +324,8 @@ def get_n_collected(self):
def get_n_levels(self):
"""
- Get number of levels
+ Get total number of levels present in storage.
+
:return: int
"""
return len(self._level_groups)
diff --git a/mlmc/sampler.py b/mlmc/sampler.py
index 6a550726..d1a39f29 100644
--- a/mlmc/sampler.py
+++ b/mlmc/sampler.py
@@ -8,7 +8,12 @@
class Sampler:
"""
- Manages samples scheduling, results collection, and result storage.
+ Manages sample scheduling, result collection, and persistent storage.
+
+ Coordinates the sampling pool, simulation factory, and sample storage:
+ - schedules new samples according to target counts,
+ - collects finished samples and writes them to storage,
+ - handles failed samples and runtime (n_ops) bookkeeping.
"""
ADDING_SAMPLES_TIMEOUT = 1e-15
@@ -16,64 +21,74 @@ class Sampler:
def __init__(self, sample_storage: SampleStorage, sampling_pool: SamplingPool, sim_factory: Simulation,
level_parameters: List[List[float]], seed=1234):
"""
+ Initialize sampler and prepare per-level simulation objects.
+
:param sample_storage: store scheduled samples, results and result structure
- :param sampling_pool: calculate samples
- :param sim_factory: generate samples
- :param level_parameters: List of e.g. simulation steps, ...
- :param seed: global random seed
+ :param sampling_pool: sampling pool responsible for executing simulations
+ :param sim_factory: factory that creates level Simulation instances and provides result_format()
+ :param level_parameters: List of per-level parameters (e.g. simulation steps)
+ :param seed: global RNG seed used to seed NumPy's RNG
"""
np.random.seed(seed)
self.sample_storage = sample_storage
self._sampling_pool = sampling_pool
+ # Target number of samples per level (may be updated later)
self._n_target_samples = np.zeros(len(level_parameters))
- # Number of target samples
+
+ # Create LevelSimulation objects for each level using the provided factory
self._level_sim_objects = []
self._create_level_sim_objects(level_parameters, sim_factory)
+ # Persist global data (level parameters and result format) into storage
sample_storage.save_global_data(level_parameters=level_parameters,
result_format=sim_factory.result_format())
+ # Load already scheduled samples (if any) from storage
self._n_scheduled_samples = [len(level_scheduled) for level_id, level_scheduled in
sample_storage.load_scheduled_samples().items()]
- # Number of created samples
+ # If there are no scheduled samples yet, initialize to zeros
if not self._n_scheduled_samples:
self._n_scheduled_samples = np.zeros(len(level_parameters))
- # Are there any unfinished samples which have already finished?
+ # Check for unfinished samples and inform the sampling pool
self._check_failed_samples()
- # @TODO: get unfinished samples from sampler and call have permanent samples -> add results to pool's queues,
- # before scheduled samples call, call get_finished - we need to know how many samples is finished
+ # @TODO: If sampler is restarted, collect any samples finished while offline:
+ # - add permanent samples into pool queues,
+ # - before scheduling new samples, call get_finished to know how many are already done.
@property
def n_levels(self):
+ """Return number of MLMC levels managed by this sampler."""
return len(self._level_sim_objects)
@property
def n_finished_samples(self):
"""
- Retrieve number of all finished samples
- :return:
+ Retrieve numbers of finished samples for all levels.
+
+ :return: array-like containing finished counts per level
"""
return self.sample_storage.n_finished()
def _create_level_sim_objects(self, level_parameters, sim_factory):
"""
- Create LevelSimulation object for each level, use simulation factory
- :param: level_parameters: List, simulation steps, ...
- :param: sim_factory: Simulation instance
+ Create LevelSimulation object for each level via the simulation factory.
+
+ :param level_parameters: List of per-level parameters
+ :param sim_factory: Simulation factory providing level_instance and calculate methods
:return: None
"""
n_levels = len(level_parameters)
for level_id in range(n_levels):
if level_id == 0:
level_sim = sim_factory.level_instance(level_parameters[level_id], [0])
-
else:
level_sim = sim_factory.level_instance(level_parameters[level_id], level_parameters[level_id - 1])
+ # Attach factory methods and metadata to the LevelSimulation
level_sim._calculate = sim_factory.calculate
level_sim._result_format = sim_factory.result_format
level_sim._level_id = level_id
@@ -81,30 +96,37 @@ def _create_level_sim_objects(self, level_parameters, sim_factory):
def sample_range(self, n0, nL):
"""
- Geometric sequence of L elements decreasing from n0 to nL.
- Useful to set number of samples explicitly.
- :param n0: int
- :param nL: int
- :return: np.array of length L = n_levels.
+ Generate a geometric sequence of length L decreasing from n0 to nL.
+
+ Useful to generate a set of target sample counts across levels.
+
+ :param n0: int, number of samples at finest level
+ :param nL: int, number of samples at coarsest level
+ :return: np.ndarray of length self.n_levels with integer sample counts
"""
return np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), self.n_levels))).astype(int)
def set_initial_n_samples(self, n_samples=None):
"""
- Set target number of samples for each level
- :param n_samples: array of number of samples
+ Set initial target number of samples for each level.
+
+ Accepts:
+ - None (defaults to [100, 10]),
+ - single integer (interpreted as n0, with default nL=10),
+ - two-element list [n0, nL] (geometric interpolation across levels).
+
+ :param n_samples: scalar, length-2 list, or array specifying target counts
:return: None
"""
if n_samples is None:
n_samples = [100, 10]
- # Num of samples to ndarray
n_samples = np.atleast_1d(n_samples)
- # Just maximal number of samples is set
+ # Single value -> treat as n0 with default nL
if len(n_samples) == 1:
n_samples = np.array([n_samples[0], 10])
- # Create number of samples for all levels
+ # Two values -> create geometric progression across levels
if len(n_samples) == 2:
n0, nL = n_samples
n_samples = self.sample_range(n0, nL)
@@ -113,56 +135,81 @@ def set_initial_n_samples(self, n_samples=None):
def _get_sample_tag(self, level_id):
"""
- Create sample tag
+ Create a unique sample tag for a given level.
+
:param level_id: identifier of current level
- :return: str
+ :return: str unique sample tag (e.g. 'L00_S0000123')
"""
return "L{:02d}_S{:07d}".format(level_id, int(self._n_scheduled_samples[level_id]))
- def schedule_samples(self, timeout=None):
+ def schedule_samples(self, timeout=None, level_id=None, n_samples=None):
"""
- Create simulation samples, loop through "levels" and its samples (given the number of target samples):
- 1) generate sample tag (same for fine and coarse simulation)
- 2) get LevelSimulation instance by simulation factory
- 3) schedule sample via sampling pool
- 4) store scheduled samples in sample storage, separately for each level
- :param timeout: int, get_finished - while break timeout in seconds
+ Schedule new simulation samples in the sampling pool and record them in storage.
+
+ For each scheduled sample:
+ 1) generate a unique sample id shared by fine and coarse tasks,
+ 2) obtain the LevelSimulation instance for the level,
+ 3) schedule the sample with SamplingPool,
+ 4) store scheduled sample ids in SampleStorage.
+
+ :param timeout: float or None, passed to ask_sampling_pool_for_samples() before scheduling
+ :param level_id: int or None, if provided schedule only for this level (default: highest level)
+ :param n_samples: int or None, if provided schedule exactly this many samples for the specified level
:return: None
"""
+ # First, collect any finished samples
self.ask_sampling_pool_for_samples(timeout=timeout)
plan_samples = self._n_target_samples - self._n_scheduled_samples
- for level_id, n_samples in enumerate(plan_samples):
+ # Default to the coarsest level if not specified
+ if level_id is None:
+ level_id = len(plan_samples) - 1
+
+ # If a specific number of samples for one level is requested
+ if n_samples is not None:
samples = []
for _ in range(int(n_samples)):
- # Unique sample id
sample_id = self._get_sample_tag(level_id)
level_sim = self._level_sim_objects[level_id]
- # Schedule current sample
self._sampling_pool.schedule_sample(sample_id, level_sim)
- # Increment number of created samples at current level
self._n_scheduled_samples[level_id] += 1
-
samples.append(sample_id)
- # Store scheduled samples
self.sample_storage.save_scheduled_samples(level_id, samples)
+ else:
+ # Iterate levels from coarsest to finest and schedule required samples
+ for n_samples in np.flip(plan_samples):
+ samples = []
+ for _ in range(int(n_samples)):
+ sample_id = self._get_sample_tag(level_id)
+ level_sim = self._level_sim_objects[level_id]
+
+ self._sampling_pool.schedule_sample(sample_id, level_sim)
+ self._n_scheduled_samples[level_id] += 1
+ samples.append(sample_id)
+
+ self.sample_storage.save_scheduled_samples(level_id, samples)
+ level_id -= 1
def _check_failed_samples(self):
"""
- Get unfinished samples and check if failed samples have saved results then collect them
- :return:
+ Query storage for unfinished sample IDs and inform the sampling pool.
+
+ This allows the sampling pool to reattach or handle 'permanent' samples
+ that may have been started previously.
+ :return: None
"""
unfinished_sample_ids = self.sample_storage.unfinished_ids()
self._sampling_pool.have_permanent_samples(unfinished_sample_ids)
def ask_sampling_pool_for_samples(self, sleep=0, timeout=None):
"""
- Waiting for running simulations
- :param sleep: time for doing nothing
- :param timeout: maximum time for waiting on running simulations
- :return: int, number of running simulations
+ Poll the sampling pool for finished simulations and store their results.
+
+ :param sleep: float, time to sleep between polls (seconds)
+ :param timeout: float or None, maximum time to wait; if <= 0 returns immediately
+ :return: int, number of running simulations remaining after the call
"""
if timeout is None:
timeout = 0
@@ -173,7 +220,7 @@ def ask_sampling_pool_for_samples(self, sleep=0, timeout=None):
t0 = time.perf_counter()
while n_running > 0:
successful_samples, failed_samples, n_running, n_ops = self._sampling_pool.get_finished()
- # Store finished samples
+ # Persist finished samples and operation counts
self._store_samples(successful_samples, failed_samples, n_ops)
time.sleep(sleep)
if 0 < timeout < (time.perf_counter() - t0):
@@ -183,10 +230,11 @@ def ask_sampling_pool_for_samples(self, sleep=0, timeout=None):
def _store_samples(self, successful_samples, failed_samples, n_ops):
"""
- Store finished samples
- :param successful_samples: Dict[level_id, List[Tuple[sample_id:str, Tuple[ndarray, ndarray]]]]
- :param failed_samples: Dict[level_id, List[Tuple[sample_id: str, error message: str]]]
- :param n_ops: Dict[level_id: int, List[total time: float, number of success samples: int]]
+ Persist finished samples and operation time estimates to storage.
+
+ :param successful_samples: Dict[level_id, List[Tuple[sample_id:str, (fine, coarse)]]]
+ :param failed_samples: Dict[level_id, List[Tuple[sample_id:str, error_message:str]]]
+ :param n_ops: Dict[level_id, Tuple[total_time:float, n_success_samples:int]]
:return: None
"""
self.sample_storage.save_samples(successful_samples, failed_samples)
@@ -194,24 +242,24 @@ def _store_samples(self, successful_samples, failed_samples, n_ops):
def process_adding_samples(self, n_estimated, sleep=0, add_coeff=0.1, timeout=ADDING_SAMPLES_TIMEOUT):
"""
- Process adding samples
- Note: n_estimated are wrong if n_ops is similar through all levels
- :param n_estimated: Number of estimated samples on each level, list
- :param sleep: Sample waiting time
- :param add_coeff: default value 0.1, The number of scheduled samples would be 'add_coef' fraction of difference
- between current number of target samples and new estimated number of target samples
- :param timeout: ask sampling pool for finished samples timeout
- :return: bool, if True adding samples is complete
+ Add newly estimated samples in batches, scheduling a fraction of the difference
+ between current scheduled and newly estimated targets.
+
+ Note: n_estimated may be unreliable if per-level n_ops are similar across levels.
+
+ :param n_estimated: array-like, estimated target samples per level
+ :param sleep: float, time to sleep while waiting for results
+ :param add_coeff: float in (0,1], fraction of the difference to schedule each iteration (default 0.1)
+ :param timeout: float, timeout passed to ask_sampling_pool_for_samples()
+ :return: bool, True if scheduled counts reached the estimates for all levels
"""
+ # Ensure storage reflects any finished work
self.ask_sampling_pool_for_samples(timeout=timeout)
- # Get default scheduled samples
+ # Currently scheduled samples per level
n_scheduled = self.l_scheduled_samples()
- # New scheduled sample will be 10 percent of difference
- # between current number of target samples and new estimated one
- # If 10 percent of estimated samples is greater than difference between estimated and scheduled samples,
- # set scheduled samples to estimated samples
+ # Compute new scheduled values (add_coeff fraction of the remaining difference)
new_scheduled = np.where((n_estimated * add_coeff) > (n_estimated - n_scheduled),
n_estimated,
n_scheduled + (n_estimated - n_scheduled) * add_coeff)
@@ -220,41 +268,43 @@ def process_adding_samples(self, n_estimated, sleep=0, add_coeff=0.1, timeout=AD
n_scheduled,
new_scheduled))
- # Levels where estimated are greater than scheduled
+ # Levels where estimated > scheduled
greater_items = np.where(np.greater(n_estimated, n_scheduled))[0]
- # Scheduled samples and wait until at least half of the samples are done
+ # Schedule and wait until at least a fraction of newly scheduled samples finish
self.set_scheduled_and_wait(n_scheduled, greater_items, sleep, timeout=timeout)
return np.all(n_estimated[greater_items] == n_scheduled[greater_items])
def set_scheduled_and_wait(self, n_scheduled, greater_items, sleep, fin_sample_coef=0.5, timeout=1e-7):
"""
- Scheduled samples on each level and wait until at least half of the samples is done
- :param n_scheduled: ndarray, number of scheduled samples on each level
- :param greater_items: Items where n_estimated is greater than n_scheduled
- :param sleep: Time waiting for samples
- :param fin_sample_coef: The proportion of samples to finished for further estimate
+ Set scheduled sample targets and wait until a proportion of those samples finish.
+
+ :param n_scheduled: ndarray, target number of scheduled samples per level
+ :param greater_items: iterable of indices where targets were increased
+ :param sleep: float, time to sleep between polls
+ :param fin_sample_coef: float in (0,1], fraction of scheduled samples that should finish before continuing
+ :param timeout: float, timeout passed to ask_sampling_pool_for_samples()
:return: None
"""
- # Set scheduled samples and run simulations
+ # Update internal targets and schedule required samples
self.set_level_target_n_samples(n_scheduled)
self.schedule_samples(timeout=timeout)
- # Finished level samples
+ # Current finished counts
n_finished = self.n_finished_samples
- # Wait until at least half of the scheduled samples are done on each level
+ # Wait until at least fin_sample_coef fraction of scheduled samples are finished for affected levels
while np.any(n_finished[greater_items] < fin_sample_coef * n_scheduled[greater_items]):
- # Wait a while
time.sleep(sleep)
self.ask_sampling_pool_for_samples(timeout=timeout)
n_finished = self.n_finished_samples
def set_level_target_n_samples(self, n_samples):
"""
- Set level number of target samples
- :param n_samples: list, each level target samples
+ Update the per-level target sample counts to at least the provided values.
+
+ :param n_samples: iterable of new target samples per level
:return: None
"""
for level, n in enumerate(n_samples):
@@ -262,14 +312,18 @@ def set_level_target_n_samples(self, n_samples):
def l_scheduled_samples(self):
"""
- Get all levels number of scheduled samples
- :return: list
+ Return the currently scheduled sample counts per level.
+
+ :return: list or array-like of scheduled sample counts
"""
return self._n_scheduled_samples
def renew_failed_samples(self):
"""
- Resurrect failed samples
+ Reschedule previously failed samples.
+
+ Retrieves failed sample IDs from storage, re-schedules them in the sampling pool,
+ and clears failed records from storage.
:return: None
"""
failed_samples = self.sample_storage.failed_samples()
@@ -279,8 +333,8 @@ def renew_failed_samples(self):
level_id = int(level_id)
for sample_id in sample_ids:
level_sim = self._level_sim_objects[level_id]
- # Schedule current sample
self._sampling_pool.schedule_sample(sample_id, level_sim)
samples.append(sample_id)
+ # Clear failed sample records after rescheduling
self.sample_storage.clear_failed()
diff --git a/mlmc/sampling_pool.py b/mlmc/sampling_pool.py
index 0d402325..9044246d 100644
--- a/mlmc/sampling_pool.py
+++ b/mlmc/sampling_pool.py
@@ -5,7 +5,7 @@
import time
import hashlib
import numpy as np
-from typing import List
+from typing import List, Tuple, Dict, Optional, Any
import traceback
from abc import ABC, abstractmethod
from multiprocessing import Pool as ProcPool
@@ -15,18 +15,22 @@
class SamplingPool(ABC):
"""
- Determining the runtime environment of samples, eg single process, multiple processes, running PBS, ...
+ Abstract base class defining the runtime environment for sample simulations.
+ It manages sample execution across different backends (single process,
+ multiprocessing, PBS, etc.).
"""
FAILED_DIR = 'failed'
SEVERAL_SUCCESSFUL_DIR = 'several_successful'
N_SUCCESSFUL = 5
- # Number of successful samples to store
+ # Number of successful samples to store.
- def __init__(self, work_dir=None, debug=False):
+ def __init__(self, work_dir: Optional[str] = None, debug: bool = False):
"""
- :param work_dir: Path to working directory
- :param debug: bool, if True keep sample directories
+ Initialize the sampling pool environment.
+
+ :param work_dir: Path to the working directory where outputs are stored.
+ :param debug: If True, keep sample directories for debugging.
"""
self._output_dir = None
if work_dir is not None:
@@ -34,14 +38,16 @@ def __init__(self, work_dir=None, debug=False):
self._output_dir = os.path.join(work_dir, "output")
self._debug = debug
- self._create_dir() # prepare output dir
- self._create_dir(SamplingPool.FAILED_DIR) # prepare failed dir
- self._successful_dir = self._create_dir(SamplingPool.SEVERAL_SUCCESSFUL_DIR) # prepare several successful dir
+ # Prepare main output, failed, and successful directories.
+ self._create_dir()
+ self._create_dir(SamplingPool.FAILED_DIR)
+ self._successful_dir = self._create_dir(SamplingPool.SEVERAL_SUCCESSFUL_DIR)
- def _create_dir(self, directory=""):
+ def _create_dir(self, directory: str = "") -> Optional[str]:
"""
- Create output directory, in 'debug' mode not remove existing output_dir
- :return: None
+ Create the output directory if it does not exist.
+
+ In debug mode, existing directories are preserved.
"""
if self._output_dir is not None:
directory = os.path.join(self._output_dir, directory)
@@ -49,288 +55,446 @@ def _create_dir(self, directory=""):
shutil.rmtree(directory)
os.makedirs(directory, mode=0o775, exist_ok=True)
return directory
+ return None
+
+ # --- Abstract methods to be implemented by subclasses ---
@abstractmethod
- def schedule_sample(self, sample_id, level_sim: LevelSimulation):
+ def schedule_sample(self, sample_id: str, level_sim: LevelSimulation):
"""
- Method for calculating simulation samples
- :param sample_id: str
- :param level_sim: level_simulation.LevelSimulation instance
+ Schedule a simulation sample for execution.
+
+ :param sample_id: Unique sample identifier.
+ :param level_sim: LevelSimulation instance.
:return: Tuple[str, List]
"""
@abstractmethod
- def have_permanent_samples(self, sample_ids):
+ def have_permanent_samples(self, sample_ids: List[str]) -> bool:
"""
- Informs the Pool about sample_ids that have been scheduled but not yet finished
+ Inform the pool about samples that have been scheduled but not yet finished.
"""
@abstractmethod
def get_finished(self):
"""
- Return finished samples
- :return: list of results, number of running samples
+ Retrieve finished sample results.
+
+ :return: Tuple containing (successful samples, failed samples, number of running samples)
"""
+ # --- Utility methods shared across subclasses ---
+
@staticmethod
- def compute_seed(sample_id):
+ def compute_seed(sample_id: str) -> int:
"""
- Calculate seed for given sample id
- :param sample_id: str
- :return: int
+ Compute a deterministic seed for a given sample ID.
+
+ :param sample_id: Unique sample identifier.
+ :return: Integer seed value.
"""
- hash = hashlib.md5(sample_id.encode('ascii'))
- seed = np.frombuffer(hash.digest(), dtype='uint32')[0]
- return seed
+ hash_val = hashlib.md5(sample_id.encode('ascii'))
+ seed = np.frombuffer(hash_val.digest(), dtype='uint32')[0]
+ return int(seed)
@staticmethod
- def calculate_sample(sample_id, level_sim, work_dir=None, seed=None):
+ def calculate_sample(sample_id: str, level_sim: LevelSimulation,
+ work_dir: Optional[str] = None,
+ seed: Optional[int] = None) -> Tuple[str, Any, str, float]:
"""
- Method for calculating results
- :param sample_id: str
- :param level_sim: LevelSimulation
- :param work_dir: working directory
- :param seed: random seed
- :return: sample id, sample result, error message with traceback, running time
+ Execute a single simulation sample.
+
+ :param sample_id: Sample identifier.
+ :param level_sim: LevelSimulation instance.
+ :param work_dir: Working directory for the sample.
+ :param seed: Optional random seed (generated if not provided).
+ :return: Tuple(sample_id, result, error_message, running_time)
"""
if seed is None:
seed = SamplingPool.compute_seed(sample_id)
+
res = (None, None)
err_msg = ""
- running_time = 0
+ running_time = 0.0
if level_sim.need_sample_workspace:
SamplingPool.handle_sim_files(work_dir, sample_id, level_sim)
+
try:
start = time.time()
res = level_sim._calculate(level_sim.config_dict, seed)
running_time = time.time() - start
- # Check result format
- if type(res[0]) is np.ndarray and type(res[1]) is np.ndarray:
+ # Validate result format.
+ if isinstance(res[0], np.ndarray) and isinstance(res[1], np.ndarray):
flatten_fine_res = res[0].flatten()
flatten_coarse_res = res[1].flatten()
- res_expected_len = np.sum(
- [np.prod(quantity_spec.shape) * len(quantity_spec.times) * len(quantity_spec.locations)
- for quantity_spec in level_sim._result_format()])
+ expected_len = np.sum([
+ np.prod(q.shape) * len(q.times) * len(q.locations)
+ for q in level_sim._result_format()
+ ])
- assert len(flatten_fine_res) == len(flatten_coarse_res) == res_expected_len, \
- "Unexpected result format, expected length: {}, resultf length: {}".format(res_expected_len,
- len(flatten_fine_res))
+ assert len(flatten_fine_res) == len(flatten_coarse_res) == expected_len, \
+ f"Unexpected result format. Expected length: {expected_len}, got: {len(flatten_fine_res)}"
except Exception:
- str_list = traceback.format_exception(*sys.exc_info())
- err_msg = "".join(str_list)
+ err_msg = "".join(traceback.format_exception(*sys.exc_info()))
+ print("Error msg:", err_msg)
return sample_id, res, err_msg, running_time
+ # --- File handling helpers ---
+
@staticmethod
- def change_to_sample_directory(work_dir, path: str):
+ def change_to_sample_directory(work_dir: str, path: str) -> str:
"""
- Create sample directory and change working directory
- :param path: str
- :return: None
+ Create and switch to the sample-specific directory.
+
+ :param work_dir: Base working directory.
+ :param path: Sample subdirectory name.
+ :return: Absolute path to the created sample directory.
"""
sample_dir = os.path.join(work_dir, path)
- if not os.path.isdir(sample_dir):
- os.makedirs(sample_dir, mode=0o775, exist_ok=True)
+ os.makedirs(sample_dir, mode=0o775, exist_ok=True)
return sample_dir
@staticmethod
- def copy_sim_files(files: List[str], sample_dir):
+ def copy_sim_files(files: List[str], sample_dir: str):
"""
- Copy simulation common files to current simulation sample directory
- :param files: List of files
- :return: None
+ Copy shared simulation files to the sample directory.
+
+ :param files: List of file paths to copy.
+ :param sample_dir: Destination sample directory.
"""
for file in files:
shutil.copy(file, sample_dir)
@staticmethod
- def handle_sim_files(work_dir, sample_id, level_sim):
+ def handle_sim_files(work_dir: str, sample_id: str, level_sim: LevelSimulation):
"""
- Change working directory to sample dir and copy common files
- :param sample_id: str
- :param level_sim: LevelSimulation
- :return: None
+ Prepare the sample workspace (create directory, copy common files, set cwd).
+
+ :param work_dir: Base working directory.
+ :param sample_id: Sample identifier.
+ :param level_sim: LevelSimulation instance.
"""
if level_sim.need_sample_workspace:
sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id)
-
if level_sim.common_files is not None:
SamplingPool.copy_sim_files(level_sim.common_files, sample_dir)
os.chdir(sample_dir)
@staticmethod
- def move_successful_rm(sample_id, level_sim, output_dir, dest_dir):
+ def move_successful_rm(sample_id: str, level_sim: LevelSimulation,
+ output_dir: str, dest_dir: str):
+ """
+ Move successful sample directories and remove originals.
+ """
if int(sample_id[-7:]) < SamplingPool.N_SUCCESSFUL:
- SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir=dest_dir)
+ SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir)
SamplingPool.remove_sample_dir(sample_id, level_sim.need_sample_workspace, output_dir)
@staticmethod
- def move_failed_rm(sample_id, level_sim, output_dir, dest_dir):
- SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir=dest_dir)
+ def move_failed_rm(sample_id: str, level_sim: LevelSimulation,
+ output_dir: str, dest_dir: str):
+ """
+ Move failed sample directories and remove originals.
+ """
+ SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir)
SamplingPool.remove_sample_dir(sample_id, level_sim.need_sample_workspace, output_dir)
@staticmethod
- def move_dir(sample_id, sample_workspace, work_dir, dest_dir):
+ def move_dir(sample_id: str, sample_workspace: bool,
+ work_dir: str, dest_dir: str):
"""
- Move failed sample dir to failed directory
- :param sample_id: str
- :param sample_workspace: bool, simulation needs workspace
- :param work_dir: str
- :param dest_dir: destination
- :return: None
+ Move a sample directory to another location (e.g., failed or successful).
+
+ :param sample_id: Sample identifier.
+ :param sample_workspace: Whether the sample uses its own workspace.
+ :param work_dir: Base working directory.
+ :param dest_dir: Destination subdirectory name.
"""
- if sample_workspace and work_dir is not None and dest_dir is not None:
+ if sample_workspace and work_dir and dest_dir:
destination_dir = os.path.join(work_dir, dest_dir)
sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id)
- if os.path.exists(os.path.join(destination_dir, sample_id)):
- shutil.rmtree(os.path.join(destination_dir, sample_id), ignore_errors=True)
- shutil.copytree(sample_dir, os.path.join(destination_dir, sample_id))
+ target_dir = os.path.join(destination_dir, sample_id)
+ if os.path.exists(target_dir):
+ shutil.rmtree(target_dir, ignore_errors=True)
+ shutil.copytree(sample_dir, target_dir)
@staticmethod
- def remove_sample_dir(sample_id, sample_workspace, work_dir):
+ def remove_sample_dir(sample_id: str, sample_workspace: bool, work_dir: str):
"""
- Remove sample directory
- :param sample_id: str
- :param sample_workspace: bool, simulation needs workspace
- :param work_dir: str
- :return: None
+ Remove the directory for a completed or failed sample.
+
+ :param sample_id: Sample identifier.
+ :param sample_workspace: Whether the sample uses its own workspace.
+ :param work_dir: Base working directory.
"""
- if sample_workspace and work_dir is not None:
+ if sample_workspace and work_dir:
sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id)
shutil.rmtree(sample_dir, ignore_errors=True)
class OneProcessPool(SamplingPool):
+ """
+ Sampling pool implementation that executes all samples sequentially in a single process.
+ Used primarily for debugging or lightweight simulations.
+ """
def __init__(self, work_dir=None, debug=False):
"""
- Everything is running in one process
+ Initialize the one-process pool.
+
+ Parameters
+ ----------
+ work_dir : str, optional
+ Working directory for storing sample outputs.
+ debug : bool, default=False
+ If True, disables moving/removing files after successful execution.
"""
super().__init__(work_dir=work_dir, debug=debug)
- self._failed_queues = {}
- self._queues = {}
- self._n_running = 0
- self.times = {}
+ self._failed_queues = {} # Stores failed sample queues per level
+ self._queues = {} # Stores successful sample queues per level
+ self._n_running = 0 # Tracks number of currently running samples
+ self.times = {} # Stores total runtime and count per level
def schedule_sample(self, sample_id, level_sim):
- self._n_running += 1
+ """
+ Execute a single sample synchronously (in the current process).
+
+ Parameters
+ ----------
+ sample_id : int
+ Identifier of the sample.
+ level_sim : LevelSimulation
+ Simulation instance containing configuration for the sample.
+ """
+ self._n_running += 1 # Increment running sample counter
+ # Set output directory if required by simulation
if self._output_dir is None and level_sim.need_sample_workspace:
self._output_dir = os.getcwd()
- sample_id, result, err_msg, running_time = SamplingPool.calculate_sample(sample_id, level_sim,
- work_dir=self._output_dir)
+ # Run the sample and collect result, error message, and runtime
+ sample_id, result, err_msg, running_time = SamplingPool.calculate_sample(
+ sample_id, level_sim, work_dir=self._output_dir
+ )
+ # Process result (successful or failed)
self._process_result(sample_id, result, err_msg, running_time, level_sim)
def _process_result(self, sample_id, result, err_msg, running_time, level_sim):
"""
- Save sample result
- :param sample_id: sample identifier from calculate_sample()
- :param result: sample result from calculate_sample()
- :param err_msg: sample error message from calculate_sample()
- :param running_time: running time for sample from calculate_sample()
- :param level_sim: level_simulation instance
- :return: None
- """
- # Save running time for n_ops
+ Process result from a sample execution and store it in the appropriate queue.
+
+ Parameters
+ ----------
+ sample_id : int
+ Identifier of the executed sample.
+ result : tuple
+ Pair of fine and coarse results (numpy arrays).
+ err_msg : str
+ Error message if the sample failed, empty string otherwise.
+ running_time : float
+ Runtime of the sample execution in seconds.
+ level_sim : LevelSimulation
+ Simulation instance used to produce the sample.
+ """
+ # Record runtime for this level
self._save_running_time(level_sim._level_id, running_time)
+ # If no error occurred, store successful result
if not err_msg:
- self._queues.setdefault(level_sim._level_id, queue.Queue()).put((sample_id, (result[0], result[1])))
+ self._queues.setdefault(level_sim._level_id, queue.Queue()).put(
+ (sample_id, (result[0], result[1]))
+ )
+ # Move successful sample to its permanent directory unless debugging
if not self._debug:
- SamplingPool.move_successful_rm(sample_id, level_sim, output_dir=self._output_dir, dest_dir=self._successful_dir)
+ SamplingPool.move_successful_rm(
+ sample_id, level_sim, output_dir=self._output_dir, dest_dir=self._successful_dir
+ )
else:
+ # If the simulation failed
if not level_sim.need_sample_workspace:
- print("Sample {} error: {}".format(sample_id, err_msg))
+ print(f"Sample {sample_id} error: {err_msg}")
else:
- SamplingPool.move_failed_rm(sample_id, level_sim, output_dir=self._output_dir, dest_dir=SamplingPool.FAILED_DIR)
+ SamplingPool.move_failed_rm(
+ sample_id, level_sim, output_dir=self._output_dir, dest_dir=SamplingPool.FAILED_DIR
+ )
self._failed_queues.setdefault(level_sim._level_id, queue.Queue()).put((sample_id, err_msg))
def _save_running_time(self, level_id, running_time):
"""
- Save running time to dictionary, store total time and number of samples
- :param level_id: int
- :param running_time: float
- :return: None
+ Save sample execution time in the tracking dictionary.
+
+ Parameters
+ ----------
+ level_id : int
+ Identifier of the simulation level.
+ running_time : float
+ Execution time of the sample.
"""
- # Save sample times [total time, number of samples]
+ # Initialize level entry if missing
if level_id not in self.times:
self.times[level_id] = [0, 0]
- # Failed samples have running time equal 0 by default
+ # Only count successful samples with nonzero runtime
if running_time != 0:
- self.times[level_id][0] += running_time
- self.times[level_id][1] += 1
+ self.times[level_id][0] += running_time # Accumulate total runtime
+ self.times[level_id][1] += 1 # Increment sample count
def have_permanent_samples(self, sample_ids):
+ """
+ Return False, indicating that no samples are stored permanently.
+
+ Parameters
+ ----------
+ sample_ids : list
+ List of sample identifiers (ignored).
+
+ Returns
+ -------
+ bool
+ Always False.
+ """
return False
def get_finished(self):
"""
- return results from queue - list of (sample_id, pair_of_result_vectors, error_message)
+ Retrieve all completed (successful and failed) samples.
+
+ Returns
+ -------
+ successful : dict
+ Dictionary of successful samples by level.
+ failed : dict
+ Dictionary of failed samples by level.
+ n_running : int
+ Number of currently running samples.
+ times : list
+ List of (level_id, [total_time, n_samples]) pairs.
"""
successful = self._queues_to_list(list(self._queues.items()))
failed = self._queues_to_list(list(self._failed_queues.items()))
-
return successful, failed, self._n_running, list(self.times.items())
def _queues_to_list(self, queue_dict_list):
+ """
+ Convert queues to lists and clear them safely.
+
+ Parameters
+ ----------
+ queue_dict_list : list
+ List of (level_id, queue.Queue) pairs.
+
+ Returns
+ -------
+ results : dict
+ Dictionary mapping level_id to list of queue entries.
+ """
results = {}
for level_id, q in queue_dict_list:
queue_list = list(q.queue)
if not queue_list:
continue
results[level_id] = queue_list
- # Thread safe clear
+
+ # Thread-safe queue clearing
with q.mutex:
q.queue.clear()
+ # Update running sample counter
self._n_running -= len(results[level_id])
-
return results
+# ==============================================================================
+
class ProcessPool(OneProcessPool):
"""
- Suitable for local parallel sampling for simulations WITHOUT external program call
+ Sampling pool using multiprocessing for parallel sample execution.
+ Suitable for simulations without external program calls.
"""
def __init__(self, n_processes, work_dir=None, debug=False):
- self._pool = ProcPool(n_processes)
+ """
+ Initialize process-based parallel sampling pool.
+
+ Parameters
+ ----------
+ n_processes : int
+ Number of worker processes to use.
+ work_dir : str, optional
+ Working directory for samples.
+ debug : bool, default=False
+ If True, disables moving/removing sample outputs.
+ """
+ self._pool = ProcPool(n_processes) # Multiprocessing pool
super().__init__(work_dir=work_dir, debug=debug)
def res_callback(self, result, level_sim):
"""
- Process simulation results
- :param result: tuple
- :param level_sim: LevelSimulation instance
- :return: None
+ Callback for handling results from asynchronous execution.
+
+ Parameters
+ ----------
+ result : tuple
+ Returned result from SamplingPool.calculate_sample().
+ level_sim : LevelSimulation
+ Simulation level instance.
"""
self._process_result(*result, level_sim)
def schedule_sample(self, sample_id, level_sim):
+ """
+ Schedule a sample for parallel execution in a separate process.
+
+ Parameters
+ ----------
+ sample_id : int
+ Sample identifier.
+ level_sim : LevelSimulation
+ Simulation configuration instance.
+ """
self._n_running += 1
+ # Set working directory for output files
if self._output_dir is None and level_sim.need_sample_workspace:
self._output_dir = os.getcwd()
- self._pool.apply_async(SamplingPool.calculate_sample, args=(sample_id, level_sim, self._output_dir),
- callback=lambda res: self.res_callback(res, level_sim),
- error_callback=lambda res: self.res_callback(res, level_sim))
+ # Submit task asynchronously to process pool
+ self._pool.apply_async(
+ SamplingPool.calculate_sample,
+ args=(sample_id, level_sim, self._output_dir),
+ callback=lambda res: self.res_callback(res, level_sim),
+ error_callback=lambda res: self.res_callback(res, level_sim)
+ )
+
+# ==============================================================================
class ThreadPool(ProcessPool):
"""
- Suitable local parallel sampling for simulations WITH external program call
+ Sampling pool using threading for local parallel sampling.
+ Suitable for simulations with external program calls (I/O-bound).
"""
def __init__(self, n_thread, work_dir=None, debug=False):
+ """
+ Initialize thread-based parallel sampling pool.
+
+ Parameters
+ ----------
+ n_thread : int
+ Number of threads to use.
+ work_dir : str, optional
+ Working directory for samples.
+ debug : bool, default=False
+ If True, disables moving/removing sample outputs.
+ """
super().__init__(n_thread, work_dir=work_dir, debug=debug)
- self._pool = pool.ThreadPool(n_thread)
+ self._pool = pool.ThreadPool(n_thread) # Thread-based pool instead of process-based
self._failed_queues = {}
self._queues = {}
self._n_running = 0
diff --git a/mlmc/sampling_pool_pbs.py b/mlmc/sampling_pool_pbs.py
index 01c4128b..2142fdcd 100644
--- a/mlmc/sampling_pool_pbs.py
+++ b/mlmc/sampling_pool_pbs.py
@@ -5,6 +5,8 @@
import pickle
import json
import glob
+import time
+import numpy as np
from mlmc.level_simulation import LevelSimulation
from mlmc.sampling_pool import SamplingPool
from mlmc.tool.pbs_job import PbsJob
@@ -133,9 +135,17 @@ def pbs_common_setting(self, **kwargs):
:return: None
"""
# Script header
- select_flags_list = kwargs.get('select_flags', [])
- if select_flags_list:
- kwargs['select_flags'] = ":" + ":".join(select_flags_list)
+ select_flags_dict = kwargs.get('select_flags', {})
+
+ # Set scratch dir
+ if any(re.compile('scratch.*').match(flag) for flag in list(select_flags_dict.keys())):
+ if kwargs['scratch_dir'] is None:
+ kwargs['scratch_dir'] = "$SCRATCHDIR"
+ else:
+ kwargs['scratch_dir'] = ''
+
+ if select_flags_dict:
+ kwargs['select_flags'] = ":" + ':'.join('{}={}'.format(*item) for item in select_flags_dict.items())
else:
kwargs['select_flags'] = ""
@@ -162,7 +172,7 @@ def pbs_common_setting(self, **kwargs):
kwargs['optional_pbs_requests']) # e.g. ['#PBS -m ae'] means mail is sent when the job aborts or terminates
self._pbs_header_template.extend(('MLMC_WORKDIR=\"{}\"'.format(self._work_dir),))
self._pbs_header_template.extend(kwargs['env_setting'])
- self._pbs_header_template.extend(('{python} -m mlmc.tool.pbs_job {output_dir} {job_name} >'
+ self._pbs_header_template.extend(('{python} -m mlmc.tool.pbs_job {output_dir} {job_name} {scratch_dir} >'
'{pbs_output_dir}/{job_name}_STDOUT 2>&1',))
self._pbs_config = kwargs
@@ -221,28 +231,31 @@ def execute(self):
script_content = "\n".join(self.pbs_script)
self.write_script(script_content, job_file)
- process = subprocess.run(['qsub', job_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- try:
- if process.returncode != 0:
- raise Exception(process.stderr.decode('ascii'))
- # Find all finished jobs
- self._qsub_failed_n = 0
- # Write current job count
- self._job_count += 1
-
- # Get pbs_id from qsub output
- pbs_id = process.stdout.decode("ascii").split(".")[0]
- # Store pbs id for future qstat calls
- self._pbs_ids.append(pbs_id)
- pbs_process.write_pbs_id(pbs_id)
-
- self._current_job_weight = 0
- self._n_samples_in_job = 0
- self._scheduled = []
- except:
- self._qsub_failed_n += 1
- if self._qsub_failed_n > SamplingPoolPBS.QSUB_FAILED_MAX_N:
- raise Exception(process.stderr.decode("ascii"))
+ while self._qsub_failed_n <= SamplingPoolPBS.QSUB_FAILED_MAX_N:
+ process = subprocess.run(['qsub', job_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+ try:
+ if process.returncode != 0:
+ raise Exception(process.stderr.decode('ascii'))
+ # Find all finished jobs
+ self._qsub_failed_n = 0
+ # Write current job count
+ self._job_count += 1
+
+ # Get pbs_id from qsub output
+ pbs_id = process.stdout.decode("ascii").split(".")[0]
+ # Store pbs id for future qstat calls
+ self._pbs_ids.append(pbs_id)
+ pbs_process.write_pbs_id(pbs_id)
+
+ self._current_job_weight = 0
+ self._n_samples_in_job = 0
+ self._scheduled = []
+ break
+ except:
+ self._qsub_failed_n += 1
+ time.sleep(30)
+ if self._qsub_failed_n > SamplingPoolPBS.QSUB_FAILED_MAX_N:
+ raise Exception(process.stderr.decode("ascii"))
def _create_script(self):
"""
@@ -277,6 +290,64 @@ def get_finished(self):
finished_pbs_jobs, unfinished_pbs_jobs = self._qstat_pbs_job()
return self._get_result_files(finished_pbs_jobs, unfinished_pbs_jobs)
+ def collect_data(self):
+ successful_results = {}
+ failed_results = {}
+ times = {}
+ sim_data_results = {}
+ # running_times = {}
+ # extract_mesh_times = {}
+ # make_field_times = {}
+ # generate_rnd_times = {}
+ # fine_flow_times = {}
+ # coarse_flow_times = {}
+ n_running = 0
+
+ os.chdir(self._jobs_dir)
+ for file in glob.glob("*_STDOUT"):
+ job_id = re.findall(r'(\d+)_STDOUT', file)[0]
+
+ successful, failed, time = PbsJob.read_results(job_id, self._jobs_dir)
+
+ # Split results to levels
+ for level_id, results in successful.items():
+ successful_results.setdefault(level_id, []).extend(results)
+ for level_id, results in failed.items():
+ failed_results.setdefault(level_id, []).extend(results)
+ for level_id, results in time.items():
+ if level_id in times:
+ times[level_id][0] += results[-1][0]
+ times[level_id][1] += results[-1][1]
+ else:
+ times[level_id] = list(results[-1])
+
+ # # Optional simulation data
+ # for level_id, results in sim_data.items():
+ # sim_data_results.setdefault(level_id, []).extend(results)
+
+ # for level_id, results in running_time.items():
+ # running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in extract_mesh.items():
+ # extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in make_field.items():
+ # make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in generate_rnd.items():
+ # generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in fine_flow.items():
+ # fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in coarse_flow.items():
+ # coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+
+ return successful_results, failed_results, n_running #, sim_data_results #list(times.items()), list(running_times.items()), \
+ # list(extract_mesh_times.items()), list(make_field_times.items()), list(generate_rnd_times.items()), \
+ # list(fine_flow_times.items()), \
+ # list(coarse_flow_times.items())
+
def _qstat_pbs_job(self):
"""
Parse qstat output and get all unfinished job ids
@@ -286,23 +357,36 @@ def _qstat_pbs_job(self):
if len(self._pbs_ids) > 0:
# Get PBS id's status,
# '-x' - displays status information for finished and moved jobs in addition to queued and running jobs.
- qstat_call = ["qstat", "-x"]
+ qstat_call = ["qstat", "-xs"]
qstat_call.extend(self._pbs_ids)
- # qstat call
- process = subprocess.run(qstat_call, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- try:
- if process.returncode != 0:
- raise Exception(process.stderr.decode("ascii"))
- output = process.stdout.decode("ascii")
- # Find all finished jobs
- finished_pbs_jobs = re.findall(r"(\d+)\..*\d+ F", output)
- self._qstat_failed_n = 0
- except:
- self._qstat_failed_n += 1
- if self._qstat_failed_n > SamplingPoolPBS.QSTAT_FAILED_MAX_N:
- raise Exception(process.stderr.decode("ascii"))
- finished_pbs_jobs = []
+ while self._qstat_failed_n <= SamplingPoolPBS.QSTAT_FAILED_MAX_N:
+ # qstat call
+ unknown_job_ids = []
+ process = subprocess.run(qstat_call, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+ try:
+ if process.returncode != 0:
+ err_output = process.stderr.decode("ascii")
+ # Presumably, Job Ids are 'unknown' for PBS after some time of their inactivity
+ unknown_job_ids = re.findall(r"Unknown Job Id (\d+)\.", err_output)
+
+ if len(unknown_job_ids) == 0:
+ raise Exception(process.stderr.decode("ascii"))
+
+ output = process.stdout.decode("ascii")
+ # Find all finished jobs
+ finished_pbs_jobs = re.findall(r"(\d+)\..*\d+ F", output)
+ finished_moved_pbs_jobs = re.findall(r"(\d+)\..*\d+ M.*\n.*Job finished", output)
+ finished_pbs_jobs.extend(finished_moved_pbs_jobs)
+ finished_pbs_jobs.extend(unknown_job_ids)
+ self._qstat_failed_n = 0
+ break
+ except:
+ self._qstat_failed_n += 1
+ time.sleep(30)
+ if self._qstat_failed_n > SamplingPoolPBS.QSTAT_FAILED_MAX_N:
+ raise Exception(process.stderr.decode("ascii"))
+ finished_pbs_jobs = []
# Get unfinished as diff between planned and finished
unfinished_pbs_jobs = []
@@ -327,7 +411,7 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
:return: successful_results: Dict[level_id, List[Tuple[sample_id: str, Tuple[fine_result: np.ndarray, coarse_result: n.ndarray]]]]
failed_results: Dict[level_id, List[Tuple[sample_id: str, err_msg: str]]]
n_running: int, number of running samples
- times:
+ times:
"""
os.chdir(self._jobs_dir)
@@ -343,6 +427,14 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
successful_results = {}
failed_results = {}
times = {}
+ #sim_data_results = {}
+ # running_times = {}
+ # extract_mesh_times = {}
+ # make_field_times = {}
+ # generate_rnd_times = {}
+ # fine_flow_times = {}
+ # coarse_flow_times = {}
+
for pbs_id in finished_pbs_jobs:
reg = "*_{}".format(pbs_id) # JobID_PbsId file
file = glob.glob(reg)
@@ -359,6 +451,8 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
successful_results.setdefault(level_id, []).extend(results)
for level_id, results in failed.items():
failed_results.setdefault(level_id, []).extend(results)
+ # for level_id, results in sim_data.items():
+ # sim_data_results.setdefault(level_id, []).extend(results)
for level_id, results in time.items():
if level_id in times:
times[level_id][0] += results[-1][0]
@@ -366,14 +460,38 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs):
else:
times[level_id] = list(results[-1])
+ # for level_id, results in running_time.items():
+ # running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in extract_mesh.items():
+ # extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in make_field.items():
+ # make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in generate_rnd.items():
+ # generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in fine_flow.items():
+ # fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in coarse_flow.items():
+ # coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
# Delete pbsID file - it means job is finished
SamplingPoolPBS.delete_pbs_id_file(file)
if self._unfinished_sample_ids:
successful_results, failed_results, times = self._collect_unfinished(successful_results,
- failed_results, times)
+ failed_results, times,
+ )
+ # running_times
+ # extract_mesh_times,
+ # make_field_times,
+ # generate_rnd_times,
+ # fine_flow_times,
+ # coarse_flow_times)
- return successful_results, failed_results, n_running, list(times.items())
+ return successful_results, failed_results, n_running, list(times.items())#, sim_data_results
def _collect_unfinished(self, successful_results, failed_results, times):
"""
@@ -384,42 +502,64 @@ def _collect_unfinished(self, successful_results, failed_results, times):
:return: all input dictionaries
"""
already_collected = set()
+
for sample_id in self._unfinished_sample_ids:
if sample_id in already_collected:
continue
- job_id = PbsJob.job_id_from_sample_id(sample_id, self._jobs_dir)
+ try:
+ job_id = PbsJob.job_id_from_sample_id(sample_id, self._jobs_dir)
+ except (FileNotFoundError, KeyError) as e:
+ level_id = int(re.findall(r'L0?(\d*)', sample_id)[0])
+ failed_results.setdefault(level_id, []).append((sample_id, "".format(e)))
+ continue
+
successful, failed, time = PbsJob.read_results(job_id, self._jobs_dir)
# Split results to levels
for level_id, results in successful.items():
- for res in results:
- if res[0] in self._unfinished_sample_ids:
- already_collected.add(res[0])
- successful_results.setdefault(level_id, []).append(res)
-
- for level_id, results in failed_results.items():
- for res in results:
- if res[0] in self._unfinished_sample_ids:
- already_collected.add(res[0])
- failed_results.setdefault(level_id, []).append(res)
-
- for level_id, results in times.items():
- for res in results:
- if res[0] in self._unfinished_sample_ids:
- times.setdefault(level_id, []).append(res)
- times[level_id] = results
+ successful_results.setdefault(level_id, []).extend(results)
+ for level_id, results in failed.items():
+ failed_results.setdefault(level_id, []).extend(results)
+ for level_id, results in time.items():
+ times[level_id] = results[-1]
+
+ # for level_id, results in sim_data.items():
+ # sim_data_results.setdefault(level_id, []).extend(results)
+
+ # for level_id, results in running_time.items():
+ # running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in extract_mesh.items():
+ # extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in make_field.items():
+ # make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in generate_rnd.items():
+ # generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in fine_flow.items():
+ # fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+ #
+ # for level_id, results in coarse_flow.items():
+ # coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]]
+
+ level_id_sample_id_seed = PbsJob.get_scheduled_sample_ids(job_id, self._jobs_dir)
+
+ for level_id, sample_id, _ in level_id_sample_id_seed:
+ already_collected.add(sample_id)
# Delete pbsID file - it means job is finished
# SamplingPoolPBS.delete_pbs_id_file(file)
self._unfinished_sample_ids = set()
- return successful_results, failed_results, times
+ return successful_results, failed_results, times#, sim_data_results
def have_permanent_samples(self, sample_ids):
"""
- List of unfinished sample ids, the corresponding samples are collecting in next get_finished() call .
+ List of unfinished sample ids, the corresponding samples are collecting in next get_finished() call
"""
self._unfinished_sample_ids = set(sample_ids)
diff --git a/mlmc/sim/simulation.py b/mlmc/sim/simulation.py
index bd9a04d0..e5d5dd33 100644
--- a/mlmc/sim/simulation.py
+++ b/mlmc/sim/simulation.py
@@ -5,29 +5,49 @@
class Simulation(ABC):
+ """
+ Abstract base class for multi-level Monte Carlo (MLMC) simulations.
+
+ Defines the interface that all concrete simulation classes must implement.
+ Provides methods for creating level simulations, specifying result formats, and running calculations.
+ """
@abstractmethod
def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
"""
- Create LevelSimulation object which is farther used for calculation etc.
- :param fine_level_params:
- :param coarse_level_params:
- :return: LevelSimulation
+ Create a LevelSimulation object for a given level.
+
+ The LevelSimulation instance is used for sample generation and result extraction
+ at both the fine and coarse levels in MLMC.
+
+ :param fine_level_params: List of floats defining parameters for the fine simulation level.
+ :param coarse_level_params: List of floats defining parameters for the coarse simulation level.
+ :return: LevelSimulation instance configured for the given level parameters.
"""
@abstractmethod
def result_format(self) -> List[QuantitySpec]:
"""
- Define simulation result format
- :return: List[QuantitySpec, ...]
+ Define the format of the simulation results.
+
+ This method should return a list of QuantitySpec objects, which describe the
+ type, shape, and units of each quantity produced by the simulation.
+
+ :return: List of QuantitySpec objects defining the simulation output format.
"""
@staticmethod
@abstractmethod
- def calculate(config_dict, seed):
+ def calculate(config_dict, seed: int):
"""
- Method that actually run the calculation, calculate fine and coarse sample and also extract their results
- :param config_dict: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance)
- :param seed: random seed, int
- :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation._calculate())
+ Execute a single simulation calculation.
+
+ This method runs the simulation for both fine and coarse levels, computes
+ the results, and returns them in a flattened form suitable for MLMC analysis.
+
+ :param config_dict: Dictionary containing simulation configuration parameters
+ (usually LevelSimulation.config_dict from level_instance).
+ :param seed: Random seed (int) to ensure reproducibility of the stochastic simulation.
+ :return: List containing two elements:
+ [fine_result, coarse_result], both as flattened arrays.
"""
diff --git a/mlmc/sim/synth_simulation.py b/mlmc/sim/synth_simulation.py
index 53417219..14a3ca61 100644
--- a/mlmc/sim/synth_simulation.py
+++ b/mlmc/sim/synth_simulation.py
@@ -1,5 +1,5 @@
import os
-import ruamel.yaml as yaml
+import ruamel.yaml as ruyaml
import numpy as np
from typing import List
import scipy.stats as stats
@@ -9,6 +9,14 @@
class SynthSimulation(Simulation):
+ """
+ Artificial (synthetic) simulation used for testing and examples.
+
+ The simulation generates random samples from a specified distribution and
+ optionally injects numerical error / NaN failures according to configuration.
+ It implements the Simulation interface: provides `level_instance`, `calculate`,
+ and `result_format` methods and a simple cost estimator `n_ops_estimate`.
+ """
n_nans = 0
nan_fraction = 0
@@ -18,49 +26,59 @@ class SynthSimulation(Simulation):
# Artificial simulation. Just random parameter + numerical error."""
def __init__(self, config=None):
"""
- :param config: Dict:
- distr= particular distribution,
- complexity=2,
- nan_fraction=fraction of failed samples
- sim_method=used method for calculating sample result
+ Initialize the synthetic simulation.
+
+ :param config: Dict, optional configuration with keys:
+ - 'distr': a scipy.stats distribution object (default: stats.norm())
+ - 'complexity': exponent used for cost estimate (default: 2)
+ - 'nan_fraction': fraction of samples that should be returned as NaN
+ If config is None, a default normal distribution is used.
"""
super().__init__()
if config is None:
config = dict(distr=stats.norm(), complexity=2)
self.config = config
+
+ # Static counters / settings used across instances
SynthSimulation.n_nans = 0
SynthSimulation.nan_fraction = config.get('nan_fraction', 0.0)
SynthSimulation.len_results = 0
- # This attribute is obligatory
+
+ # Indicates whether this simulation needs a workspace directory for samples
self.need_workspace: bool = False
@staticmethod
def sample_fn(x, h):
"""
- Calculates the simulation sample
- :param x: Distribution sample
- :param h: Simluation step
- :return: sample
+ Compute a (noisy) synthetic sample value for given distribution samples.
+
+ :param x: Distribution sample(s) (scalar or array-like).
+ :param h: Simulation step (resolution parameter). Typically small positive float.
+ :return: Computed sample(s). Introduces small h-dependent perturbation:
+ x + h * sqrt(1e-4 + |x|). This can produce outliers for certain x.
"""
- # This function can cause many outliers depending on chosen domain of moments function
return x + h * np.sqrt(1e-4 + np.abs(x))
@staticmethod
def sample_fn_no_error(x, h):
"""
- Calculates the simulation sample
- :param x: Distribution sample
- :param h: Simluation step
- :return: sample
+ Compute a synthetic sample without introducing numerical error.
+
+ :param x: Distribution sample(s) (scalar or array-like).
+ :param h: Simulation step (ignored for this function).
+ :return: The input sample(s) unchanged (identity mapping).
"""
return x
def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
"""
+ Create a LevelSimulation configured for a pair of fine/coarse level parameters.
- :param fine_level_params:
- :param coarse_level_params:
- :return:
+ :param fine_level_params: List-like where the first element is the fine step size.
+ :param coarse_level_params: List-like where the first element is the coarse step size.
+ :return: LevelSimulation instance initialized with:
+ - config_dict containing 'fine.step', 'coarse.step', 'distr', and 'res_format'
+ - task_size estimated by n_ops_estimate(...)
"""
config = dict()
config["fine"] = {}
@@ -75,16 +93,19 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
@staticmethod
def generate_random_samples(distr, seed, size):
"""
- Generate random samples from given distribution
- :param distr: scipy distribution
- :param seed: uint32
- :param size: size of result
- :return: fine sample, coarse sample
+ Draw random samples from the provided scipy distribution reproducibly.
+
+ :param distr: scipy.stats distribution object (must support .rvs()).
+ :param seed: Integer seed used to construct a RandomState for reproducibility.
+ :param size: Number of samples to draw.
+ :return: Tuple (fine_samples, coarse_samples). For this synthetic sim both are identical.
+ May return [np.nan] to simulate a failed sample according to nan_fraction.
"""
SynthSimulation.len_results += 1
distr.random_state = np.random.RandomState(seed)
y = distr.rvs(size=size)
+ # Inject NaN failures up to configured fraction
if SynthSimulation.n_nans / (1e-10 + SynthSimulation.len_results) < SynthSimulation.nan_fraction:
SynthSimulation.n_nans += 1
y = [np.nan]
@@ -94,35 +115,51 @@ def generate_random_samples(distr, seed, size):
@staticmethod
def calculate(config, seed):
"""
- Calculate fine and coarse sample and also extract their results
- :param config: dictionary containing simulation configuration
- :param seed: random number generator seed
- :return: np.ndarray, np.ndarray
+ Calculate fine and coarse samples and convert them to the expected result format.
+
+ :param config: Dictionary containing simulation configuration (must include 'res_format',
+ 'fine.step' and 'coarse.step' keys).
+ :param seed: Integer RNG seed for reproducibility.
+ :return: Tuple (fine_flat, coarse_flat) where both are 1D numpy arrays produced by
+ flattening the per-quantity/time/location arrays constructed below.
+ :raises: Exception if any resulting sample contains NaN.
"""
quantity_format = config["res_format"]
- fine_random, coarse_random = SynthSimulation.generate_random_samples(config["distr"], seed, np.prod(quantity_format[0].shape))
+
+ # Generate base random values for fine and coarse (identical in this toy sim)
+ fine_random, coarse_random = SynthSimulation.generate_random_samples(
+ config["distr"],
+ seed,
+ np.prod(quantity_format[0].shape)
+ )
fine_step = config["fine"]["step"]
coarse_step = config["coarse"]["step"]
+ # Compute sample values for fine and coarse levels
fine_result = SynthSimulation.sample_fn(fine_random, fine_step)
if coarse_step == 0:
- coarse_result = np.zeros(len(fine_result))
+ coarse_result = np.zeros(len(fine_result)) # coarse = zero baseline if step==0
else:
coarse_result = SynthSimulation.sample_fn(coarse_random, coarse_step)
+ # Fail hard if NaNs are present
if np.any(np.isnan(fine_result)) or np.any(np.isnan(coarse_result)):
raise Exception("result is nan")
+ # Convert results into list-of-quantities × times × locations arrays and then flatten
results = []
for result in [fine_result, coarse_result]:
quantities = []
for quantity in quantity_format:
if coarse_step == 0:
+ # replicate the same result for each location (coarse step 0 special case)
locations = np.array([result for _ in range(len(quantity.locations))])
else:
+ # create simple distinct location-dependent arrays for demonstration
locations = np.array([result + i for i in range(len(quantity.locations))])
+ # repeat across times
times = np.array([locations for _ in range(len(quantity.times))])
quantities.append(times)
@@ -131,21 +168,42 @@ def calculate(config, seed):
return results[0].flatten(), results[1].flatten()
def n_ops_estimate(self, step):
+ """
+ Estimate number of operations (cost) for a sample at given step size.
+
+ :param step: Level step size (h).
+ :return: Estimated operation count (float). Uses configured complexity exponent.
+ """
return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0))
def result_format(self) -> List[QuantitySpec]:
"""
- Result format
- :return:
+ Define the synthetic simulation's result format.
+
+ :return: List[QuantitySpec] describing the shape, units, times and locations
+ for each reported quantity. This informs how `calculate` arranges
+ and flattens outputs.
"""
spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20'])
spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
- # spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=[(1, 2, 3), (4, 5, 6)])
- # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=[(7, 8, 9), (10, 11, 12)])
+ # Alternative examples with numeric locations (commented out)
+ # spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3],
+ # locations=[(1, 2, 3), (4, 5, 6)])
+ # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3],
+ # locations=[(7, 8, 9), (10, 11, 12)])
return [spec1, spec2]
+
class SynthSimulationWorkspace(SynthSimulation):
+ """
+ Synthetic simulation variant that requires a workspace (reads config from YAML).
+
+ This subclass behaves like `SynthSimulation` but:
+ - Reads distribution and nan_fraction from a YAML configuration file.
+ - Declares `need_workspace = True` so sample files are written to/read from disk.
+ - Supplies `common_files` (the YAML) to LevelSimulation so workspaces get that file.
+ """
n_nans = 0
nan_fraction = 0
@@ -157,48 +215,55 @@ class SynthSimulationWorkspace(SynthSimulation):
# Artificial simulation. Just random parameter + numerical error."""
def __init__(self, config):
"""
- :param config: Dict:
- distr= particular distribution,
- complexity=2,
- nan_fraction=fraction of failed samples
- sim_method=used method for calculating sample result
+ Initialize the workspace-capable synthetic simulation.
+
+ :param config: Dict with at least:
+ - "config_yaml": path to YAML configuration file (relative or absolute)
+ Optionally may contain 'nan_fraction' as a fallback.
"""
self.config_yaml = config["config_yaml"]
+ # Reset static counters
SynthSimulationWorkspace.n_nans = 0
SynthSimulationWorkspace.nan_fraction = config.get('nan_fraction', 0.0)
SynthSimulationWorkspace.len_results = 0
- # This attribute is obligatory
+ # This simulation requires a workspace directory for sample execution
self.need_workspace: bool = True
@staticmethod
def sample_fn(x, h):
"""
- Calculates the simulation sample
- :param x: Distribution sample
- :param h: Simluation step
- :return: sample
+ Compute a (noisy) synthetic sample value for given distribution samples.
+
+ :param x: Distribution sample(s) (scalar or array-like).
+ :param h: Simulation step (resolution parameter).
+ :return: Computed sample(s): x + h * sqrt(1e-4 + |x|).
"""
- # This function can cause many outliers depending on chosen domain of moments function
return x + h * np.sqrt(1e-4 + np.abs(x))
@staticmethod
def sample_fn_no_error(x, h):
"""
- Calculates the simulation sample
- :param x: Distribution sample
- :param h: Simluation step
- :return: sample
+ Identity sampling function (no added numerical error).
+
+ :param x: Distribution sample(s).
+ :param h: Simulation step (ignored).
+ :return: x (unchanged).
"""
return x
def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
"""
-
- :param fine_level_params:
- :param coarse_level_params:
- :return:
+ Produce a LevelSimulation configured to use the YAML config as a common file.
+
+ :param fine_level_params: list-like where first element is fine step size.
+ :param coarse_level_params: list-like where first element is coarse step size.
+ :return: LevelSimulation configured with:
+ - config_dict: containing 'fine.step', 'coarse.step', 'res_format'
+ - common_files: list containing the YAML path (so worker/workspace has it)
+ - task_size: small constant (1/job_weight) to simulate job weighting
+ - need_sample_workspace: True (this class requires workspace)
"""
config = dict()
config["fine"] = {}
@@ -208,21 +273,30 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
config["coarse"]["step"] = coarse_level_params[0]
config["res_format"] = self.result_format()
+ # Use a fixed job weight to keep task_size small (simulating many small jobs)
job_weight = 20000
- return LevelSimulation(config_dict=config,
- common_files=[self.config_yaml],
- task_size=1.0 / job_weight,
- need_sample_workspace=self.need_workspace)
+ return LevelSimulation(
+ config_dict=config,
+ common_files=[self.config_yaml],
+ task_size=1.0 / job_weight,
+ need_sample_workspace=self.need_workspace
+ )
@staticmethod
def generate_random_samples(distr, seed, size):
"""
- Generate random samples from given distribution
- :param distr: scipy distribution
- :param seed: uint32
- :param size: size of result
- :return: fine sample, coarse sample
+ Draw random samples based on YAML-specified distribution names.
+
+ This implementation currently supports only the string "norm" which
+ maps to scipy.stats.norm(loc=1, scale=2). A NotImplementedError is raised
+ for other distribution identifiers.
+
+ :param distr: Either a string identifier (e.g. "norm") or a scipy distribution.
+ :param seed: Integer RNG seed used to create a RandomState for reproducibility.
+ :param size: Integer number of samples to draw.
+ :return: Tuple (fine_samples, coarse_samples) — identical arrays for this toy sim.
+ May return [np.nan] to simulate a failed sample according to nan_fraction.
"""
SynthSimulationWorkspace.len_results += 1
@@ -234,6 +308,7 @@ def generate_random_samples(distr, seed, size):
distr.random_state = np.random.RandomState(seed)
y = distr.rvs(size=size)
+ # Inject NaN failure if configured fraction not yet reached
if SynthSimulationWorkspace.n_nans / (1e-10 + SynthSimulationWorkspace.len_results) < SynthSimulationWorkspace.nan_fraction:
SynthSimulationWorkspace.n_nans += 1
y = [np.nan]
@@ -243,18 +318,28 @@ def generate_random_samples(distr, seed, size):
@staticmethod
def calculate(config, seed):
"""
- Calculate fine and coarse sample and also extract their results
- :param config: dictionary containing simulation configuration
- :param seed: random number generator seed
- :return: np.ndarray, np.ndarray
+ Calculate fine and coarse samples (using values from the YAML config file).
+
+ Workflow:
+ 1. Read YAML configuration (via _read_config) to get distribution and nan_fraction.
+ 2. Generate base random numbers (fine_random, coarse_random).
+ 3. Compute fine_result and coarse_result via sample functions.
+ 4. Assemble results into arrays shaped by res_format and flatten them.
+
+ :param config: LevelSimulation.config_dict (must include 'res_format', 'fine.step', 'coarse.step').
+ :param seed: Integer RNG seed.
+ :return: Tuple (fine_flat, coarse_flat) — 1D numpy arrays produced by flattening quantities × times × locations.
+ :raises: Exception if any computed result contains NaN.
"""
+ # Load runtime YAML config (distribution name and nan_fraction)
config_file = SynthSimulationWorkspace._read_config()
SynthSimulationWorkspace.nan_fraction = config_file["nan_fraction"]
quantity_format = config["res_format"]
- fine_random, coarse_random = SynthSimulationWorkspace.generate_random_samples(config_file["distr"], seed,
- np.prod(quantity_format[0].shape))
+ fine_random, coarse_random = SynthSimulationWorkspace.generate_random_samples(
+ config_file["distr"], seed, np.prod(quantity_format[0].shape)
+ )
fine_step = config["fine"]["step"]
coarse_step = config["coarse"]["step"]
@@ -272,7 +357,6 @@ def calculate(config, seed):
results = []
for result in [fine_result, coarse_result]:
quantities = []
-
for quantity in quantity_format:
if coarse_step == 0:
locations = np.array([result for _ in range(len(quantity.locations))])
@@ -285,12 +369,26 @@ def calculate(config, seed):
return results[0].flatten(), results[1].flatten()
def n_ops_estimate(self, step):
- # @TODO: how to determine n ops
+ """
+ Estimate a synthetic operation count for the workspace-enabled simulation.
+ :param step: Level step size.
+ :return: Estimated operation cost (float). Uses a fixed exponent of 2 here.
+ """
return (1 / step) ** 2 * np.log(max(1 / step, 2.0))
@staticmethod
def _read_config():
+ """
+ Read the YAML configuration file (CONFIG_FILE) from the current working directory.
+
+ The YAML is parsed using ruamel.yaml and should contain keys expected by this class
+ (e.g. "distr" and "nan_fraction").
+
+ :return: Parsed configuration dictionary.
+ :raises: IOError / FileNotFoundError if the YAML file is missing.
+ """
with open(os.path.join(os.getcwd(), SynthSimulationWorkspace.CONFIG_FILE)) as file:
+ yaml = ruyaml.YAML(typ='rt')
config = yaml.load(file)
return config
diff --git a/mlmc/tool/context_statprof.py b/mlmc/tool/context_statprof.py
deleted file mode 100644
index faf3afc4..00000000
--- a/mlmc/tool/context_statprof.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import statprof
-from contextlib import contextmanager
-
-
-
-
-@contextmanager
-def stat_profiler():
- statprof.start()
- yield statprof
- statprof.stop()
- statprof.display()
-
diff --git a/mlmc/tool/distribution.py b/mlmc/tool/distribution.py
index e377607b..1427ef2b 100644
--- a/mlmc/tool/distribution.py
+++ b/mlmc/tool/distribution.py
@@ -48,39 +48,6 @@ def __init__(self, moments_obj, moment_data, domain=None, force_decay=(True, Tru
# Flag for monitoring convergence on stdout.
self.monitor = monitor
- # def choose_parameters_from_samples(self, samples):
- # """
- # Determine model hyperparameters, in particular domain of the density function,
- # from given samples.
- # :param samples: np array of samples from the distribution or its approximation.
- # :return: None
- # """
- # self.domain = (np.min(samples), np.max(samples))
- #
- # @staticmethod
- # def choose_parameters_from_moments(mean, variance, quantile=0.9999, log=False):
- # """
- # Determine model hyperparameters, in particular domain of the density function,
- # from given samples.
- # :param samples: np array of samples from the distribution or its approximation.
- # :return: None
- # """
- # if log:
- # # approximate by log normal
- # # compute mu, sigma parameters from observed mean and variance
- # sigma_sq = np.log(np.exp(np.log(variance) - 2.0 * np.log(mean)) + 1.0)
- # mu = np.log(mean) - sigma_sq / 2.0
- # sigma = np.sqrt(sigma_sq)
- # domain = tuple(sc.stats.lognorm.ppf([1.0 - quantile, quantile], s=sigma, scale=np.exp(mu)))
- # assert np.isclose(mean, sc.stats.lognorm.mean(s=sigma, scale=np.exp(mu)))
- # assert np.isclose(variance, sc.stats.lognorm.var(s=sigma, scale=np.exp(mu)))
- # else:
- # domain = tuple(sc.stats.norm.ppf([1.0 - quantile, quantile], loc=mean, scale=np.sqrt(variance)))
- # return domain
- #
- # def choose_parameters_from_approximation(self):
- # pass
-
def estimate_density_minimize(self, tol=1e-5, reg_param =0.01):
"""
@@ -411,15 +378,9 @@ def _calculate_jacobian_matrix(self, multipliers):
jacobian_matrix[np.diag_indices_from(jacobian_matrix)] += self._stab_penalty
-
- #e_vals = np.linalg.eigvalsh(jacobian_matrix)
-
- #print(multipliers)
- #print("jac spectra: ", e_vals[0], e_vals[-1], e_vals[-1]/e_vals[0])
return jacobian_matrix
-
def compute_exact_moments(moments_fn, density, tol=1e-4):
"""
Compute approximation of moments using exact density.
diff --git a/mlmc/tool/flow_mc.py b/mlmc/tool/flow_mc.py
index 09fc12da..cdddb9c1 100644
--- a/mlmc/tool/flow_mc.py
+++ b/mlmc/tool/flow_mc.py
@@ -15,14 +15,21 @@
def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1, mode_no=1000):
"""
- Create random fields
- :return:
+ Create correlated random-field provider (cf.Fields) according to selected backend.
+
+ :param model: One of 'fourier', 'svd', 'exp', 'TPLgauss', 'TPLexp', 'TPLStable', or others (defaults to 'gauss').
+ :param corr_length: Correlation length (used by GSTools or SVD implementations).
+ :param dim: Spatial dimension of the field (1, 2 or 3).
+ :param log: If True, generate log-normal field (exponentiate underlying Gaussian field).
+ :param sigma: Standard deviation for the generated field.
+ :param mode_no: Number of Fourier modes
+ :return: cf.Fields instance that can generate random field samples.
"""
if model == 'fourier':
return cf.Fields([
cf.Field('conductivity', cf.FourierSpatialCorrelatedField('gauss', dim=dim,
- corr_length=corr_length,
- log=log, sigma=sigma)),
+ corr_length=corr_length,
+ log=log, sigma=sigma)),
])
elif model == 'svd':
@@ -52,13 +59,14 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1
])
-
def substitute_placeholders(file_in, file_out, params):
"""
- Substitute for placeholders of format '' from the dict 'params'.
- :param file_in: Template file.
- :param file_out: Values substituted.
- :param params: { 'name': value, ...}
+ Replace placeholders of form '' in a template file with corresponding values.
+
+ :param file_in: Path to the template file containing placeholders.
+ :param file_out: Path where the substituted output will be written.
+ :param params: Dictionary mapping placeholder names to replacement values, e.g. {'mesh_file': 'mesh.msh'}.
+ :return: List of parameter names that were actually used (replaced) in the template.
"""
used_params = []
with open(file_in, 'r') as src:
@@ -76,10 +84,10 @@ def substitute_placeholders(file_in, file_out, params):
def force_mkdir(path, force=False):
"""
- Make directory 'path' with all parents,
- remove the leaf dir recursively if it already exists.
- :param path: path to directory
- :param force: if dir already exists then remove it and create new one
+ Create directory tree; optionally remove existing leaf directory first.
+
+ :param path: Directory path to create (parents created as needed).
+ :param force: If True and the directory already exists, remove it (recursively) before creating.
:return: None
"""
if force:
@@ -92,55 +100,36 @@ class FlowSim(Simulation):
# placeholders in YAML
total_sim_id = 0
MESH_FILE_VAR = 'mesh_file'
- # Timestep placeholder given as O(h), h = mesh step
- TIMESTEP_H1_VAR = 'timestep_h1'
- # Timestep placeholder given as O(h^2), h = mesh step
- TIMESTEP_H2_VAR = 'timestep_h2'
+ TIMESTEP_H1_VAR = 'timestep_h1' # O(h)
+ TIMESTEP_H2_VAR = 'timestep_h2' # O(h^2)
- # files
+ # filenames used in workspace and job directories
GEO_FILE = 'mesh.geo'
MESH_FILE = 'mesh.msh'
YAML_TEMPLATE = 'flow_input.yaml.tmpl'
YAML_FILE = 'flow_input.yaml'
FIELDS_FILE = 'fields_sample.msh'
- """
- Gather data for single flow call (coarse/fine)
-
- Usage:
- mlmc.sampler.Sampler uses instance of FlowSim, it calls once level_instance() for each level step (The level_instance() method
- is called as many times as the number of levels), it takes place in main process
-
- mlmc.tool.pbs_job.PbsJob uses static methods in FlowSim, it calls calculate(). That's where the calculation actually runs,
- it takes place in PBS process
- It also extracts results and passes them back to PbsJob, which handles the rest
-
- """
-
def __init__(self, config=None, clean=None):
"""
- Simple simulation using flow123d
- :param config: configuration of the simulation, processed keys:
- env - Environment object.
- fields - FieldSet object
- yaml_file: Template for main input file. Placeholders:
- - replaced by generated mesh
- - for FIELD be name of any of `fields`, replaced by the FieldElementwise field with generated
- field input file and the field name for the component.
- geo_file: Path to the geometry file.
- :param clean: bool, if True remove existing simulation files - mesh files, ...
+ Initialize FlowSim instance that runs flow123d simulations using generated random fields.
+
+ :param config: Dict with keys:
+ - env: dict of environment executables (flow123d, gmsh, gmsh_version, etc.)
+ - fields_params: parameters forwarded to create_corr_field
+ - yaml_file: base YAML template path
+ - geo_file: geometry (.geo) file path
+ - work_dir: base working directory for generated level common files
+ - field_template: optional template string for field definition in YAML
+ - time_factor: optional multiplier for timestep selection (default 1.0)
+ :param clean: If True, regenerate common files (mesh, yaml) for the given level.
"""
- self.need_workspace = True
- # This simulation requires workspace
+ self.need_workspace = True # this simulation needs per-sample work directories
self.env = config['env']
- # Environment variables, flow123d, gmsh, ...
self._fields_params = config['fields_params']
self._fields = create_corr_field(**config['fields_params'])
self._fields_used_params = None
- # Random fields instance
self.time_factor = config.get('time_factor', 1.0)
- # It is used for minimal element from mesh determination (see level_instance method)
-
self.base_yaml_file = config['yaml_file']
self.base_geo_file = config['geo_file']
self.field_template = config.get('field_template',
@@ -148,54 +137,55 @@ def __init__(self, config=None, clean=None):
self.work_dir = config['work_dir']
self.clean = clean
- super(Simulation, self).__init__()
+ super(Simulation, self).__init__() # keep compatibility with parent initialization
+
def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
"""
- Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.)
- :param fine_level_params: in this version, it is just fine simulation step
- :param coarse_level_params: in this version, it is just coarse simulation step
- :return: mlmc.LevelSimulation object, this object is serialized in SamplingPoolPbs and deserialized in PbsJob,
- so it allows pass simulation data from main process to PBS process
+ Create a LevelSimulation object for given fine/coarse steps.
+
+ This method is called in the main process (Sampler) and must prepare
+ common files (mesh, YAML) for that level. The returned LevelSimulation
+ is serialized and sent to PBS jobs (PbsJob) for actual execution.
+
+ :param fine_level_params: list with single element [fine_step] (mesh step)
+ :param coarse_level_params: list with single element [coarse_step] (mesh step) or [0] for one-level MC
+ :return: LevelSimulation configured with task size and calculate method
"""
fine_step = fine_level_params[0]
coarse_step = coarse_level_params[0]
- # TODO: determine minimal element from mesh
+ # Set time steps used in YAML substitution (O(h) and O(h^2) placeholders)
self.time_step_h1 = self.time_factor * fine_step
self.time_step_h2 = self.time_factor * fine_step * fine_step
- # Set fine simulation common files directory
- # Files in the directory are used by each simulation at that level
+ # Directory to store files common to all samples at this fine level
common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(fine_step))
force_mkdir(common_files_dir, force=self.clean)
self.mesh_file = os.path.join(common_files_dir, self.MESH_FILE)
if self.clean:
- # Prepare mesh
+ # Create computational mesh from geometry template
geo_file = os.path.join(common_files_dir, self.GEO_FILE)
shutil.copyfile(self.base_geo_file, geo_file)
- self._make_mesh(geo_file, self.mesh_file, fine_step) # Common computational mesh for all samples.
+ self._make_mesh(geo_file, self.mesh_file, fine_step)
- # Prepare main input YAML
+ # Prepare main YAML by substituting placeholders
yaml_template = os.path.join(common_files_dir, self.YAML_TEMPLATE)
shutil.copyfile(self.base_yaml_file, yaml_template)
yaml_file = os.path.join(common_files_dir, self.YAML_FILE)
self._substitute_yaml(yaml_template, yaml_file)
- # Mesh is extracted because we need number of mesh points to determine task_size parameter (see return value)
+ # Extract mesh metadata to determine task_size (number of points affects job weight)
fine_mesh_data = self.extract_mesh(self.mesh_file)
- # Set coarse simulation common files directory
- # Files in the directory are used by each simulation at that level
+ # Set coarse sim common files dir if coarse level exists
coarse_sim_common_files_dir = None
if coarse_step != 0:
coarse_sim_common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(coarse_step))
- # Simulation config
- # Configuration is used in mlmc.tool.pbs_job.PbsJob instance which is run from PBS process
- # It is part of LevelSimulation which is serialized and then deserialized in mlmc.tool.pbs_job.PbsJob
+ # Prepare configuration dict that will be serialized in LevelSimulation
config = dict()
config["fine"] = {}
config["coarse"] = {}
@@ -204,71 +194,66 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
config["fine"]["common_files_dir"] = common_files_dir
config["coarse"]["common_files_dir"] = coarse_sim_common_files_dir
- config[
- "fields_used_params"] = self._fields_used_params # Params for Fields instance, which is createed in PbsJob
+ config["fields_used_params"] = self._fields_used_params
config["gmsh"] = self.env['gmsh']
config["flow123d"] = self.env['flow123d']
config['fields_params'] = self._fields_params
- # Auxiliary parameter which I use to determine task_size (should be from 0 to 1, if task_size is above 1 then pbs job is scheduled)
- job_weight = 17000000 # 4000000 - 20 min, 2000000 - cca 10 min
+ # job_weight is used to convert mesh size into a normalized task_size
+ job_weight = 17000000
return LevelSimulation(config_dict=config,
task_size=len(fine_mesh_data['points']) / job_weight,
calculate=FlowSim.calculate,
- # method which carries out the calculation, will be called from PBS processs
- need_sample_workspace=True # If True, a sample directory is created
+ need_sample_workspace=True
)
@staticmethod
def calculate(config, seed):
"""
- Method that actually run the calculation, it's called from mlmc.tool.pbs_job.PbsJob.calculate_samples()
- Calculate fine and coarse sample and also extract their results
- :param config: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance)
- :param seed: random seed, int
- :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation.calculate())
+ Execute one MLMC sample calculation (fine and optional coarse) inside PBS job.
+
+ :param config: Configuration dict from LevelSimulation.config_dict (contains common_files dirs, steps, fields params)
+ :param seed: Random seed for the sample generation (derived from sample id)
+ :return: Tuple (fine_result_array, coarse_result_array), both numpy arrays (coarse may be zeros for one-level MC)
"""
- # Init correlation field objects
- fields = create_corr_field(**config['fields_params']) # correlated_field.Fields instance
+ # Initialize fields object in the worker process
+ fields = create_corr_field(**config['fields_params'])
fields.set_outer_fields(config["fields_used_params"])
- coarse_step = config["coarse"]["step"] # Coarse simulation step, zero if one level MC
- flow123d = config["flow123d"] # Flow123d command
+ coarse_step = config["coarse"]["step"]
+ flow123d = config["flow123d"]
- # Extract fine mesh
- fine_common_files_dir = config["fine"]["common_files_dir"] # Directory with fine simulation common files
+ # Extract fine mesh structure and optionally coarse mesh structure
+ fine_common_files_dir = config["fine"]["common_files_dir"]
fine_mesh_data = FlowSim.extract_mesh(os.path.join(fine_common_files_dir, FlowSim.MESH_FILE))
- # Extract coarse mesh
coarse_mesh_data = None
coarse_common_files_dir = None
if coarse_step != 0:
- coarse_common_files_dir = config["coarse"][
- "common_files_dir"] # Directory with coarse simulation common files
+ coarse_common_files_dir = config["coarse"]["common_files_dir"]
coarse_mesh_data = FlowSim.extract_mesh(os.path.join(coarse_common_files_dir, FlowSim.MESH_FILE))
- # Create fields both fine and coarse
+ # Prepare combined fields object that has points for both fine and coarse meshes
fields = FlowSim.make_fields(fields, fine_mesh_data, coarse_mesh_data)
- # Set random seed, seed is calculated from sample id, so it is not user defined
+ # Sample random field realizations reproducibly
np.random.seed(seed)
- # Generate random samples
- fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(fields, coarse_step=coarse_step,
- n_fine_elements=len(
- fine_mesh_data['points']))
+ fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(
+ fields, coarse_step=coarse_step, n_fine_elements=len(fine_mesh_data['points'])
+ )
- # Run fine sample
+ # Run fine-level simulation
fields_file = os.path.join(os.getcwd(), FlowSim.FIELDS_FILE)
fine_res = FlowSim._run_sample(fields_file, fine_mesh_data['ele_ids'], fine_input_sample, flow123d,
fine_common_files_dir)
- # Rename fields_sample.msh to fine_fields_sample.msh, we might remove it
+ # Move generated files to have 'fine_' prefix so they don't collide
for filename in os.listdir(os.getcwd()):
if not filename.startswith("fine"):
shutil.move(os.path.join(os.getcwd(), filename), os.path.join(os.getcwd(), "fine_" + filename))
- # Run coarse sample
+ # Run coarse-level simulation if coarse sample exists
coarse_res = np.zeros(len(fine_res))
if coarse_input_sample:
coarse_res = FlowSim._run_sample(fields_file, coarse_mesh_data['ele_ids'], coarse_input_sample, flow123d,
@@ -279,17 +264,19 @@ def calculate(config, seed):
@staticmethod
def make_fields(fields, fine_mesh_data, coarse_mesh_data):
"""
- Create random fields that are used by both coarse and fine simulation
- :param fields: correlated_field.Fields instance
- :param fine_mesh_data: Dict contains data extracted from fine mesh file (points, point_region_ids, region_map)
- :param coarse_mesh_data: Dict contains data extracted from coarse mesh file (points, point_region_ids, region_map)
- :return: correlated_field.Fields
+ Assign evaluation points to fields and return the Fields object prepared for sampling.
+
+ :param fields: correlated_field.Fields instance (with local field definitions)
+ :param fine_mesh_data: Dict returned by extract_mesh() for the fine mesh
+ :param coarse_mesh_data: Dict returned by extract_mesh() for the coarse mesh (or None for one-level)
+ :return: the same cf.Fields object with points set for sampling
"""
- # One level MC has no coarse_mesh_data
+ # If no coarse mesh, just register fine mesh points
if coarse_mesh_data is None:
fields.set_points(fine_mesh_data['points'], fine_mesh_data['point_region_ids'],
fine_mesh_data['region_map'])
else:
+ # Concatenate fine and coarse points to compute joint fields (ensures consistent sampling)
coarse_centers = coarse_mesh_data['points']
both_centers = np.concatenate((fine_mesh_data['points'], coarse_centers), axis=0)
both_regions_ids = np.concatenate(
@@ -302,13 +289,14 @@ def make_fields(fields, fine_mesh_data, coarse_mesh_data):
@staticmethod
def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_dir):
"""
- Create random fields file, call Flow123d and extract results
- :param fields_file: Path to file with random fields
- :param ele_ids: Element IDs in computational mesh
- :param fine_input_sample: fields: {'field_name' : values_array, ..}
- :param flow123d: Flow123d command
- :param common_files_dir: Directory with simulations common files (flow_input.yaml, )
- :return: simulation result, ndarray
+ Write random fields to Gmsh file, call flow123d, and extract sample results.
+
+ :param fields_file: Path where fields will be written (in current working directory)
+ :param ele_ids: Array of element ids for which field values are provided
+ :param fine_input_sample: Dict mapping field names to arrays of shape (n_elements, 1)
+ :param flow123d: Path/command to flow123d executable
+ :param common_files_dir: Directory containing common YAML and other input files for the level
+ :return: numpy.ndarray with extracted simulation result (e.g., water balance)
"""
gmsh_io.GmshIO().write_fields(fields_file, ele_ids, fine_input_sample)
@@ -321,11 +309,16 @@ def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_
@staticmethod
def generate_random_sample(fields, coarse_step, n_fine_elements):
"""
- Generate random field, both fine and coarse part.
- Store them separeted.
- :return: Dict, Dict
+ Generate random field samples for the fine and (optionally) coarse meshes.
+
+ :param fields: cf.Fields object (already configured with points)
+ :param coarse_step: coarse-level step (0 for no coarse sample)
+ :param n_fine_elements: Number of elements that belong to fine mesh (used to split combined sample)
+ :return: Tuple (fine_input_sample: dict, coarse_input_sample: dict)
+ Each dict maps field name -> array shaped (n_elements, 1).
"""
fields_sample = fields.sample()
+ # Fine inputs are first n_fine_elements rows; coarse are the remainder (if any)
fine_input_sample = {name: values[:n_fine_elements, None] for name, values in fields_sample.items()}
coarse_input_sample = {}
if coarse_step != 0:
@@ -336,10 +329,12 @@ def generate_random_sample(fields, coarse_step, n_fine_elements):
def _make_mesh(self, geo_file, mesh_file, fine_step):
"""
- Make the mesh, mesh_file: _step.msh.
- Make substituted yaml: _step.yaml,
- using common fields_step.msh file for generated fields.
- :return:
+ Invoke Gmsh to produce a mesh with the requested geometric scale (clscale).
+
+ :param geo_file: Path to the .geo file used to generate the mesh
+ :param mesh_file: Path where the .msh output will be written
+ :param fine_step: Mesh step (controls element size via -clscale)
+ :return: None
"""
if self.env['gmsh_version'] == 2:
subprocess.call(
@@ -350,9 +345,14 @@ def _make_mesh(self, geo_file, mesh_file, fine_step):
@staticmethod
def extract_mesh(mesh_file):
"""
- Extract mesh from file
- :param mesh_file: Mesh file path
- :return: Dict
+ Parse a Gmsh mesh file and extract points (element centers), element ids and region mapping.
+
+ :param mesh_file: Path to .msh file to parse (Gmsh 2/4 depending on GmshIO implementation)
+ :return: Dict with keys:
+ - 'points': np.ndarray of shape (n_elements, dim) with element center coordinates
+ - 'point_region_ids': np.ndarray of region id per element
+ - 'ele_ids': np.ndarray of original element ids
+ - 'region_map': dict mapping region name -> region id
"""
mesh = gmsh_io.GmshIO(mesh_file)
is_bc_region = {}
@@ -386,7 +386,7 @@ def extract_mesh(mesh_file):
diff = max_pt - min_pt
min_axis = np.argmin(diff)
non_zero_axes = [0, 1, 2]
- # TODO: be able to use this mesh_dimension in fields
+ # If mesh is effectively 2D (one axis collapsed), remove that axis from point coordinates
if diff[min_axis] < 1e-10:
non_zero_axes.pop(min_axis)
points = centers[:, non_zero_axes]
@@ -395,8 +395,11 @@ def extract_mesh(mesh_file):
def _substitute_yaml(self, yaml_tmpl, yaml_out):
"""
- Create substituted YAML file from the tamplate.
- :return:
+ Build YAML input file for flow123d by substituting placeholders for mesh and fields.
+
+ :param yaml_tmpl: Path to YAML template with placeholders like '' and ''.
+ :param yaml_out: Path to output YAML file that will be used by flow123d.
+ :return: None (also populates self._fields_used_params with names of substituted fields)
"""
param_dict = {}
field_tmpl = self.field_template
@@ -412,11 +415,12 @@ def _substitute_yaml(self, yaml_tmpl, yaml_out):
@staticmethod
def _extract_result(sample_dir):
"""
- Extract the observed value from the Flow123d output.
- :param sample_dir: str, path to sample directory
- :return: None, inf or water balance result (float) and overall sample time
+ Extract the observed quantity (e.g., water balance flux) from a flow123d run directory.
+
+ :param sample_dir: Directory where flow123d output (water_balance.yaml) is located.
+ :return: numpy.ndarray with a single value [-total_flux] representing outflow (negative sign).
+ Raises Exception if expected data is not found or inflow at outlet is positive.
"""
- # extract the flux
balance_file = os.path.join(sample_dir, "water_balance.yaml")
with open(balance_file, "r") as f:
@@ -434,44 +438,19 @@ def _extract_result(sample_dir):
flux_in = float(flux_item['data'][1])
if flux_in > 1e-10:
raise Exception("Possitive inflow at outlet region.")
- total_flux += flux # flux field
+ total_flux += flux
found = True
- # Get flow123d computing time
- # run_time = FlowSim.get_run_time(sample_dir)
-
if not found:
- raise Exception
+ raise Exception("No outlet flux found in water_balance.yaml")
return np.array([-total_flux])
@staticmethod
def result_format() -> List[QuantitySpec]:
"""
- Define simulation result format
- :return: List[QuantitySpec, ...]
+ Describe the simulation output format as a list of QuantitySpec objects.
+
+ :return: List[QuantitySpec] describing each output quantity (name, unit, shape, times, locations)
"""
spec1 = QuantitySpec(name="conductivity", unit="m", shape=(1, 1), times=[1], locations=['0'])
- # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
return [spec1]
-
- # @staticmethod
- # def get_run_time(sample_dir):
- # """
- # Get flow123d sample running time from profiler
- # :param sample_dir: Sample directory
- # :return: float
- # """
- # profiler_file = os.path.join(sample_dir, "profiler_info_*.json")
- # profiler = glob.glob(profiler_file)[0]
- #
- # try:
- # with open(profiler, "r") as f:
- # prof_content = json.load(f)
- #
- # run_time = float(prof_content['children'][0]['cumul-time-sum'])
- # except:
- # print("Extract run time failed")
- #
- # return run_time
-
-
diff --git a/mlmc/tool/gmsh_io.py b/mlmc/tool/gmsh_io.py
index c5a3ad36..0d059918 100644
--- a/mlmc/tool/gmsh_io.py
+++ b/mlmc/tool/gmsh_io.py
@@ -3,21 +3,8 @@
import struct
import numpy as np
-import enum
-# class ElementType(enum.IntEnum):
-# simplex_1d = 1
-# simplex_2d = 2
-# simplex_3d = 4
-#
-# element_sizes = {
-# 1: 1,
-# 2: 2,
-# 4: 3
-# }
-#
-
class GmshIO:
"""This is a class for storing nodes and elements. Based on Gmsh.py
@@ -28,25 +15,60 @@ class GmshIO:
Methods:
read([file]) -- Parse a Gmsh version 1.0 or 2.0 mesh file
- write([file]) -- Output a Gmsh version 2.0 mesh file
+ write_ascii([file]) -- Output a Gmsh version 2.0 mesh file (ASCII)
+ write_binary([file]) -- Output a Gmsh version 2.0 mesh file (binary)
+ write_element_data(f, ele_ids, name, values) -- write $ElementData block
+ write_fields(msh_file, ele_ids, fields) -- convenience to write several ElementData blocks
"""
def __init__(self, filename=None):
- """Initialise Gmsh data structure"""
+ """
+ Initialise Gmsh data structure.
+
+ :param filename: Optional path to a .msh file. If provided, the file is read on construction.
+ :return: None
+ """
self.reset()
self.filename = filename
if self.filename:
self.read()
def reset(self):
- """Reinitialise Gmsh data structure"""
+ """
+ Reinitialise internal storage.
+
+ Clears nodes, elements, physical names and element_data dictionaries.
+
+ :return: None
+ """
self.nodes = {}
self.elements = {}
self.physical = {}
self.element_data = {}
def read_element_data_head(self, mshfile):
-
+ """
+ Read header of a $ElementData block from an open mshfile.
+
+ The method expects the lines after '$ElementData' to match the conventional
+ Gmsh textual ElementData header layout:
+
+ ""
+
+