diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index e3bddcb2..d9626f48 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ["3.10", "3.12"] steps: - uses: actions/checkout@v2 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 0299153c..7cdd6911 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,16 +1,17 @@ -# Required +# .readthedocs.yml version: 2 -# Build documentation in the docs/ directory with Sphinx +build: + os: ubuntu-22.04 + tools: + python: "3.10" + sphinx: - configuration: docs/source/conf.py + configuration: docs/source/conf.py -# Optionally build your docs in additional formats such as PDF formats: - - pdf + - pdf -# Optionally set the version of Python and requirements required to build your docs python: - version: 3.8 - install: + install: - requirements: docs/requirements.txt diff --git a/README.rst b/README.rst index 2bc32b23..2cc38ac7 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,5 @@ - MLMC ----- +==== .. image:: https://github.com/GeoMop/MLMC/workflows/package/badge.svg :target: https://github.com/GeoMop/MLMC/actions @@ -8,56 +7,87 @@ MLMC :target: https://pypi.org/project/mlmc/ .. image:: https://img.shields.io/pypi/pyversions/mlmc.svg :target: https://pypi.org/project/mlmc/ +.. image:: https://img.shields.io/badge/License-GPLv3-blue.svg + :target: https://www.gnu.org/licenses/gpl-3.0.html -MLMC provides tools for the multilevel Monte Carlo method. - -mlmc package includes: -- samples scheduling -- estimation of generalized moment functions -- probability density function approximation -- advanced post-processing with Quantity structure +**MLMC** is a Python library implementing the **Multilevel Monte Carlo (MLMC)** method. +It provides tools for sampling, moment estimation, statistical post-processing, and more. +Originally developed as part of the `GeoMop `_ project. -It is meant as part of the `GeoMop `_ project in particular Analysis component. +Features +-------- +* Sample scheduling +* Estimation of generalized moments +* Advanced post-processing with the ``Quantity`` structure +* Approximation of probability density functions using the maximum entropy method +* Bootstrap and regression-based variance estimation +* Diagnostic tools (e.g., consistency checks) Installation ----------------------------------- -Package can be installed via pip. +------------ + +The package is available on PyPI and can be installed with pip: -.. code-block:: +.. code-block:: bash pip install mlmc +To install the latest development version: + +.. code-block:: bash + + git clone https://github.com/GeoMop/MLMC.git + cd MLMC + pip install -e . Documentation ------------- -You can find the documentation including tutorials under https://mlmc.readthedocs.io/ + +Full documentation, including tutorials, is available at: +`https://mlmc.readthedocs.io/ `_ + +Topics covered include: + +* Basic MLMC workflow and examples +* Definition and composition of ``Quantity`` objects +* Moment and covariance estimation +* Probability density function reconstruction Development ----------- -Provided that you want to contribute, create a pull request and make sure you run `tox` before. Tox -installs necessary requirements as well as the developed package itself into clear virtual environment -and call pytest to search in the `test` folder for tests to execute. +Contributions are welcome! +To contribute, please fork the repository and create a pull request. + +Before submitting, make sure all tests pass by running ``tox``: + +.. code-block:: bash + pip install tox + tox + +``tox`` creates a clean virtual environment, installs all dependencies, +runs unit tests via ``pytest``, and checks that the package installs correctly. Requirements ------------ -- `NumPy `_ -- `SciPy `_ -- `h5py `_ -- `attrs `_ -- `ruamel.yaml `_ -- `gstools `_ -- `memoization `_ -- `sklearn `_ +MLMC depends on the following Python packages: -Licence -------- -* Free software: GPL 3.0 License +* `NumPy `_ +* `SciPy `_ +* `h5py `_ +* `attrs `_ +* `ruamel.yaml `_ +* `gstools `_ +* `memoization `_ +* `scikit-learn `_ +License +------- +* Free software: **GNU General Public License v3.0** diff --git a/docs/requirements.txt b/docs/requirements.txt index 83f22df7..5852805c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,18 @@ +# --- Documentation dependencies --- +sphinx>=7.0 +sphinx-rtd-theme +sphinx-autodoc-typehints +sphinxcontrib-napoleon +myst-parser +nbsphinx +sphinx_copybutton + +# --- MLMC runtime dependencies --- numpy scipy -sklearn +scikit-learn h5py>=3.1.0 -ruamel.yaml +ruamel.yaml==0.17.26 attrs gstools -memoization -matplotlib +memoization \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 55a3cec7..43609bc6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,78 +1,68 @@ # Configuration file for the Sphinx documentation builder. # -# This file only contains a selection of the most common options. For a full -# list see the documentation: +# For a full list of configuration options see: # https://www.sphinx-doc.org/en/master/usage/configuration.html -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# import os import sys import datetime + +# -- Path setup -------------------------------------------------------------- + +# Add project root to sys.path sys.path.insert(0, os.path.abspath("../../")) # -- Project information ----------------------------------------------------- -# General information about the project. + curr_year = datetime.datetime.now().year project = "mlmc" -copyright = "{}, Jan Březina, Martin Špetlík".format(curr_year) -author = "Jan Březina, Martin Špetlík" +copyright = f"{curr_year}, Martin Špetlík, Jan Březina" +author = "Martin Špetlík, Jan Březina" # The full version, including alpha/beta/rc tags -release = '1.0.1' - +release = "1.0.3" # -- General configuration --------------------------------------------------- -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.doctest', - 'sphinx.ext.autosectionlabel' - ] - -# autosummaries from source-files -autosummary_generate = True -# dont show __init__ docstring -autoclass_content = 'class' -# sort class members -autodoc_member_order = "groupwise" -# autodoc_member_order = 'bysource' + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.doctest", + "sphinx.ext.autosectionlabel", + "sphinx_autodoc_typehints", + "myst_parser", + "nbsphinx", + "sphinx_copybutton", +] + +# Autodoc settings +autosummary_generate = True # Generate autosummary files +autoclass_content = "class" # Don't repeat __init__ docstring +autodoc_member_order = "groupwise" # Grouped members in docs +autodoc_typehints = "description" # Show type hints in docstring + +# Napoleon settings for Google-style docstrings +napoleon_google_docstring = True +napoleon_numpy_docstring = False +napoleon_use_param = True +napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +templates_path = ["_templates"] +# Exclude build files and system junk +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" #'alabaster' +html_theme = "sphinx_rtd_theme" html_theme_options = { - # 'canonical_url': '', - # 'analytics_id': '', "logo_only": False, "display_version": True, "prev_next_buttons_location": "top", - # 'style_external_links': False, - # 'vcs_pageview_mode': '', - # Toc options "collapse_navigation": False, "sticky_navigation": True, "navigation_depth": 4, @@ -80,18 +70,11 @@ "titles_only": False, } -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] - -# autodoc_default_options = { -# 'members': True, -# # The ones below should be optional but work nicely together with -# # example_package/autodoctest/doc/source/_templates/autosummary/class.rst -# # and other defaults in sphinx-autodoc. -# 'show-inheritance': True, -# 'inherited-members': True, -# 'no-special-members': True, -# } +# Optional: uncomment if you have static files like custom CSS +# html_static_path = ["_static"] + +# This tells Sphinx which file is the master doc (entry point) master_doc = "contents" + +# -- Optional convenience: print path info on build -------------------------- +print(f"[conf.py] Using sys.path[0]: {sys.path[0]}") diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 23b4acd6..7c0271ad 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -4,7 +4,10 @@ Tutorials .. automodule:: examples -The following tutorials illustrates how to use mlmc package. +This section provides step-by-step tutorials demonstrating how to use the **mlmc** package. + +Each tutorial builds upon the previous one — starting from sampler creation, through sample scheduling and quantity handling, to full postprocessing and probability density estimation. + .. toctree:: :includehidden: @@ -16,4 +19,12 @@ The following tutorials illustrates how to use mlmc package. examples_postprocessing -You can find more complex examples in :any:`examples.shooting` +Additional Examples +------------------- + +You can find more advanced and domain-specific examples (e.g., stochastic simulations or PDE-based problems) in: + +:mod:`examples.shooting` + +These examples demonstrate how to integrate MLMC with real-world simulation workflows. + diff --git a/docs/source/examples_postprocessing.rst b/docs/source/examples_postprocessing.rst index 56479b6f..cb01a45d 100644 --- a/docs/source/examples_postprocessing.rst +++ b/docs/source/examples_postprocessing.rst @@ -1,64 +1,105 @@ -Results postprocessing +.. _examples results postprocessing: +Results Postprocessing ====================== -If you already know how to create a sampler, schedule samples and handle quantities, -postprocessing will be easy for you. Otherwise, see the previous tutorials before. +Once you know how to **create a sampler**, **schedule samples**, and **work with quantities**, +postprocessing becomes straightforward. +If you haven’t gone through those steps yet, please review the earlier tutorials first. +Estimating Moments +------------------ -First, schedule samples and estimate moments for a particular quantity +We start by scheduling samples and estimating moments for a specific quantity. .. testcode:: import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels - target_var = 1e-4 - n_moments = 10 + + n_levels = 3 # Number of MLMC levels + step_range = [0.5, 0.005] # Simulation steps for coarsest and finest levels + target_var = 1e-4 # Desired target variance + n_moments = 10 # Number of moment functions + + # Compute level parameters (simulation steps per level) level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists + # Initialize components simulation_factory = mlmc.SynthSimulation() sampling_pool = mlmc.OneProcessPool() - # Memory() storage keeps samples in the computer main memory - sample_storage = mlmc.Memory() + sample_storage = mlmc.Memory() # In-memory sample storage - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) + # Create the sampler + sampler = mlmc.Sampler( + sample_storage=sample_storage, + sampling_pool=sampling_pool, + sim_factory=simulation_factory, + level_parameters=level_parameters + ) + # Schedule and run initial samples sampler.set_initial_n_samples() sampler.schedule_samples() + running = 1 while running > 0: running = 0 running += sampler.ask_sampling_pool_for_samples() - # Get particular quantity +Accessing Quantities +-------------------- + +Now we extract a specific **Quantity** from the results and set up the estimation. + +.. testcode:: + + # Obtain the root quantity representing all simulation outputs root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format()) - length = root_quantity['length'] + + # Select a sub-quantity + length = root_quantity["length"] time = length[1] - location = time['10'] + location = time["10"] q_value = location[0] +Defining the Domain and Estimator +--------------------------------- + +Before estimating higher-order statistics, we determine the valid domain and define the estimator. + +.. testcode:: + true_domain = mlmc.Estimate.estimate_domain(q_value, sample_storage) moments_fn = mlmc.Legendre(n_moments, true_domain) - estimate_obj = mlmc.Estimate(q_value, sample_storage=sampler.sample_storage, - moments_fn=moments_fn) + + estimate_obj = mlmc.Estimate( + q_value, + sample_storage=sampler.sample_storage, + moments_fn=moments_fn + ) + + +Variance and Sample Estimation +------------------------------ + +We can now estimate variances and determine how many samples are required +to achieve the target variance. + +.. testcode:: variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples) from mlmc.estimator import estimate_n_samples_for_target_variance - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) + n_estimated = estimate_n_samples_for_target_variance( + target_var, variances, n_ops, n_levels=sampler.n_levels + ) + # Add and process samples iteratively until convergence while not sampler.process_adding_samples(n_estimated): - # New estimation according to already finished samples variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) + n_estimated = estimate_n_samples_for_target_variance( + target_var, variances, n_ops, n_levels=sampler.n_levels + ) running = 1 while running > 0: @@ -66,17 +107,36 @@ First, schedule samples and estimate moments for a particular quantity running += sampler.ask_sampling_pool_for_samples() -Probability density function approximation ---------------------- +Probability Density Function Approximation +------------------------------------------ + +Finally, we can construct and visualize an approximation of the **probability density function (PDF)**. .. testcode:: from mlmc.plot.plots import Distribution + distr_obj, result, _, _ = estimate_obj.construct_density() - distr_plot = Distribution(title="distributions", error_plot=None) + + distr_plot = Distribution(title="Distributions", error_plot=None) distr_plot.add_distribution(distr_obj) + # For single-level simulations, add a histogram of raw samples if n_levels == 1: samples = estimate_obj.get_level_samples(level_id=0)[..., 0] - distr_plot.add_raw_samples(np.squeeze(samples)) # add histogram + distr_plot.add_raw_samples(np.squeeze(samples)) + distr_plot.show() + + +**Summary** +----------- + +In this tutorial, you learned how to: +- Estimate statistical moments from MLMC results. +- Automatically determine the required number of samples to reach a target variance. +- Construct and visualize an estimated probability density function. + +This completes the **postprocessing** workflow of the MLMC pipeline. + + diff --git a/docs/source/examples_quantity.rst b/docs/source/examples_quantity.rst index 9ab4fa58..09812832 100644 --- a/docs/source/examples_quantity.rst +++ b/docs/source/examples_quantity.rst @@ -1,31 +1,39 @@ .. _examples quantity: -Quantity tutorial +Quantity Tutorial ================= -An overview of basic :any:`mlmc.quantity.quantity.Quantity` operations. -Quantity related classes and functions allow estimate mean and variance of MLMC samples results, -derive other quantities from original ones and much more. +This tutorial provides an overview of basic :any:`mlmc.quantity.quantity.Quantity` operations. + +The :mod:`mlmc.quantity` module and its related classes allow you to: +- Estimate means and variances of MLMC sample results. +- Derive new quantities from existing ones. +- Perform arithmetic and NumPy-based operations on quantities. + + +Setup +----- + +Before exploring `Quantity` operations, we first set up a simple synthetic MLMC sampler. .. testcode:: :hide: import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels + n_levels = 3 # number of MLMC levels + step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists simulation_factory = mlmc.SynthSimulation() sampling_pool = mlmc.OneProcessPool() - # Memory() storage keeps samples in the computer main memory sample_storage = mlmc.Memory() - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) + sampler = mlmc.Sampler( + sample_storage=sample_storage, + sampling_pool=sampling_pool, + sim_factory=simulation_factory, + level_parameters=level_parameters + ) n_samples = [100, 75, 50] sampler.set_initial_n_samples(n_samples) @@ -36,7 +44,6 @@ derive other quantities from original ones and much more. running += sampler.ask_sampling_pool_for_samples() - .. testcode:: import numpy as np @@ -44,7 +51,10 @@ derive other quantities from original ones and much more. from examples.synthetic_quantity import create_sampler -First, the synthetic Quantity with the following :code:`result_format` is created +Creating a Synthetic Quantity +----------------------------- + +We begin by creating a synthetic :class:`mlmc.quantity.quantity.Quantity` with a predefined ``result_format``: .. testcode:: @@ -52,72 +62,60 @@ First, the synthetic Quantity with the following :code:`result_format` is create # mlmc.QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']), # mlmc.QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']), # ] - # Meaning: sample results contain data on two quantities in three time steps [1, 2, 3] and in two locations, - # each quantity can have different shape sampler, simulation_factory, moments_fn = create_sampler() root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format()) -:code:`root_quantity` is :py:class:`mlmc.quantity.quantity.Quantity` instance and represents the whole result data. -According to :code:`result_format` it contains two sub-quantities named "length" and "width". +This means the sample results contain data for two quantities (`length` and `width`) at three time steps `[1, 2, 3]` and two locations each. + +:code:`root_quantity` is an instance of :class:`mlmc.quantity.quantity.Quantity`, representing the full result data structure. + +Mean Estimates +-------------- -Mean estimates ---------------- -To get estimated mean of a quantity: +To compute the estimated mean of a quantity: .. testcode:: root_quantity_mean = mlmc.quantity.quantity_estimate.estimate_mean(root_quantity) -:code:`root_quantity_mean` is an instance of :py:class:`mlmc.quantity.quantity.QuantityMean` +The returned object, :code:`root_quantity_mean`, is a :class:`mlmc.quantity.quantity.QuantityMean` instance. -To get the total mean value: +To retrieve statistical values: .. testcode:: + # Total mean and variance root_quantity_mean.mean - -To get the total variance value: - -.. testcode:: - root_quantity_mean.var -To get means at each level: - -.. testcode:: - + # Means and variances at each level root_quantity_mean.l_means - -To get variances at each level: - -.. testcode:: - root_quantity_mean.l_vars -Estimate moments and covariance matrix --------------------------------------- +Moments and Covariance Estimation +--------------------------------- -Create a quantity representing moments and get their estimates +To create and estimate statistical moments: .. testcode:: moments_quantity = mlmc.quantity.quantity_estimate.moments(root_quantity, moments_fn=moments_fn) moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(moments_quantity) -To obtain central moments, use: +To obtain **central moments**, first subtract the mean: .. testcode:: central_root_quantity = root_quantity - root_quantity_mean.mean - central_moments_quantity = mlmc.quantity.quantity_estimate.moments(central_root_quantity, - moments_fn=moments_fn) + central_moments_quantity = mlmc.quantity.quantity_estimate.moments( + central_root_quantity, moments_fn=moments_fn + ) central_moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(central_moments_quantity) - -Create a quantity representing a covariance matrix +To estimate a **covariance matrix**: .. testcode:: @@ -125,36 +123,31 @@ Create a quantity representing a covariance matrix cov_mean = mlmc.quantity.quantity_estimate.estimate_mean(covariance_quantity) - -Quantity selection +Quantity Selection ------------------ -According to the result_format, it is possible to select items from a quantity +You can access and manipulate sub-quantities directly using the structure defined by `result_format`: .. testcode:: length = root_quantity["length"] # Get quantity with name="length" - width = root_quantity["width"] # Get quantity with name="width" + width = root_quantity["width"] # Get quantity with name="width" -:code:`length` and :code:`width` are still :py:class:`mlmc.quantity.quantity.Quantity` instances +Both are still :class:`mlmc.quantity.quantity.Quantity` instances. -To get a quantity at particular time: +Selecting by **time**: .. testcode:: length_locations = length.time_interpolation(2.5) -:code:`length_locations` represents results for all locations of quantity named "length" at the time 2.5 - -To get quantity at particular location: +Selecting by **location**: .. testcode:: length_result = length_locations['10'] -:code:`length_result` represents results shape=(2, 1) of quantity named "length" at the time 2,5 and location '10' - -Now it is possible to slice Quantity :code:`length_result` the same way as :code:`np.ndarray`. For example: +Now, :code:`length_result` behaves like a NumPy array: .. testcode:: @@ -164,39 +157,41 @@ Now it is possible to slice Quantity :code:`length_result` the same way as :code length_result[:1, :1] length_result[:2, ...] -Keep in mind: - - all derived quantities such as :code:`length_locations` and :code:`length_result`, ... are still :py:class:`mlmc.quantity.quantity.Quantity` instances - - selecting location before time is not supported! +.. note:: + - All derived quantities (like :code:`length_locations` or :code:`length_result`) remain `Quantity` instances. + - Selecting a **location before time** is not supported. -Binary operations + +Binary Operations ----------------- -Following operations are supported - - Addition, subtraction, ... of compatible quantities +`Quantity` supports standard arithmetic operations: + +**Between quantities:** - .. testcode:: +.. testcode:: - quantity = root_quantity + root_quantity - quantity = root_quantity + root_quantity + root_quantity + quantity = root_quantity + root_quantity + quantity = root_quantity + root_quantity + root_quantity - - Operations with Quantity and a constant +**With constants:** - .. testcode:: +.. testcode:: - const = 5 - quantity_const_add = root_quantity + const - quantity_const_sub = root_quantity - const - quantity_const_mult = root_quantity * const - quantity_const_div = root_quantity / const - quantity_const_mod = root_quantity % const - quantity_add_mult = root_quantity + root_quantity * const + const = 5 + quantity_const_add = root_quantity + const + quantity_const_sub = root_quantity - const + quantity_const_mult = root_quantity * const + quantity_const_div = root_quantity / const + quantity_const_mod = root_quantity % const + quantity_add_mult = root_quantity + root_quantity * const -NumPy universal functions +NumPy Universal Functions -------------------------- -Examples of tested NumPy universal functions: +`Quantity` objects are compatible with many NumPy universal functions (`ufuncs`): .. testcode:: @@ -209,41 +204,41 @@ Examples of tested NumPy universal functions: x = np.ones(24) quantity_np_divide_const = np.divide(x, root_quantity) quantity_np_add_const = np.add(x, root_quantity) - quantity_np_arctan2_cosnt = np.arctan2(x, root_quantity) + quantity_np_arctan2_const = np.arctan2(x, root_quantity) -Quantity selection by conditions ---------------------------------- +Conditional Selection +---------------------- -Method :code:`select` returns :py:class:`mlmc.quantity.quantity.Quantity` instance +You can select parts of a quantity using logical conditions via the :code:`select()` method. .. testcode:: selected_quantity = root_quantity.select(0 < root_quantity) +Or using comparisons between quantities: + .. testcode:: quantity_add = root_quantity + root_quantity quantity_add_select = quantity_add.select(root_quantity < quantity_add) root_quantity_selected = root_quantity.select(-1 != root_quantity) -Logical operation among more provided conditions is AND +Multiple conditions are combined using logical **AND**: .. testcode:: quantity_add.select(root_quantity < quantity_add, root_quantity < 10) -User can use one of the logical NumPy universal functions +Use NumPy logical functions for more complex conditions: .. testcode:: selected_quantity_or = root_quantity.select(np.logical_or(0 < root_quantity, root_quantity < 10)) -It is possible to explicitly define the selection condition of one quantity by another quantity +You can also explicitly define the selection mask: .. testcode:: - mask = np.logical_and(0 < root_quantity, root_quantity < 10) # mask is Quantity instance + mask = np.logical_and(0 < root_quantity, root_quantity < 10) q_bounded = root_quantity.select(mask) - - diff --git a/docs/source/examples_sampler_creation.rst b/docs/source/examples_sampler_creation.rst index b363d774..29dab5b0 100644 --- a/docs/source/examples_sampler_creation.rst +++ b/docs/source/examples_sampler_creation.rst @@ -1,56 +1,102 @@ -Sampler creation +Sampler Creation ================= -Sampler controls the execution of MLMC samples. +The **Sampler** controls the execution and management of MLMC (Multilevel Monte Carlo) samples. +This example demonstrates how to configure all essential components for an MLMC simulation. -First, import mlmc package and define basic MLMC parameters. + + +Basic Setup +------------ + +First, import the :mod:`mlmc` package and define basic MLMC parameters. .. testcode:: import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels + + # Define number of MLMC levels + n_levels = 3 + + # Simulation step sizes at the coarsest and finest levels + step_range = [0.5, 0.005] + + # Compute level parameters (simulation steps per level) level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists + # Alternatively, you can specify level_parameters manually as a list of lists. -Prepare a simulation, it must be instance of class that inherits from :any:`mlmc.sim.simulation.Simulation`. + + +Simulation Definition +---------------------- + +Prepare a simulation instance. +The simulation class must inherit from :class:`mlmc.sim.simulation.Simulation`. .. testcode:: simulation_factory = mlmc.SynthSimulation() -Create a sampling pool. +This factory will be used by the sampler to create individual simulation runs. + + + +Sampling Pool +-------------- + +Next, create a sampling pool that controls how samples are executed. .. testcode:: sampling_pool = mlmc.OneProcessPool() +The :class:`mlmc.sampling_pool.OneProcessPool` executes samples sequentially within a single process. + +You can also use: + +- :class:`mlmc.sampling_pool.ProcessPool` — executes samples in parallel across multiple processes. +- :class:`mlmc.sampling_pool_pbs.SamplingPoolPBS` — submits jobs to a PBS (Portable Batch System) cluster for distributed computation. + -You can also use :any:`mlmc.sampling_pool.ProcessPool` which supports parallel execution of MLMC samples. -In order to use PBS (portable batch system), employ :any:`mlmc.sampling_pool_pbs.SamplingPoolPBS`. +Sample Storage +--------------- -Create a sample storage. It contains sample's related data e.g. simulation result. +The **sample storage** keeps all data related to simulation results. .. testcode:: - # Memory() storage keeps samples in the computer main memory + # Memory storage keeps samples in main memory sample_storage = mlmc.Memory() -We support also HDF5 file storage :any:`mlmc.sample_storage_hdf.SampleStorageHDF`. +Alternatively, use persistent file-based storage: +- :class:`mlmc.sample_storage_hdf.SampleStorageHDF` — stores results in an HDF5 file for long-term reuse and analysis. -Finally, create a sampler that manages scheduling MLMC samples and also saves the results. + + +Sampler Initialization +----------------------- + +Finally, create the **Sampler** instance. +It coordinates sample scheduling, simulation execution, and result collection. .. testcode:: - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) + sampler = mlmc.Sampler( + sample_storage=sample_storage, + sampling_pool=sampling_pool, + sim_factory=simulation_factory, + level_parameters=level_parameters + ) + +The sampler is now ready to generate and manage MLMC samples. + +Next Steps +----------- -:ref:`examples samples scheduling` \ No newline at end of file +Proceed to the next example: +:ref:`examples samples scheduling` diff --git a/docs/source/examples_samples_scheduling.rst b/docs/source/examples_samples_scheduling.rst index 135a421c..32ad7fd0 100644 --- a/docs/source/examples_samples_scheduling.rst +++ b/docs/source/examples_samples_scheduling.rst @@ -1,128 +1,156 @@ .. _examples samples scheduling: -Samples scheduling +Samples Scheduling ================== -Once you create a sampler you can schedule samples. +Once you have created a **Sampler**, you can schedule the execution of MLMC samples in different ways. +This tutorial demonstrates two approaches: +1. Scheduling with a prescribed number of samples. +2. Scheduling to reach a target variance automatically. -1. Prescribe the exact number of samples ----------------------------------------------------------------- + +1. Prescribing an Exact Number of Samples +----------------------------------------- + +In this approach, you explicitly set how many samples should be created at each MLMC level. .. testcode:: :hide: import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels + n_levels = 3 # number of MLMC levels + step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists simulation_factory = mlmc.SynthSimulation() sampling_pool = mlmc.OneProcessPool() - # Memory() storage keeps samples in the computer main memory sample_storage = mlmc.Memory() - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) + sampler = mlmc.Sampler( + sample_storage=sample_storage, + sampling_pool=sampling_pool, + sim_factory=simulation_factory, + level_parameters=level_parameters + ) + +Set the number of samples for each level: .. testcode:: n_samples = [100, 75, 50] sampler.set_initial_n_samples(n_samples) -Schedule set samples. +Schedule the prescribed samples: .. testcode:: sampler.schedule_samples() -You can wait until all samples are finished. +Wait until all samples have finished: .. testcode:: - running = 1 - while running > 0: - running = 0 - running += sampler.ask_sampling_pool_for_samples() + running = 1 + while running > 0: + running = 0 + running += sampler.ask_sampling_pool_for_samples() + +2. Prescribing a Target Variance +-------------------------------- -2. Prescribe a target variance -------------------------------------------------------------- +This approach automatically determines the number of samples needed to achieve a desired **target variance**. -Set target variance and number of random variable moments that must meet this variance. +Define the target variance and the number of **moment functions** used for estimation: .. testcode:: - target_var = 1e-4 - n_moments = 10 + target_var = 1e-4 + n_moments = 10 -The first phase is the same as the first approach, but the initial samples are automatically determined -as a sequence from 100 samples at the coarsest level to 10 samples at the finest level. +As before, initialize and run a first batch of samples (automatically ranging from 100 samples on the coarsest level +to 10 on the finest level): .. testcode:: sampler.set_initial_n_samples() sampler.schedule_samples() + running = 1 while running > 0: running = 0 running += sampler.ask_sampling_pool_for_samples() -The :py:class:`mlmc.quantity.quantity.Quantity` instance is created, for details see :ref:`examples quantity` +Creating the Quantity and Moment Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Next, create a :py:class:`mlmc.quantity.quantity.Quantity` object representing the MLMC results. .. testcode:: - root_quantity = mlmc.make_root_quantity(storage=sampler.sample_storage, - q_specs=sampler.sample_storage.load_result_format()) + root_quantity = mlmc.make_root_quantity( + storage=sampler.sample_storage, + q_specs=sampler.sample_storage.load_result_format() + ) -:code:`root_quantity` contains the structure of sample results and also allows access to their values. +The :code:`root_quantity` contains both the data structure and the sampled values. -In order to estimate moment values including variance, moment functions class (in this case Legendre polynomials) instance -and :py:class:`mlmc.estimator.Estimate` instance are created. +Now we create the **moment functions** and an :py:class:`mlmc.estimator.Estimate` instance to perform statistical estimation. .. testcode:: true_domain = mlmc.Estimate.estimate_domain(root_quantity, sample_storage) moments_fn = mlmc.Legendre(n_moments, true_domain) - estimate_obj = mlmc.Estimate(root_quantity, sample_storage=sampler.sample_storage, - moments_fn=moments_fn) + estimate_obj = mlmc.Estimate( + root_quantity, + sample_storage=sampler.sample_storage, + moments_fn=moments_fn + ) + +Estimating Variances and Computational Cost +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -At first, the variance of moments and average execution time per sample at each level are estimated from already finished samples. +From the finished samples, estimate the variance of moments and the average computational cost per sample for each level: .. testcode:: variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples) -Then, an initial estimate of the number of MLMC samples that should meet prescribed target variance is conducted. + +Estimating the Required Number of Samples +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the estimated variances and costs, determine the number of samples required to meet the **target variance**: .. testcode:: from mlmc.estimator import estimate_n_samples_for_target_variance - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) + + n_estimated = estimate_n_samples_for_target_variance( + target_var, variances, n_ops, n_levels=sampler.n_levels + ) -Now it is time for our sampling algorithm that gradually schedules samples and refines the total number of samples -until the number of estimated samples is greater than the number of scheduled samples. +Iterative Sampling Process +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The sampling algorithm incrementally schedules additional samples until the estimated number of required samples is reached. .. testcode:: while not sampler.process_adding_samples(n_estimated): - # New estimation according to already finished samples + # Recalculate estimates based on completed samples variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) - + n_estimated = estimate_n_samples_for_target_variance( + target_var, variances, n_ops, n_levels=sampler.n_levels + ) -Finally, wait until all samples are finished. +Finally, wait until all samples are completed: .. testcode:: @@ -131,6 +159,21 @@ Finally, wait until all samples are finished. running = 0 running += sampler.ask_sampling_pool_for_samples() -Since our sampling algorithm determines the number of samples according to moment variances, -the type of moment functions (Legendre by default) might affect total number of MLMC samples. +Notes +----- + +- The sampling algorithm automatically adjusts the number of samples per level based on estimated **moment variances**. +- The choice of moment functions (e.g. :class:`mlmc.Legendre`) influences the total number of samples needed. +- For most cases, the default **Legendre polynomials** provide a good balance between accuracy and computational cost. + + +**Summary** +----------- + +In this tutorial, you learned how to: +- Schedule MLMC samples with a fixed number of samples per level. +- Automatically adapt the number of samples to achieve a target variance. +- Use moment functions and variance regression for optimal sample allocation. + +Continue to the next section for :ref:`examples results postprocessing`. diff --git a/docs/source/index.rst b/docs/source/index.rst index 27a83141..a329294f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,29 +1,30 @@ -===== -MLMC -===== +MLMC: Multilevel Monte Carlo method +==================================== + +MLMC is a Python library for efficient uncertainty quantification using the multilevel Monte Carlo method. + +It provides tools for: +- Parallel sample scheduling +- Generalized moment estimation +- Probability density approximation +- Advanced post-processing via the Quantity abstraction .. image:: https://github.com/GeoMop/MLMC/workflows/package/badge.svg - :target: https://github.com/GeoMop/MLMC/actions + :target: https://github.com/GeoMop/MLMC/actions .. image:: https://img.shields.io/pypi/v/mlmc.svg - :target: https://pypi.org/project/mlmc/ + :target: https://pypi.org/project/mlmc/ .. image:: https://img.shields.io/pypi/pyversions/mlmc.svg - :target: https://pypi.org/project/mlmc/ - -MLMC provides tools for the multilevel Monte Carlo method, which is theoretically described by `M. Giles `_. - -mlmc package includes: - -- samples scheduling -- estimation of generalized moment functions -- probability density function approximation -- advanced post-processing with our Quantity structure - + :target: https://pypi.org/project/mlmc/ Installation -============ -mlmc can be installed via `pip `_ +------------ +Install via pip: + +.. code-block:: bash -.. code-block:: none + pip install mlmc - pip install mlmc +Documentation +------------- +Tutorials and API reference are available below. diff --git a/docs/source/mlmc.plot.rst b/docs/source/mlmc.plot.rst index 47f0bd09..f5faf6cb 100644 --- a/docs/source/mlmc.plot.rst +++ b/docs/source/mlmc.plot.rst @@ -1,4 +1,4 @@ -mlmc.plot +mlmc.plot package ================= .. automodule:: mlmc.plot @@ -6,29 +6,31 @@ mlmc.plot :undoc-members: :show-inheritance: +--- + Submodules ---------- -mlmc.plot.plots module ----------------------- +.. toctree:: + :maxdepth: 1 -.. automodule:: mlmc.plot.plots - :members: - :undoc-members: - :show-inheritance: + mlmc.plot.plots + mlmc.plot.violinplot -mlmc.plot.violinplot module ---------------------------- +--- -.. automodule:: mlmc.plot.violinplot +mlmc.plot.plots +--------------- + +.. automodule:: mlmc.plot.plots :members: :undoc-members: :show-inheritance: -Module contents ---------------- +mlmc.plot.violinplot +-------------------- -.. automodule:: mlmc.plot +.. automodule:: mlmc.plot.violinplot :members: :undoc-members: :show-inheritance: diff --git a/docs/source/mlmc.tool.rst b/docs/source/mlmc.tool.rst index 6e0bdc51..8234e205 100644 --- a/docs/source/mlmc.tool.rst +++ b/docs/source/mlmc.tool.rst @@ -6,14 +6,6 @@ mlmc.tool Submodules ---------- -mlmc.tool.context\_statprof module ----------------------------------- - -.. automodule:: mlmc.tool.context_statprof - :members: - :undoc-members: - :show-inheritance: - mlmc.tool.distribution module ----------------------------- diff --git a/examples/quantity_operations.py b/examples/quantity_operations.py index e8cd5b5a..e4b0ecc6 100644 --- a/examples/quantity_operations.py +++ b/examples/quantity_operations.py @@ -1,31 +1,63 @@ +""" +Quantity examples and quick reference. + +This module demonstrates basic operations with mlmc Quantity objects. +It shows how to: + - create a synthetic root quantity (result format specified below), + - compute mean estimates, + - estimate moments and covariance, + - select sub-quantities, time-interpolate and slice, + - perform arithmetic and NumPy ufuncs with Quantity objects, + - perform selections using conditions and masks. + +Result format used in the synthetic example: + +result_format = [ + mlmc.quantity.quantity_spec.QuantitySpec( + name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20'] + ), + mlmc.quantity.quantity_spec.QuantitySpec( + name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'] + ), +] + +Meaning: + - sample results contain data on two quantities ("length" and "width"), + - each quantity is evaluated at three times [1,2,3] and two locations, + - each quantity can also have its own internal shape. +""" import numpy as np import mlmc.quantity.quantity_spec from mlmc.quantity.quantity import make_root_quantity import mlmc.quantity.quantity_estimate from examples.synthetic_quantity import create_sampler -# An overview of basic Quantity operations - -###################################### -### Create synthetic quantity ### -###################################### -# Synthetic Quantity with the following result format -# result_format = [ -# mlmc.quantity.quantity_spec.QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']), -# mlmc.quantity.quantity_spec.QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']), -# ] -# Meaning: sample results contain data on two quantities in three time steps [1, 2, 3] and in two locations, -# each quantity can have different shape +# ----------------------------------------------------------------------------- +# Create synthetic quantity +# ----------------------------------------------------------------------------- +""" +Create a synthetic sampler + factory and produce a root Quantity instance. +- create_sampler() returns: (sampler, simulation_factory, moments_fn) +- make_root_quantity(storage, q_specs) builds a Quantity object that represents the + whole result data structure (root with named sub-quantities). +""" sampler, simulation_factory, moments_fn = create_sampler() root_quantity = make_root_quantity(sampler.sample_storage, simulation_factory.result_format()) -# root_quantity is mlmc.quantity.quantity.Quantity instance and represents the whole result data, +# root_quantity is an mlmc.quantity.quantity.Quantity instance and represents the whole result data, # it contains two sub-quantities named "length" and "width" -################################### -#### Mean estimates ##### -################################### -# To get estimated mean of a quantity: +# ----------------------------------------------------------------------------- +# Mean estimates +# ----------------------------------------------------------------------------- +""" +Compute and inspect mean estimates for a Quantity. + +- estimate_mean(quantity) returns a QuantityMean instance that contains: + - .mean : mean Quantity + - .var : variance information + - .l_vars : level variances (if available) +""" root_quantity_mean = mlmc.quantity.quantity_estimate.estimate_mean(root_quantity) # root_quantity_mean is an instance of mlmc.quantity.QuantityMean # To get overall mean value: @@ -35,55 +67,60 @@ # To get level variance value: root_quantity_mean.l_vars -######################################################### -#### Estimate moments and covariance matrix ##### -######################################################### -# Create a quantity representing moments +# ----------------------------------------------------------------------------- +# Estimate moments and covariance matrix +# ----------------------------------------------------------------------------- +""" +Construct moments and covariance quantities from a root Quantity. + +- moments(root_quantity, moments_fn=moments_fn) returns a Quantity of moments. +- estimate_mean(moments_quantity) computes means for those moments. +- covariance(root_quantity, moments_fn=moments_fn) returns a Quantity describing covariance. +""" moments_quantity = mlmc.quantity.quantity_estimate.moments(root_quantity, moments_fn=moments_fn) moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(moments_quantity) # Central moments: central_root_quantity = root_quantity - root_quantity_mean.mean -central_moments_quantity = mlmc.quantity.quantity_estimate.moments(central_root_quantity, moments_fn=moments_fn) +central_moments_quantity = mlmc.quantity.quantity_estimate.moments( + central_root_quantity, moments_fn=moments_fn +) central_moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(central_moments_quantity) # Create a quantity representing covariance matrix covariance_quantity = mlmc.quantity.quantity_estimate.covariance(root_quantity, moments_fn=moments_fn) cov_mean = mlmc.quantity.quantity_estimate.estimate_mean(covariance_quantity) -# Both moments() and covariance() calls return mlmc.quantity.quantity.Quantity instance +# Both moments() and covariance() calls return mlmc.quantity.quantity.Quantity instances -################################## -### Quantity selection #### -################################## -# According to the result_format, tt is possible to select items from a quantity +# ----------------------------------------------------------------------------- +# Quantity selection +# ----------------------------------------------------------------------------- +""" +Examples of indexing and time/ location selection on a Quantity. + +According to the result_format you can select by name, then time, then location. +Selecting location before time is not supported. +""" length = root_quantity["length"] # Get quantity with name="length" width = root_quantity["width"] # Get quantity with name="width" -# length and width are still mlmc.quantity.quantity.Quantity instances -# To get a quantity at particular time: +# To get a quantity at a particular (interpolated) time: length_locations = length.time_interpolation(2.5) # length_locations represents results for all locations of quantity named "length" at the time 2.5 -# To get quantity at particular location +# To get quantity at particular location: length_result = length_locations['10'] -# length_result represents results shape=(2, 1) of quantity named "length" at the time 2,5 and location '10' - -# Now it is possible to slice Quantity length_result the same way as np.ndarray -# For example: -# length_result[1, 0] -# length_result[:, 0] -# length_result[:, :] -# length_result[:1, :1] -# length_result[:2, ...] - -# Keep in mind: -# - all derived quantities such as length_locations and length_result, ... are still mlmc.quantity.quantity.Quantity instances -# - selecting location before time is not supported! - -################################### -#### Binary operations ##### -################################### -# Following operations are supported -# Addition of compatible quantities +# length_result represents shape=(2, 1) data of "length" at time 2.5 and location '10' + +# You can slice Quantity like an ndarray: +# length_result[1, 0], length_result[:, 0], length_result[:2, ...], etc. + +# ----------------------------------------------------------------------------- +# Binary operations +# ----------------------------------------------------------------------------- +""" +Supported arithmetic operations between compatible Quantity objects and scalars. +The result is a Quantity instance with the same result_format structure. +""" quantity = root_quantity + root_quantity quantity = root_quantity + root_quantity + root_quantity @@ -96,11 +133,13 @@ quantity_const_mod = root_quantity % const quantity_add_mult = root_quantity + root_quantity * const - -################################### -#### NumPy universal functions #### -################################### -# Examples of tested NumPy universal functions: +# ----------------------------------------------------------------------------- +# NumPy universal functions (ufuncs) +# ----------------------------------------------------------------------------- +""" +Many NumPy ufuncs are supported and return Quantity instances: +- np.add, np.max, np.sin, np.sum, np.maximum, np.divide, np.arctan2, ... +""" quantity_np_add = np.add(root_quantity, root_quantity) quantity_np_max = np.max(root_quantity, axis=0, keepdims=True) quantity_np_sin = np.sin(root_quantity) @@ -112,10 +151,21 @@ quantity_np_add_const = np.add(x, root_quantity) quantity_np_arctan2_cosnt = np.arctan2(x, root_quantity) -################################################ -#### Quantity selection by a condition #### -################################################ -# Method select returns mlmc.quantity.quantity.Quantity instance +# ----------------------------------------------------------------------------- +# Quantity selection by a condition +# ----------------------------------------------------------------------------- +""" +The select(condition) method extracts elements of a Quantity according to a boolean condition; +it returns a Quantity (masking is internal and shape-aware). + +Examples: + - selected_quantity = root_quantity.select(0 < root_quantity) + - quantity_add_select = quantity_add.select(root_quantity < quantity_add) + - root_quantity_selected = root_quantity.select(-1 != root_quantity) + +You can combine conditions via logical ufuncs (np.logical_or / np.logical_and), and +you can explicitly pass a Quantity mask (mask is a Quantity instance). +""" selected_quantity = root_quantity.select(0 < root_quantity) quantity_add = root_quantity + root_quantity @@ -125,9 +175,11 @@ # Logical operation for more conditions is AND quantity_add.select(root_quantity < quantity_add, root_quantity < 10) -# User can use one of the logical NumPy universal functions -selected_quantity_or = root_quantity.select(np.logical_or(0 < root_quantity, root_quantity < 10)) +# Use NumPy logical ufuncs for complex masks +selected_quantity_or = root_quantity.select( + np.logical_or(0 < root_quantity, root_quantity < 10) +) -# It is possible to explicitly define the selection condition of one quantity by another quantity -mask = np.logical_and(0 < root_quantity, root_quantity < 10) # mask is Quantity instance +# Explicit mask Quantity +mask = np.logical_and(0 < root_quantity, root_quantity < 10) # mask is a Quantity instance q_bounded = root_quantity.select(mask) diff --git a/examples/shooting/shooting_1D.py b/examples/shooting/shooting_1D.py index 609d2d6a..12ba7036 100644 --- a/examples/shooting/shooting_1D.py +++ b/examples/shooting/shooting_1D.py @@ -16,141 +16,164 @@ # - create Quantity instance # - approximate density class ProcessShooting1D: + """ + Example driver for a 1D shooting problem using the MLMC framework. + + This tutorial class demonstrates how to: + - create a Sampler with storage and sampling pool, + - schedule and collect MLMC samples, + - estimate moments and build an approximate probability density function + of the quantity of interest. + + The constructor runs a full example MLMC workflow when the module is executed. + """ def __init__(self): + """ + Initialize parameters, create sampler, generate samples, collect them and postprocess. + + The constructor sets up: + - MLMC level parameters, + - sampling and storage components, + - schedule and collection of samples, + - postprocessing including moment estimation and density approximation. + """ n_levels = 2 # Number of MLMC levels + step_range = [1, 1e-3] # step_range [simulation step at the coarsest level, simulation step at the finest level] + level_parameters = ProcessShooting1D.determine_level_parameters(n_levels, step_range) - # Determine each level parameters (in this case, simulation step at each level), level_parameters should be - # simulation dependent - self._sample_sleep = 0#30 - # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run - self._sample_timeout = 60 - # Maximum waiting time for running simulations - self._adding_samples_coef = 0.1 - self._n_moments = 20 - # number of generalized statistical moments used for MLMC number of samples estimation - self._quantile = 0.01 - # Setting parameters that are utilized when scheduling samples - ### - # MLMC run - ### + # Determine each level parameters (in this case, simulation step at each level) + + # Sampling control parameters + self._sample_sleep = 0 # seconds to sleep between checking sampling pool (was 30) + self._sample_timeout = 60 # maximum waiting time for running simulations (seconds) + self._adding_samples_coef = 0.1 # coefficient used when dynamically adding samples + + # Moment / distribution settings + self._n_moments = 20 # number of generalized moments used for MLMC sample estimation + self._quantile = 0.01 # quantile used to estimate domain for moment functions + + # MLMC run: create sampler and run sampling workflow sampler = self.create_sampler(level_parameters=level_parameters) - # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run self.generate_samples(sampler, n_samples=None, target_var=1e-3) - # Generate MLMC samples, there are two ways: - # 1) set exact number of samples at each level, - # e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50]) - # 2) set target variance of MLMC estimates, - # e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6) + # Generate MLMC samples. Two ways: + # 1) Provide exact n_samples per level: generate_samples(sampler, n_samples=[...]) + # 2) Provide target variance and let algorithm decide counts: target_var=1e-6 + self.all_collect(sampler) - # Check if all samples are finished - ### - # Postprocessing - ### + # Ensure all scheduled samples finished + + # Postprocessing: compute moments and approximate distributions self.process_results(sampler, n_levels) - # Postprocessing, MLMC is finished at this point def create_sampler(self, level_parameters): """ - Create: - # sampling pool - the way sample simulations are executed - # sample storage - stores sample results - # sampler - controls MLMC execution - :param level_parameters: list of lists - :return: mlmc.sampler.Sampler instance + Create the Sampler with storage and sampling pool and return it. + + Components created: + - sampling_pool: OneProcessPool (runs all samples in the same process) + - simulation_factory: instance of ShootingSimulation1D + - sample_storage: Memory() (in-memory storage) + - sampler: mlmc.sampler.Sampler + + :param level_parameters: List of level parameter lists (simulation-dependent). + :return: mlmc.sampler.Sampler instance configured for this example. """ - # Create OneProcessPool - all run in the same process + # Use OneProcessPool for simplicity (sequential execution). For parallel local runs, + # replace with ProcessPool(n) or ThreadPool(n). sampling_pool = OneProcessPool() - # There is another option mlmc.sampling_pool.ProcessPool() - supports local parallel sample simulation run - # sampling_pool = ProcessPool(n), n - number of parallel simulations, depends on computer architecture - # Simulation configuration which is passed to simulation constructor + # Simulation configuration dictionary passed to simulation factory simulation_config = { "start_position": np.array([0, 0]), "start_velocity": np.array([10, 0]), "area_borders": np.array([-100, 200, -300, 400]), "max_time": 10, - "complexity": 2, # used for initial estimate of number of operations per sample + "complexity": 2, # used as prior for cost estimates 'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1), } - # Create simulation factory, instance of class that inherits from mlmc.sim.simulation + # Simulation factory (constructs LevelSimulation instances internally) simulation_factory = ShootingSimulation1D(config=simulation_config) - # Create simple sample storage - # Memory() storage keeps samples in computer main memory + # Lightweight in-memory sample storage sample_storage = Memory() - # We support also HDF file storage mlmc.sample_storage_hdf.SampleStorageHDF() - # sample_storage = SampleStorageHDF(file_path=path_to_HDF5_file) + # Alternative: HDF storage (persistent) - sample_storage_hdf.SampleStorageHDF(...) - # Create sampler - # Controls the execution of MLMC - sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory, + # Create and return the Sampler that orchestrates MLMC + sampler = Sampler(sample_storage=sample_storage, + sampling_pool=sampling_pool, + sim_factory=simulation_factory, level_parameters=level_parameters) return sampler def generate_samples(self, sampler, n_samples=None, target_var=None): """ - Generate MLMC samples - :param sampler: mlmc.sampler.Sampler instance - :param n_samples: None or list, number of samples at each level - :param target_var: target variance of MLMC estimates - :return: None + Schedule and generate MLMC samples, optionally targetting a global variance. + + :param sampler: mlmc.sampler.Sampler instance controlling sampling. + :param n_samples: None or list of exact sample counts per level. + :param target_var: Target variance for MLMC estimator (if not None). + :return: None (results are stored in sampler.sample_storage). """ - # The number of samples is set by user + # If user provided exact counts, set them; otherwise let sampler pick initial counts if n_samples is not None: sampler.set_initial_n_samples(n_samples) - # The number of initial samples is determined automatically else: sampler.set_initial_n_samples() - # Samples are scheduled and the program is waiting for all of them to be completed. + + # Schedule initial samples and wait for them to complete sampler.schedule_samples() sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout) self.all_collect(sampler) - # MLMC estimates target variance is set + # If a target variance is requested, iteratively estimate variances and add more samples if target_var is not None: - # The mlmc.quantity.quantity.Quantity instance is created - # parameters 'storage' and 'q_specs' are obtained from sample_storage, - # originally 'q_specs' is set in the simulation class + # Create a Quantity for the root results; needed for moments estimation root_quantity = make_root_quantity(storage=sampler.sample_storage, q_specs=sampler.sample_storage.load_result_format()) - # Moment functions object is created - # The MLMC algorithm determines number of samples according to the moments variance, - # Type of moment functions (Legendre by default) might affect the total number of MLMC samples + # Build moment functions (Legendre polynomials on estimated domain) moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments) - estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, - moments_fn=moments_fn) + estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, moments_fn=moments_fn) - # Initial estimation of the number of samples at each level + # Initial variance and cost estimates from currently finished samples variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples) - # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples + + # Compute initial recommended number of samples for each level n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) - - ##### - # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples - ##### - # Loop until number of estimated samples is greater than the number of scheduled samples - while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef, - timeout=self._sample_timeout): - # New estimation according to already finished samples + n_levels=sampler.n_levels) + + # Gradually schedule additional samples until the estimate stabilizes + while not sampler.process_adding_samples(n_estimated, + self._sample_sleep, + self._adding_samples_coef, + timeout=self._sample_timeout): + # Re-estimate variance / ops using newly finished samples and recompute targets variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, n_levels=sampler.n_levels) def set_moments(self, quantity, sample_storage, n_moments=25): + """ + Create a Legendre-moments function object for the given quantity. + + :param quantity: mlmc.quantity.quantity.Quantity instance (root quantity). + :param sample_storage: sample storage instance to estimate domain from samples. + :param n_moments: Number of moments (basis size) to construct. + :return: Legendre(n_moments, domain) instance to be used for moment estimation. + """ true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile) return Legendre(n_moments, true_domain) def all_collect(self, sampler): """ - Collect samples, wait until all samples are finished - :param sampler: mlmc.sampler.Sampler object + Wait until all scheduled samples finish by repeatedly collecting finished samples. + + :param sampler: mlmc.sampler.Sampler instance. :return: None """ running = 1 @@ -161,37 +184,44 @@ def all_collect(self, sampler): def process_results(self, sampler, n_levels): """ - Process MLMC results - :param sampler: mlmc.sampler.Sampler instance - :param n_levels: int, number of MLMC levels + Postprocess completed samples: estimate moments and approximate distribution. + + Steps: + - Load result format and build Quantity objects + - Choose a target item inside the quantity and check its mean + - Compute estimated domain, construct moment functions (Legendre) + - Estimate moments and their variances, compute distribution approximation + + :param sampler: mlmc.sampler.Sampler instance containing sample_storage. + :param n_levels: int, number of MLMC levels used in the run. :return: None """ sample_storage = sampler.sample_storage - # Load result format from the sample storage + + # Load result format (list of QuantitySpec) and create root Quantity result_format = sample_storage.load_result_format() - # Create Quantity instance representing our real quantity of interest root_quantity = make_root_quantity(sample_storage, result_format) - # It is possible to access items of the quantity according to the result format + # Access an example item from the nested quantity to demonstrate API target = root_quantity['target'] time = target[10] position = time['0'] q_value = position[0] - # Compute moments, first estimate domain of moment functions + # Estimate domain for moment functions from sample data and build moments function estimated_domain = Estimate.estimate_domain(q_value, sample_storage, quantile=self._quantile) moments_fn = Legendre(self._n_moments, estimated_domain) - # Create estimator for the quantity + # Create an estimator and compute moment means and variances estimator = Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn) - # Estimate moment means and variances means, vars = estimator.estimate_moments(moments_fn) - # Generally, root quantity has different domain than its items + + # For root-level moments use separate estimated domain root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage, - quantile=self._quantile) + quantile=self._quantile) root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain) - # There is another possible approach to calculating all moments at once and then select desired quantity + # Alternative approach: compute moments for the entire root quantity and then extract target moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True) moments_mean = estimate_mean(moments_quantity) target_mean = moments_mean['target'] @@ -199,35 +229,45 @@ def process_results(self, sampler, n_levels): location_mean = time_mean['0'] # locations: ['0'] value_mean = location_mean[0] # result shape: (1,) + # Quick assertion expected for the tutorial problem (value_mean should be 1) assert value_mean.mean[0] == 1 + + # Build approximate density for the selected quantity self.approx_distribution(estimator, n_levels, tol=1e-8) def approx_distribution(self, estimator, n_levels, tol=1.95): """ - Probability density function approximation - :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated - :param n_levels: int, number of MLMC levels - :param tol: Tolerance of the fitting problem, with account for variances in moments. + Construct and display an approximate probability density function for the quantity. + + :param estimator: mlmc.estimator.Estimate instance which contains the quantity and methods + for constructing the density approximation. + :param n_levels: int, number of MLMC levels (used for optional plotting of raw samples). + :param tol: Tolerance parameter for the density fitting problem (default 1.95). :return: None """ distr_obj, result, _, _ = estimator.construct_density(tol=tol) distr_plot = Distribution(title="distributions", error_plot=None) distr_plot.add_distribution(distr_obj) + # Optionally overlay raw samples for single-level case if n_levels == 1: samples = estimator.get_level_samples(level_id=0)[..., 0] distr_plot.add_raw_samples(np.squeeze(samples)) + distr_plot.show(None) distr_plot.reset() @staticmethod def determine_level_parameters(n_levels, step_range): """ - Determine level parameters, - In this case, a step of fine simulation at each level - :param n_levels: number of MLMC levels - :param step_range: simulation step range - :return: list of lists + Determine level parameters for MLMC from the coarsest and finest step sizes. + + The default strategy interpolates in log-space between step_range[0] and step_range[1] + across n_levels and returns a list of single-entry lists containing the step for each level. + + :param n_levels: int number of MLMC levels. + :param step_range: Sequence [coarse_step, fine_step] where coarse_step > fine_step. + :return: list of lists where each inner list contains the parameter(s) for that level. """ assert step_range[0] > step_range[1] level_parameters = [] diff --git a/examples/shooting/shooting_1D_mcqmc.py b/examples/shooting/shooting_1D_mcqmc.py new file mode 100644 index 00000000..af251f06 --- /dev/null +++ b/examples/shooting/shooting_1D_mcqmc.py @@ -0,0 +1,274 @@ +import numpy as np +import mlmc.estimator as est +from mlmc.estimator import Estimate, estimate_n_samples_for_target_variance +from mlmc.sampler import Sampler +from mlmc.sample_storage import Memory +from mlmc.sampling_pool import OneProcessPool +from examples.shooting.simulation_shooting_1D import ShootingSimulation1D +from mlmc.quantity.quantity import make_root_quantity +from mlmc.quantity.quantity_estimate import moments, estimate_mean +from mlmc.moments import Legendre +from mlmc.plot.plots import Distribution + + +# Tutorial class for 1D shooting simulation, includes +# - samples scheduling +# - process results: +# - create Quantity instance +# - approximate density +class ProcessShooting1D: + """ + Example driver for a 1D shooting problem using the MLMC framework. + + Demonstrates: + - creating the Sampler, sampling_pool and sample storage, + - scheduling and collecting MLMC samples, + - estimating moments and building an approximate PDF for a quantity of interest. + """ + + def __init__(self): + """ + Initialize parameters, create sampler, schedule and collect samples, and postprocess. + + The constructor executes a full example run: + - determine level parameters, + - create sampler, + - generate samples (either user-specified or determined from a target variance), + - collect all results, + - postprocess and approximate distribution of the chosen quantity. + """ + n_levels = 3 + # Number of MLMC levels + + step_range = [1, 1e-3] + # step_range [simulation step at the coarsest level, simulation step at the finest level] + + level_parameters = ProcessShooting1D.determine_level_parameters(n_levels, step_range) + # Determine each level parameters (in this case, simulation step at each level) + + self._sample_sleep = 0 # seconds to sleep while polling sampling pool + self._sample_timeout = 60 # maximum waiting time for sampling pool operations (seconds) + self._adding_samples_coef = 0.1 # coefficient used when adding samples adaptively + + self._n_moments = 20 + # number of generalized statistical moments used for MLMC sample estimation + self._quantile = 0.01 + # quantile used to estimate domain of distribution for moment basis + + # MLMC run + sampler = self.create_sampler(level_parameters=level_parameters) + # Create sampler (mlmc.Sampler instance) - controls MLMC run + self.generate_samples(sampler, n_samples=None, target_var=1e-3) + # Generate MLMC samples (either explicit counts or target variance) + self.all_collect(sampler) + # Wait for all scheduled samples to finish + + # Postprocessing + self.process_results(sampler, n_levels) + # Postprocessing complete + + def create_sampler(self, level_parameters): + """ + Create and configure sampler components: sampling pool, simulation factory and storage. + + :param level_parameters: list of per-level parameter lists (simulation-dependent). + :return: mlmc.sampler.Sampler instance configured with Memory storage and OneProcessPool. + """ + # Use OneProcessPool for sequential runs. Replace with ProcessPool/ThreadPool for parallel runs. + sampling_pool = OneProcessPool() + + # Simulation configuration passed into the simulation factory + simulation_config = { + "start_position": np.array([0, 0]), + "start_velocity": np.array([10, 0]), + "area_borders": np.array([-100, 200, -300, 400]), + "max_time": 10, + "complexity": 2, # used for initial estimate of operations per sample + "fields_params": dict(model='gauss', dim=1, sigma=1, corr_length=0.1), + } + + # Create simulation factory (produces LevelSimulation instances) + simulation_factory = ShootingSimulation1D(config=simulation_config) + + # In-memory sample storage (alternative: HDF-based storage) + sample_storage = Memory() + + # Create and return the Sampler orchestrating MLMC + sampler = Sampler(sample_storage=sample_storage, + sampling_pool=sampling_pool, + sim_factory=simulation_factory, + level_parameters=level_parameters) + return sampler + + def generate_samples(self, sampler, n_samples=None, target_var=None): + """ + Schedule and generate MLMC samples. If target_var is provided, iteratively determine + and add samples until the MLMC variance target is reached. + + :param sampler: mlmc.sampler.Sampler instance controlling sampling. + :param n_samples: Optional list of exact sample counts per level. If provided, used as initial counts. + :param target_var: Optional float target variance for MLMC estimator. If provided, algorithm estimates counts. + :return: None + """ + # Set initial number of samples (user-specified or default) + if n_samples is not None: + sampler.set_initial_n_samples(n_samples) + else: + sampler.set_initial_n_samples() + + # Schedule and start the initial batch of samples, then wait for completion + sampler.schedule_samples() + sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout) + self.all_collect(sampler) + + # If a target_var is provided, compute required sample counts and add samples iteratively + if target_var is not None: + # Build a root quantity (required for moments estimation) + root_quantity = make_root_quantity(storage=sampler.sample_storage, + q_specs=sampler.sample_storage.load_result_format()) + + # Create moment functions (Legendre) on estimated domain + moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments) + estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, moments_fn=moments_fn) + + # Estimate variances and costs from finished samples + variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples) + + # Compute estimated number of samples per level for the target variance + n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, + n_levels=sampler.n_levels) + + # Iteratively add samples until the scheduler has scheduled enough + while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef, + timeout=self._sample_timeout): + variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) + n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, + n_levels=sampler.n_levels) + + def set_moments(self, quantity, sample_storage, n_moments=25): + """ + Build Legendre moment basis on the domain estimated from samples. + + :param quantity: Quantity (or root quantity) used to estimate domain. + :param sample_storage: Sample storage used to compute domain estimate. + :param n_moments: Number of Legendre basis functions / moments. + :return: Legendre(n_moments, domain) instance. + """ + true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile) + return Legendre(n_moments, true_domain) + + def all_collect(self, sampler): + """ + Repeatedly collect finished samples until none are running. + + :param sampler: mlmc.sampler.Sampler instance to poll for finished samples. + :return: None + """ + running = 1 + while running > 0: + running = 0 + running += sampler.ask_sampling_pool_for_samples() + print("N running: ", running) + + def process_results(self, sampler, n_levels): + """ + Postprocess completed samples: + - build Quantity objects, + - compute moment means/variances, + - perform checks and consistency tests, + - approximate distribution for the target quantity. + + :param sampler: mlmc.sampler.Sampler instance (contains sample_storage). + :param n_levels: int, number of MLMC levels used in the run. + :return: None + """ + sample_storage = sampler.sample_storage + + # Load result format and create root quantity + result_format = sample_storage.load_result_format() + root_quantity = make_root_quantity(sample_storage, result_format) + + print("N collected ", sample_storage.get_n_collected()) + + # Access a nested item (example of how to index Quantity) + target = root_quantity['target'] + time = target[10] + position = time['0'] + q_value = position[0] + + # Estimate domain from samples and build moments function + estimated_domain = Estimate.estimate_domain(q_value, sample_storage, quantile=self._quantile) + moments_fn = Legendre(self._n_moments, estimated_domain) + + # Estimator for the selected quantity + estimator = Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn) + + # Compute moment means and variances + means, vars = estimator.estimate_moments(moments_fn) + + # Diagnostics and consistency checks + #est.plot_checks(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn) + est.consistency_check(quantity=q_value, sample_storage=sample_storage) + estimator.kurtosis_check(q_value) + + # Optionally compute moments for full root quantity and extract target mean + root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage, + quantile=self._quantile) + root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain) + moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True) + moments_mean = estimate_mean(moments_quantity) + target_mean = moments_mean['target'] + time_mean = target_mean[10] + location_mean = time_mean['0'] + value_mean = location_mean[0] + + # Example assertion for tutorial (problem-dependent) + assert value_mean.mean[0] == 1 + + # Build and show approximate density + self.approx_distribution(estimator, n_levels, tol=1e-8) + + def approx_distribution(self, estimator, n_levels, tol=1.95): + """ + Approximate and display the probability density function for the estimator's quantity. + + :param estimator: mlmc.estimator.Estimate instance (contains quantity and methods for density construction). + :param n_levels: int, number of MLMC levels (used for optional raw-sample overlay). + :param tol: Tolerance for density fitting (accounts for moment variances). + :return: None + """ + distr_obj, result, _, _ = estimator.construct_density(tol=tol) + distr_plot = Distribution(title="distributions", error_plot=None) + distr_plot.add_distribution(distr_obj) + + if n_levels == 1: + samples = estimator.get_level_samples(level_id=0)[..., 0] + distr_plot.add_raw_samples(np.squeeze(samples)) + + distr_plot.show(None) + distr_plot.reset() + + @staticmethod + def determine_level_parameters(n_levels, step_range): + """ + Determine parameters for each MLMC level (here a single step size per level). + + Interpolates between step_range[0] (coarse) and step_range[1] (fine) across levels. + + :param n_levels: int number of MLMC levels. + :param step_range: [coarse_step, fine_step] with coarse_step > fine_step. + :return: list of lists; each inner list contains the step parameter for that level. + """ + assert step_range[0] > step_range[1] + level_parameters = [] + for i_level in range(n_levels): + if n_levels == 1: + level_param = 1 + else: + level_param = i_level / (n_levels - 1) + level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param]) + return level_parameters + + +if __name__ == "__main__": + ProcessShooting1D() diff --git a/examples/shooting/shooting_2D.py b/examples/shooting/shooting_2D.py index b58ebe73..06d3bbcb 100644 --- a/examples/shooting/shooting_2D.py +++ b/examples/shooting/shooting_2D.py @@ -14,55 +14,85 @@ class ProcessShooting2D: + """ + Example driver for a 2D shooting simulation using the MLMC framework. + + Demonstrates: + - building sampler, sampling pool and sample storage, + - scheduling and collecting MLMC samples, + - estimating moments and approximating probability densities for quantities of interest. + """ def __init__(self): + """ + Initialize parameters, create sampler, schedule and collect samples, and postprocess. + + The constructor runs a full example MLMC workflow: + - determine level parameters, + - create sampler and sampling pool, + - generate MLMC samples (either user-specified or adaptively from target variance), + - wait for completion and postprocess results. + """ n_levels = 5 # Number of MLMC levels step_range = [0.05, 0.005] # step_range [simulation step at the coarsest level, simulation step at the finest level] level_parameters = ProcessShooting2D.determine_level_parameters(n_levels, step_range) - # Determine each level parameters (in this case, simulation step at each level), level_parameters should be - # simulation dependent - self._sample_sleep = 0 # 30 - # Time to do nothing just to make sure the simulations aren't constantly checked, useful mainly for PBS run + # Determine each level parameters (in this case, simulation step at each level) + + self._sample_sleep = 0 + # Seconds to sleep while polling sampling pool self._sample_timeout = 60 - # Maximum waiting time for running simulations + # Maximum waiting time for sampling pool operations (seconds) self._adding_samples_coef = 0.1 self._n_moments = 20 # number of generalized statistical moments used for MLMC number of samples estimation self._quantile = 0.001 - # Setting parameters that are utilized when scheduling samples - ### + # quantile used to estimate domain for moment basis functions + # MLMC run - ### sampler = self.create_sampler(level_parameters=level_parameters) - # Create sampler (mlmc.Sampler instance) - crucial class that controls MLMC run + # Create sampler (mlmc.Sampler instance) - controls MLMC run self.generate_samples(sampler, n_samples=None, target_var=1e-3) - # Generate MLMC samples, there are two ways: - # 1) set exact number of samples at each level, - # e.g. for 5 levels - self.generate_samples(sampler, n_samples=[1000, 500, 250, 100, 50]) - # 2) set target variance of MLMC estimates, - # e.g. self.generate_samples(sampler, n_samples=None, target_var=1e-6) + # Generate MLMC samples: either explicit counts or by target variance self.all_collect(sampler) - # Check if all samples are finished - ### + # Wait until all samples finish + # Postprocessing - ### self.process_results(sampler, n_levels) - # Postprocessing, MLMC is finished at this point + # Postprocessing complete def create_estimator(self, quantity, sample_storage): + """ + Create an Estimate object for a given quantity and storage using Legendre moments. + + :param quantity: Quantity object (mlmc.quantity.quantity.Quantity item) to estimate. + :param sample_storage: sample storage instance used to estimate domain. + :return: mlmc.estimator.Estimate instance configured for the quantity. + """ estimated_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile) moments_fn = Legendre(self._n_moments, estimated_domain) - # Create estimator for your quantity - return Estimate(quantity=quantity, sample_storage=sample_storage, - moments_fn=moments_fn) + return Estimate(quantity=quantity, sample_storage=sample_storage, moments_fn=moments_fn) def process_results(self, sampler, n_levels): + """ + Postprocess completed MLMC samples: form quantities, create estimators and approximate distributions. + + Steps performed: + - load result format and create root Quantity, + - extract x and y components of the target item, + - build per-component estimators, + - compute root-level moments and means, + - approximate and display distributions for x and y. + + :param sampler: mlmc.sampler.Sampler instance containing completed samples. + :param n_levels: int, number of MLMC levels used. + :return: None + """ sample_storage = sampler.sample_storage # Load result format from sample storage result_format = sample_storage.load_result_format() - # Create quantity instance representing your real quantity of interest + # Create quantity instance representing the quantity of interest root_quantity = make_root_quantity(sample_storage, result_format) # You can access item of quantity according to result format @@ -72,15 +102,15 @@ def process_results(self, sampler, n_levels): x_quantity_value = position[0] y_quantity_value = position[1] - # Create estimator for quantities + # Create estimators for each coordinate component x_estimator = self.create_estimator(x_quantity_value, sample_storage) y_estimator = self.create_estimator(y_quantity_value, sample_storage) + # Optionally compute moments for full root quantity (and extract means) root_quantity_estimated_domain = Estimate.estimate_domain(root_quantity, sample_storage, quantile=self._quantile) root_quantity_moments_fn = Legendre(self._n_moments, root_quantity_estimated_domain) - # There is another possible approach to calculating all moments at once and then choose quantity moments_quantity = moments(root_quantity, moments_fn=root_quantity_moments_fn, mom_at_bottom=True) moments_mean = estimate_mean(moments_quantity) target_mean = moments_mean['target'] @@ -88,15 +118,17 @@ def process_results(self, sampler, n_levels): location_mean = time_mean['0'] # locations: ['0'] value_mean = location_mean[0] # result shape: (1,) + # Display approximated distributions for both coordinates self.approx_distribution(x_estimator, n_levels, tol=1e-8) self.approx_distribution(y_estimator, n_levels, tol=1e-8) def approx_distribution(self, estimator, n_levels, tol=1.95): """ - Probability density function approximation - :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is approximated - :param n_levels: int, number of MLMC levels - :param tol: Tolerance of the fitting problem, with account for variances in moments. + Approximate and display the probability density function for the estimator's quantity. + + :param estimator: mlmc.estimator.Estimate instance providing construct_density(). + :param n_levels: int, number of MLMC levels (used to optionally overlay raw samples). + :param tol: float, tolerance for density-fitting routine (affects regularization with moment variances). :return: None """ distr_obj, result, _, _ = estimator.construct_density(tol=tol) @@ -111,26 +143,27 @@ def approx_distribution(self, estimator, n_levels, tol=1.95): def create_sampler(self, level_parameters): """ - Simulation dependent configuration - :return: mlmc.sampler instance + Create sampler, sampling pool and simulation factory for the 2D shooting example. + + :param level_parameters: list of per-level parameter lists. + :return: mlmc.sampler.Sampler instance configured with Memory storage and OneProcessPool. """ - # Create Pbs sampling pool sampling_pool = OneProcessPool() simulation_config = { "start_position": np.array([0, 0]), "start_velocity": np.array([10, 0]), - "area_borders": np.array([-100, 200, -300, 400]), + "area_borders": np.array([-100, 200, -300, 400]), "max_time": 10, "complexity": 2, # used for initial estimate of number of operations per sample - 'fields_params': dict(model='gauss', dim=1, sigma=1, corr_length=0.1), + "fields_params": dict(model='gauss', dim=1, sigma=1, corr_length=0.1), } # Create simulation factory simulation_factory = ShootingSimulation2D(config=simulation_config) - # Create HDF sample storage + # In-memory storage (Memory). Replace with HDF storage if persistence is needed. sample_storage = Memory() - # Create sampler, it manages sample scheduling and so on + # Create sampler which orchestrates scheduling and storage sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory, level_parameters=level_parameters) @@ -138,59 +171,52 @@ def create_sampler(self, level_parameters): def generate_samples(self, sampler, n_samples=None, target_var=None): """ - Generate MLMC samples - :param sampler: mlmc.sampler.Sampler instance - :param n_samples: None or list, number of samples at each level - :param target_var: target variance of MLMC estimates + Schedule and generate MLMC samples. If target_var is provided, iteratively determine + additional samples required to reach the variance target. + + :param sampler: mlmc.sampler.Sampler instance controlling the run. + :param n_samples: Optional list of exact sample counts per level. + :param target_var: Optional float target variance for MLMC estimator. :return: None """ - # The number of samples is set by user + # Set initial samples either from user or default logic if n_samples is not None: sampler.set_initial_n_samples(n_samples) - # The number of initial samples is determined automatically else: sampler.set_initial_n_samples() - # Samples are scheduled and the program is waiting for all of them to be completed. + + # Schedule and wait for initial batch of samples sampler.schedule_samples() sampler.ask_sampling_pool_for_samples(sleep=self._sample_sleep, timeout=self._sample_timeout) self.all_collect(sampler) - # MLMC estimates target variance is set + # If a global target variance is specified, estimate and iteratively add samples if target_var is not None: - # The mlmc.quantity.quantity.Quantity instance is created - # parameters 'storage' and 'q_specs' are obtained from sample_storage, - # originally 'q_specs' is set in the simulation class + # Build a root quantity required for moment estimation root_quantity = make_root_quantity(storage=sampler.sample_storage, q_specs=sampler.sample_storage.load_result_format()) - # Moment functions object is created - # The MLMC algorithm determines number of samples according to the moments variance, - # Type of moment functions (Legendre by default) might affect the total number of MLMC samples moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self._n_moments) estimate_obj = Estimate(root_quantity, sample_storage=sampler.sample_storage, moments_fn=moments_fn) - # Initial estimation of the number of samples at each level + # Estimate per-level variances and costs from finished samples variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples) - # Firstly, the variance of moments and execution time of samples at each level are calculated from already finished samples n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, n_levels=sampler.n_levels) - ##### - # MLMC sampling algorithm - gradually schedules samples and refines the total number of samples - ##### - # Loop until number of estimated samples is greater than the number of scheduled samples + # Loop until sampler has scheduled enough samples while not sampler.process_adding_samples(n_estimated, self._sample_sleep, self._adding_samples_coef, timeout=self._sample_timeout): - # New estimation according to already finished samples variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, n_levels=sampler.n_levels) def all_collect(self, sampler): """ - Collect samples - :param sampler: mlmc.Sampler object + Repeatedly poll the sampling pool and collect finished samples until none remain running. + + :param sampler: mlmc.sampler.Sampler instance. :return: None """ running = 1 @@ -200,17 +226,28 @@ def all_collect(self, sampler): print("N running: ", running) def set_moments(self, quantity, sample_storage, n_moments=25): + """ + Build Legendre moments object for a given quantity using the domain estimated from samples. + + :param quantity: quantity object (used to estimate domain). + :param sample_storage: storage used to compute the domain estimate. + :param n_moments: number of Legendre polynomials to use. + :return: Legendre instance configured for the estimated domain. + """ true_domain = Estimate.estimate_domain(quantity, sample_storage, quantile=self._quantile) return Legendre(n_moments, true_domain) @staticmethod def determine_level_parameters(n_levels, step_range): """ - Determine level parameters, - In this case, a step of fine simulation at each level - :param n_levels: number of MLMC levels - :param step_range: simulation step range - :return: List of lists + Determine level parameters for MLMC. + + For this example a single per-level 'step' is returned in a list, interpolating + (geometrically) between step_range[0] and step_range[1]. + + :param n_levels: int number of MLMC levels. + :param step_range: [coarse_step, fine_step] with coarse_step > fine_step. + :return: List[List[float]]: per-level parameters (each inner list contains the step value). """ assert step_range[0] > step_range[1] level_parameters = [] diff --git a/examples/shooting/simulation_shooting_1D.py b/examples/shooting/simulation_shooting_1D.py index b5646a09..81615794 100644 --- a/examples/shooting/simulation_shooting_1D.py +++ b/examples/shooting/simulation_shooting_1D.py @@ -10,8 +10,17 @@ def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1, mode_no=1000): """ - Create correlated random field - :return: mlmc.random.correlated_field.Field instance + Create a correlated random field wrapper used by the simulations. + + :param model: str, covariance model name. Supported values: + 'gauss' (default), 'exp', 'TPLgauss', 'TPLexp', 'TPLStable'. + :param corr_length: float, correlation length (len_scale) for the covariance model. + :param dim: int, spatial dimension of the covariance model. + :param log: bool, whether the generated random field is treated as log-field. + :param sigma: float, standard deviation (sigma) for the random field. + :param mode_no: int, number of Fourier modes (used by GSTools SRF). + :return: cf.Field instance that wraps a GSToolsSpatialCorrelatedField configured + with the selected covariance model. """ if model == 'exp': model = gstools.Exponential(dim=dim, len_scale=corr_length) @@ -28,22 +37,42 @@ def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1, class ShootingSimulation1D(Simulation): + """ + Simple 1D "shooting" simulation used as example for MLMC workflow. + + The class implements: + - level_instance(...) to create LevelSimulation configurations, + - calculate(...) static method to produce paired (fine, coarse) samples, + - result_format() describing the output QuantitySpec. + """ def __init__(self, config): """ - :param config: Dict, simulation configuration + Initialize simulation factory. + + :param config: Dict containing simulation configuration. Expected keys include: + - 'start_position': np.ndarray shape (2,) + - 'start_velocity': np.ndarray shape (2,) + - 'area_borders': np.ndarray [xmin, xmax, ymin, ymax] + - 'max_time': float + - 'complexity': float used in n_ops_estimate + - 'fields_params': dict passed to create_corr_field() """ super().__init__() self._config = config - # This attribute is obligatory, if True workspace is created + # If True, a workspace for each sample will be created (not used here). self.need_workspace: bool = False def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation: """ - Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.level_simulation) - :param fine_level_params: fine simulation step at particular level - :param coarse_level_params: coarse simulation step at particular level - :return: mlmc.LevelSimulation object + Create a LevelSimulation object for given fine and coarse level parameters. + + This constructs the simulation configuration for one MLMC level (fine/coarse pair) + and returns a LevelSimulation that knows which calculate() function to call. + + :param fine_level_params: List[float], typically a single-element list containing the fine step size. + :param coarse_level_params: List[float], typically a single-element list containing the coarse step size. + :return: LevelSimulation configured with the derived config dictionary and task size. """ config = copy.deepcopy(self._config) config["fine"] = {} @@ -51,6 +80,8 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li config["fine"]["step"] = fine_level_params[0] config["coarse"]["step"] = coarse_level_params[0] config["res_format"] = self.result_format() + + # compute number of elements per level from complexity and step config["fine"]["n_elements"] = int(config["complexity"] / config["fine"]["step"]) if config["coarse"]["step"] > 0: config["coarse"]["n_elements"] = int(config["complexity"] / config["coarse"]["step"]) @@ -63,20 +94,33 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li @staticmethod def calculate(config, seed): """ - Calculate fine and coarse sample and also extract their results - :param config: dictionary containing simulation configuration - :param seed: random number generator seed - :return: np.ndarray, np.ndarray + Build random field and produce a paired (fine, coarse) simulation result. + + This is the static worker function used by LevelSimulation to generate one sample pair. + It creates the correlated random field, samples it on the fine+coarse point set and + runs the deterministic integrator for fine and coarse input arrays. + + :param config: dict, level-specific configuration produced by level_instance(). + It must include 'fields_params', and 'fine'/'coarse' with 'n_elements' + and 'step' keys, plus other simulation settings. + :param seed: int, RNG seed passed by sampler (currently unused inside; RNG is handled by field implementation). + :return: tuple (fine_result, coarse_result), each is a scalar float (y coordinate at final time or NaN). """ - # Create random field structure + # create random field generator (GSTools wrapper) field = create_corr_field(**config['fields_params']) + + # create sample points (concatenated fine + coarse) and remember fine size points, n_fine_points = ShootingSimulation1D.create_points(config) + + # assign points to field and generate realization field.set_points(points) - fine_input_sample, coarse_input_sample = ShootingSimulation1D.generate_random_sample(field, - coarse_step=config["coarse"]["step"], - n_fine_elements=n_fine_points) + # sample fine and coarse inputs from the realizations + fine_input_sample, coarse_input_sample = ShootingSimulation1D.generate_random_sample( + field, coarse_step=config["coarse"]["step"], n_fine_elements=n_fine_points + ) + # run simulator for fine and coarse inputs fine_res = ShootingSimulation1D._run_sample(config, fine_input_sample) coarse_res = ShootingSimulation1D._run_sample(config, coarse_input_sample) @@ -85,36 +129,46 @@ def calculate(config, seed): @staticmethod def _run_sample(config, rnd_input_samples): """ - Simulation of 1D shooting - :param config: dictionary containing simulation configuration - :param rnd_input_samples: np.ndarray, shape: (number of elements ) + Run the explicit Euler "shooting" simulation for a given input array. + + The dynamics are: + X_{k+1} = X_k + dt * V_k + V_{k+1} = V_k + dt * F_k + where F_k is drawn from the correlated field sample (rnd_input_samples). + + If the simulated projectile exits `area_borders`, the function returns np.nan. + + :param config: dict, simulation configuration (same structure as in calculate()). + :param rnd_input_samples: np.ndarray-like, shape (n_elements,) with random forcing values. + :return: float, y-coordinate of the projectile at the end of simulation + (or np.nan if it left the domain). """ n_elements = len(rnd_input_samples) - x, y, time, n = 0, 0, 0, 0 - X = config["start_position"] - V = config['start_velocity'] + X = config["start_position"].copy() # 2-vector [x, y] + V = config['start_velocity'].copy() # 2-vector + y = 0.0 - # Time step + # compute time step (if n_elements == 0 dt is unused and loop is skipped) if n_elements != 0: dt = config['max_time'] / n_elements - # Loop through random array F + + # step through the random forcing and integrate for i in range(n_elements): - # New coordinates + # update position and velocity (explicit Euler) X = X + dt * V - # New vector of speed V = V + dt * rnd_input_samples[i] x = X[0] y = X[1] - if x > config['area_borders'][1] or x < config['area_borders'][0] or\ - y > config['area_borders'][3] or y < config['area_borders'][2]: + # check domain boundaries; if outside, record NaN and stop + if x > config['area_borders'][1] or x < config['area_borders'][0] or \ + y > config['area_borders'][3] or y < config['area_borders'][2]: y = np.nan break + # current time (not used beyond stopping condition) time = dt * (i + 1) - - # End simulation if time is bigger then maximum time if time >= config['max_time']: break @@ -122,23 +176,43 @@ def _run_sample(config, rnd_input_samples): @staticmethod def create_points(config): + """ + Create concatenated evaluation points for the random field for fine and coarse parts. + + Points are returned as an (n_fine + n_coarse, 1) array where each row is a coordinate + at which the GSTools SRF will be evaluated. The first n_fine rows correspond to the fine mesh, + the remaining rows (if any) to the coarse mesh. + + :param config: dict, level configuration that contains 'fine' and 'coarse' sub-dicts + with 'n_elements' integer entries and other simulation parameters. + :return: tuple (points, n_fine_elements) + - points: np.ndarray shape (n_fine + n_coarse, 1) + - n_fine_elements: int number of fine elements (split index) + """ n_fine_elements = config["fine"]["n_elements"] n_coarse_elements = config["coarse"]["n_elements"] assert n_fine_elements > n_coarse_elements + # build a 1D coordinate array for GSTools evaluation; here we use distance ~ velocity * time points = np.empty((n_fine_elements + n_coarse_elements, 1)) - points[:, 0] = np.concatenate((np.linspace(0, config["start_velocity"][0]*config["max_time"], - n_fine_elements), - np.linspace(0, config["start_velocity"][0]*config["max_time"], - n_coarse_elements))) + points[:, 0] = np.concatenate(( + np.linspace(0, config["start_velocity"][0] * config["max_time"], n_fine_elements), + np.linspace(0, config["start_velocity"][0] * config["max_time"], n_coarse_elements) + )) return points, n_fine_elements @staticmethod def generate_random_sample(field, coarse_step, n_fine_elements): """ - Generate random field, both fine and coarse part. - :return: List, List + Sample a realization from the provided correlated field and split it into fine and coarse parts. + + :param field: cf.Field instance created by create_corr_field(...) and already `set_points()` called. + :param coarse_step: float, coarse level step (if 0 then no coarse part is returned). + :param n_fine_elements: int, number of entries that belong to the fine-level part (split index). + :return: tuple (fine_input_sample, coarse_input_sample) + - fine_input_sample: np.ndarray shape (n_fine_elements,) + - coarse_input_sample: np.ndarray shape (n_coarse_elements,) or [] if coarse_step == 0 """ field_sample = field.sample() fine_input_sample = field_sample[:n_fine_elements] @@ -148,13 +222,22 @@ def generate_random_sample(field, coarse_step, n_fine_elements): return fine_input_sample, coarse_input_sample def n_ops_estimate(self, step): + """ + Estimate computational cost (# of operations) for a sample at a given step size. + + :param step: float, simulation step size for the fine level + :return: float, heuristic estimate of cost used by MLMC to balance work across levels + """ return (1 / step) ** self._config['complexity'] * np.log(max(1 / step, 2.0)) def result_format(self) -> List[QuantitySpec]: """ - Result format - :return: + Describe the output format of the simulation for MLMC. + + :return: List[QuantitySpec] describing the result vector(s). Here we return a single + scalar quantity 'target' representing y-position at final time. """ spec1 = QuantitySpec(name="target", unit="m", shape=(1,), times=[10], locations=['0']) return [spec1] + diff --git a/examples/shooting/simulation_shooting_2D.py b/examples/shooting/simulation_shooting_2D.py index 79571eb1..f906f503 100644 --- a/examples/shooting/simulation_shooting_2D.py +++ b/examples/shooting/simulation_shooting_2D.py @@ -9,45 +9,78 @@ def create_corr_field(model='gauss', corr_length=0.1, dim=1, log=True, sigma=1, mode_no=1000): """ - Create random fields - :return: + Create a correlated random field wrapper (GSTools model -> mlmc.random.correlated_field.Field). + + :param model: str, covariance model name. Supported strings: 'gauss' (default), 'exp', + 'TPLgauss', 'TPLexp', 'TPLStable'. + :param corr_length: float, correlation length (len_scale) supplied to the GSTools model. + :param dim: int, spatial dimension of the covariance model. + :param log: bool, if True the field will be considered in log-space when sampling. + :param sigma: float, multiplicative sigma parameter for the SRF generator. + :param mode_no: int, number of Fourier modes used by GSTools SRF. + :return: cf.Field — an mlmc.random.correlated_field.Field wrapping a GSTools SRF. """ if model == 'exp': model = gstools.Exponential(dim=dim, len_scale=corr_length) elif model == 'TPLgauss': - model = gstools.TPLGaussian(dim=dim, len_scale=corr_length) + model = gstools.TPLGaussian(dim=dim, len_scale=corr_length) elif model == 'TPLexp': - model = gstools.TPLExponential(dim=dim, len_scale=corr_length) + model = gstools.TPLExponential(dim=dim, len_scale=corr_length) elif model == 'TPLStable': - model = gstools.TPLStable(dim=dim, len_scale=corr_length) + model = gstools.TPLStable(dim=dim, len_scale=corr_length) else: - model = gstools.Gaussian(dim=dim, len_scale=corr_length) + model = gstools.Gaussian(dim=dim, len_scale=corr_length) + return cf.Field('conductivity', cf.GSToolsSpatialCorrelatedField(model, log=log, sigma=sigma, mode_no=mode_no)) class ShootingSimulation2D(Simulation): + """ + Example 2D shooting simulation used with MLMC sampler. + + Implements: + - level_instance(...) to create per-level LevelSimulation config, + - calculate(...) static method used to produce a (fine, coarse) sample pair, + - result_format() describing output QuantitySpec. + """ def __init__(self, config): """ - :param config: Dict, simulation configuration + Initialize the simulation factory. + + :param config: dict containing base simulation configuration. Expected keys: + - 'start_position': np.ndarray shape (2,) + - 'start_velocity': np.ndarray shape (2,) + - 'area_borders': np.ndarray [xmin, xmax, ymin, ymax] + - 'max_time': float + - 'complexity': float used by n_ops_estimate + - 'fields_params': dict passed to create_corr_field() """ super().__init__() self.config = config - # This attribute is obligatory + # If True, a sample workspace will be created (not required here) self.need_workspace: bool = False def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation: """ + Construct LevelSimulation for given fine/coarse parameters. + + This method mutates a copy of self.config into a level-specific config and + returns a LevelSimulation that will call ShootingSimulation2D.calculate to + generate samples for this level. - :param fine_level_params: - :param coarse_level_params: - :return: + :param fine_level_params: list-like, typically [fine_step] + :param coarse_level_params: list-like, typically [coarse_step] + :return: LevelSimulation configured for this level """ + # Update level-specific fields in self.config self.config["fine"] = {} self.config["coarse"] = {} self.config["fine"]["step"] = fine_level_params[0] self.config["coarse"]["step"] = coarse_level_params[0] self.config["res_format"] = self.result_format() + + # compute number of elements per level from complexity and step sizes self.config["fine"]["n_elements"] = int(self.config["complexity"] / self.config["fine"]["step"]) if self.config["coarse"]["step"] > 0: self.config["coarse"]["n_elements"] = int(self.config["complexity"] / self.config["coarse"]["step"]) @@ -60,84 +93,124 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li @staticmethod def calculate(config, seed): """ - Calculate fine and coarse sample and also extract their results - :param config: dictionary containing simulation configuration - :param seed: random number generator seed - :return: np.ndarray, np.ndarray + Build correlated fields and produce paired fine/coarse simulation results. + + This static function is intended to be executed by the LevelSimulation worker. + It: + - creates two independent correlated fields (x and y components), + - sets evaluation points, + - samples the fields and splits the realization into fine and coarse parts, + - simulates the projectile dynamics for fine/coarse inputs. + + :param config: dict, level-specific configuration produced by level_instance(). + :param seed: int, RNG seed (currently not used directly; GSTools RNG is internal). + :return: tuple (fine_result, coarse_result) + - fine_result: np.ndarray or scalar describing the fine-level output + - coarse_result: np.ndarray or scalar describing the coarse-level output """ - # Create random field structure + # Create independent correlated random fields for X and Y forcing field_x = create_corr_field(**config['fields_params']) field_y = create_corr_field(**config['fields_params']) + # create concatenated points and get number of fine points points, n_fine_points = ShootingSimulation2D.create_points(config) field_x.set_points(points) field_y.set_points(points) - fine_input_sample, coarse_input_sample = ShootingSimulation2D.generate_random_sample(field_x, field_y, - coarse_step=config["coarse"]["step"], - n_fine_elements=n_fine_points) + # sample and split into fine and coarse inputs + fine_input_sample, coarse_input_sample = ShootingSimulation2D.generate_random_sample( + field_x, field_y, coarse_step=config["coarse"]["step"], n_fine_elements=n_fine_points + ) + # run dynamics for fine and coarse inputs fine_res = ShootingSimulation2D._run_sample(config, fine_input_sample) - coarse_res = ShootingSimulation2D._run_sample(config, fine_input_sample) + coarse_res = ShootingSimulation2D._run_sample(config, coarse_input_sample) return fine_res, coarse_res @staticmethod def _run_sample(config, rnd_input_samples): """ - Simulation of 2D shooting - :param config: dictionary containing simulation configuration - :param rnd_input_samples: np.ndarray, shape: (number of elements ) + Run the explicit Euler integrator for the 2D shooting model. + + Dynamics: + X_{k+1} = X_k + dt * V_k + V_{k+1} = V_k + dt * F_k + + The method returns the final position vector X (or [nan, nan] if the projectile exits area_borders). + + :param config: dict, level-specific simulation configuration. + :param rnd_input_samples: array-like with shape (n_elements, 2) or (n_elements,) depending on caller. + :return: np.ndarray shape (2,) representing final [x, y] or [nan, nan] if out-of-bounds. """ n_elements = len(rnd_input_samples) - X = config["start_position"] - V = config['start_velocity'] + # Use copies so we don't mutate config objects + X = np.array(config["start_position"], dtype=float).copy() + V = np.array(config['start_velocity'], dtype=float).copy() - # Time step + # compute time step if there are elements if n_elements != 0: dt = config['max_time'] / n_elements - # Loop through random array F + # integrate step-by-step for i in range(n_elements): - # New coordinates + # update position X = X + dt * V - # New vector of speed + # update velocity: rnd_input_samples[i] must be 2-vector (Fx, Fy) V = V + dt * rnd_input_samples[i] x = X[0] y = X[1] - if x > config['area_borders'][1] or x < config['area_borders'][0] or\ - y > config['area_borders'][3] or y < config['area_borders'][2]: - X = [np.nan, np.nan] + # if projectile leaves the bounding box, return NaNs + if x > config['area_borders'][1] or x < config['area_borders'][0] or \ + y > config['area_borders'][3] or y < config['area_borders'][2]: + X = np.array([np.nan, np.nan]) break time = dt * (i + 1) - - # End simulation if time is bigger then maximum time if time >= config['max_time']: break + return X @staticmethod def create_points(config): + """ + Build concatenated evaluation points for the correlated fields (fine first, then coarse). + + The points are 1D coordinates used for the GSTools SRF evaluation; the first + n_fine rows correspond to the fine mesh, the remainder to the coarse mesh. + + :param config: dict with 'fine' and 'coarse' sub-dicts containing 'n_elements'. + :return: tuple (points, n_fine_elements) + - points: np.ndarray shape (n_fine + n_coarse, 1) + - n_fine_elements: int number of fine-level points + """ n_fine_elements = config["fine"]["n_elements"] n_coarse_elements = config["coarse"]["n_elements"] assert n_fine_elements > n_coarse_elements points = np.empty((n_fine_elements + n_coarse_elements, 1)) - points[:, 0] = np.concatenate((np.linspace(0, config["start_velocity"][0]*config["max_time"], - n_fine_elements), - np.linspace(0, config["start_velocity"][0]*config["max_time"], - n_coarse_elements))) + points[:, 0] = np.concatenate(( + np.linspace(0, config["start_velocity"][0] * config["max_time"], n_fine_elements), + np.linspace(0, config["start_velocity"][0] * config["max_time"], n_coarse_elements) + )) return points, n_fine_elements @staticmethod def generate_random_sample(field_x, field_y, coarse_step, n_fine_elements): """ - Generate random field, both fine and coarse part. - :return: List, List + Sample two correlated fields (x and y components) and split the realization into fine and coarse parts. + + :param field_x: cf.Field for x-component (already had set_points called). + :param field_y: cf.Field for y-component (already had set_points called). + :param coarse_step: float, coarse-level step size (if 0 there is no coarse part). + :param n_fine_elements: int, how many samples belong to the fine-level portion. + :return: tuple (fine_input_sample, coarse_input_sample) + - fine_input_sample: np.ndarray shape (n_fine_elements, 2) with x and y forcing + - coarse_input_sample: np.ndarray shape (n_coarse_elements, 2) or empty array if coarse_step==0 """ field_sample_x = field_x.sample() field_sample_y = field_y.sample() @@ -153,13 +226,19 @@ def generate_random_sample(field_x, field_y, coarse_step, n_fine_elements): return fine_input_sample, coarse_input_sample def n_ops_estimate(self, step): + """ + Heuristic estimate of computational expense for a single fine-level sample. + + :param step: float, fine-level step size. + :return: float, cost estimate used by MLMC scheduler. + """ return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0)) def result_format(self) -> List[QuantitySpec]: """ - Result format - :return: + Describe the result format (QuantitySpec) produced by calculate(). + + :return: list of QuantitySpec objects describing outputs. Here a single 2D target vector. """ spec1 = QuantitySpec(name="target", unit="m", shape=(2,), times=[10], locations=['0']) return [spec1] - diff --git a/examples/synthetic_quantity.py b/examples/synthetic_quantity.py index ad792dd0..0fcec175 100644 --- a/examples/synthetic_quantity.py +++ b/examples/synthetic_quantity.py @@ -19,6 +19,27 @@ def fill_sample_storage(sample_storage, result_format): + """ + Populate a sample storage with synthetic scheduled and finished samples for tests. + + This saves: + - global meta-data (result_format, level_parameters) + - scheduled samples per level + - finished (successful) samples per level + - number-of-operations records per level + + Parameters + ---------- + sample_storage : mlmc.sample_storage.* instance + Storage object to be filled (Memory or HDF). + result_format : list[QuantitySpec] + Result format used to determine sizes of flattened sample vectors. + + Returns + ------- + tuple + (result_format, sizes) where `sizes` is the last computed per-quantity sizes list. + """ np.random.seed(123) n_levels = 3 @@ -55,7 +76,37 @@ def fill_sample_storage(sample_storage, result_format): return result_format, sizes + def create_sampler(clean=True, memory=True, n_moments=5): + """ + Create and prepare a Sampler with synthetic simulation factory and initial scheduled samples. + + The function: + - creates a temporary working directory (when clean=True), + - constructs a SynthSimulationForTests factory, + - chooses Memory or HDF sample storage, + - creates a OneProcessPool sampling_pool, + - constructs a Sampler instance, + - prepares a Monomial moments function, + - schedules an initial set of samples and triggers immediate sample execution. + + Parameters + ---------- + clean : bool, optional + If True, removes and recreates the test working directory before use (default True). + memory : bool, optional + If True use Memory() storage; otherwise use SampleStorageHDF (default True). + n_moments : int, optional + Number of monomial moments (unused in this helper besides constructing moments_fn) (default 5). + + Returns + ------- + tuple + (sampler, simulation_factory, moments_fn) + - sampler: mlmc.sampler.Sampler instance with initial samples scheduled/executed + - simulation_factory: SynthSimulationForTests instance used by the sampler + - moments_fn: Monomial moments object constructed for the true_domain + """ # Set work dir np.random.seed(1234) n_levels = 3 @@ -76,10 +127,6 @@ def create_sampler(clean=True, memory=True, n_moments=5): simulation_config = dict(distr=distr, complexity=2, nan_fraction=failed_fraction, sim_method='_sample_fn') simulation_factory = SynthSimulationForTests(simulation_config) - # shutil.copyfile('synth_sim_config.yaml', os.path.join(work_dir, 'synth_sim_config.yaml')) - # simulation_config = {"config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')} - # simulation_workspace = SynthSimulationWorkspace(simulation_config) - # Create sample storages if memory: sample_storage = Memory() @@ -87,7 +134,6 @@ def create_sampler(clean=True, memory=True, n_moments=5): sample_storage = SampleStorageHDF(file_path=os.path.join(work_dir, "mlmc_test.hdf5")) # Create sampling pools sampling_pool = OneProcessPool() - # sampling_pool_dir = OneProcessPool(work_dir=work_dir) if clean: if sampling_pool._output_dir is not None: @@ -111,5 +157,3 @@ def create_sampler(clean=True, memory=True, n_moments=5): sampler.ask_sampling_pool_for_samples() return sampler, simulation_factory, moments_fn - - diff --git a/mlmc/archive/flow123d_mock.py b/mlmc/archive/flow123d_mock.py deleted file mode 100644 index aca6f1a0..00000000 --- a/mlmc/archive/flow123d_mock.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -Mockup for the Flow123d simulator. -""" - -import argparse -import ruamel.yaml as yaml - -parser = argparse.ArgumentParser(description='Flow123d mockup.') -parser.add_argument('yaml_file', metavar='
', type=string, nargs=1, - help='Main input YAML file.') - -args = parser.parse_args() - -with open(args.yaml_file, 'r') as f: - input = yaml.load(f) - mesh_file = input.mesh.mesh_file - -def conductivity_field_average(gmsh): - pass diff --git a/mlmc/estimator.py b/mlmc/estimator.py index af4f6e03..cc98be3b 100644 --- a/mlmc/estimator.py +++ b/mlmc/estimator.py @@ -3,6 +3,7 @@ import scipy.integrate as integrate import mlmc.quantity.quantity_estimate as qe import mlmc.tool.simple_distribution +from mlmc.quantity.quantity_estimate import mask_nan_samples from mlmc.quantity.quantity_types import ScalarType from mlmc.plot import plots from mlmc.quantity.quantity_spec import ChunkSpec @@ -10,42 +11,88 @@ class Estimate: """ - Provides wrapper methods for moments estimation, pdf approximation, ... + A wrapper class for moment estimation, PDF approximation, and related MLMC post-processing. + + Provides utility methods to: + - Estimate statistical moments, variances, and covariances + - Perform regression-based variance estimation + - Conduct bootstrap resampling + - Construct approximate probability density functions + - Visualize and analyze MLMC variance and sample distributions """ + def __init__(self, quantity, sample_storage, moments_fn=None): + """ + Initialize the Estimate instance. + + :param quantity: mlmc.quantity.Quantity + Quantity object representing the stochastic quantity of interest. + :param sample_storage: mlmc.sample_storage.SampleStorage + Storage containing MLMC samples for each level. + :param moments_fn: callable, optional + Function defining the statistical moments to be estimated. + """ self._quantity = quantity self._sample_storage = sample_storage self._moments_fn = moments_fn + self._moments_mean = None @property def quantity(self): + """Return the current Quantity object.""" return self._quantity @quantity.setter def quantity(self, quantity): + """Set a new Quantity object.""" self._quantity = quantity @property def n_moments(self): + """Return the number of moment functions defined.""" return self._moments_fn.size + @property + def moments_mean_obj(self): + """Return the most recently computed mean of the moments.""" + return self._moments_mean + + @moments_mean_obj.setter + def moments_mean_obj(self, moments_mean): + """ + Set the estimated mean of the moments. + + :param moments_mean: mlmc.quantity.quantity.QuantityMean + Object containing mean and variance of the estimated moments. + :raises TypeError: If the object is not an instance of QuantityMean. + """ + if not isinstance(moments_mean, mlmc.quantity.quantity.QuantityMean): + raise TypeError + self._moments_mean = moments_mean + def estimate_moments(self, moments_fn=None): """ - Use collected samples to estimate moments and variance of this estimate. - :param moments_fn: moments function - :return: estimate_of_moment_means, estimate_of_variance_of_estimate ; arrays of length n_moments + Estimate the mean and variance of the defined moment functions. + + :param moments_fn: callable, optional + Function to compute statistical moments. If None, uses the stored function. + :return: tuple (moment_means, moment_variances) + Arrays of length n_moments representing estimated means and variances. """ if moments_fn is None: moments_fn = self._moments_fn moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn)) + self.moments_mean_obj = moments_mean return moments_mean.mean, moments_mean.var def estimate_covariance(self, moments_fn=None): """ - Use collected samples to estimate covariance matrix and variance of this estimate. - :param moments_fn: moments function - :return: estimate_of_moment_means, estimate_of_variance_of_estimate ; arrays of length n_moments + Estimate the covariance matrix and its variance from MLMC samples. + + :param moments_fn: callable, optional + Function defining moment evaluations. If None, uses the stored one. + :return: tuple (covariance_matrix, covariance_variance) """ if moments_fn is None: moments_fn = self._moments_fn @@ -55,14 +102,20 @@ def estimate_covariance(self, moments_fn=None): def estimate_diff_vars_regression(self, n_created_samples, moments_fn=None, raw_vars=None): """ - Estimate variances using linear regression model. - Assumes increasing variance with moments_fn, use only two moments_fn with highest average variance. - :param n_created_samples: number of created samples on each level - :param moments_fn: Moment evaluation function - :return: array of variances, n_ops_estimate + Estimate variances using a linear regression model. + + Assumes that variance increases with moment order. Typically, only two moments + with the highest average variance are used. + + :param n_created_samples: array-like + Number of created samples on each MLMC level. + :param moments_fn: callable, optional + Moment evaluation function. + :param raw_vars: ndarray, optional + Precomputed raw variance estimates. + :return: tuple (variance_array, n_ops_estimate) """ self._n_created_samples = n_created_samples - # vars shape L x R if raw_vars is None: if moments_fn is None: moments_fn = self._moments_fn @@ -70,21 +123,24 @@ def estimate_diff_vars_regression(self, n_created_samples, moments_fn=None, raw_ sim_steps = np.squeeze(self._sample_storage.get_level_parameters()) vars = self._all_moments_variance_regression(raw_vars, sim_steps) - # We need to get n_ops_estimate from storage + return vars, self._sample_storage.get_n_ops() def estimate_diff_vars(self, moments_fn=None): """ - Estimate moments_fn variance from samples - :param moments_fn: Moment evaluation functions - :return: (diff_variance, n_samples); - diff_variance - shape LxR, variances of diffs of moments_fn - n_samples - shape L, num samples for individual levels. + Estimate the variance of moment differences between consecutive MLMC levels. + + :param moments_fn: callable, optional + Moment evaluation functions. + :return: tuple (diff_variance, n_samples) + diff_variance - shape (L, R): variances of differences of moments. + n_samples - shape (L,): number of samples per level. """ moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn)) return moments_mean.l_vars, moments_mean.n_samples def _all_moments_variance_regression(self, raw_vars, sim_steps): + """Apply variance regression across all moment functions.""" reg_vars = raw_vars.copy() n_moments = raw_vars.shape[1] for m in range(1, n_moments): @@ -94,53 +150,53 @@ def _all_moments_variance_regression(self, raw_vars, sim_steps): def _moment_variance_regression(self, raw_vars, sim_steps): """ - Estimate level variance using separate model for every moment. + Perform regression-based smoothing of level variance for a single moment. - log(var_l) = A + B * log(h_l) + C * log^2(hl), - for l = 0, .. L-1 - :param raw_vars: moments_fn variances raws, shape (L,) - :param sim_steps: simulation steps, shape (L,) - :return: np.array (L, ) + Model: + log(var_l) = A + B * log(h_l) + C * log^2(h_l) + + :param raw_vars: ndarray, shape (L,) + Raw variance estimates of a single moment. + :param sim_steps: ndarray, shape (L,) + Simulation step sizes or level parameters. + :return: ndarray, shape (L,) + Smoothed variance estimates. """ L, = raw_vars.shape L1 = L - 1 if L < 3 or np.allclose(raw_vars, 0): return raw_vars - # estimate of variances of variances, compute scaling W = 1.0 / np.sqrt(self._variance_of_variance()) - W = W[1:] # ignore level 0 + W = W[1:] W = np.ones((L - 1,)) - # Use linear regresion to improve estimate of variances V1, ... - # model log var_{r,l} = a_r + b * log step_l - # X_(r,l), j = dirac_{r,j} - - K = 3 # number of parameters + K = 3 X = np.zeros((L1, K)) log_step = np.log(sim_steps[1:]) X[:, 0] = np.ones(L1) X[:, 1] = np.full(L1, log_step) X[:, 2] = np.full(L1, log_step ** 2) - WX = X * W[:, None] # scale - log_vars = np.log(raw_vars[1:]) # omit first variance - log_vars = W * log_vars # scale RHS + WX = X * W[:, None] + log_vars = np.log(raw_vars[1:]) + log_vars = W * log_vars params, res, rank, sing_vals = np.linalg.lstsq(WX, log_vars) new_vars = raw_vars.copy() new_vars[1:] = np.exp(np.dot(X, params)) - return new_vars def _variance_of_variance(self, n_samples=None): """ - Approximate variance of log(X) where - X is from ch-squared with df=n_samples - 1. - Return array of variances for actual n_samples array. + Approximate the variance of log(X), where X follows a chi-squared distribution. + + Used to approximate the uncertainty of variance estimates. - :param n_samples: Optional array with n_samples. - :return: array of variances of variance estimate. + :param n_samples: array-like, optional + Number of samples per level. + :return: ndarray + Variance of variance estimates per level. """ if n_samples is None: n_samples = self._n_created_samples @@ -169,21 +225,29 @@ def compute_moment(moment): return np.array(vars) def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None): - + """ + Perform bootstrap resampling to estimate uncertainty in MLMC estimators. + + :param n_subsamples: int, default=100 + Number of bootstrap subsamples. + :param sample_vector: ndarray, optional + Sampling vector for selecting subsamples. + :param moments_fn: callable, optional + Moment evaluation function. + """ if moments_fn is not None: self._moments_fn = moments_fn else: moments_fn = self._moments_fn - sample_vector = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(), - n_levels=self._sample_storage.get_n_levels(), - sample_vector=sample_vector) - bs_mean = [] - bs_var = [] - bs_l_means = [] - bs_l_vars = [] + sample_vector = determine_sample_vec( + n_collected_samples=self._sample_storage.get_n_collected(), + n_levels=self._sample_storage.get_n_levels(), + sample_vector=sample_vector + ) + bs_mean, bs_var, bs_l_means, bs_l_vars = [], [], [], [] for i in range(n_subsamples): - quantity_subsample = self.quantity.select(self.quantity.subsample(sample_vec=sample_vector)) + quantity_subsample = self.quantity.subsample(sample_vec=sample_vector) moments_quantity = qe.moments(quantity_subsample, moments_fn=moments_fn, mom_at_bottom=False) q_mean = qe.estimate_mean(moments_quantity) @@ -192,6 +256,9 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None): bs_l_means.append(q_mean.l_means) bs_l_vars.append(q_mean.l_vars) + self.bs_mean = bs_mean + self.bs_var = bs_var + self.mean_bs_mean = np.mean(bs_mean, axis=0) self.mean_bs_var = np.mean(bs_var, axis=0) self.mean_bs_l_means = np.mean(bs_l_means, axis=0) @@ -202,51 +269,105 @@ def est_bootstrap(self, n_subsamples=100, sample_vector=None, moments_fn=None): self.var_bs_l_means = np.var(bs_l_means, axis=0, ddof=1) self.var_bs_l_vars = np.var(bs_l_vars, axis=0, ddof=1) - self._bs_level_mean_variance = self.var_bs_l_means * np.array(self._sample_storage.get_n_collected())[:, None] + self._bs_level_mean_variance = ( + self.var_bs_l_means * np.array(self._sample_storage.get_n_collected())[:, None] + ) - def bs_target_var_n_estimated(self, target_var, sample_vec=None): - sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(), - n_levels=self._sample_storage.get_n_levels(), - sample_vector=sample_vec) + def bs_target_var_n_estimated(self, target_var, sample_vec=None, n_subsamples=100): + """ + Estimate the number of samples required to achieve a target variance. + + :param target_var: float + Desired target variance for MLMC estimation. + :param sample_vec: ndarray, optional + Sampling vector specifying subsamples per level. + :param n_subsamples: int, default=100 + Number of bootstrap resamplings to perform. + :return: ndarray + Estimated number of samples required at each level. + """ + sample_vec = determine_sample_vec( + n_collected_samples=self._sample_storage.get_n_collected(), + n_levels=self._sample_storage.get_n_levels(), + sample_vector=sample_vec + ) - self.est_bootstrap(n_subsamples=300, sample_vector=sample_vec) + self.est_bootstrap(n_subsamples=n_subsamples, sample_vector=sample_vec) - variances, n_ops = self.estimate_diff_vars_regression(sample_vec, raw_vars=self.mean_bs_l_vars) - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=self._sample_storage.get_n_levels()) + variances, n_ops = self.estimate_diff_vars_regression( + sample_vec, raw_vars=self.mean_bs_l_vars + ) + + n_estimated = estimate_n_samples_for_target_variance( + target_var, variances, n_ops, n_levels=self._sample_storage.get_n_levels() + ) return n_estimated def plot_variances(self, sample_vec=None): + """ + Plot variance breakdown from bootstrap and regression data. + + :param sample_vec: ndarray, optional + Sampling vector specifying subsamples per level. + """ var_plot = plots.VarianceBreakdown(10) - sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(), - n_levels=self._sample_storage.get_n_levels(), - sample_vector=sample_vec) + sample_vec = determine_sample_vec( + n_collected_samples=self._sample_storage.get_n_collected(), + n_levels=self._sample_storage.get_n_levels(), + sample_vector=sample_vec + ) self.est_bootstrap(n_subsamples=100, sample_vector=sample_vec) - var_plot.add_variances(self.mean_bs_l_vars, sample_vec, ref_level_vars=self._bs_level_mean_variance) + var_plot.add_variances( + self.mean_bs_l_vars, + sample_vec, + ref_level_vars=self._bs_level_mean_variance + ) var_plot.show(None) def plot_bs_var_log(self, sample_vec=None): - sample_vec = determine_sample_vec(n_collected_samples=self._sample_storage.get_n_collected(), - n_levels=self._sample_storage.get_n_levels(), - sample_vector=sample_vec) + """ + Generate log-scale bootstrap variance plots and variance regression fits. - moments_quantity = qe.moments(self._quantity, moments_fn=self._moments_fn, mom_at_bottom=False) + :param sample_vec: ndarray, optional + Sampling vector specifying subsamples per level. + """ + sample_vec = determine_sample_vec( + n_collected_samples=self._sample_storage.get_n_collected(), + n_levels=self._sample_storage.get_n_levels(), + sample_vector=sample_vec + ) + + moments_quantity = qe.moments( + self._quantity, moments_fn=self._moments_fn, mom_at_bottom=False + ) q_mean = qe.estimate_mean(moments_quantity) - bs_plot = plots.BSplots(bs_n_samples=sample_vec, n_samples=self._sample_storage.get_n_collected(), - n_moments=self._moments_fn.size, ref_level_var=q_mean.l_vars) + bs_plot = plots.BSplots( + bs_n_samples=sample_vec, + n_samples=self._sample_storage.get_n_collected(), + n_moments=self._moments_fn.size, + ref_level_var=q_mean.l_vars + ) - bs_plot.plot_means_and_vars(self.mean_bs_mean[1:], self.mean_bs_var[1:], n_levels=self._sample_storage.get_n_levels()) + bs_plot.plot_means_and_vars( + self.mean_bs_mean[1:], + self.mean_bs_var[1:], + n_levels=self._sample_storage.get_n_levels() + ) bs_plot.plot_bs_variances(self.mean_bs_l_vars) - #bs_plot.plot_bs_var_log_var() - + # bs_plot.plot_bs_var_log_var() bs_plot.plot_var_regression(self, self._sample_storage.get_n_levels(), self._moments_fn) def fine_coarse_violinplot(self): + """ + Create violin plots comparing fine and coarse samples across levels. + + Uses pandas for data organization and mlmc.plot.violinplot for visualization. + """ import pandas as pd from mlmc.plot import violinplot @@ -255,14 +376,18 @@ def fine_coarse_violinplot(self): if n_levels > 1: for level_id in range(n_levels): - chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=self._sample_storage.get_n_collected()[level_id])) + chunk_spec = next( + self._sample_storage.chunks( + level_id=level_id, + n_samples=self._sample_storage.get_n_collected()[level_id] + ) + ) samples = np.squeeze(self._quantity.samples(chunk_spec, axis=0)) if level_id == 0: label = "{} F{} {} C".format(level_id, ' ' * label_n_spaces, level_id + 1) data = {'samples': samples[:, 0], 'type': 'fine', 'level': label} dframe = pd.DataFrame(data) else: - data = {'samples': samples[:, 1], 'type': 'coarse', 'level': label} dframe = pd.concat([dframe, pd.DataFrame(data)], axis=0) @@ -270,16 +395,21 @@ def fine_coarse_violinplot(self): label = "{} F{} {} C".format(level_id, ' ' * label_n_spaces, level_id + 1) data = {'samples': samples[:, 0], 'type': 'fine', 'level': label} dframe = pd.concat([dframe, pd.DataFrame(data)], axis=0) + violinplot.fine_coarse_violinplot(dframe) @staticmethod def estimate_domain(quantity, sample_storage, quantile=None): """ - Estimate moments domain from MLMC samples. - :param quantity: mlmc.quantity.Quantity instance, represents the real quantity - :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples - :param quantile: float in interval (0, 1), None means whole sample range - :return: lower_bound, upper_bound + Estimate lower and upper bounds of the domain from MLMC samples. + + :param quantity: mlmc.quantity.Quantity + Quantity object representing the stochastic quantity. + :param sample_storage: mlmc.sample_storage.SampleStorage + Storage object containing all level samples. + :param quantile: float, optional + Quantile value in (0, 1). None defaults to 0.01. + :return: tuple (lower_bound, upper_bound) """ ranges = [] if quantile is None: @@ -289,13 +419,25 @@ def estimate_domain(quantity, sample_storage, quantile=None): try: sample_storage.get_n_collected()[level_id] except AttributeError: - print("No collected values for level {}".format(level_id)) + print(f"No collected values for level {level_id}") break - chunk_spec = next(sample_storage.chunks(n_samples=sample_storage.get_n_collected()[level_id])) - fine_samples = quantity.samples(chunk_spec)[..., 0] # Fine samples at level 0 + print("sample_storage.get_n_collected() ", type(sample_storage.get_n_collected()[0])) + + if isinstance(sample_storage.get_n_collected()[level_id], AttributeError): + print("continue") + continue + + chunk_spec = next( + sample_storage.chunks( + level_id=level_id, + n_samples=sample_storage.get_n_collected()[level_id] + ) + ) + fine_samples = quantity.samples(chunk_spec)[..., 0] fine_samples = np.squeeze(fine_samples) - fine_samples = fine_samples[~np.isnan(fine_samples)] # remove NaN + print("fine samples ", fine_samples) + fine_samples = fine_samples[~np.isnan(fine_samples)] ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)])) ranges = np.array(ranges) @@ -303,102 +445,185 @@ def estimate_domain(quantity, sample_storage, quantile=None): def construct_density(self, tol=1e-8, reg_param=0.0, orth_moments_tol=1e-4, exact_pdf=None): """ - Construct approximation of the density using given moment functions. + Construct an approximate probability density function using orthogonal moments. + + :param tol: float, default=1e-8 + Optimization tolerance for density estimation. + :param reg_param: float, default=0.0 + Regularization parameter to stabilize estimation. + :param orth_moments_tol: float, default=1e-4 + Tolerance for orthogonalization of moments. + :param exact_pdf: callable, optional + Reference exact PDF for validation or comparison. + :return: tuple (distribution, info, result, moments_object) """ if not isinstance(self._quantity.qtype, ScalarType): - raise NotImplementedError("Currently, we only support ScalarType quantities") + raise NotImplementedError("Currently, only ScalarType quantities are supported.") cov_mean = qe.estimate_mean(qe.covariance(self._quantity, self._moments_fn)) cov_mat = cov_mean.mean - moments_obj, info = mlmc.tool.simple_distribution.construct_ortogonal_moments(self._moments_fn, - cov_mat, - tol=orth_moments_tol) + moments_obj, info = mlmc.tool.simple_distribution.construct_ortogonal_moments( + self._moments_fn, cov_mat, tol=orth_moments_tol + ) + moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_obj)) est_moments = moments_mean.mean est_vars = moments_mean.var - # if exact_pdf is not None: - # exact_moments = mlmc.tool.simple_distribution.compute_exact_moments(moments_obj, exact_pdf) - est_vars = np.ones(moments_obj.size) min_var, max_var = np.min(est_vars[1:]), np.max(est_vars[1:]) - #print("min_err: {} max_err: {} ratio: {}".format(min_var, max_var, max_var / min_var)) moments_data = np.stack((est_moments, est_vars), axis=1) - distr_obj = mlmc.tool.simple_distribution.SimpleDistribution(moments_obj, moments_data, - domain=moments_obj.domain) - result = distr_obj.estimate_density_minimize(tol, reg_param) # 0.95 two side quantile + + distr_obj = mlmc.tool.simple_distribution.SimpleDistribution( + moments_obj, moments_data, domain=moments_obj.domain + ) + result = distr_obj.estimate_density_minimize(tol, reg_param) return distr_obj, info, result, moments_obj def get_level_samples(self, level_id, n_samples=None): """ - Get level samples from storage - :param level_id: int, level identifier - :param n_samples> int, number of samples to retrieve, if None first chunk of data is retrieved - :return: level samples, shape: (M, N, 1) for level 0, (M, N, 2) otherwise + Retrieve MLMC samples for a given level. + + :param level_id: int + Level index to access. + :param n_samples: int, optional + Number of samples to retrieve. If None, retrieves all available samples. + :return: ndarray + Samples for the specified level. """ chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=n_samples)) return self._quantity.samples(chunk_spec=chunk_spec) + def kurtosis_check(self, quantity=None): + """ + Compute and return the kurtosis of the given or stored quantity. + + :param quantity: mlmc.quantity.Quantity, optional + Quantity for which to compute kurtosis. Defaults to the stored quantity. + :return: float or ndarray + Computed kurtosis per level. + """ + if quantity is None: + quantity = self._quantity + moments_mean_quantity = qe.estimate_mean(quantity) + kurtosis = qe.level_kurtosis(quantity, moments_mean_quantity) + return kurtosis + -def estimate_domain(quantity, sample_storage, quantile=None): +def consistency_check(quantity, sample_storage=None): """ - Estimate moments domain from MLMC samples. - :param quantity: mlmc.quantity.Quantity instance, represents the real quantity - :param sample_storage: mlmc.sample_storage.SampleStorage instance, provides all the samples - :param quantile: float in interval (0, 1), None means whole sample range - :return: lower_bound, upper_bound + Check consistency between fine and coarse level samples in MLMC. + + :param quantity: mlmc.quantity.Quantity instance + :param sample_storage: mlmc.sample_storage.SampleStorage instance + :return: dict mapping level_id -> consistency metric """ - ranges = [] - if quantile is None: - quantile = 0.01 + fine_samples = {} + coarse_samples = {} + + for chunk_spec in quantity.get_quantity_storage().chunks(): + samples = quantity.samples(chunk_spec) + chunk, _ = mask_nan_samples(samples) + + # Skip empty chunks + if chunk.shape[1] == 0: + continue + fine_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 0]) + if chunk_spec.level_id > 0: + coarse_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 1]) + + cons_check_val = {} for level_id in range(sample_storage.get_n_levels()): - fine_samples = quantity.samples(ChunkSpec(level_id=level_id, n_samples=sample_storage.get_n_collected()[0]))[..., 0] + if level_id > 0: + fine_mean = np.mean(fine_samples[level_id]) + coarse_mean = np.mean(coarse_samples[level_id]) + diff_mean = np.mean(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id])) + + fine_var = np.var(fine_samples[level_id]) + coarse_var = np.var(fine_samples[level_id]) + diff_var = np.var(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id])) - fine_samples = np.squeeze(fine_samples) - ranges.append(np.percentile(fine_samples, [100 * quantile, 100 * (1 - quantile)])) + val = np.abs(coarse_mean - fine_mean + diff_mean) / ( + 3 * (np.sqrt(coarse_var) + np.sqrt(fine_var) + np.sqrt(diff_var)) + ) - ranges = np.array(ranges) - return np.min(ranges[:, 0]), np.max(ranges[:, 1]) + assert np.isclose(coarse_mean - fine_mean + diff_mean, 0) + assert val < 0.9 + cons_check_val[level_id] = val -def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels): + return cons_check_val + + +def coping_with_high_kurtosis(vars, costs, kurtosis, kurtosis_threshold=100): """ - Estimate optimal number of samples for individual levels that should provide a target variance of - resulting moment estimate. - This also set given moment functions to be used for further estimates if not specified otherwise. - :param target_variance: Constrain to achieve this variance. - :param prescribe_vars: vars[ L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance. - :param n_ops: number of operations at each level + Adjust variance estimates if kurtosis is unusually high to avoid underestimation. + + :param vars: ndarray of shape (L, M) with level variances for moments + :param costs: cost of computing samples per level + :param kurtosis: kurtosis of each level + :param kurtosis_threshold: threshold above which kurtosis is considered "high" + :return: adjusted vars ndarray + """ + for l_id in range(2, vars.shape[0]): + if kurtosis[l_id] > kurtosis_threshold: + vars[l_id] = np.maximum(vars[l_id], 0.5 * vars[l_id - 1] * costs[l_id - 1] / costs[l_id]) + return vars + + +def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels, theta=0, kurtosis=None): + """ + Estimate optimal number of samples per level to reach a target variance. + + :param target_variance: desired variance for MLMC estimator + :param prescribe_vars: ndarray of level variances (L x M) + :param n_ops: cost/operations per level :param n_levels: number of levels - :return: np.array with number of optimal samples for individual levels and moments_fn, array (LxR) + :param theta: safety factor (0 ≤ theta < 1) + :param kurtosis: optional ndarray of kurtosis per level + :return: ndarray of optimal number of samples per moment function """ vars = prescribe_vars + + if kurtosis is not None and len(vars) == len(kurtosis): + vars = coping_with_high_kurtosis(vars, n_ops, kurtosis) + sqrt_var_n = np.sqrt(vars.T * n_ops) # moments_fn in rows, levels in cols - total = np.sum(sqrt_var_n, axis=1) # sum over levels - n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int) # moments_fn in cols + total = np.sum(sqrt_var_n, axis=1) + n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int) + n_samples_estimate = 1 / (1 - theta) * n_samples_estimate + # Limit maximal number of samples per level n_samples_estimate_safe = np.maximum( - np.minimum(n_samples_estimate, vars * n_levels / target_variance), 2) + np.minimum(n_samples_estimate, vars * n_levels / target_variance), + 2 + ) return np.max(n_samples_estimate_safe, axis=1).astype(int) def calc_level_params(step_range, n_levels): + """ + Compute level-dependent step sizes for MLMC. + + :param step_range: tuple (h_fine, h_coarse) + :param n_levels: number of levels + :return: list of step sizes per level + """ assert step_range[0] > step_range[1] level_parameters = [] for i_level in range(n_levels): - if n_levels == 1: - level_param = 1 - else: - level_param = i_level / (n_levels - 1) + level_param = 1 if n_levels == 1 else i_level / (n_levels - 1) level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param]) - return level_parameters def determine_sample_vec(n_collected_samples, n_levels, sample_vector=None): + """ + Determine the sample vector for bootstrapping or MLMC calculations. + """ if sample_vector is None: sample_vector = n_collected_samples if len(sample_vector) > n_levels: @@ -408,45 +633,34 @@ def determine_sample_vec(n_collected_samples, n_levels, sample_vector=None): def determine_level_parameters(n_levels, step_range): """ - Determine level parameters, - In this case, a step of fine simulation at each level - :param n_levels: number of MLMC levels - :param step_range: simulation step range - :return: List + Wrapper to calculate level parameters (simulation step sizes). """ assert step_range[0] > step_range[1] level_parameters = [] for i_level in range(n_levels): - if n_levels == 1: - level_param = 1 - else: - level_param = i_level / (n_levels - 1) + level_param = 1 if n_levels == 1 else i_level / (n_levels - 1) level_parameters.append([step_range[0] ** (1 - level_param) * step_range[1] ** level_param]) - return level_parameters def determine_n_samples(n_levels, n_samples=None): """ - Set target number of samples for each level - :param n_levels: number of levels - :param n_samples: array of number of samples - :return: None + Generate an array of target sample sizes for each level. + + :param n_levels: number of MLMC levels + :param n_samples: int or list of 2 ints to define start/end for exponential spacing + :return: ndarray of sample sizes for each level """ if n_samples is None: n_samples = [100, 3] - # Num of samples to ndarray + n_samples = np.atleast_1d(n_samples) - # Just maximal number of samples is set if len(n_samples) == 1: n_samples = np.array([n_samples[0], 3]) - # Create number of samples for all levels if len(n_samples) == 2: n0, nL = n_samples n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), n_levels))).astype(int) return n_samples - - diff --git a/mlmc/level_simulation.py b/mlmc/level_simulation.py index 67c5c23d..593f28c8 100644 --- a/mlmc/level_simulation.py +++ b/mlmc/level_simulation.py @@ -1,34 +1,37 @@ import attr -from typing import List, Dict, Any +from typing import List, Dict, Any, Optional, Callable from mlmc.quantity.quantity_spec import QuantitySpec @attr.s(auto_attribs=True) class LevelSimulation: """ - This class is used to pass simulation data at a given level between a Sampler and a SamplingPool - User shouldn't change this class + Class for passing simulation configuration and metadata for a given level between + a Sampler and a SamplingPool. + + User shouldn't modify this class manually. """ + config_dict: Dict[Any, Any] - # Calculate configuration. + # Level-specific simulation configuration dictionary. - common_files: List[str] = None - # List of files in the level workspace to copy/symlink to the sample workspace. + common_files: Optional[List[str]] = None + # List of files in the level workspace to copy or symlink to the sample workspace. need_sample_workspace: bool = False - # If the simulation needs sample workspace at all. + # Whether the simulation requires an individual workspace for each sample. - task_size: int = 0 - # Relative size of the simulation at this level. - # When using PBS, keep in mind that the pbs job size is the sum of task_sizes, and if this sum is above 1, - # the job is scheduled and PBS scheduler manages it + task_size: float = 0.0 + # Relative size (or computational cost) of the simulation task at this level. + # When using PBS or SLURM, note that the job size is the sum of task_sizes. + # If this sum exceeds 1.0, the job is queued and scheduled by the system. - ### User shouldn't modify the following attributes ### - _calculate: Any = None - # Calculate method + ### Internal attributes — users should not modify these ### + _calculate: Optional[Callable] = None + # Calculation method used internally by the sampler. - _level_id: int = None - # Level id is set by mlmc.sampler.Sampler. It is internal variable and user shouldn't change it. + _level_id: Optional[int] = None + # Level identifier, set automatically by mlmc.sampler.Sampler. - _result_format: List[QuantitySpec] = None - # Simulation result format + _result_format: Optional[List[QuantitySpec]] = None + # Format specification for simulation results (defined by QuantitySpec instances). diff --git a/mlmc/moments.py b/mlmc/moments.py index 2329566e..ba1f932a 100644 --- a/mlmc/moments.py +++ b/mlmc/moments.py @@ -5,9 +5,26 @@ class Moments: """ - Class for calculating moments of a random variable + Base class for computing moment functions of a random variable. + + Provides transformation, scaling, and evaluation utilities common + to various types of generalized moment bases (monomial, Fourier, Legendre, etc.). """ + def __init__(self, size, domain, log=False, safe_eval=True): + """ + Initialize the moment function set. + + :param size: int + Number of moment functions. + :param domain: tuple(float, float) + Domain of the input variable (min, max). + :param log: bool + If True, use logarithmic transformation of the domain. + :param safe_eval: bool + If True, clip transformed values outside the reference domain + and replace them with NaN. + """ assert size > 0 self.size = size self.domain = domain @@ -25,6 +42,7 @@ def __init__(self, size, domain, log=False, safe_eval=True): self._linear_scale = (self.ref_domain[1] - self.ref_domain[0]) / diff self._linear_shift = lin_domain[0] + # Define transformation and inverse transformation functions if safe_eval and log: self.transform = lambda val: self.clip(self.linear(np.log(val))) self.inv_transform = lambda ref: np.exp(self.inv_linear(ref)) @@ -40,78 +58,153 @@ def __init__(self, size, domain, log=False, safe_eval=True): def __eq__(self, other): """ - Compare two moment functions. Equal if they returns same values. + Compare two Moments objects for equality. + + :param other: Moments + Another Moments instance. + :return: bool + True if both instances have the same parameters and configuration. """ - return type(self) is type(other) \ - and self.size == other.size \ - and np.all(self.domain == other.domain) \ - and self._is_log == other._is_log \ - and self._is_clip == other._is_clip + return ( + type(self) is type(other) + and self.size == other.size + and np.all(self.domain == other.domain) + and self._is_log == other._is_log + and self._is_clip == other._is_clip + ) def change_size(self, size): """ - Return moment object with different size. - :param size: int, new number of _moments_fn + Return a new moment object with a different number of basis functions. + + :param size: int + New number of moment functions. + :return: Moments + New instance of the same class with updated size. """ return self.__class__(size, self.domain, self._is_log, self._is_clip) def clip(self, value): """ - Remove outliers and replace them with NaN - :param value: array of numbers - :return: masked_array, out + Clip values to the reference domain, replacing outliers with NaN. + + :param value: array-like + Input data to be clipped. + :return: ndarray + Array with out-of-bound values replaced by NaN. """ - # Masked array out = ma.masked_outside(value, self.ref_domain[0], self.ref_domain[1]) - # Replace outliers with NaN return ma.filled(out, np.nan) def linear(self, value): + """Apply linear transformation to reference domain.""" return (value - self._linear_shift) * self._linear_scale + self.ref_domain[0] def inv_linear(self, value): + """Inverse linear transformation back to the original domain.""" return (value - self.ref_domain[0]) / self._linear_scale + self._linear_shift def __call__(self, value): + """Evaluate all moment functions for the given value(s).""" return self._eval_all(value, self.size) def eval(self, i, value): - return self._eval_all(value, i+1)[:, -1] + """ + Evaluate the i-th moment function. + + :param i: int + Index of the moment function to evaluate (0-based). + :param value: float or array-like + Input value(s). + :return: ndarray + Values of the i-th moment function. + """ + return self._eval_all(value, i + 1)[:, -1] def eval_single_moment(self, i, value): """ - Be aware this implementation is inefficient for large i - :param i: int, order of moment - :param value: float - :return: np.ndarray + Evaluate a single moment function (less efficient for large i). + + :param i: int + Order of the moment. + :param value: float or array-like + Input value(s). + :return: ndarray + Evaluated moment values. """ - return self._eval_all(value, i+1)[..., i] + return self._eval_all(value, i + 1)[..., i] def eval_all(self, value, size=None): + """ + Evaluate all moments up to the specified size. + + :param value: float or array-like + Input value(s). + :param size: int or None + Number of moments to evaluate. If None, use self.size. + :return: ndarray + Matrix of evaluated moments. + """ if size is None: size = self.size return self._eval_all(value, size) def eval_all_der(self, value, size=None, degree=1): + """ + Evaluate derivatives of all moment functions. + + :param value: float or array-like + Input value(s). + :param size: int or None + Number of moments to evaluate. + :param degree: int + Derivative degree (1 for first derivative, etc.). + :return: ndarray + Matrix of evaluated derivatives. + """ if size is None: size = self.size return self._eval_all_der(value, size, degree) def eval_diff(self, value, size=None): + """ + Evaluate first derivatives of all moment functions. + + :param value: float or array-like + Input value(s). + :param size: int or None + Number of moments to evaluate. + :return: ndarray + Matrix of first derivatives. + """ if size is None: size = self.size return self._eval_diff(value, size) def eval_diff2(self, value, size=None): + """ + Evaluate second derivatives of all moment functions. + + :param value: float or array-like + Input value(s). + :param size: int or None + Number of moments to evaluate. + :return: ndarray + Matrix of second derivatives. + """ if size is None: size = self.size return self._eval_diff2(value, size) +# ------------------------------------------------------------------------- +# Specific moment types +# ------------------------------------------------------------------------- class Monomial(Moments): """ - Monomials generalized moments + Monomial basis functions for generalized moment evaluation. """ + def __init__(self, size, domain=(0, 1), ref_domain=None, log=False, safe_eval=True): if ref_domain is not None: self.ref_domain = ref_domain @@ -120,33 +213,49 @@ def __init__(self, size, domain=(0, 1), ref_domain=None, log=False, safe_eval=Tr super().__init__(size, domain, log=log, safe_eval=safe_eval) def _eval_all(self, value, size): - # Create array from values and transform values outside the ref domain + """ + Evaluate monomial basis (Vandermonde matrix). + + :param value: array-like + Input values. + :param size: int + Number of moments to compute. + :return: ndarray + Vandermonde matrix of monomials. + """ t = self.transform(np.atleast_1d(value)) - # Vandermonde matrix return np.polynomial.polynomial.polyvander(t, deg=size - 1) def eval(self, i, value): + """Evaluate the i-th monomial t^i.""" t = self.transform(np.atleast_1d(value)) - return t**i + return t ** i class Fourier(Moments): """ - Fourier functions generalized moments + Fourier basis functions for generalized moment evaluation. """ - def __init__(self, size, domain=(0, 2*np.pi), ref_domain=None, log=False, safe_eval=True): + + def __init__(self, size, domain=(0, 2 * np.pi), ref_domain=None, log=False, safe_eval=True): if ref_domain is not None: self.ref_domain = ref_domain else: - self.ref_domain = (0, 2*np.pi) - + self.ref_domain = (0, 2 * np.pi) super().__init__(size, domain, log=log, safe_eval=safe_eval) def _eval_all(self, value, size): - # Transform values + """ + Evaluate Fourier moment basis (cosine/sine terms). + + :param value: array-like + Input values. + :param size: int + Number of moments to compute. + :return: ndarray + Matrix of evaluated Fourier functions. + """ t = self.transform(np.atleast_1d(value)) - - # Half the number of moments R = int(size / 2) shorter_sin = 1 - int(size % 2) k = np.arange(1, R + 1) @@ -154,26 +263,33 @@ def _eval_all(self, value, size): res = np.empty((len(t), size)) res[:, 0] = 1 - - # Odd column index res[:, 1::2] = np.cos(kx[:, :]) - # Even column index res[:, 2::2] = np.sin(kx[:, : R - shorter_sin]) return res def eval(self, i, value): + """ + Evaluate a single Fourier basis function. + + :param i: int + Index of the moment function. + :param value: float or array-like + Input values. + :return: ndarray + Evaluated function values. + """ t = self.transform(np.atleast_1d(value)) if i == 0: return 1 elif i % 2 == 1: - return np.sin( (i - 1) / 2 * t) + return np.sin((i - 1) / 2 * t) else: return np.cos(i / 2 * t) class Legendre(Moments): """ - Legendre polynomials generalized moments + Legendre polynomial basis functions for generalized moments. """ def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True): @@ -182,6 +298,7 @@ def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True): else: self.ref_domain = (-1, 1) + # Precompute derivative matrices self.diff_mat = np.zeros((size, size)) for n in range(size - 1): self.diff_mat[n, n + 1::2] = 2 * n + 1 @@ -190,19 +307,26 @@ def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True): super().__init__(size, domain, log, safe_eval) def _eval_value(self, x, size): - return np.polynomial.legendre.legvander(x, deg=size-1) + """Evaluate Legendre polynomials up to the given order.""" + return np.polynomial.legendre.legvander(x, deg=size - 1) def _eval_all(self, value, size): + """Evaluate all Legendre polynomials.""" value = self.transform(np.atleast_1d(value)) return np.polynomial.legendre.legvander(value, deg=size - 1) def _eval_all_der(self, value, size, degree=1): """ - Derivative of Legendre polynomials - :param value: values to evaluate - :param size: number of moments - :param degree: degree of derivative - :return: + Evaluate derivatives of Legendre polynomials. + + :param value: array-like + Points at which to evaluate. + :param size: int + Number of moment functions. + :param degree: int + Derivative order. + :return: ndarray + Matrix of derivative values. """ value = self.transform(np.atleast_1d(value)) eval_values = np.empty((value.shape + (size,))) @@ -211,7 +335,7 @@ def _eval_all_der(self, value, size, degree=1): if s == 0: coef = [1] else: - coef = np.zeros(s+1) + coef = np.zeros(s + 1) coef[-1] = 1 coef = np.polynomial.legendre.legder(coef, degree) @@ -219,26 +343,38 @@ def _eval_all_der(self, value, size, degree=1): return eval_values def _eval_diff(self, value, size): + """Evaluate first derivatives using precomputed differentiation matrix.""" t = self.transform(np.atleast_1d(value)) P_n = np.polynomial.legendre.legvander(t, deg=size - 1) return P_n @ self.diff_mat def _eval_diff2(self, value, size): + """Evaluate second derivatives using precomputed differentiation matrix.""" t = self.transform(np.atleast_1d(value)) P_n = np.polynomial.legendre.legvander(t, deg=size - 1) return P_n @ self.diff2_mat class TransformedMoments(Moments): + """ + Linearly transformed moment basis. + + Creates a new set of moment functions as linear combinations + of another existing set of basis functions. + """ + def __init__(self, other_moments, matrix): """ - Set a new moment functions as linear combination of the previous. - new_moments = matrix . old_moments + Initialize transformed moment functions. + + :param other_moments: Moments + Original set of moment functions. + :param matrix: ndarray + Linear transformation matrix where: + new_moments = matrix @ old_moments - We assume that new_moments[0] is still == 1. That means - first row of the matrix must be (1, 0 , ...). - :param other_moments: Original _moments_fn. - :param matrix: Linear combinations of the original _moments_fn. + The first row must correspond to (1, 0, 0, ...), + ensuring that new_moments[0] = 1. """ n, m = matrix.shape assert m == other_moments.size @@ -248,27 +384,34 @@ def __init__(self, other_moments, matrix): self._transform = matrix def __eq__(self, other): - return type(self) is type(other) \ - and self.size == other.size \ - and self._origin == other._origin \ - and np.all(self._transform == other._transform) + """Check equality with another TransformedMoments object.""" + return ( + type(self) is type(other) + and self.size == other.size + and self._origin == other._origin + and np.all(self._transform == other._transform) + ) def _eval_all(self, value, size): + """Evaluate all transformed moment functions.""" orig_moments = self._origin._eval_all(value, self._origin.size) x1 = np.matmul(orig_moments, self._transform.T) return x1[..., :size] def _eval_all_der(self, value, size, degree=1): + """Evaluate derivatives of transformed moment functions.""" orig_moments = self._origin._eval_all_der(value, self._origin.size, degree=degree) x1 = np.matmul(orig_moments, self._transform.T) return x1[..., :size] def _eval_diff(self, value, size): + """Evaluate first derivatives of transformed moment functions.""" orig_moments = self._origin.eval_diff(value, self._origin.size) x1 = np.matmul(orig_moments, self._transform.T) return x1[..., :size] def _eval_diff2(self, value, size): + """Evaluate second derivatives of transformed moment functions.""" orig_moments = self._origin.eval_diff2(value, self._origin.size) x1 = np.matmul(orig_moments, self._transform.T) return x1[..., :size] diff --git a/mlmc/plot/diagnostic_plots.py b/mlmc/plot/diagnostic_plots.py new file mode 100644 index 00000000..cefd0520 --- /dev/null +++ b/mlmc/plot/diagnostic_plots.py @@ -0,0 +1,166 @@ +import numpy as np +import matplotlib + +matplotlib.rcParams.update({'font.size': 22}) +import matplotlib.pyplot as plt + + +def log_var_per_level(l_vars, levels=None, moments=[0], err_l_vars=None): + """ + Plot log₂ of variance per level and fit a slope to estimate the decay rate β. + + The function plots the base-2 logarithm of the variance for each level + and fits a linear model to estimate the convergence rate β, based on + the slope of log₂(variance) vs. level. + + :param l_vars: Array of shape (n_levels, n_moments) representing + the variance of each moment at each level. + :param levels: Optional array of level indices (default: np.arange(n_levels)). + :param moments: List of moment indices to include in the plot. + :param err_l_vars: Optional array of errors corresponding to l_vars. + :return: None + """ + n_levels = l_vars.shape[0] + if levels is None: + levels = np.arange(n_levels) + + fig, ax = plt.subplots(figsize=(8, 5)) + + for m in moments: + y = np.log2(l_vars[:, m]) + ax.plot(levels, y, 'o-', label=f'm={m}') + + slope, intercept = np.polyfit(levels, y, 1) + beta = -slope + ax.plot( + levels, + slope * levels + intercept, + '--', + label=f'fit m={m}: slope={slope:.2f}, beta≈{beta:.2f}' + ) + + ax.set_ylabel(r'$\log_2 \, V_\ell$') + ax.set_xlabel('level $\ell$') + ax.legend() + ax.grid(True, which="both") + plt.tight_layout() + plt.show() + + +def log_mean_per_level(l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]): + """ + Plot log₂ of absolute mean per level for specified statistical moments. + + :param l_means: Array of mean values per level and moment. + :param err_means: Optional array of mean estimation errors (unused). + :param err_l_means: Optional array of level-mean estimation errors (unused). + :param moments: List of moment indices to include in the plot. + :return: None + """ + fig, ax1 = plt.subplots(figsize=(8, 5)) + print("l means ", l_means) + for m in moments: + line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label=f"m={m}", marker="s") + + ax1.set_ylabel('log' + r'$_2$' + 'mean') + ax1.set_xlabel('level' + r'$l$') + plt.legend() + plt.tight_layout() + plt.show() + + +def sample_cost_per_level(costs, levels=None): + """ + Plot log₂ of sample cost per level and fit a slope to estimate γ. + + The slope of the linear regression line provides an estimate of the + cost scaling parameter γ. + + :param costs: Array of computational costs per sample for each level. + :param levels: Optional array of level indices (default: 0, 1, ...). + :return: Estimated γ (float), the slope of the fitted line. + """ + n_levels = len(costs) + if levels is None: + levels = np.arange(n_levels) + + y = np.log2(costs) + slope, intercept = np.polyfit(levels, y, 1) + gamma = slope + + fig, ax = plt.subplots(figsize=(8, 5)) + ax.plot(levels, y, 'o-', label='log2(cost)') + ax.plot( + levels, + slope * levels + intercept, + '--', + label=f'fit: slope={slope:.2f}, gamma≈{gamma:.2f}' + ) + + ax.set_ylabel(r'$\log_2 \, C_\ell$') + ax.set_xlabel('level $\ell$') + ax.legend() + ax.grid(True, which="both") + plt.tight_layout() + plt.show() + + return gamma + + +def variance_to_cost_ratio(l_vars, costs, moments=[1, 2, 3, 4]): + """ + Plot the log₂ of variance-to-cost ratio per level for given statistical moments. + + The ratio Vₗ/Cₗ is computed for each level, and the slope of its + log₂-linear fit indicates the decay behavior relative to computational cost. + + :param l_vars: Array of variances per level and moment (shape: n_levels × n_moments). + :param costs: Array of costs per sample for each level. + :param moments: List of moment indices to include in the plot. + :return: None + """ + print("l_vars ", l_vars) + print(costs) + n_levels = l_vars.shape[0] + levels = np.arange(n_levels) + fig, ax1 = plt.subplots(figsize=(8, 5)) + print('costs ', costs) + print("levels ", levels) + for m in moments: + line2, = ax1.plot(np.log2(l_vars[:, m] / costs), label=f"m={m}", marker="s") + + print("l vars ", l_vars[:, m]) + print("np.log2(l_vars[:, m]/costs) ", np.log2(l_vars[:, m] / costs)) + + # Fit a straight line: log2(V/C) ≈ a + b * level + coeffs = np.polyfit(levels, np.log2(l_vars[:, m] / costs), 1) + slope, intercept = coeffs[0], coeffs[1] + ax1.plot(levels, slope * levels + intercept, '--', label=f'fit: slope={slope:.2f}') + + ax1.set_ylabel('log' + r'$_2$' + 'variance to cost ratio') + ax1.set_xlabel('level' + r'$l$') + plt.legend() + plt.tight_layout() + plt.show() + + +def kurtosis_per_level(means, l_means, err_means=0, err_l_means=0, moments=[1, 2, 3, 4]): + """ + Plot log₂ of mean values per level (often used for analyzing kurtosis trends). + + :param means: Array of global mean values per moment (unused in plotting). + :param l_means: Array of level-wise mean values per moment. + :param err_means: Optional array of mean estimation errors (unused). + :param err_l_means: Optional array of level-mean estimation errors (unused). + :param moments: List of moment indices to include in the plot. + :return: None + """ + fig, ax1 = plt.subplots(figsize=(8, 5)) + for m in moments: + line2, = ax1.plot(np.log2(np.abs(l_means[:, m])), label=f"m={m}", marker="s") + + ax1.set_ylabel('log ' + r'$_2$ ' + 'mean') + ax1.set_xlabel('level ' + r'$l$') + plt.legend() + plt.tight_layout() + plt.show() diff --git a/mlmc/plot/violinplot.py b/mlmc/plot/violinplot.py index 85e9a1b1..74dfe836 100644 --- a/mlmc/plot/violinplot.py +++ b/mlmc/plot/violinplot.py @@ -2,18 +2,56 @@ import seaborn from mlmc.plot.plots import _show_and_save import matplotlib + +# Set default font size for all plots matplotlib.rcParams.update({'font.size': 22}) + import matplotlib.pyplot as plt class ViolinPlotter(seaborn.categorical._ViolinPlotter): + """ + Custom subclass of seaborn's internal _ViolinPlotter to modify how quartiles + and mean lines are drawn inside a violin plot. + + This class extends the default behavior by drawing the 25th, 50th, and 75th + percentiles as dashed lines, and the mean as a solid line across the violin body. + """ + def draw_quartiles(self, ax, data, support, density, center, split=False): + """ + Draw quartile and mean lines on the violin plot. + + Parameters + ---------- + ax : matplotlib.axes.Axes + The axes object to draw on. + data : array-like + Input data for a single violin. + support : array-like + Grid over which the kernel density was evaluated. + density : array-like + Corresponding kernel density values. + center : float + Position of the violin on the categorical axis. + split : bool, default=False + Whether the violin is split by hue (two sides). + + Notes + ----- + - The mean is drawn as a solid line. + - Quartiles (25%, 50%, 75%) are drawn as dashed lines. + - The density scaling follows seaborn’s internal behavior. + """ + # Compute quartiles and mean of the data q25, q50, q75 = np.percentile(data, [25, 50, 75]) mean = np.mean(data) + # Draw mean line (solid) self.draw_to_density(ax, center, mean, support, density, split, linewidth=self.linewidth) + # Draw quartile lines (dashed) self.draw_to_density(ax, center, q25, support, density, split, linewidth=self.linewidth, dashes=[self.linewidth * 1.5] * 2) @@ -33,39 +71,110 @@ def violinplot( bw="scott", cut=2, scale="area", scale_hue=True, gridsize=100, width=.8, inner="box", split=False, dodge=True, orient=None, linewidth=None, color=None, palette=None, saturation=.75, - ax=None, **kwargs,): - - plotter = ViolinPlotter(x, y, hue, data, order, hue_order, - bw, cut, scale, scale_hue, gridsize, - width, inner, split, dodge, orient, linewidth, - color, palette, saturation) - + ax=None, **kwargs, +): + """ + Wrapper around the custom ViolinPlotter class to generate a violin plot. + + Parameters + ---------- + x, y, hue : str, optional + Variable names for the categorical axis, numeric axis, and hue grouping. + data : DataFrame, optional + Dataset containing the variables. + order, hue_order : list, optional + Order of categories for x and hue variables. + bw : str or float, default="scott" + Bandwidth method or scalar for kernel density estimation. + cut : float, default=2 + How far the violin extends beyond extreme data points. + scale : {"area", "count", "width"}, default="area" + Method for scaling the width of each violin. + scale_hue : bool, default=True + Whether to scale by hue levels within each category. + gridsize : int, default=100 + Number of points in the KDE grid. + width : float, default=0.8 + Width of each violin. + inner : {"box", "quartile", "point", "stick", None}, default="box" + Representation inside each violin. + split : bool, default=False + Draw half-violins when hue is used. + dodge : bool, default=True + Separate violins for each hue level. + orient : {"v", "h"}, optional + Plot orientation; inferred if not specified. + linewidth : float, optional + Width of the line used for drawing violins and quartiles. + color : matplotlib color, optional + Color for all violins. + palette : str or sequence, optional + Color palette for hue levels. + saturation : float, default=0.75 + Saturation for colors. + ax : matplotlib.axes.Axes, optional + Axes object to draw on; created if None. + **kwargs : + Additional arguments passed to seaborn’s internal methods. + + Returns + ------- + ax : matplotlib.axes.Axes + The axes containing the drawn violin plot. + """ + # Initialize a custom violin plotter instance + plotter = ViolinPlotter( + x, y, hue, data, order, hue_order, + bw, cut, scale, scale_hue, gridsize, + width, inner, split, dodge, orient, linewidth, + color, palette, saturation + ) + + # Create a new axes if none provided if ax is None: ax = plt.gca() + # Draw the plot using the seaborn-based custom plotter plotter.plot(ax) return ax def fine_coarse_violinplot(data_frame): + """ + Generate a split violin plot comparing fine and coarse simulation samples per level. + + Parameters + ---------- + data_frame : pandas.DataFrame + Must contain the columns: + - 'level' : int, simulation level + - 'samples' : float, sample values + - 'type' : str, either 'fine' or 'coarse' + + Notes + ----- + - Uses log scale on the y-axis. + - Calls `_show_and_save` to display and save the resulting plot. + - Produces a split violin plot (fine vs coarse) for each level. + """ + # Create a single subplot for the violin plot fig, axes = plt.subplots(1, 1, figsize=(22, 10)) - # mean with confidence interval - # sns.pointplot(x='level', y='samples', hue='type', data=data_frame, estimator=np.mean, - # palette="Set2", join=False, ax=axes) - - # line is not suitable for our purpose - # sns.lineplot(x="level", y="samples", hue="type",# err_style="band", ci='sd' - # estimator=np.median, data=data_frame, ax=axes) - - violinplot(x="level", y="samples", hue='type', data=data_frame, palette="Set2", - split=True, scale="area", inner="quartile", ax=axes) + # Draw split violin plot for 'fine' and 'coarse' samples per level + violinplot( + x="level", y="samples", hue='type', data=data_frame, + palette="Set2", split=True, scale="area", + inner="quartile", ax=axes + ) + # Use logarithmic y-scale (typical for MLMC variance/cost visualizations) axes.set_yscale('log') axes.set_ylabel('') axes.set_xlabel('') + + # Remove legend frame and content axes.legend([], [], frameon=False) + # Display and save plot using utility function _show_and_save(fig, "violinplot", "violinplot") _show_and_save(fig, None, "violinplot") - diff --git a/mlmc/quantity/quantity.py b/mlmc/quantity/quantity.py index bdeddb96..6a1696f4 100644 --- a/mlmc/quantity/quantity.py +++ b/mlmc/quantity/quantity.py @@ -13,12 +13,13 @@ def make_root_quantity(storage: SampleStorage, q_specs: List[QuantitySpec]): """ - Create a root quantity that has QuantityStorage as the input quantity, + Create a root quantity that has QuantityStorage as the input quantity. QuantityStorage is the only class that directly accesses the stored data. - Quantity type is created based on the q_spec parameter - :param storage: SampleStorage - :param q_specs: same as result format in simulation class - :return: QuantityStorage + The returned QuantityStorage uses a QType built from provided QuantitySpec objects. + + :param storage: SampleStorage instance that provides stored samples + :param q_specs: list of QuantitySpec describing the simulation result format + :return: QuantityStorage that wraps the provided SampleStorage with a matching QType """ dict_types = [] for q_spec in q_specs: @@ -33,29 +34,37 @@ def make_root_quantity(storage: SampleStorage, q_specs: List[QuantitySpec]): class Quantity: + """ + Represents a quantity (a measurable value or expression) constructed from a QType, + an operation (callable) and zero-or-more input quantities. Quantities are lazy: + their actual data are returned by calling `samples(chunk_spec)`. + + - qtype: structure description (QType) + - _operation: callable that takes sample-chunks from input_quantities and returns result chunks + - _input_quantities: dependencies (other Quantity instances) + """ + def __init__(self, quantity_type, operation, input_quantities=[]): """ - Quantity class represents real quantity and also provides operation that can be performed with stored values. - Each Quantity has Qtype which describes its structure. - :param quantity_type: QType instance - :param operation: function - :param input_quantities: List[Quantity] + :param quantity_type: QType instance describing the shape/structure + :param operation: callable implementing the transform on input chunks + :param input_quantities: List[Quantity] dependencies (may be empty for constants) """ self.qtype = quantity_type self._operation = operation self._input_quantities = input_quantities - # List of quantities on which the 'self' depends, their number have to match number of arguments - # to the operation. + # Underlying QuantityStorage (inherited from one of the inputs, if present) self._storage = self.get_quantity_storage() - # QuantityStorage instance + # Selection identifier - used to tie selections together (set by select) self._selection_id = self.set_selection_id() - # Identifier of selection, should be set in select() method + # Validate that input quantities use consistent selection/storage self._check_selection_ids() def get_quantity_storage(self): """ - Get QuantityStorage instance - :return: None, QuantityStorage + Find the first QuantityStorage among inputs (if any) and return it. + + :return: QuantityStorage instance or None if not found """ if len(self._input_quantities) == 0: return None @@ -68,9 +77,11 @@ def get_quantity_storage(self): def set_selection_id(self): """ - Set selection id - selection id is None by default, - but if we create new quantity from quantities that are result of selection we need to pass selection id + Determine the selection id for this Quantity. If inputs have a selection id + (created by select), inherit it; if multiple different selection ids are + present among inputs, raise an exception. + + :return: selection id or None """ selection_id = None for input_quantity in self._input_quantities: @@ -82,12 +93,11 @@ def set_selection_id(self): def _check_selection_ids(self): """ - Make sure the all input quantities come from the same QuantityStorage + Ensure that all input quantities that have selection ids share the same one. + If no QuantityStorage is present, nothing to check. """ - # All input quantities are QuantityConst instances if self._storage is None: return - # Check selection ids otherwise for input_quantity in self._input_quantities: sel_id = input_quantity.selection_id() if sel_id is None: @@ -97,8 +107,10 @@ def _check_selection_ids(self): def selection_id(self): """ - Get storage ids of all input quantities - :return: List[int] + Return this Quantity's selection id. If not set, use id(self._storage) to + identify the underlying storage instance. + + :return: selection identifier (int or None) """ if self._selection_id is not None: return self._selection_id @@ -109,71 +121,93 @@ def selection_id(self): def size(self) -> int: """ - Quantity size from qtype + Return the number of scalar components described by the QType. + :return: int """ return self.qtype.size() def get_cache_key(self, chunk_spec): """ - Create cache key + Create a cache key used by memoization for samples. We include: + - level id + - chunk id + - chunk size (derived from slice) + - id(self) to distinguish different quantity instances + + :param chunk_spec: ChunkSpec + :return: tuple key """ chunk_size = None if chunk_spec.chunk_slice is not None: chunk_size = chunk_spec.chunk_slice.stop - chunk_spec.chunk_slice.start - return (chunk_spec.level_id, chunk_spec.chunk_id, chunk_size, id(self)) # redundant parentheses needed due to py36, py37 + return (chunk_spec.level_id, chunk_spec.chunk_id, chunk_size, id(self)) # py36/37 compatibility @cached(custom_key_maker=get_cache_key) def samples(self, chunk_spec): """ - Return list of sample chunks for individual levels. - Possibly calls underlying quantities. - :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object - :return: np.ndarray or None + Evaluate and return the data chunk for this quantity at the specified chunk_spec. + Calls samples(chunk_spec) recursively on inputs and passes the results to _operation. + + :param chunk_spec: ChunkSpec object with level_id, chunk_id, and optional slice + :return: np.ndarray (M, chunk_size, 2) or None """ chunks_quantity_level = [q.samples(chunk_spec) for q in self._input_quantities] return self._operation(*chunks_quantity_level) def _reduction_op(self, quantities, operation): """ + Helper for building a reduction Quantity from many inputs. + + If any input is a non-constant Quantity, return a Quantity with the operation and inputs. + If all inputs are QuantityConst, evaluate the operation immediately and return QuantityConst. + :param quantities: List[Quantity] - :param operation: function which is run with given quantities + :param operation: Callable to apply :return: Quantity or QuantityConst """ for quantity in quantities: if not isinstance(quantity, QuantityConst): return Quantity(quantity.qtype, operation=operation, input_quantities=quantities) - # Quantity from QuantityConst instances + # All constant -> precompute value return QuantityConst(quantities[0].qtype, value=operation(*[q._value for q in quantities])) def select(self, *args): """ - Performs sample selection based on conditions - :param args: Quantity - :return: Quantity + Apply boolean selection masks to this Quantity's samples. + + :param args: One or more Quantity instances with BoolType that act as masks. + :return: Quantity representing the selected samples (mask applied on sample axis) """ - # args always has len() at least 1 + # First mask masks = args[0] + # Validate masks are BoolType for quantity in args: if not isinstance(quantity.qtype.base_qtype(), qt.BoolType): raise Exception("Quantity: {} doesn't have BoolType, instead it has QType: {}" .format(quantity, quantity.qtype.base_qtype())) - # More conditions leads to default AND + # Combine multiple masks with logical AND if len(args) > 1: for m in args[1:]: - masks = np.logical_and(masks, m) # method from this module + masks = np.logical_and(masks, m) def op(x, mask): - return x[..., mask, :] # [...sample size, cut number of samples, 2] + # Mask samples (reduce number of sample columns) + return x[..., mask, :] # [..., selected_samples, 2] + q = Quantity(quantity_type=self.qtype, input_quantities=[self, masks], operation=op) - q._selection_id = id(q) + q._selection_id = id(q) # mark selection id to ensure consistency return q def __array_ufunc__(self, ufunc, method, *args, **kwargs): + """ + Support numpy ufuncs by routing them through _method which constructs a new Quantity. + """ return Quantity._method(ufunc, method, *args, **kwargs) + # Arithmetic operator wrappers - build new Quantities or constants as needed. def __add__(self, other): return Quantity.create_quantity([self, Quantity.wrap(other)], Quantity.add_op) @@ -207,19 +241,17 @@ def __rmod__(self, other): @staticmethod def create_quantity(quantities, operation): """ - Create new quantity (Quantity or QuantityConst) based on given quantities and operation. - There are two scenarios: - 1. At least one of quantities is Quantity instance then all quantities are considered to be input_quantities - of new Quantity - 2. All of quantities are QuantityConst instances then new QuantityConst is created - :param quantities: List[Quantity] - :param operation: function which is run with given quantities - :return: Quantity + Create a new Quantity or QuantityConst. If any input is non-constant, return + a Quantity that will evaluate lazily. If all are constant, return QuantityConst. + + :param quantities: list-like of Quantity / QuantityConst + :param operation: callable to combine inputs + :return: Quantity or QuantityConst """ for quantity in quantities: if not isinstance(quantity, QuantityConst): return Quantity(quantity.qtype, operation=operation, input_quantities=quantities) - # Quantity from QuantityConst instances + # all constant -> precompute return QuantityConst(quantities[0].qtype, value=operation(*[q._value for q in quantities])) @staticmethod @@ -245,35 +277,40 @@ def mod_op(x, y): @staticmethod def _process_mask(x, y, operator): """ - Create samples mask - All values for sample must meet given condition, if any value doesn't meet the condition, - whole sample is eliminated - :param x: Quantity chunk - :param y: Quantity chunk or int, float - :param operator: operator module function - :return: np.ndarray of bools + Create a boolean mask that marks full samples passing the given per-element condition. + + The operator is applied elementwise; then we require that *every* element within the sample + passes to keep that sample. This collapses non-sample axes and returns a 1-D boolean array. + + :param x: Quantity chunk (ndarray) + :param y: Quantity chunk or scalar + :param operator: operator module function like operator.lt + :return: 1-D boolean numpy array indexing samples """ mask = operator(x, y) + # collapse over spatial/time axes and per-sample axis, keep sample index axis return mask.all(axis=tuple(range(mask.ndim - 2))).all(axis=1) def _mask_quantity(self, other, op): """ - Create quantity that represent bool mask - :param other: number or Quantity - :param op: operation - :return: Quantity + Helper to build a BoolType Quantity representing comparisons (>, <, ==, etc.) + + :param other: Quantity or scalar to compare with + :param op: operation callable that builds the boolean mask from chunked arrays + :return: Quantity producing a boolean mask per sample """ bool_type = qt.BoolType() - new_qtype = self.qtype - new_qtype = new_qtype.replace_scalar(bool_type) + new_qtype = self.qtype.replace_scalar(bool_type) other = Quantity.wrap(other) + # Only scalar base types support comparison if not isinstance(self.qtype.base_qtype(), qt.ScalarType) or not isinstance(other.qtype.base_qtype(), qt.ScalarType): raise TypeError("Quantity has base qtype {}. " "Quantities with base qtype ScalarType are the only ones that support comparison". format(self.qtype.base_qtype())) return Quantity(quantity_type=new_qtype, input_quantities=[self, other], operation=op) + # Comparison operators returning boolean mask Quantities def __lt__(self, other): def lt_op(x, y): return Quantity._process_mask(x, y, operator.lt) @@ -281,59 +318,66 @@ def lt_op(x, y): def __le__(self, other): def le_op(x, y): - return self._process_mask(x, y, operator.le) + return Quantity._process_mask(x, y, operator.le) return self._mask_quantity(other, le_op) def __gt__(self, other): def gt_op(x, y): - return self._process_mask(x, y, operator.gt) + return Quantity._process_mask(x, y, operator.gt) return self._mask_quantity(other, gt_op) def __ge__(self, other): def ge_op(x, y): - return self._process_mask(x, y, operator.ge) + return Quantity._process_mask(x, y, operator.ge) return self._mask_quantity(other, ge_op) def __eq__(self, other): def eq_op(x, y): - return self._process_mask(x, y, operator.eq) + return Quantity._process_mask(x, y, operator.eq) return self._mask_quantity(other, eq_op) def __ne__(self, other): def ne_op(x, y): - return self._process_mask(x, y, operator.ne) + return Quantity._process_mask(x, y, operator.ne) return self._mask_quantity(other, ne_op) @staticmethod def pick_samples(chunk, subsample_params): """ - Pick samples some samples from chunk in order to have 'k' samples from 'n' after all chunks are processed - Inspired by https://dl.acm.org/doi/10.1145/23002.23003 method S + Subsample a chunk using Method S (hypergeometric sampling) so that across chunks + we end up with k samples from n total. - :param chunk: np.ndarray, shape M, N, 2, where N denotes number of samples in chunk - :param subsample_params: instance of SubsampleParams class, it has two parameters: - k: number of samples which we want to get from all chunks - n: number of all samples among all chunks - :return: np.ndarray + :param chunk: ndarray of shape (M, N, 2) where N is number of samples in this chunk + :param subsample_params: object with attributes k (remaining desired) and n (remaining available) + :return: selected sub-chunk array with shape (M, m, 2), where m is chosen by hypergeometric draw """ + # Draw how many to pick from this chunk using hypergeometric distribution size = scipy.stats.hypergeom(subsample_params.n, subsample_params.k, chunk.shape[1]).rvs(size=1) out = RNG.choice(chunk, size=size, axis=1) - subsample_params.k -= out.shape[1] - subsample_params.n -= chunk.shape[1] + subsample_params.k -= out.shape[1] # reduce remaining desired + subsample_params.n -= chunk.shape[1] # reduce remaining available return out def subsample(self, sample_vec): """ - Subsampling - :param sample_vec: list of number of samples at each level - :return: Quantity + Build a Quantity that implements subsampling across levels to obtain a specified + number of samples per level (sample_vec). + + Returns a Quantity whose operation will pick samples according to subsample params + stored per-level. Uses QuantityConst with a level-aware _adjust_value to pass + different parameters to each level chunk. + + :param sample_vec: list-like of desired numbers of samples per level + :return: Quantity producing subsampled chunks """ class SubsampleParams: + """ + Small helper to carry per-level parameters while subsampling across chunks. + """ def __init__(self, num_subsample, num_collected): """ - Auxiliary object for subsampling - :param num_subsample: the number of samples we want to obtain from all samples - :param num_collected: total number of samples + :param num_subsample: desired number of samples to pick from this level + :param num_collected: total available samples on this level """ self._orig_k = num_subsample self._orig_n = num_collected @@ -342,36 +386,41 @@ def __init__(self, num_subsample, num_collected): self.n = num_collected self.total_n = num_collected - # SubsampleParams for each level + # Build params per level using level collected counts from the storage subsample_level_params = {key: SubsampleParams(sample_vec[key], value) for key, value in enumerate(self.get_quantity_storage().n_collected())} - # Create a QuantityConst of dictionary in the sense of hashing dictionary items + + # Wrap a hashed version of this parameters dict in a QuantityConst to feed into operation quantity_subsample_params = Quantity.wrap(hash(frozenset(subsample_level_params.items()))) def adjust_value(values, level_id): """ - Custom implementation of QuantityConst.adjust_value() - It allows us to get different parameters for different levels + Method assigned to QuantityConst._adjust_value so each level receives its own SubsampleParams. + Re-initializes k/n for repeated calls. """ subsample_l_params_obj = subsample_level_params[level_id] subsample_l_params_obj.k = subsample_l_params_obj._orig_k subsample_l_params_obj.n = subsample_l_params_obj._orig_n subsample_l_params_obj.total_n = subsample_l_params_obj._orig_total_n return subsample_l_params_obj + quantity_subsample_params._adjust_value = adjust_value + # Build resulting Quantity that uses pick_samples as its operation return Quantity(quantity_type=self.qtype.replace_scalar(qt.BoolType()), input_quantities=[self, quantity_subsample_params], operation=Quantity.pick_samples) def __getitem__(self, key): """ - Get items from Quantity, quantity type must support brackets access - :param key: str, int, tuple - :return: Quantity + Create a Quantity representing indexed/ sliced access into this quantity (similar to numpy slicing). + + :param key: index or slice or tuple interpreted by qtype.get_key + :return: Quantity restricted to the requested key """ - new_qtype, start = self.qtype.get_key(key) # New quantity type + new_qtype, start = self.qtype.get_key(key) # New quantity type for selection if not isinstance(self.qtype, qt.ArrayType): + # Convert key to a slice covering the sub-array if base is not ArrayType key = slice(start, start + new_qtype.size()) def _make_getitem_op(y): @@ -380,7 +429,11 @@ def _make_getitem_op(y): return Quantity(quantity_type=new_qtype, input_quantities=[self], operation=_make_getitem_op) def __getattr__(self, name): - static_fun = getattr(self.qtype, name) # We support only static function call forwarding + """ + Forward static QType methods as Quantity methods so that QType-level helpers are available + as operations on quantities (e.g., aggregation helpers). + """ + static_fun = getattr(self.qtype, name) def apply_on_quantity(*attr, **d_attr): return static_fun(self, *attr, **d_attr) @@ -389,11 +442,12 @@ def apply_on_quantity(*attr, **d_attr): @staticmethod def _concatenate(quantities, qtype, axis=0): """ - Concatenate level_chunks - :param quantities: list of quantities - :param qtype: QType - :param axis: int - :return: Quantity + Construct a Quantity that concatenates multiple quantities along a given axis. + + :param quantities: sequence of Quantity instances + :param qtype: QType describing result shape + :param axis: axis along which concatenation happens + :return: Quantity that when evaluated concatenates input chunks """ def op_concatenate(*chunks): y = np.concatenate(tuple(chunks), axis=axis) @@ -403,12 +457,12 @@ def op_concatenate(*chunks): @staticmethod def _get_base_qtype(args_quantities): """ - Get quantities base Qtype - :param args_quantities: list of quantities and other passed arguments, - we expect at least one of the arguments is Quantity - :return: base QType, ScalarType if any quantity has that base type, otherwise BoolType + Determine base QType for arithmetic/ufunc results: if any argument has a ScalarType base + return ScalarType(), otherwise BoolType(). + + :param args_quantities: iterable containing Quantity instances and possibly other values + :return: base QType instance """ - # Either all quantities are BoolType or it is considered to be ScalarType for quantity in args_quantities: if isinstance(quantity, Quantity): if type(quantity.qtype.base_qtype()) == qt.ScalarType: @@ -418,14 +472,17 @@ def _get_base_qtype(args_quantities): @staticmethod def _method(ufunc, method, *args, **kwargs): """ - Process input parameters to perform numpy ufunc. - Get base QType of passed quantities, QuantityStorage instance, ... - Determine the resulting QType from the first few samples - :param ufunc: ufunc object that was called - :param method: string, indicating which Ufunc method was called - :param args: tuple of the input arguments to the ufunc - :param kwargs: dictionary containing the optional input arguments of the ufunc - :return: Quantity + Generic handler for numpy ufunc operations mapped to Quantities. + + 1) Wrap inputs as Quantities. + 2) Determine the result QType by calling the ufunc on a small sample. + 3) Return a new Quantity that performs the ufunc at evaluation time. + + :param ufunc: numpy ufunc object + :param method: method name to call on ufunc (e.g., '__call__' or 'reduce') + :param args: positional arguments passed to ufunc (may include Quantities) + :param kwargs: optional ufunc kwargs + :return: Quantity representing ufunc applied to inputs """ def _ufunc_call(*input_quantities_chunks): return getattr(ufunc, method)(*input_quantities_chunks, **kwargs) @@ -438,9 +495,10 @@ def _ufunc_call(*input_quantities_chunks): @staticmethod def wrap(value): """ - Convert flat, bool or array (list) to Quantity - :param value: flat, bool, array (list) or Quantity - :return: Quantity + Convert a primitive (int, float, bool), a numpy/list array, or an existing Quantity into a Quantity. + + :param value: scalar, bool, list/ndarray, or Quantity + :return: Quantity or QuantityConst wrapping the value """ if isinstance(value, Quantity): return value @@ -459,27 +517,32 @@ def wrap(value): @staticmethod def _result_qtype(method, quantities): """ - Determine QType from evaluation with given method and first few samples from storage - :param quantities: list of Quantities - :param method: ufunc function - :return: QType + Infer the resulting QType for an operation by evaluating the operation on the first + available chunk from each input quantity. + + :param method: callable that takes input chunks and returns sample chunk result + :param quantities: list of Quantity instances + :return: inferred QType (ArrayType) """ chunks_quantity_level = [] for q in quantities: quantity_storage = q.get_quantity_storage() - # QuantityConst doesn't have QuantityStorage + # If QuantityConst (no storage), use an empty default ChunkSpec if quantity_storage is None: chunk_spec = ChunkSpec() else: chunk_spec = next(quantity_storage.chunks()) chunks_quantity_level.append(q.samples(chunk_spec)) - result = method(*chunks_quantity_level) # numpy array of [M, <=10, 2] + result = method(*chunks_quantity_level) # expect shape [M, <=10, 2] qtype = qt.ArrayType(shape=result.shape[0], qtype=Quantity._get_base_qtype(quantities)) return qtype @staticmethod def QArray(quantities): + """ + Build a Quantity representing an array-of-quantities aggregated into a single QType. + """ flat_quantities = np.array(quantities).flatten() qtype = Quantity._check_same_qtype(flat_quantities) array_type = qt.ArrayType(np.array(quantities).shape, qtype) @@ -487,24 +550,40 @@ def QArray(quantities): @staticmethod def QDict(key_quantity): + """ + Build a Quantity representing a dictionary of quantities. + :param key_quantity: iterable of (key, Quantity) + """ dict_type = qt.DictType([(key, quantity.qtype) for key, quantity in key_quantity]) return Quantity._concatenate(np.array(key_quantity)[:, 1], qtype=dict_type) @staticmethod def QTimeSeries(time_quantity): + """ + Build a Quantity representing a time series constructed from (time, Quantity) pairs. + """ qtype = Quantity._check_same_qtype(np.array(time_quantity)[:, 1]) times = np.array(time_quantity)[:, 0] return Quantity._concatenate(np.array(time_quantity)[:, 1], qtype=qt.TimeSeriesType(times=times, qtype=qtype)) - @staticmethod def QField(key_quantity): + """ + Build a Quantity representing a field (mapping of locations to quantities). + """ Quantity._check_same_qtype(np.array(key_quantity)[:, 1]) field_type = qt.FieldType([(key, quantity.qtype) for key, quantity in key_quantity]) return Quantity._concatenate(np.array(key_quantity)[:, 1], qtype=field_type) @staticmethod def _check_same_qtype(quantities): + """ + Validate that all provided quantities share the same QType. + + :param quantities: sequence of Quantity instances + :return: the shared QType + :raise ValueError: if a mismatch is found + """ qtype = quantities[0].qtype for quantity in quantities[1:]: if qtype != quantity.qtype: @@ -513,26 +592,28 @@ def _check_same_qtype(quantities): class QuantityConst(Quantity): + """ + Represents a constant quantity whose value is stored directly in the instance. + The samples() method returns the constant value broadcasted to the requested chunk shape. + """ + def __init__(self, quantity_type, value): """ - QuantityConst class represents constant quantity and also provides operation - that can be performed with quantity values. - The quantity is constant, meaning that this class stores the data itself - :param quantity_type: QType instance - :param value: quantity value + :param quantity_type: QType describing the const + :param value: scalar or array-like value """ self.qtype = quantity_type self._value = self._process_value(value) + # No input dependencies for a constant self._input_quantities = [] - # List of input quantities should be empty, - # but we still need this attribute due to storage_id() and level_ids() method self._selection_id = None def _process_value(self, value): """ - Reshape value if array, otherwise create array first - :param value: quantity value - :return: value with shape [M, 1, 1] which suitable for further broadcasting + Ensure the constant is stored as an array with axes [M, 1, 1] suitable for broadcasting. + + :param value: scalar or array-like + :return: ndarray shaped for broadcasting into (M, chunk_size, 2) """ if isinstance(value, (int, float, bool)): value = np.array([value]) @@ -540,42 +621,50 @@ def _process_value(self, value): def selection_id(self): """ - Get storage ids of all input quantities - :return: List[int] + Constants have no selection id (they are independent of storage). """ return self._selection_id def _adjust_value(self, value, level_id=None): """ - Allows process value based on chunk_epc params (such as level_id, ...). - The custom implementation is used in Qunatity.subsample method - :param value: np.ndarray - :param level_id: int - :return: np.ndarray, particular type depends on implementation + Hook to adjust constant value per-level. By default returns the stored value unchanged. + This method gets overridden by consumers (e.g., subsample) to provide level-specific params. + + :param value: constant value array + :param level_id: int, level index (optional) + :return: possibly adjusted value """ return value @cached(custom_key_maker=Quantity.get_cache_key) def samples(self, chunk_spec): """ - Get constant values with an enlarged number of axes - :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object - :return: np.ndarray + Return the constant value, optionally adjusted for the given level via _adjust_value. + + :param chunk_spec: ChunkSpec with level_id + :return: ndarray representing the constant for this chunk """ return self._adjust_value(self._value, chunk_spec.level_id) class QuantityMean: + """ + Container for aggregated mean/variance results computed by mlmc.quantity.quantity_estimate.estimate_mean. + + - qtype: QType of the quantity + - _l_means: per-level mean contributions (L x M flattened) + - _l_vars: per-level variance contributions (L x M flattened) + - _n_samples: number of samples used per level + - _n_rm_samples: number of removed samples per level + """ def __init__(self, quantity_type, l_means, l_vars, n_samples, n_rm_samples): """ - QuantityMean represents results of mlmc.quantity_estimate.estimate_mean method :param quantity_type: QType - :param l_means: np.ndarray, shape: L, M - :param l_vars: np.ndarray, shape: L, M - :param n_samples: List, number of samples that were used for means at each level - :param n_rm_samples: List, number of removed samples at each level, - n_samples + n_rm_samples = all successfully collected samples + :param l_means: ndarray shape (L, M_flat) of level-wise mean contributions + :param l_vars: ndarray shape (L, M_flat) of level-wise variance contributions + :param n_samples: list/ndarray length L with number of samples used per level + :param n_rm_samples: list/ndarray length L with removed samples count per level """ self.qtype = quantity_type self._mean = None @@ -587,29 +676,43 @@ def __init__(self, quantity_type, l_means, l_vars, n_samples, n_rm_samples): def _calculate_mean_var(self): """ - Calculates the overall estimates of the mean and the variance from the means and variances at each level + Compute overall mean and variance from per-level contributions: + mean = sum_l l_means[l] + var = sum_l (l_vars[l] / n_samples[l]) """ self._mean = np.sum(self._l_means, axis=0) self._var = np.sum(self._l_vars / self._n_samples[:, None], axis=0) @property def mean(self): + """ + Reshaped overall mean according to QType. + """ if self._mean is None: self._calculate_mean_var() return self._reshape(self._mean) @property def var(self): + """ + Reshaped overall variance according to QType. + """ if self._var is None: self._calculate_mean_var() return self._reshape(self._var) @property def l_means(self): + """ + Level means reshaped according to QType for each level. + """ return np.array([self._reshape(means) for means in self._l_means]) @property def l_vars(self): + """ + Level variances reshaped according to QType for each level. + """ return np.array([self._reshape(vars) for vars in self._l_vars]) @property @@ -622,74 +725,96 @@ def n_rm_samples(self): def _reshape(self, data): """ - Reshape passed data, expected means or vars - :param data: flatten np.ndarray - :return: np.ndarray, reshaped data, the final data shape depends on the particular QType - there is currently a reshape for ArrayType only + Reshape a flat data vector (flattened M) into the structure determined by qtype. + + :param data: flattened ndarray + :return: reshaped ndarray according to qtype """ return self.qtype.reshape(data) def __getitem__(self, key): """ - Get item from current QuantityMean, quantity type must support brackets access - All levels means and vars are reshaped to their QType shape and then the item is gotten, - ath the end, new QuantityMean instance is created with flatten selected means and vars - :param key: str, int, tuple - :return: QuantityMean + Index into QuantityMean similarly to Quantity.__getitem__: + reshape level-wise means/vars and select the requested key, then return a new QuantityMean. + + :param key: indexing key (int, slice, str, etc.) + :return: QuantityMean restricted to the requested key """ new_qtype, start = self.qtype.get_key(key) # New quantity type if not isinstance(self.qtype, qt.ArrayType): key = slice(start, start + new_qtype.size()) - # Getting items, it performs reshape inside + # Selecting and reshaping level arrays l_means = self.l_means[:, key] l_vars = self.l_vars[:, key] - return QuantityMean(quantity_type=new_qtype, l_means=l_means.reshape((l_means.shape[0], -1)), - l_vars=l_vars.reshape((l_vars.shape[0], -1)), n_samples=self._n_samples, + return QuantityMean(quantity_type=new_qtype, + l_means=l_means.reshape((l_means.shape[0], -1)), + l_vars=l_vars.reshape((l_vars.shape[0], -1)), + n_samples=self._n_samples, n_rm_samples=self._n_rm_samples) class QuantityStorage(Quantity): + """ + Special Quantity that provides direct access to SampleStorage. + It implements the bridge between storage and the Quantity abstraction. + """ + def __init__(self, storage, qtype): """ - Special Quantity for direct access to SampleStorage - :param storage: mlmc._sample_storage.SampleStorage child - :param qtype: QType + :param storage: SampleStorage instance (in-memory or HDF5, etc.) + :param qtype: QType describing stored data structure """ + # Store underlying storage reference and QType self._storage = storage self.qtype = qtype + # No operation or inputs required for storage root self._input_quantities = [] self._operation = None def level_ids(self): """ - Number of levels + Return list of available level ids from the SampleStorage. :return: List[int] """ return self._storage.get_level_ids() def selection_id(self): """ - Identity of QuantityStorage instance + Identity of this QuantityStorage (unique by object id). :return: int """ return id(self) def get_quantity_storage(self): + """ + For QuantityStorage the storage is itself. + :return: self + """ return self def chunks(self, level_id=None): + """ + Proxy to SampleStorage.chunks which yields ChunkSpec instances describing available chunks. + :param level_id: optional level id to restrict chunks + :return: generator of ChunkSpec + """ return self._storage.chunks(level_id) def samples(self, chunk_spec): """ - Get results for given level id and chunk id - :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object - :return: Array[M, chunk size, 2] + Retrieve stored sample pairs for the requested level/chunk. + + :param chunk_spec: ChunkSpec describing (level, chunk slice) + :return: ndarray shaped [M, chunk_size, 2] where M is number of result quantities """ return self._storage.sample_pairs_level(chunk_spec) # Array[M, chunk size, 2] def n_collected(self): + """ + Return number of collected results per level from the underlying SampleStorage. + :return: list of ints + """ return self._storage.get_n_collected() diff --git a/mlmc/quantity/quantity_estimate.py b/mlmc/quantity/quantity_estimate.py index 2436d7b9..e2553138 100644 --- a/mlmc/quantity/quantity_estimate.py +++ b/mlmc/quantity/quantity_estimate.py @@ -5,102 +5,146 @@ def mask_nan_samples(chunk): """ - Mask out samples that contain NaN in either fine or coarse part of the result - :param chunk: np.ndarray [M, chunk_size, 2] - :return: chunk: np.ndarray, number of masked samples: int + Remove (mask out) samples containing NaN values in either the fine or coarse part of the result. + + :param chunk: np.ndarray of shape [M, chunk_size, 2] + M - quantity size (number of scalar components), + chunk_size - number of samples in the chunk, + 2 - fine and coarse parts of the result. + :return: (filtered_chunk, n_masked) + filtered_chunk: np.ndarray with invalid samples removed, + n_masked: int, number of masked (removed) samples. """ - # Fine and coarse moments_fn mask + # Identify any sample with NaNs in its fine or coarse component mask = np.any(np.isnan(chunk), axis=0).any(axis=1) return chunk[..., ~mask, :], np.count_nonzero(mask) def cache_clear(): + """ + Clear cached Quantity sample evaluations. + + Used before running MLMC estimations to ensure fresh data is fetched from storage. + """ mlmc.quantity.quantity.Quantity.samples.cache_clear() mlmc.quantity.quantity.QuantityConst.samples.cache_clear() -def estimate_mean(quantity): +def estimate_mean(quantity, form="diff", operation_func=None, **kwargs): """ - MLMC mean estimator. - The MLMC method is used to compute the mean estimate to the Quantity dependent on the collected samples. - The squared error of the estimate (the estimator variance) is estimated using the central limit theorem. - Data is processed by chunks, so that it also supports big data processing - :param quantity: Quantity - :return: QuantityMean which holds both mean and variance + Estimate the MLMC mean (and variance) of a Quantity using multilevel sampling. + + The function computes per-level means and variances from simulation results. + Supports large datasets via chunked processing and handles NaN-masked samples. + + :param quantity: Quantity instance to estimate. + :param form: str, type of estimation: + - "diff": estimate based on differences (fine - coarse) → standard MLMC approach. + - "fine": estimate using fine-level data only. + - "coarse": estimate using coarse-level data only. + :param operation_func: Optional transformation applied to chunk data before accumulation + (e.g., for moment or kurtosis computation). + :param kwargs: Additional keyword arguments passed to operation_func. + :return: QuantityMean object containing mean, variance, and sample statistics per level. """ + # Reset cached quantity evaluations cache_clear() + quantity_vec_size = quantity.size() sums = None sums_of_squares = None - # initialization + # Initialize level-specific storage quantity_storage = quantity.get_quantity_storage() level_ids = quantity_storage.level_ids() n_levels = np.max(level_ids) + 1 n_samples = [0] * n_levels n_rm_samples = [0] * n_levels + # Iterate through data chunks for chunk_spec in quantity_storage.chunks(): samples = quantity.samples(chunk_spec) chunk, n_mask_samples = mask_nan_samples(samples) n_samples[chunk_spec.level_id] += chunk.shape[1] n_rm_samples[chunk_spec.level_id] += n_mask_samples - # No samples in chunk + # Skip empty chunks if chunk.shape[1] == 0: continue - assert (chunk.shape[0] == quantity_vec_size) + assert chunk.shape[0] == quantity_vec_size - # Set variables for level sums and sums of squares + # Allocate accumulators at first valid chunk if sums is None: sums = [np.zeros(chunk.shape[0]) for _ in range(n_levels)] sums_of_squares = [np.zeros(chunk.shape[0]) for _ in range(n_levels)] - if chunk_spec.level_id == 0: + # Select appropriate data form for the estimator + if form == "fine": chunk_diff = chunk[:, :, 0] + elif form == "coarse": + chunk_diff = np.zeros_like(chunk[:, :, 0]) if chunk_spec.level_id == 0 else chunk[:, :, 1] else: - chunk_diff = chunk[:, :, 0] - chunk[:, :, 1] + # Default MLMC difference (fine - coarse) + chunk_diff = chunk[:, :, 0] if chunk_spec.level_id == 0 else chunk[:, :, 0] - chunk[:, :, 1] + + # Optional user-defined transformation of data + if operation_func is not None: + chunk_diff = operation_func(chunk_diff, chunk_spec, **kwargs) + # Accumulate sums and squared sums for this level sums[chunk_spec.level_id] += np.sum(chunk_diff, axis=1) - sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff**2, axis=1) + sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff ** 2, axis=1) if sums is None: - raise Exception("All samples were masked") + raise Exception("All samples were masked (no valid data found).") + # Compute means and variances for each level l_means = [] l_vars = [] for s, sp, n in zip(sums, sums_of_squares, n_samples): l_means.append(s / n) if n > 1: - l_vars.append((sp - (s ** 2 / n)) / (n-1)) + l_vars.append((sp - (s ** 2 / n)) / (n - 1)) else: l_vars.append(np.full(len(s), np.inf)) - return mlmc.quantity.quantity.QuantityMean(quantity.qtype, l_means=l_means, l_vars=l_vars, n_samples=n_samples, - n_rm_samples=n_rm_samples) + # Construct QuantityMean object with level statistics + return mlmc.quantity.quantity.QuantityMean( + quantity.qtype, + l_means=l_means, + l_vars=l_vars, + n_samples=n_samples, + n_rm_samples=n_rm_samples + ) def moment(quantity, moments_fn, i=0): """ - Create quantity with operation that evaluates particular moment - :param quantity: Quantity instance - :param moments_fn: mlmc.moments.Moments child - :param i: index of moment - :return: Quantity + Construct a Quantity that represents a single statistical moment. + + :param quantity: Base Quantity instance. + :param moments_fn: Instance of mlmc.moments.Moments defining the moment computation. + :param i: Index of the moment to compute. + :return: New Quantity that computes the i-th moment. """ def eval_moment(x): return moments_fn.eval_single_moment(i, value=x) - return mlmc.quantity.quantity.Quantity(quantity_type=quantity.qtype, input_quantities=[quantity], operation=eval_moment) + + return mlmc.quantity.quantity.Quantity( + quantity_type=quantity.qtype, + input_quantities=[quantity], + operation=eval_moment + ) def moments(quantity, moments_fn, mom_at_bottom=True): """ - Create quantity with operation that evaluates moments_fn - :param quantity: Quantity - :param moments_fn: mlmc.moments.Moments child - :param mom_at_bottom: bool, if True moments are underneath, - a scalar is substituted with an array of moments of that scalar - :return: Quantity + Construct a Quantity representing all moments defined by a given Moments object. + + :param quantity: Base Quantity. + :param moments_fn: mlmc.moments.Moments child defining moment evaluations. + :param mom_at_bottom: bool, if True, moments are added at the lowest (scalar) level of the Quantity type. + :return: Quantity that computes all defined moments. """ def eval_moments(x): if mom_at_bottom: @@ -109,48 +153,85 @@ def eval_moments(x): mom = moments_fn.eval_all(x).transpose((3, 0, 1, 2)) # [R, M, N, 2] return mom.reshape((np.prod(mom.shape[:-2]), mom.shape[-2], mom.shape[-1])) # [M, N, 2] - # Create quantity type which has moments_fn at the bottom + # Define new Quantity type according to desired hierarchy if mom_at_bottom: moments_array_type = qt.ArrayType(shape=(moments_fn.size,), qtype=qt.ScalarType()) moments_qtype = quantity.qtype.replace_scalar(moments_array_type) - # Create quantity type that has moments_fn on the surface else: moments_qtype = qt.ArrayType(shape=(moments_fn.size,), qtype=quantity.qtype) - return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_moments) + + return mlmc.quantity.quantity.Quantity( + quantity_type=moments_qtype, + input_quantities=[quantity], + operation=eval_moments + ) def covariance(quantity, moments_fn, cov_at_bottom=True): """ - Create quantity with operation that evaluates covariance matrix - :param quantity: Quantity - :param moments_fn: mlmc.moments.Moments child - :param cov_at_bottom: bool, if True cov matrices are underneath, - a scalar is substituted with a matrix of moments of that scalar - :return: Quantity + Construct a Quantity representing covariance matrices of the given moments. + + :param quantity: Base Quantity. + :param moments_fn: mlmc.moments.Moments child defining moment evaluations. + :param cov_at_bottom: bool, if True covariance matrices are attached at the scalar level of the Quantity type. + :return: Quantity that computes covariance matrices. """ def eval_cov(x): + # Compute all moments (fine and coarse) moments = moments_fn.eval_all(x) mom_fine = moments[..., 0, :] cov_fine = np.einsum('...i,...j', mom_fine, mom_fine) if moments.shape[-2] == 1: + # Single level (no coarse) cov = np.array([cov_fine]) else: mom_coarse = moments[..., 1, :] cov_coarse = np.einsum('...i,...j', mom_coarse, mom_coarse) cov = np.array([cov_fine, cov_coarse]) + # Reshape covariance according to desired data layout if cov_at_bottom: - cov = cov.transpose((1, 3, 4, 2, 0)) # [M, R, R, N, 2] + cov = cov.transpose((1, 3, 4, 2, 0)) # [M, R, R, N, 2] else: - cov = cov.transpose((3, 4, 1, 2, 0)) # [R, R, M, N, 2] + cov = cov.transpose((3, 4, 1, 2, 0)) # [R, R, M, N, 2] return cov.reshape((np.prod(cov.shape[:-2]), cov.shape[-2], cov.shape[-1])) - # Create quantity type which has covariance matrices at the bottom + # Adjust Quantity type for covariance structure if cov_at_bottom: - moments_array_type = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=qt.ScalarType()) + moments_array_type = qt.ArrayType(shape=(moments_fn.size, moments_fn.size), qtype=qt.ScalarType()) moments_qtype = quantity.qtype.replace_scalar(moments_array_type) - # Create quantity type that has covariance matrices on the surface else: - moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=quantity.qtype) - return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_cov) + moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size), qtype=quantity.qtype) + + return mlmc.quantity.quantity.Quantity( + quantity_type=moments_qtype, + input_quantities=[quantity], + operation=eval_cov + ) + + +def kurtosis_numerator(chunk_diff, chunk_spec, l_means): + """ + Compute the numerator for the sample kurtosis: + E[(Y_l - E[Y_l])^4] + :param chunk_diff: np.ndarray [quantity shape, number of samples] + :param chunk_spec: quantity_spec.ChunkSpec describing current level and chunk. + :param l_means: List of per-level means used for centering. + :return: np.ndarray of the same shape as input. + """ + return (chunk_diff - l_means[chunk_spec.level_id]) ** 4 + + +def level_kurtosis(quantity, means_obj): + """ + Estimate the sample kurtosis for each level: + E[(Y_l - E[Y_l])^4] / (Var[Y_l])^2, where Y_l = fine_l - coarse_l + + :param quantity: Quantity instance. + :param means_obj: QuantityMean object containing level means and variances. + :return: np.ndarray of kurtosis values per level. + """ + numerator_means_obj = estimate_mean(quantity, operation_func=kurtosis_numerator, l_means=means_obj.l_means) + kurtosis = numerator_means_obj.l_means / (means_obj.l_vars) ** 2 + return kurtosis diff --git a/mlmc/quantity/quantity_spec.py b/mlmc/quantity/quantity_spec.py index ea25dc66..377adad8 100644 --- a/mlmc/quantity/quantity_spec.py +++ b/mlmc/quantity/quantity_spec.py @@ -5,24 +5,56 @@ @attr.s(auto_attribs=True, eq=False) class QuantitySpec: + """ + Specification of a physical quantity for simulation or data storage. + + :param name: Name of the quantity (e.g. 'pressure', 'velocity'). + :param unit: Unit of the quantity (e.g. 'm/s', 'Pa'). + :param shape: Tuple describing the shape of the data (e.g. (64, 64)). + :param times: List of time points associated with this quantity. + :param locations: List of either string-based identifiers or 3D coordinates + (x, y, z) where the quantity is defined. + """ + name: str unit: str shape: Tuple[int, int] times: List[float] locations: Union[List[str], List[Tuple[float, float, float]]] - # Note: auto generated eq raises ValueError def __eq__(self, other): - if (self.name, self.unit) == (other.name, other.unit) \ - and np.array_equal(self.shape, other.shape)\ - and np.array_equal(self.times, other.times)\ - and not (set(self.locations) - set(other.locations)): - return True - return False + """ + Compare two QuantitySpec instances for equality. + + :param other: Another QuantitySpec instance to compare with. + :return: True if both instances describe the same quantity, False otherwise. + """ + if not isinstance(other, QuantitySpec): + return False + + # Compare name, unit, shape, and times + same_basic_attrs = ( + (self.name, self.unit) == (other.name, other.unit) + and np.array_equal(self.shape, other.shape) + and np.array_equal(self.times, other.times) + ) + + # Compare locations (set difference = ∅ → same) + same_locations = not (set(self.locations) - set(other.locations)) + + return same_basic_attrs and same_locations @attr.s(auto_attribs=True) class ChunkSpec: + """ + Specification of a simulation or dataset chunk. + + :param chunk_id: Integer identifier of the chunk. + :param chunk_slice: Slice object defining the range of data indices in the chunk. + :param level_id: Identifier of the refinement or simulation level. + """ + chunk_id: int = None chunk_slice: slice = None level_id: int = None diff --git a/mlmc/quantity/quantity_types.py b/mlmc/quantity/quantity_types.py index 41d6ea7f..56b42b69 100644 --- a/mlmc/quantity/quantity_types.py +++ b/mlmc/quantity/quantity_types.py @@ -7,23 +7,37 @@ class QType(metaclass=abc.ABCMeta): + """ + Base class for quantity types. + + :param qtype: inner/contained QType or Python type + """ + def __init__(self, qtype): self._qtype = qtype def size(self) -> int: """ - Size of type + Size of the type in flattened units. + :return: int """ + raise NotImplementedError def base_qtype(self): + """ + Return the base scalar/bool type for nested types. + + :return: QType + """ return self._qtype.base_qtype() def replace_scalar(self, substitute_qtype): """ - Find ScalarType and replace it with substitute_qtype - :param substitute_qtype: QType, replaces ScalarType - :return: QType + Find ScalarType and replace it with substitute_qtype. + + :param substitute_qtype: QType that replaces ScalarType + :return: QType (new instance with scalar replaced) """ inner_qtype = self._qtype.replace_scalar(substitute_qtype) new_qtype = copy.deepcopy(self) @@ -31,85 +45,121 @@ def replace_scalar(self, substitute_qtype): return new_qtype @staticmethod - def keep_dims(chunk): + def keep_dims(chunk: np.ndarray) -> np.ndarray: """ - Always keep chunk shape to be [M, chunk size, 2]! - For scalar quantities, the input block can have the shape (chunk size, 2) - Sometimes we need to 'flatten' first few shape to have desired chunk shape - :param chunk: list - :return: list + Ensure chunk has shape [M, chunk size, 2]. + + For scalar quantities the input block can have shape (chunk size, 2). + Sometimes we need to 'flatten' first few dimensions to achieve desired chunk shape. + + :param chunk: numpy array + :return: numpy array with shape [M, chunk size, 2] + :raises ValueError: if chunk.ndim < 2 """ # Keep dims [M, chunk size, 2] if len(chunk.shape) == 2: chunk = chunk[np.newaxis, :] elif len(chunk.shape) > 2: - chunk = chunk.reshape((np.prod(chunk.shape[:-2]), chunk.shape[-2], chunk.shape[-1])) + chunk = chunk.reshape((int(np.prod(chunk.shape[:-2])), chunk.shape[-2], chunk.shape[-1])) else: - raise ValueError("Chunk shape not supported") + raise ValueError("Chunk shape not supported: need ndim >= 2") return chunk - def _make_getitem_op(self, chunk, key): + def _make_getitem_op(self, chunk: np.ndarray, key): """ - Operation - :param chunk: level chunk, list with shape [M, chunk size, 2] - :param key: parent QType's key, needed for ArrayType - :return: list + Extract a slice from chunk while preserving chunk dims. + + :param chunk: level chunk, numpy array with shape [M, chunk size, 2] + :param key: index/slice used by parent QType + :return: numpy array with shape [M', chunk size', 2] """ return QType.keep_dims(chunk[key]) - def reshape(self, data): + def reshape(self, data: np.ndarray) -> np.ndarray: + """ + Default reshape (identity). + + :param data: numpy array + :return: numpy array + """ return data class ScalarType(QType): + """ + Scalar quantity type (leaf type). + """ + def __init__(self, qtype=float): + """ + :param qtype: Python type or nested type used as underlying scalar type + """ self._qtype = qtype def base_qtype(self): + """ + :return: base scalar QType (self or underlying BoolType base) + """ if isinstance(self._qtype, BoolType): return self._qtype.base_qtype() return self def size(self) -> int: - if hasattr(self._qtype, 'size'): + """ + :return: int size of the scalar (defaults to 1 or uses `_qtype.size()` if present) + """ + if hasattr(self._qtype, "size"): return self._qtype.size() return 1 def replace_scalar(self, substitute_qtype): """ - Find ScalarType and replace it with substitute_qtype - :param substitute_qtype: QType, replaces ScalarType - :return: QType + Replace ScalarType with substitute type. + + :param substitute_qtype: QType that replaces ScalarType + :return: substitute_qtype """ return substitute_qtype class BoolType(ScalarType): + """ + Boolean scalar type (inherits ScalarType). + """ pass class ArrayType(QType): - def __init__(self, shape, qtype: QType): + """ + Array quantity type. + :param shape: int or tuple describing array shape + :param qtype: contained QType for array elements + """ + + def __init__(self, shape, qtype: QType): if isinstance(shape, int): shape = (shape,) - self._shape = shape self._qtype = qtype def size(self) -> int: - return np.prod(self._shape) * self._qtype.size() + """ + :return: total flattened size (product of shape * inner qtype size) + """ + return int(np.prod(self._shape)) * int(self._qtype.size()) def get_key(self, key): """ - ArrayType indexing + ArrayType indexing. + :param key: int, tuple of ints or slice objects - :return: QuantityType - ArrayType or self._qtype + :return: Tuple (QuantityType, offset) where offset is 0 for this implementation """ - # Get new shape + # Get new shape by applying indexing on an empty array of the target shape new_shape = np.empty(self._shape)[key].shape - # One selected item is considered to be a scalar QType + # If one selected item is considered to be a scalar QType if len(new_shape) == 1 and new_shape[0] == 1: new_shape = () @@ -121,26 +171,42 @@ def get_key(self, key): q_type = self._qtype return q_type, 0 - def _make_getitem_op(self, chunk, key): + def _make_getitem_op(self, chunk: np.ndarray, key): """ - Operation - :param chunk: list [M, chunk size, 2] - :param key: slice - :return: + Slice operation for ArrayType while restoring original shape. + + :param chunk: numpy array [M, chunk size, 2] + :param key: slice or index to apply on the array-shaped leading dims + :return: numpy array with preserved dims via QType.keep_dims """ - # Reshape M to original shape to allow access assert self._shape is not None chunk = chunk.reshape((*self._shape, chunk.shape[-2], chunk.shape[-1])) return QType.keep_dims(chunk[key]) - def reshape(self, data): + def reshape(self, data: np.ndarray) -> np.ndarray: + """ + Reshape flattened data to array shape. + + :param data: numpy array + :return: reshaped numpy array + """ if isinstance(self._qtype, ScalarType): return data.reshape(self._shape) else: - return data.reshape((*self._shape, np.prod(data.shape) // np.prod(self._shape))) + # assume trailing dimension belongs to inner types + total = np.prod(data.shape) + leading = int(np.prod(self._shape)) + return data.reshape((*self._shape, int(total // leading))) class TimeSeriesType(QType): + """ + Time-series quantity type. + + :param times: iterable of time points + :param qtype: QType for each time slice + """ + def __init__(self, times, qtype): if isinstance(times, np.ndarray): times = times.tolist() @@ -148,96 +214,176 @@ def __init__(self, times, qtype): self._qtype = qtype def size(self) -> int: - return len(self._times) * self._qtype.size() + """ + :return: total size = number of time points * inner qtype.size() + """ + return len(self._times) * int(self._qtype.size()) def get_key(self, key): + """ + Get a qtype and offset corresponding to a given time key. + + :param key: time value to locate + :return: Tuple (q_type, offset) + """ q_type = self._qtype try: position = self._times.index(key) - except KeyError: - print("Item " + str(key) + " was not found in TimeSeries" + ". Available items: " + str(list(self._times))) + except ValueError: + # keep behavior similar to original: print available items + print( + "Item " + + str(key) + + " was not found in TimeSeries" + + ". Available items: " + + str(list(self._times)) + ) + # raise to make the error explicit + raise return q_type, position * q_type.size() @staticmethod def time_interpolation(quantity, value): """ - Interpolation in time - :param quantity: Quantity instance - :param value: point where to interpolate - :return: Quantity + Interpolate a time-series quantity to a single time value. + + :param quantity: Quantity instance with qtype being a TimeSeriesType + :param value: float time value where to interpolate + :return: Quantity object representing interpolated value """ def interp(y): - split_indeces = np.arange(1, len(quantity.qtype._times)) * quantity.qtype._qtype.size() - y = np.split(y, split_indeces, axis=-3) + split_indices = np.arange(1, len(quantity.qtype._times)) * quantity.qtype._qtype.size() + y = np.split(y, split_indices, axis=-3) f = interpolate.interp1d(quantity.qtype._times, y, axis=0) return f(value) - return mlmc.quantity.quantity.Quantity(quantity_type=quantity.qtype._qtype, input_quantities=[quantity], operation=interp) + + return mlmc.quantity.quantity.Quantity( + quantity_type=quantity.qtype._qtype, + input_quantities=[quantity], + operation=interp + ) class FieldType(QType): + """ + Field type composed of named entries each having the same base qtype. + + :param args: List of (name, QType) pairs + """ + def __init__(self, args: List[Tuple[str, QType]]): - """ - QType must have same structure - :param args: - """ self._dict = dict(args) self._qtype = args[0][1] assert all(q_type.size() == self._qtype.size() for _, q_type in args) def size(self) -> int: - return len(self._dict.keys()) * self._qtype.size() + """ + :return: total size = number of fields * inner qtype size + """ + return len(self._dict.keys()) * int(self._qtype.size()) def get_key(self, key): + """ + Access sub-field by name. + + :param key: field name + :return: Tuple (q_type, offset) + """ q_type = self._qtype try: position = list(self._dict.keys()).index(key) - except KeyError: - print("Key " + str(key) + " was not found in FieldType" + - ". Available keys: " + str(list(self._dict.keys())[:5]) + "...") + except ValueError: + print( + "Key " + + str(key) + + " was not found in FieldType" + + ". Available keys: " + + str(list(self._dict.keys())[:5]) + + "..." + ) + raise return q_type, position * q_type.size() class DictType(QType): + """ + Dictionary-like type of named QTypes which may differ in size. + + :param args: List of (name, QType) pairs + """ + def __init__(self, args: List[Tuple[str, QType]]): - self._dict = dict(args) # Be aware we it is ordered dictionary + self._dict = dict(args) # keep ordered mapping semantics self._check_base_type() def _check_base_type(self): + """ + Ensure all contained qtypes share the same base_qtype. + + :raises TypeError: if base_qtypes differ + """ qtypes = list(self._dict.values()) qtype_0_base_type = qtypes[0].base_qtype() for qtype in qtypes[1:]: if not isinstance(qtype.base_qtype(), type(qtype_0_base_type)): - raise TypeError("qtype {} has base QType {}, expecting {}. " - "All QTypes must have same base QType, either SacalarType or BoolType". - format(qtype, qtype.base_qtype(), qtype_0_base_type)) + raise TypeError( + "qtype {} has base QType {}, expecting {}. " + "All QTypes must have same base QType, either ScalarType or BoolType".format( + qtype, qtype.base_qtype(), qtype_0_base_type + ) + ) def base_qtype(self): + """ + :return: base_qtype of the first element + """ return next(iter(self._dict.values())).base_qtype() def size(self) -> int: + """ + :return: total flattened size (sum of sizes of contained qtypes) + """ return int(sum(q_type.size() for _, q_type in self._dict.items())) def get_qtypes(self): + """ + :return: iterable of contained qtypes + """ return self._dict.values() def replace_scalar(self, substitute_qtype): """ - Find ScalarType and replace it with substitute_qtype - :param substitute_qtype: QType, replaces ScalarType - :return: DictType + Replace scalar types recursively inside dict entries. + + :param substitute_qtype: QType that replaces ScalarType + :return: new DictType instance """ dict_items = [] for key, qtype in self._dict.items(): new_qtype = qtype.replace_scalar(substitute_qtype) - dict_items.append((key, new_qtype)) + dict_items.append((key, new_qtype)) return DictType(dict_items) def get_key(self, key): + """ + Return the QType and starting offset for a named key. + + :param key: name of entry + :return: Tuple (q_type, start_offset) + """ try: q_type = self._dict[key] except KeyError: - print("Key " + str(key) + " was not found in DictType" + - ". Available keys: " + str(list(self._dict.keys())[:5]) + "...") + print( + "Key " + + str(key) + + " was not found in DictType" + + ". Available keys: " + + str(list(self._dict.keys())[:5]) + + "..." + ) + raise + start = 0 for k, qt in self._dict.items(): if k == key: diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py index ba83e15b..cfb19c51 100644 --- a/mlmc/random/correlated_field.py +++ b/mlmc/random/correlated_field.py @@ -12,16 +12,17 @@ def kozeny_carman(porosity, m, factor, viscosity): """ Kozeny-Carman law. Empirical relationship between porosity and conductivity. + :param porosity: Porosity value. :param m: Power. Suitable values are 1 < m < 4 - :param factor: [m^2] - E.g. 1e-7 , m = 3.48; juta fibers - 2.2e-8 , 1.46; glass fibers - 1.8e-13, 2.89; erruptive material - 1e-12 2.76; erruptive material - 1.8e-12 1.99; basalt - :param viscosity: [Pa . s], water: 8.90e-4 - :return: + :param factor: Factor [m^2]. Examples: + 1e-7 , m = 3.48; juta fibers + 2.2e-8 , m = 1.46; glass fibers + 1.8e-13, m = 2.89; erruptive material + 1e-12 , m = 2.76; erruptive material + 1.8e-12, m = 1.99; basalt + :param viscosity: Fluid viscosity [Pa.s], e.g., water: 8.90e-4 + :return: Conductivity """ assert np.all(viscosity > 1e-10) porosity = np.minimum(porosity, 1-1e-10) @@ -33,10 +34,12 @@ def kozeny_carman(porosity, m, factor, viscosity): def positive_to_range(exp, a, b): """ - Mapping a positive parameter 'exp' from the interval <0, \infty) to the interval . + + :param exp: Positive parameter (e.g., lognormal variable) + :param a: Lower bound of target interval + :param b: Upper bound of target interval + :return: Mapped value in [a, b) """ return b * (1 - (b - a) / (b + (b - a) * exp)) @@ -44,12 +47,12 @@ def positive_to_range(exp, a, b): class Field: def __init__(self, name, field=None, param_fields=[], regions=[]): """ - :param name: Name of the field. - :param field: scalar (const field), or instance of SpatialCorrelatedField, or a callable - for evaluation of the field from its param_fields. - :param regions: Domain where field is sampled. - :param param_fields: List of names of parameter fields, dependees. - TODO: consider three different derived classes for: const, random and func fields. + Initialize a Field object. + + :param name: Name of the field + :param field: Scalar (const), RandomFieldBase, or callable function + :param param_fields: List of dependent parameter fields + :param regions: List of region names where the field is defined """ self.correlated_field = None self.const = None @@ -67,8 +70,6 @@ def __init__(self, name, field=None, param_fields=[], regions=[]): assert len(param_fields) == 0 else: assert len(param_fields) > 0, field - - # check callable try: params = [np.ones(2) for i in range(len(param_fields))] field(*params) @@ -81,74 +82,61 @@ def __init__(self, name, field=None, param_fields=[], regions=[]): def set_points(self, points): """ - Internal method to set evaluation points. See Fields.set_points. + Set points for field evaluation. + + :param points: Array of points where the field will be evaluated """ if self.const is not None: self._sample = self.const * np.ones(len(points)) elif self.correlated_field is not None: self.correlated_field.set_points(points) - if type(self.correlated_field) is SpatialCorrelatedField: - # TODO: make n_terms_range an optianal parmater for SpatialCorrelatedField + if type(self.correlated_field) is SpatialCorrelatedField: self.correlated_field.svd_dcmp(n_terms_range=(10, 100)) else: pass def sample(self): """ - Internal method to generate/compute new sample. - :return: + Generate or compute a new sample of the field. + + :return: Sample values of the field """ if self.const is not None: return self._sample elif self.correlated_field is not None: self._sample = self.correlated_field.sample() else: - params = [ pf._sample for pf in self.param_fields] + params = [pf._sample for pf in self.param_fields] self._sample = self._func(*params) return self._sample class Fields: - def __init__(self, fields): """ - Creates a new set of cross dependent random fields. - Currently no support for cross-correlated random fields. - A set of independent basic random fields must exist - other fields can be dependent in deterministic way. - - :param fields: A list of dependent fields. - - Example: - rf = SpatialCorrelatedField(log=True) - Fields([ - Field('por_top', rf, regions='ground_0'), - Field('porosity_top', positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'), - Field('por_bot', rf, regions='ground_1'), - Field('porosity_bot', positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'), - Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 1e-8, water_viscosity], regions='ground_0'), - Field('conductivity_bot', cf.kozeny_carman, ['porosity_bot', 1, 1e-10, water_viscosity],regions='ground_1') - ]) + Create a set of cross-dependent random fields. - TODO: use topological sort to fix order of 'fields' - TODO: syntactic sugar for calculating with fields (like with np.arrays). + :param fields: List of Field objects """ self.fields_orig = fields self.fields_dict = {} self.fields = [] - # Have to make a copy of the fields since we want to generate the samples in them - # and the given instances of Field can be used by an independent FieldSet instance. for field in self.fields_orig: new_field = copy.copy(field) if new_field.param_fields: - new_field.param_fields = [self._get_field_obj(field, new_field.regions) for field in new_field.param_fields] + new_field.param_fields = [self._get_field_obj(f, new_field.regions) + for f in new_field.param_fields] self.fields_dict[new_field.name] = new_field self.fields.append(new_field) def _get_field_obj(self, field_name, regions): """ - Get fields by name, replace constants by constant fields for unification. + Get Field object by name or create constant field. + + :param field_name: Field name or constant + :param regions: Regions of the field + :return: Field object """ if type(field_name) in [float, int]: const_field = Field("const_{}".format(field_name), field_name, regions=regions) @@ -156,54 +144,31 @@ def _get_field_obj(self, field_name, regions): self.fields_dict[const_field.name] = const_field return const_field else: - assert field_name in self.fields_dict, "name: {} dict: {}".format(field_name, self.fields_dict) + assert field_name in self.fields_dict return self.fields_dict[field_name] - @property - def names(self): - return self.fields_dict.keys() - - # def iterative_dfs(self, graph, start, path=[]): - # q = [start] - # while q: - # v = q.pop(0) - # if v not in path: - # path = path + [v] - # q = graph[v] + q - # - # return path - def set_outer_fields(self, outer): """ - Set fields that will be in a dictionary produced by FieldSet.sample() call. - :param outer: A list of names of fields that are sampled. - :return: + Set fields to be included in the sampled dictionary. + + :param outer: List of outer field names """ outer_set = set(outer) for f in self.fields: - if f.name in outer_set: - f.is_outer = True - else: - f.is_outer = False + f.is_outer = f.name in outer_set def set_points(self, points, region_ids=[], region_map={}): """ - Set mesh related data to fields. - - set points for sample evaluation - - translate region names to region ids in fields - - create maps from region constraned point sets of fields to full point set - :param points: np array of points for field evaluation - :param regions: regions of the points; - empty means no points for fields restricted to regions and all points for unrestricted fields - :return: + Assign evaluation points to each field. + + :param points: Array of points for field evaluation + :param region_ids: Optional array of region ids for each point + :param region_map: Mapping from region name to region id """ self.n_elements = len(points) - assert len(points) == len(region_ids) reg_points = {} for i, reg_id in enumerate(region_ids): - reg_list = reg_points.get(reg_id, []) - reg_list.append(i) - reg_points[reg_id] = reg_list + reg_points.setdefault(reg_id, []).append(i) for field in self.fields: point_ids = [] @@ -219,69 +184,42 @@ def set_points(self, points, region_ids=[], region_map={}): def sample(self): """ - Return dictionary of sampled fields. - :return: { 'field_name': sample, ...} + Sample all outer fields. + + :return: Dictionary with field names as keys and sampled arrays as values """ result = {} for field in self.fields: sample = field.sample() if field.is_outer: - result[field.name] = np.zeros(self.n_elements) + shape = (self.n_elements, 3) if field.name == "cond_tn" else self.n_elements + result[field.name] = np.zeros(shape) result[field.name][field.full_sample_ids] = sample return result class RandomFieldBase: """ - Base class for various methods for generating random fields. - - Generating realizations of a spatially correlated random field F for a fixed set of points at X. - E[F(x)] = mu(x) - Cov_ij = Cov[x_i,x_j] = E[(F(x_i) - mu(x))(F(x_j) - mu(x))] - - We assume stationary random field with covariance matrix Cov_ij: - Cov_i,j = c(x_i - x_j) - where c(X) is the "stationary covariance" function. We assume: - c(X) = sigma^2 exp( -|X^t K X|^(alpha/2) ) - for spatially heterogeneous sigma(X) we consider particular non-stationary generalization:\ - Cov_i,i = sigma(x_i)*sigma(x_j) exp( -|X^t K X|^(alpha/2) ); X = x_i - x_j - - where: - - sigma(X) is the standard deviance of the single uncorrelated value - - K is a positive definite tensor with eigen vectors corresponding to - main directions and eigen values equal to (1/l_i)^2, where l_i is correlation - length in singel main direction. - - alpha is =1 for "exponential" and =2 for "Gauss" correlation - - SVD decomposition: - Considering first m vectors, such that lam(m)/lam(0) <0.1 - - Example: - ``` - field = SpatialCorrelatedField(corr_exp='exp', corr_length=1.5) - X, Y = np.mgrid[0:1:10j, 0:1:10j] - points = np.vstack([X.ravel(), Y.ravel()]) - field.set_points(points) - sample = field.sample() - - ``` + Base class for generating spatially correlated random fields. + + Random field F(x) with mean E[F(x)] = mu(x) and covariance Cov[x_i,x_j]. + Stationary covariance: Cov_ij = sigma^2 * exp(-|X^T K X|^(alpha/2)), + X = x_i - x_j. + Supports optional non-stationary variance sigma(X). """ def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0, aniso_correlation=None, mu=0.0, sigma=1.0, log=False, **kwargs): """ - :param corr_exp: 'gauss', 'exp' or a float (should be >= 1) - :param dim: dimension of the domain (size of point coords) - :param corr_length: scalar, correlation length L > machine epsilon; tensor K = (1/L)^2 - :param aniso_correlation: 3x3 array; K tensor, overrides correlation length - :param mu - mu field (currently just a constant) - :param sigma - sigma field (currently just a constant) + Initialize a random field. - TODO: - - implement anisotropy in the base class using transformation matrix for the points - - use transformation matrix also for the corr_length - - replace corr_exp by aux classes for various correlation functions and pass them here - - more general set of correlation functions + :param corr_exp: 'gauss', 'exp', or float >=1 (correlation exponent) + :param dim: Dimension of the domain + :param corr_length: Scalar correlation length + :param aniso_correlation: Optional anisotropic 3x3 correlation tensor + :param mu: Mean (scalar or array) + :param sigma: Standard deviation (scalar or array) + :param log: If True, output field is exponentiated """ self.dim = dim self.log = log @@ -293,8 +231,6 @@ def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0, else: self.correlation_exponent = float(corr_exp) - # TODO: User should prescribe scaling for main axis and their rotation. - # From this we should construct the transformation matrix for the points self._corr_length = corr_length if aniso_correlation is None: assert corr_length > np.finfo(float).eps @@ -302,31 +238,26 @@ def __init__(self, corr_exp='gauss', dim=2, corr_length=1.0, self._max_corr_length = corr_length else: self.correlation_tensor = aniso_correlation - self._max_corr_length = la.norm(aniso_correlation, ord=2) # largest eigen value + self._max_corr_length = la.norm(aniso_correlation, ord=2) - #### Attributes set through `set_points`. self.points = None - # Evaluation points of the field. self.mu = mu - # Mean in points. Or scalar. self.sigma = sigma - # Standard deviance in points. Or scalar. - - self._initialize(**kwargs) # Implementation dependent initialization. + self._initialize(**kwargs) def _initialize(self, **kwargs): + """Implementation-specific initialization. To be overridden in subclasses.""" raise NotImplementedError() def set_points(self, points, mu=None, sigma=None): """ - :param points: N x d array. Points X_i where the field will be evaluated. d is the dimension. - :param mu: Scalar or N array. Mean value of uncorrelated field: E( F(X_i)). - :param sigma: Scalar or N array. Standard deviance of uncorrelated field: sqrt( E ( F(X_i) - mu_i )^2 ) - :return: None + Set points for field evaluation. + + :param points: Array of points (N x dim) + :param mu: Optional mean at points + :param sigma: Optional standard deviation at points """ points = np.array(points, dtype=float) - - assert len(points.shape) >= 1 assert points.shape[1] == self.dim self.n_points, self.dimension = points.shape self.points = points @@ -339,49 +270,43 @@ def set_points(self, points, mu=None, sigma=None): if sigma is not None: self.sigma = sigma self.sigma = np.array(self.sigma, dtype=float) - assert self.sigma.shape == () or sigma.shape == (len(points),) + assert self.sigma.shape == () or self.sigma.shape == (len(points),) def _set_points(self): + """Optional internal method to update points. Can be overridden.""" pass def sample(self): """ - :param uncorelated: Random samples from standard normal distribution. - Removed as the spectral method do not support it. - :return: Random field evaluated in points given by 'set_points'. - """ - # if uncorelated is None: - # uncorelated = np.random.normal(0, 1, self.n_approx_terms) - # else: - # assert uncorelated.shape == (self.n_approx_terms,) + Generate a realization of the random field. + :return: Array of field values at set points + """ field = self._sample() field = self.sigma * field + self.mu - - if not self.log: - return field - return np.exp(field) + return np.exp(field) if self.log else field def _sample(self, uncorrelated): + """ + Implementation-specific sample generation. To be overridden. + + :param uncorrelated: Array of uncorrelated standard normal samples + :return: Field sample + """ raise NotImplementedError() class SpatialCorrelatedField(RandomFieldBase): + """ + Generate spatially correlated fields using covariance matrix and KL decomposition. + """ def _initialize(self, **kwargs): - """ - Called after initialization in common constructor. - """ - - ### Attributes computed in precalculation. + """Initialization specific to SVD/KL-based spatial correlation.""" self.cov_mat = None - # Covariance matrix (dense). self._n_approx_terms = None - # Length of the sample vector, number of KL (Karhunen-Loe?ve) expansion terms. self._cov_l_factor = None - # (Reduced) L factor of the SVD decomposition of the covariance matrix. self._sqrt_ev = None - # (Reduced) square roots of singular values. def _set_points(self): self.cov_mat = None @@ -389,17 +314,16 @@ def _set_points(self): def cov_matrix(self): """ - Setup dense covariance matrix for given set of points. - :return: None. + Compute dense covariance matrix for current points. + + :return: Covariance matrix """ assert self.points is not None, "Points not set, call set_points." - - self._points_bbox = box = (np.min(self.points, axis=0), np.max(self.points, axis=0)) - diameter = np.max(np.abs(box[1] - box[0])) + self._points_bbox = (np.min(self.points, axis=0), np.max(self.points, axis=0)) + diameter = np.max(np.abs(self._points_bbox[1] - self._points_bbox[0])) self._relative_corr_length = self._max_corr_length / diameter - - # sigma_sqr_mat = np.outer(self.sigma, self.sigma.T) self._sigma_sqr_max = np.max(self.sigma) ** 2 + n_pt = len(self.points) self.cov_mat = np.empty((n_pt, n_pt)) corr_exp = self.correlation_exponent / 2.0 @@ -409,19 +333,16 @@ def cov_matrix(self): diff_row = self.points - pt len_sqr_row = np.sum(diff_row.dot(self.correlation_tensor) * diff_row, axis=-1) self.cov_mat[i_row, :] = np.exp(-len_sqr_row ** corr_exp) + return self.cov_mat def _eigen_value_estimate(self, m): """ - Estimate of the m-th eigen value of the covariance matrix. - According to paper: Schwab, Thodor: KL Approximation of Random Fields by ... - However for small gamma the asimtotics holds just for to big values of 'm'. - We rather need to find a semiempricial formula. - greater - :param m: - :return: + Semi-empirical estimate of the m-th eigenvalue of covariance matrix. + + :param m: Eigenvalue index + :return: Estimated eigenvalue """ - assert self.cov_mat is not None d = self.dimension alpha = self.correlation_exponent gamma = self._relative_corr_length @@ -429,24 +350,11 @@ def _eigen_value_estimate(self, m): def svd_dcmp(self, precision=0.01, n_terms_range=(1, np.inf)): """ - Does decomposition of covariance matrix defined by set of points - :param precision: Desired accuracy of the KL approximation, smaller eigen values are dropped. - :param n_terms_range: (min, max) number of terms in KL expansion to use. The number of terms estimated from - given precision is snapped to the given interval. + Perform truncated SVD for Karhunen-Loeve decomposition. - truncated SVD: - cov_mat = U*diag(ev) * V, - _cov_l_factor = U[:,0:m]*sqrt(ev[0:m]) - - Note on number of terms: - According to: C. Schwab and R. A. Todor: KL Approximation of Random Fields by Generalized Fast Multiploe Method - the eigen values should decay as (Proposition 2.18): - lambda_m ~ sigma^2 * ( 1/gamma ) **( m**(1/d) + alpha ) / Gamma(0.5 * m**(1/d) ) - where gamma = correlation length / domain diameter - ans alpha is the correlation exponent. Gamma is the gamma function. - ... should be checked experimantaly and generalized for sigma(X) - - :return: + :param precision: Desired accuracy + :param n_terms_range: Min/max number of KL terms + :return: (_cov_l_factor, singular values) """ if self.cov_mat is None: self.cov_matrix() @@ -455,76 +363,99 @@ def svd_dcmp(self, precision=0.01, n_terms_range=(1, np.inf)): U, ev, VT = np.linalg.svd(self.cov_mat) m = self.n_points else: - range = list(n_terms_range) - range[0] = max(1, range[0]) - range[1] = min(self.n_points, range[1]) - - prec_range = (self._eigen_value_estimate(range[0]), self._eigen_value_estimate(range[1])) + range_vals = [max(1, n_terms_range[0]), min(self.n_points, n_terms_range[1])] + prec_range = (self._eigen_value_estimate(range_vals[0]), self._eigen_value_estimate(range_vals[1])) if precision < prec_range[0]: - m = range[0] + m = range_vals[0] elif precision > prec_range[1]: - m = range[1] + m = range_vals[1] else: f = lambda m: self._eigen_value_estimate(m) - precision - m = sp.optmize.bisect(f, range[0], range[1], xtol=0.5, ) - - m = max(m, range[0]) + m = sp.optimize.bisect(f, range_vals[0], range_vals[1], xtol=0.5) + m = max(m, range_vals[0]) threshold = 2 * precision - # TODO: Test if we should cut eigen values by relative (like now) or absolute value - while threshold >= precision and m <= range[1]: - #print("treshold: {} m: {} precision: {} max_m: {}".format(threshold, m, precision, range[1])) + while threshold >= precision and m <= range_vals[1]: U, ev, VT = randomized_svd(self.cov_mat, n_components=m, n_iter=3, random_state=None) threshold = ev[-1] / ev[0] m = int(np.ceil(1.5 * m)) - m = len(ev) - m = min(m, range[1]) + m = min(len(ev), range_vals[1]) - #print("KL approximation: {} for {} points.".format(m, self.n_points)) self.n_approx_terms = m - self._sqrt_ev = np.sqrt(ev[0:m]) - self._cov_l_factor = U[:, 0:m].dot(np.diag(self._sqrt_ev)) + self._sqrt_ev = np.sqrt(ev[:m]) + self._cov_l_factor = U[:, :m].dot(np.diag(self._sqrt_ev)) self.cov_mat = None - return self._cov_l_factor, ev[0:m] + return self._cov_l_factor, ev[:m] def _sample(self): """ - :param uncorelated: Random samples from standard normal distribution. - :return: Random field evaluated in points given by 'set_points'. + Generate a field realization using KL decomposition. + + :return: Field sample array """ if self._cov_l_factor is None: self.svd_dcmp() - uncorelated = np.random.normal(0, 1, self.n_approx_terms) - return self._cov_l_factor.dot(uncorelated) + uncorrelated = np.random.normal(0, 1, self.n_approx_terms) + return self._cov_l_factor.dot(uncorrelated) class GSToolsSpatialCorrelatedField(RandomFieldBase): + """ + Spatially correlated random field generator using GSTools. + + This class acts as an adapter between :mod:`gstools` and the MLMC + random field interface (:class:`mlmc.random.random_field_base.RandomFieldBase`). + It supports 1D, 2D, and 3D random fields with optional logarithmic transformation, + and can generate fields on both structured and unstructured grids. + """ - def __init__(self, model, mode_no=1000, log=False, sigma=1): + def __init__(self, model, mode_no=1000, log=False, sigma=1, seed=None, mode=None, structured=False): """ - :param model: instance of covariance model class, which parent is gstools.covmodel.CovModel - :param mode_no: number of Fourier modes, default: 1000 as in gstools package + Initialize a spatially correlated random field generator. + + :param model: Covariance model instance (subclass of ``gstools.covmodel.CovModel``) + defining the spatial correlation structure. + :param mode_no: Number of Fourier modes used in the random field generation. + Default is 1000. + :param log: If True, applies an exponential transformation to obtain + a lognormal field. Default is False. + :param sigma: Standard deviation scaling factor applied to the generated field. + Default is 1. + :param seed: Random seed for reproducibility. Default is None. + :param mode: Sampling mode for GSTools SRF. Use "fft" for structured grids or + None for unstructured. Default is None. + :param structured: If True, assumes a structured grid for field evaluation. + Default is False. """ self.model = model self.mode_no = mode_no - self.srf = gstools.SRF(model, mode_no=mode_no) + if mode == "fft": + self.srf = gstools.SRF(model, mode="fft", seed=seed) + else: + self.srf = gstools.SRF(model, mode_no=mode_no, seed=seed) self.mu = self.srf.mean self.sigma = sigma self.dim = model.dim self.log = log + self.structured = structured def change_srf(self, seed): """ - Spatial random field with new seed - :param seed: int, random number generator seed + Reinitialize the GSTools random field with a new random seed. + + :param seed: Random seed used to reinitialize the underlying + :class:`gstools.SRF` instance. :return: None """ self.srf = gstools.SRF(self.model, seed=seed, mode_no=self.mode_no) - def random_field(self): + def random_field(self, seed=None): """ - Generate the spatial random field - :return: field, np.ndarray + Generate a raw random field realization (without scaling or transformation). + + :param seed: Optional random seed for reproducibility. Default is None. + :return: numpy.ndarray + Field values evaluated at the points defined by :meth:`set_points`. """ if self.dim == 1: x = self.points @@ -540,147 +471,151 @@ def random_field(self): x = x.reshape(len(x), 1) y = y.reshape(len(y), 1) z = z.reshape(len(z), 1) - field = self.srf((x, y, z)) + if self.structured: + field = self.srf([np.squeeze(x), np.squeeze(y), np.squeeze(z)], seed=seed) + field = field.flatten() + else: + if seed is not None: + field = self.srf(self.points.T, seed=seed) + else: + field = self.srf(self.points.T) return field - def sample(self): + def sample(self, seed=None): """ - :return: Random field evaluated in points given by 'set_points' + Evaluate the scaled random field at the defined points. + + :param seed: Optional random seed for reproducibility. Default is None. + :return: numpy.ndarray + Field values evaluated at the defined points, scaled by ``sigma`` + and shifted by ``mu``. If ``log=True``, returns + ``exp(sigma * field + mu)`` instead. """ if not self.log: - return self.sigma * self.random_field() + self.mu - return np.exp(self.sigma * self.random_field() + self.mu) + return self.sigma * self.random_field(seed) + self.mu + return np.exp(self.sigma * self.random_field(seed) + self.mu) class FourierSpatialCorrelatedField(RandomFieldBase): """ - Generate spatial random fields + Deprecated: Fourier-based spatial random field generator. + + Generates spatial random fields using a truncated Fourier series. + Use GSToolsSpatialCorrelatedField instead. """ def _initialize(self, **kwargs): """ - Own intialization. - :param mode_no: Number of Fourier modes + Initialization specific to Fourier-based spatial fields. + + :param mode_no: Number of Fourier modes (default 1000) """ - warnings.warn("FourierSpatialCorrelatedField class is deprecated, try to use GSToolsSpatialCorrelatedField class instead", - DeprecationWarning) - self.len_scale = self._corr_length * 2*np.pi + warnings.warn( + "FourierSpatialCorrelatedField class is deprecated, use GSToolsSpatialCorrelatedField instead", + DeprecationWarning + ) + self.len_scale = self._corr_length * 2 * np.pi self.mode_no = kwargs.get("mode_no", 1000) def get_normal_distr(self): """ - Normal distributed arrays - :return: np.ndarray + Generate normal distributed random coefficients for Fourier modes. + + :return: Array of shape (2, mode_no) """ Z = np.empty((2, self.mode_no)) rng = self._get_random_stream() for i in range(2): Z[i] = rng.normal(size=self.mode_no) - return Z def _sample_sphere(self, mode_no): - """Uniform sampling on a d-dimensional sphere - Parameters - ---------- - mode_no : :class:`int`, optional - number of the Fourier modes - Returns - ------- - coord : :class:`numpy.ndarray` - x[, y[, z]] coordinates on the sphere with shape (dim, mode_no) + """ + Uniformly sample directions on the unit sphere (dim=1,2,3). + + :param mode_no: Number of modes + :return: Array of unit vectors (dim, mode_no) """ coord = self._create_empty_k(mode_no) + rng = self._get_random_stream() if self.dim == 1: - rng = self._get_random_stream() ang1 = rng.random_sample(mode_no) coord[0] = 2 * np.around(ang1) - 1 elif self.dim == 2: - rng = self._get_random_stream() ang1 = rng.uniform(0.0, 2 * np.pi, mode_no) coord[0] = np.cos(ang1) coord[1] = np.sin(ang1) elif self.dim == 3: - raise NotImplementedError("For implementation see " - "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py") + raise NotImplementedError("3D implementation see GSTools repo") return coord def gau(self, mode_no=1000): """ - Compute a gaussian spectrum - :param mode_no: int, Number of Fourier modes - :return: numpy.ndarray + Gaussian Fourier spectrum. + + :param mode_no: Number of modes + :return: Array of wave vectors (dim, mode_no) """ len_scale = self.len_scale * np.sqrt(np.pi / 4) if self.dim == 1: k = self._create_empty_k(mode_no) - rng = self._get_random_stream() - k[0] = rng.normal(0., np.pi / 2.0 / len_scale ** 2, mode_no) + k[0] = self._get_random_stream().normal(0., np.pi / 2.0 / len_scale ** 2, mode_no) elif self.dim == 2: coord = self._sample_sphere(mode_no) - rng = self._get_random_stream() - rad_u = rng.random_sample(mode_no) - # weibull distribution sampling + rad_u = self._get_random_stream().random_sample(mode_no) rad = np.sqrt(np.pi) / len_scale * np.sqrt(-np.log(rad_u)) k = rad * coord elif self.dim == 3: - raise NotImplementedError("For implementation see " - "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py") + raise NotImplementedError("3D implementation see GSTools repo") return k def exp(self, mode_no=1000): """ - Compute an exponential spectrum - :param mode_no: int, Number of Fourier modes - :return: numpy.ndarray + Exponential Fourier spectrum. + + :param mode_no: Number of modes + :return: Array of wave vectors (dim, mode_no) """ if self.dim == 1: k = self._create_empty_k(mode_no) - rng = self._get_random_stream() - k_u = rng.rng.uniform(-np.pi / 2.0, np.pi / 2.0, mode_no) + k_u = self._get_random_stream().uniform(-np.pi / 2.0, np.pi / 2.0, mode_no) k[0] = np.tan(k_u) / self.len_scale elif self.dim == 2: coord = self._sample_sphere(mode_no) - rng = self._get_random_stream() - rad_u = rng.random_sample(mode_no) - # sampling with ppf + rad_u = self._get_random_stream().random_sample(mode_no) rad = np.sqrt(1.0 / rad_u ** 2 - 1.0) / self.len_scale k = rad * coord elif self.dim == 3: - raise NotImplementedError("For implementation see " - "https://github.com/LSchueler/GSTools/blob/randomization_revisited/gstools/field/rng.py") + raise NotImplementedError("3D implementation see GSTools repo") return k def _create_empty_k(self, mode_no=None): - """ Create empty mode array with the correct shape. - Parameters - ---------- - mode_no : :class:`int` - number of the fourier modes - Returns - ------- - :class:`numpy.ndarray` - the empty mode array - """ - if mode_no is None: - k = np.empty(self.dim) - else: - k = np.empty((self.dim, mode_no)) + """ + Helper to create empty Fourier mode array. - return k + :param mode_no: Number of modes + :return: Empty array of shape (dim, mode_no) + """ + return np.empty((self.dim, mode_no)) if mode_no is not None else np.empty(self.dim) def _get_random_stream(self, seed=None): + """ + Return a random number generator. + + :param seed: Optional seed + """ return rand.RandomState(rand.RandomState(seed).randint(2 ** 16 - 1)) def random_field(self): """ - Calculates the random modes for the randomization method. + Generate a random field using Fourier series. + + :return: Field values at points """ - y, z = None, None + # Prepare coordinates if self.dim == 1: - x = self.points - x.reshape(len(x), 1) + x = self.points.reshape(len(self.points), 1) elif self.dim == 2: x, y = self.points.T x = x.reshape(len(x), 1) @@ -692,64 +627,20 @@ def random_field(self): z = z.reshape(len(z), 1) normal_distr_values = self.get_normal_distr() + k = self.gau(self.mode_no) if self.correlation_exponent == 2 else self.exp(self.mode_no) - if self.correlation_exponent == 2: - k = self.gau(self.mode_no) - else: - k = self.exp(self.mode_no) - - # reshape for unstructured grid - for dim_i in range(self.dim): - k[dim_i] = np.squeeze(k[dim_i]) - k[dim_i] = np.reshape(k[dim_i], (1, len(k[dim_i]))) - - summed_modes = np.broadcast(x, y, z) - summed_modes = np.squeeze(np.zeros(summed_modes.shape)) - # Test to see if enough memory is available. - # In case there isn't, divide Fourier modes into smaller chunks - chunk_no = 1 - chunk_no_exp = 0 - - while True: - try: - chunk_len = int(np.ceil(self.mode_no / chunk_no)) - - for chunk in range(chunk_no): - a = chunk * chunk_len - # In case k[d,a:e] with e >= len(k[d,:]) causes errors in - # numpy, use the commented min-function below - # e = min((chunk + 1) * chunk_len, self.mode_no-1) - e = (chunk + 1) * chunk_len - - if self.dim == 1: - phase = k[0, a:e]*x - elif self.dim == 2: - phase = k[0, a:e]*x + k[1, a:e]*y - else: - phase = (k[0, a:e]*x + k[1, a:e]*y + - k[2, a:e]*z) - - summed_modes += np.squeeze( - np.sum(normal_distr_values[0, a:e] * np.cos(2.*np.pi*phase) + - normal_distr_values[1, a:e] * np.sin(2.*np.pi*phase), - axis=-1)) - except MemoryError: - chunk_no += 2**chunk_no_exp - chunk_no_exp += 1 - print('Not enough memory. Dividing Fourier modes into {} ' - 'chunks.'.format(chunk_no)) - else: - break + summed_modes = np.zeros(len(self.points)) + # Fourier summation (memory safe chunks could be implemented here) + for i in range(self.mode_no): + phase = np.sum(k[:, i] * self.points.T, axis=0) + summed_modes += normal_distr_values[0, i] * np.cos(2*np.pi*phase) + normal_distr_values[1, i] * np.sin(2*np.pi*phase) - field = np.sqrt(1.0 / self.mode_no) * summed_modes - return field + return np.sqrt(1.0 / self.mode_no) * summed_modes def _sample(self): """ - :return: Random field evaluated in points given by 'set_points'. + Generate a Fourier-based random field realization. + + :return: Field values """ return self.random_field() - - if not self.log: - return field - return np.exp(field) diff --git a/mlmc/random/frac_geom.py b/mlmc/random/frac_geom.py deleted file mode 100644 index 0d872646..00000000 --- a/mlmc/random/frac_geom.py +++ /dev/null @@ -1,140 +0,0 @@ -import numpy as np -import geomop.polygons as poly -import geomop.merge as merge -import geomop.polygons_io as poly_io -import geomop.format_last as lg -import geomop.layers_io -import geomop.geometry -#from geomop.plot_polygons import plot_polygon_decomposition - - - - - - - - - -def make_frac_mesh(box, mesh_step, fractures, frac_step): - """ - Make geometry and mesh for given 2d box and set of fractures. - :param box: [min_point, max_point]; points are np.arrays - :param fractures: Array Nx2x2, one row for every fracture given by endpoints: [p0, p1] - :return: GmshIO object with physical groups: - box: 1, - fractures: 1000 + i, i = 0, ... , N-1 - """ - regions = make_regions(mesh_step, fractures, frac_step) - decomp, reg_map = make_decomposition(box, fractures, regions) - geom = fill_lg(decomp, reg_map, regions) - return make_mesh(geom) - - -def add_reg(regions, name, dim, step=0.0, bc=False, not_used =False): - reg = lg.Region(dict(name=name, dim=dim, mesh_step=step, boundary=bc, not_used=not_used)) - reg._id = len(regions) - regions.append(reg) - -def make_regions(mesh_step, fractures, frac_step): - regions = [] - add_reg(regions, "NONE", -1, not_used=True) - add_reg(regions, "bulk_0", 2, mesh_step) - add_reg(regions, ".bc_inflow", 1, bc=True) - add_reg(regions, ".bc_outflow", 1, bc=True) - for f_id in range(len(fractures)): - add_reg(regions, "frac_{}".format(f_id), 1, frac_step) - return regions - - -def make_decomposition(box, fractures, regions): - box_pd = poly.PolygonDecomposition() - p00, p11 = box - p01 = np.array([p00[0], p11[1]]) - p10 = np.array([p11[0], p00[1]]) - box_pd.add_line(p00, p01) - seg_outflow, = box_pd.add_line(p01, p11) - box_pd.add_line(p11, p10) - seg_inflow, = box_pd.add_line(p10, p00) - - decompositions = [box_pd] - for p0, p1 in fractures: - pd = poly.PolygonDecomposition() - pd.add_line(p0, p1) - decompositions.append(pd) - - common_decomp, maps = merge.intersect_decompositions(decompositions) - #plot_polygon_decomposition(common_decomp) - #print(maps) - - # Map common_decomp objects to regions. - none_region_id = 0 - box_reg_id = 1 - bc_inflow_id = 2 - bc_outflow_id = 3 - frac_id_shift = 4 - decomp_shapes = [common_decomp.points, common_decomp.segments, common_decomp.polygons] - reg_map = [{key: regions[none_region_id] for key in decomp_shapes[d].keys()} for d in range(3)] - for i_frac, f_map in enumerate(maps[1:]): - for id, orig_seg_id in f_map[1].items(): - reg_map[1][id] = regions[frac_id_shift + i_frac] - - for id, orig_poly_id in maps[0][2].items(): - if orig_poly_id == 0: - continue - reg_map[2][id] = regions[box_reg_id] - - for id, orig_seg_id in maps[0][1].items(): - if orig_seg_id == seg_inflow.id: - reg_map[1][id] = regions[bc_inflow_id] - if orig_seg_id == seg_outflow.id: - reg_map[1][id] = regions[bc_outflow_id] - - - return common_decomp, reg_map - - -def fill_lg(decomp, reg_map, regions): - """ - Create LayerGeometry object. - """ - nodes, topology = poly_io.serialize(decomp) - - geom = lg.LayerGeometry() - geom.version - geom.regions = regions - - - - iface_ns = lg.InterfaceNodeSet(dict( - nodeset_id = 0, - interface_id = 0 - )) - layer = lg.FractureLayer(dict( - name = "layer", - top = iface_ns, - polygon_region_ids = [ reg_map[2][poly.id]._id for poly in decomp.polygons.values() ], - segment_region_ids = [ reg_map[1][seg.id]._id for seg in decomp.segments.values() ], - node_region_ids = [ reg_map[0][node.id]._id for node in decomp.points.values() ] - )) - geom.layers = [ layer ] - #geom.surfaces = [ClassFactory(Surface)] - - iface = lg.Interface(dict( - surface_id = None, - elevation = 0.0 - )) - geom.interfaces = [ iface ] - #geom.curves = [ClassFactory(Curve)] - geom.topologies = [ topology ] - - nodeset = lg.NodeSet(dict( - topology_id = 0, - nodes = nodes - )) - geom.node_sets = [ nodeset ] - geomop.layers_io.write_geometry("fractured_2d.json", geom) - return geom - - -def make_mesh(geometry): - return geomop.geometry.make_geometry(geometry=geometry, layers_file="fractured_2d.json", mesh_step=1.0) \ No newline at end of file diff --git a/mlmc/sample_storage.py b/mlmc/sample_storage.py index 01be9082..623bd9b3 100644 --- a/mlmc/sample_storage.py +++ b/mlmc/sample_storage.py @@ -1,134 +1,142 @@ import itertools import numpy as np -from abc import ABCMeta -from abc import abstractmethod -from typing import List, Dict +from abc import ABCMeta, abstractmethod +from typing import List, Dict, Any, Generator, Optional, Tuple from mlmc.quantity.quantity_spec import QuantitySpec, ChunkSpec class SampleStorage(metaclass=ABCMeta): """ - Provides methods to store and retrieve sample's data + Provides methods to store and retrieve sample data. + Abstract base class for all storage backends. """ @abstractmethod def save_samples(self, successful_samples, failed_samples): """ - Write results to storage + Write simulation results to storage. + :param successful_samples: Dict[level_id, List[Tuple[sample_id, (fine, coarse)]]] + :param failed_samples: Dict[level_id, List[Tuple[sample_id, error_message]]] """ @abstractmethod def save_result_format(self, res_spec: List[QuantitySpec]): """ - Save result format + Save result format. + :param res_spec: List of quantity specifications describing result structure. """ @abstractmethod def load_result_format(self) -> List[QuantitySpec]: """ - Load result format + Load stored result format. + :return: List[QuantitySpec] """ @abstractmethod def save_global_data(self, result_format: List[QuantitySpec], level_parameters=None): """ - Save global data, at the moment: _result_format, level_parameters + Save global metadata such as result format and level parameters. + :param result_format: List[QuantitySpec] + :param level_parameters: Optional metadata per level """ @abstractmethod def save_scheduled_samples(self, level_id, samples): """ - Save scheduled samples ids + Save scheduled sample identifiers. + :param level_id: int + :param samples: List[str] """ @abstractmethod - def load_scheduled_samples(self): + def load_scheduled_samples(self) -> Dict[int, List[str]]: """ - Load scheduled samples - :return: Dict[_level_id, List[sample_id: str]] + Load scheduled sample IDs. + :return: Dict[level_id, List[sample_id]] """ @abstractmethod def sample_pairs(self): """ - Get results from storage - :return: List[Array[M, N, 2]] + Retrieve all stored fine–coarse result pairs. + :return: List[np.ndarray[M, N, 2]] """ - def chunks(self, level_id=None, n_samples=None): + def chunks(self, level_id: Optional[int] = None, n_samples: Optional[int] = None) -> Generator[ChunkSpec, None, None]: """ - Create chunks generator - :param level_id: int, if not None return chunks for a given level - :param n_samples: int, number of samples to retrieve - :return: generator + Create a generator yielding chunk specifications for collected data. + :param level_id: int, if provided, return chunks only for the given level. + :param n_samples: int, maximum number of samples to retrieve. + :return: generator of ChunkSpec objects. """ - assert isinstance(n_samples, (type(None), int)), "n_samples param must be int" - level_ids = self.get_level_ids() - if level_id is not None: - level_ids = [level_id] - return itertools.chain(*[self._level_chunks(level_id, n_samples) for level_id in level_ids]) # concatenate generators + assert isinstance(n_samples, (type(None), int)), "n_samples must be int or None" + level_ids = [level_id] if level_id is not None else self.get_level_ids() + return itertools.chain(*[self._level_chunks(lid, n_samples) for lid in level_ids]) @abstractmethod def _level_chunks(self, level_id, n_samples=None): """ - Info about chunks of level's collected data + Get chunk information for data collected at a given level. + :param level_id: int + :param n_samples: int :return: generator of ChunkSpec objects """ @abstractmethod def n_finished(self): """ - Number of finished samples - :return: List + Get number of finished samples on each level. + :return: List[int] """ @abstractmethod - def save_n_ops(self, n_ops: Dict[int, List[float]]): + def save_n_ops(self, n_ops: Dict[int, Tuple[float, int]]): """ - Save number of operations (time) - :param n_ops: Dict[_level_id, List[overall time, number of valid samples]] + Save number of operations (time). + :param n_ops: Dict[level_id, Tuple[total_time, n_valid_samples]] """ @abstractmethod def get_n_ops(self): """ - Number of operations (time) per sample for each level + Get number of operations per sample for each level. :return: List[float] """ @abstractmethod def unfinished_ids(self): """ - Get unfinished sample's ids - :return: list + Get IDs of unfinished samples. + :return: List[str] """ @abstractmethod def get_level_ids(self): """ - Get number of levels - :return: int + Get list of available level IDs. + :return: List[int] """ @abstractmethod def get_n_levels(self): """ - Get number of levels + Get total number of levels. :return: int """ @abstractmethod def get_level_parameters(self): """ - Get level parameters - :return: list + Get stored level parameters. + :return: List[Any] """ @abstractmethod def get_n_collected(self): """ - Get number of collected results at each evel - :return: list + Get number of collected results at each level. + :return: List[int] """ @@ -168,12 +176,15 @@ def _save_successful(self, samples): :return: None """ for level_id, res in samples.items(): - res = np.array(res) + res = np.array(res, dtype=object) fine_coarse_res = res[:, 1] - result_type = np.dtype((np.float, np.array(fine_coarse_res[0]).shape)) + result_type = np.dtype((float, np.array(fine_coarse_res[0], dtype=object).shape)) results = np.empty(shape=(len(res),), dtype=result_type) - results[:] = [val for val in fine_coarse_res] + + for idx, val in enumerate(fine_coarse_res): + results[idx, 0] = val[0] + results[idx, 1] = val[1] # Save sample ids self._successful_sample_ids.setdefault(level_id, []).extend(res[:, 0]) diff --git a/mlmc/sample_storage_hdf.py b/mlmc/sample_storage_hdf.py index 7e5fbef5..5b7e4dbe 100644 --- a/mlmc/sample_storage_hdf.py +++ b/mlmc/sample_storage_hdf.py @@ -4,81 +4,86 @@ from mlmc.sample_storage import SampleStorage from mlmc.quantity.quantity_spec import QuantitySpec, ChunkSpec import mlmc.tool.hdf5 as hdf -import warnings -warnings.simplefilter("ignore", np.VisibleDeprecationWarning) class SampleStorageHDF(SampleStorage): """ - Sample's data are stored in a HDF5 file + Store and manage sample data in an HDF5 file. + + This implementation of the SampleStorage interface provides efficient + persistent storage for MLMC simulation results using HDF5. """ def __init__(self, file_path): """ - HDF5 storage, provide method to interact with storage - :param file_path: absolute path to hdf file (which not exists at the moment) + Initialize the HDF5 storage and create or load the file structure. + + :param file_path: Absolute path to the HDF5 file. + If the file exists, it will be loaded instead of created. """ super().__init__() - # If file exists load not create new file - load_from_file = True if os.path.exists(file_path) else False + load_from_file = os.path.exists(file_path) # HDF5 interface self._hdf_object = hdf.HDF5(file_path=file_path, load_from_file=load_from_file) self._level_groups = [] - # 'Load' level groups + # Load existing level groups if file already contains data if load_from_file: - # Create level group for each level if len(self._level_groups) != len(self._hdf_object.level_parameters): for i_level in range(len(self._hdf_object.level_parameters)): self._level_groups.append(self._hdf_object.add_level_group(str(i_level))) def _hdf_result_format(self, locations, times): """ - QuantitySpec data type, necessary for hdf storage - :return: + Construct an appropriate dtype for QuantitySpec data representation in HDF5. + + :param locations: List of spatial locations (as coordinates or identifiers). + :param times: List of time steps. + :return: Numpy dtype describing the QuantitySpec data structure. """ if len(locations[0]) == 3: - tuple_dtype = np.dtype((np.float, (3,))) + tuple_dtype = np.dtype((float, (3,))) loc_dtype = np.dtype((tuple_dtype, (len(locations),))) else: loc_dtype = np.dtype(('S50', (len(locations),))) - result_dtype = {'names': ('name', 'unit', 'shape', 'times', 'locations'), - 'formats': ('S50', - 'S50', - np.dtype((np.int32, (2,))), - np.dtype((np.float, (len(times),))), - loc_dtype - ) - } + result_dtype = { + 'names': ('name', 'unit', 'shape', 'times', 'locations'), + 'formats': ( + 'S50', + 'S50', + np.dtype((np.int32, (2,))), + np.dtype((float, (len(times),))), + loc_dtype + ) + } return result_dtype - def save_global_data(self, level_parameters: List[np.float], result_format: List[QuantitySpec]): + def save_global_data(self, level_parameters: List[float], result_format: List[QuantitySpec]): """ - Save hdf5 file global attributes - :param level_parameters: list of simulation steps - :param result_format: simulation result format + Save HDF5 global attributes including simulation parameters and result format. + + :param level_parameters: List of simulation level parameters (e.g., mesh sizes). + :param result_format: List of QuantitySpec objects describing result quantities. :return: None """ res_dtype = self._hdf_result_format(result_format[0].locations, result_format[0].times) - - # Create file structure self._hdf_object.create_file_structure(level_parameters) - # Create group for each level + # Create HDF5 groups for each simulation level if len(self._level_groups) != len(level_parameters): for i_level in range(len(level_parameters)): self._level_groups.append(self._hdf_object.add_level_group(str(i_level))) - # Save result format (QuantitySpec) self.save_result_format(result_format, res_dtype) def load_scheduled_samples(self): """ - Get scheduled samples for each level - :return: Dict[level_id, List[sample_id: str]] + Load scheduled samples from storage. + + :return: Dict[level_id, List[sample_id: str]] """ scheduled = {} for level in self._level_groups: @@ -87,79 +92,116 @@ def load_scheduled_samples(self): def save_result_format(self, result_format: List[QuantitySpec], res_dtype): """ - Save result format to hdf - :param result_format: List[QuantitySpec] + Save result format metadata to HDF5. + + :param result_format: List of QuantitySpec objects defining stored quantities. + :param res_dtype: Numpy dtype for structured storage. :return: None """ try: if self.load_result_format() != result_format: - raise ValueError('You are setting a new different result format for an existing sample storage') + raise ValueError( + "Attempting to overwrite an existing result format with a new incompatible one." + ) except AttributeError: pass + self._hdf_object.save_result_format(result_format, res_dtype) def load_result_format(self) -> List[QuantitySpec]: """ - Load result format + Load and reconstruct the result format from HDF5. + + :return: List of QuantitySpec objects. """ results_format = self._hdf_object.load_result_format() quantities = [] for res_format in results_format: - spec = QuantitySpec(res_format[0].decode(), res_format[1].decode(), res_format[2], res_format[3], - [loc.decode() for loc in res_format[4]]) - + spec = QuantitySpec( + res_format[0].decode(), + res_format[1].decode(), + res_format[2], + res_format[3], + [loc.decode() for loc in res_format[4]] + ) quantities.append(spec) - return quantities def save_samples(self, successful, failed): """ - Save successful and failed samples - :param successful: List[Tuple[sample_id: str, Tuple[ndarray, ndarray]]] - :param failed: List[Tuple[sample_id: str, error_message: str]] + Save successful and failed samples to the HDF5 storage. + + :param successful: Dict[level_id, List[Tuple[sample_id: str, (fine, coarse)]]] + :param failed: Dict[level_id, List[Tuple[sample_id: str, error_message: str]]] :return: None """ - self._save_succesful(successful) + self._save_successful(successful) self._save_failed(failed) - def _save_succesful(self, successful_samples): + def _save_successful(self, successful_samples): + """ + Append successful sample results to the appropriate level group. + + :param successful_samples: Dict[level_id, List[Tuple[sample_id, (fine, coarse)]]] + :return: None + """ for level, samples in successful_samples.items(): if len(samples) > 0: - self._level_groups[level].append_successful(np.array(samples)) + self._level_groups[level].append_successful(np.array(samples, dtype=object)) def _save_failed(self, failed_samples): + """ + Append failed sample identifiers and messages. + + :param failed_samples: Dict[level_id, List[Tuple[sample_id, error_message]]] + :return: None + """ for level, samples in failed_samples.items(): if len(samples) > 0: self._level_groups[level].append_failed(samples) def save_scheduled_samples(self, level_id, samples: List[str]): """ - Append scheduled samples - :param level_id: int - :param samples: list of sample identifiers + Append scheduled sample identifiers for a specific level. + + :param level_id: Integer level identifier. + :param samples: List of sample identifiers. :return: None """ self._level_groups[level_id].append_scheduled(samples) def _level_chunks(self, level_id, n_samples=None): + """ + Generate chunk specifications for a given level. + + :param level_id: Level identifier. + :param n_samples: Optional number of samples to include per chunk. + :return: Generator of ChunkSpec objects. + """ return self._level_groups[level_id].chunks(n_samples) def sample_pairs(self): """ - Load results from hdf file - :return: List[Array[M, N, 2]] + Retrieve all sample pairs from storage. + + :return: List[np.ndarray[M, N, 2]] where M = number of results, N = number of samples. """ if len(self._level_groups) == 0: - raise Exception("self._level_groups shouldn't be empty, save_global_data() method should have set it, " - "that method is always called from mlmc.sampler.Sampler constructor." - " In other cases, call save_global_data() directly") + raise Exception( + "Level groups are not initialized. " + "Ensure save_global_data() is called before using SampleStorageHDF." + ) levels_results = list(np.empty(len(self._level_groups))) for level in self._level_groups: - chunk_spec = next(self.chunks(level_id=int(level.level_id), - n_samples=self.get_n_collected()[int(level.level_id)])) - results = self.sample_pairs_level(chunk_spec) # return all samples no chunks + chunk_spec = next( + self.chunks( + level_id=int(level.level_id), + n_samples=self.get_n_collected()[int(level.level_id)] + ) + ) + results = self.sample_pairs_level(chunk_spec) if results is None or len(results) == 0: levels_results[int(level.level_id)] = [] continue @@ -168,13 +210,12 @@ def sample_pairs(self): def sample_pairs_level(self, chunk_spec): """ - Get result for particular level and chunk - :param chunk_spec: object containing chunk identifier level identifier and chunk_slice - slice() object - :return: np.ndarray + Retrieve samples for a specific level and chunk. + + :param chunk_spec: ChunkSpec containing level ID and slice information. + :return: np.ndarray of shape [M, chunk size, 2]. """ - level_id = chunk_spec.level_id - if chunk_spec.level_id is None: - level_id = 0 + level_id = chunk_spec.level_id or 0 chunk = self._level_groups[int(level_id)].collected(chunk_spec.chunk_slice) # Remove auxiliary zeros from level zero sample pairs @@ -185,31 +226,31 @@ def sample_pairs_level(self, chunk_spec): def n_finished(self): """ - Number of finished samples on each level - :return: List[int] + Count the number of finished samples for each level. + + :return: np.ndarray[int] containing finished sample counts per level. """ n_finished = np.zeros(len(self._level_groups)) for level in self._level_groups: n_finished[int(level.level_id)] += len(level.get_finished_ids()) - return n_finished def unfinished_ids(self): """ - List of unfinished ids - :return: list + Return identifiers of all unfinished samples. + + :return: List[str] """ unfinished = [] - for level in self._level_groups: unfinished.extend(level.get_unfinished_ids()) - return unfinished def failed_samples(self): """ - Dictionary of failed samples - :return: dict + Return dictionary of failed samples for each level. + + :return: Dict[str, List[str]] """ failed_samples = {} for level in self._level_groups: @@ -217,13 +258,17 @@ def failed_samples(self): return failed_samples def clear_failed(self): + """ + Clear all failed sample records from storage. + """ for level in self._level_groups: level.clear_failed_dataset() def save_n_ops(self, n_ops): """ - Save number of operations (time) of samples - :param n_ops: Dict[level_id, List[overall time, number of successful samples]] + Save the estimated number of operations (e.g., runtime) for each level. + + :param n_ops: Dict[level_id, List[total_time, num_successful_samples]] :return: None """ for level_id, (time, n_samples) in n_ops: @@ -238,8 +283,9 @@ def save_n_ops(self, n_ops): def get_n_ops(self): """ - Get number of estimated operations on each level - :return: List + Get the average number of operations per sample for each level. + + :return: List[float] """ n_ops = list(np.zeros(len(self._level_groups))) for level in self._level_groups: @@ -250,15 +296,26 @@ def get_n_ops(self): return n_ops def get_level_ids(self): + """ + Get identifiers of all levels stored in HDF5. + + :return: List[int] + """ return [int(level.level_id) for level in self._level_groups] def get_level_parameters(self): + """ + Load stored level parameters (e.g., step sizes or resolutions). + + :return: List[float] + """ return self._hdf_object.load_level_parameters() def get_n_collected(self): """ - Get number of collected samples at each level - :return: List + Get the number of collected (stored) samples for each level. + + :return: List[int] """ n_collected = list(np.zeros(len(self._level_groups))) for level in self._level_groups: @@ -267,7 +324,8 @@ def get_n_collected(self): def get_n_levels(self): """ - Get number of levels + Get total number of levels present in storage. + :return: int """ return len(self._level_groups) diff --git a/mlmc/sampler.py b/mlmc/sampler.py index 6a550726..d1a39f29 100644 --- a/mlmc/sampler.py +++ b/mlmc/sampler.py @@ -8,7 +8,12 @@ class Sampler: """ - Manages samples scheduling, results collection, and result storage. + Manages sample scheduling, result collection, and persistent storage. + + Coordinates the sampling pool, simulation factory, and sample storage: + - schedules new samples according to target counts, + - collects finished samples and writes them to storage, + - handles failed samples and runtime (n_ops) bookkeeping. """ ADDING_SAMPLES_TIMEOUT = 1e-15 @@ -16,64 +21,74 @@ class Sampler: def __init__(self, sample_storage: SampleStorage, sampling_pool: SamplingPool, sim_factory: Simulation, level_parameters: List[List[float]], seed=1234): """ + Initialize sampler and prepare per-level simulation objects. + :param sample_storage: store scheduled samples, results and result structure - :param sampling_pool: calculate samples - :param sim_factory: generate samples - :param level_parameters: List of e.g. simulation steps, ... - :param seed: global random seed + :param sampling_pool: sampling pool responsible for executing simulations + :param sim_factory: factory that creates level Simulation instances and provides result_format() + :param level_parameters: List of per-level parameters (e.g. simulation steps) + :param seed: global RNG seed used to seed NumPy's RNG """ np.random.seed(seed) self.sample_storage = sample_storage self._sampling_pool = sampling_pool + # Target number of samples per level (may be updated later) self._n_target_samples = np.zeros(len(level_parameters)) - # Number of target samples + + # Create LevelSimulation objects for each level using the provided factory self._level_sim_objects = [] self._create_level_sim_objects(level_parameters, sim_factory) + # Persist global data (level parameters and result format) into storage sample_storage.save_global_data(level_parameters=level_parameters, result_format=sim_factory.result_format()) + # Load already scheduled samples (if any) from storage self._n_scheduled_samples = [len(level_scheduled) for level_id, level_scheduled in sample_storage.load_scheduled_samples().items()] - # Number of created samples + # If there are no scheduled samples yet, initialize to zeros if not self._n_scheduled_samples: self._n_scheduled_samples = np.zeros(len(level_parameters)) - # Are there any unfinished samples which have already finished? + # Check for unfinished samples and inform the sampling pool self._check_failed_samples() - # @TODO: get unfinished samples from sampler and call have permanent samples -> add results to pool's queues, - # before scheduled samples call, call get_finished - we need to know how many samples is finished + # @TODO: If sampler is restarted, collect any samples finished while offline: + # - add permanent samples into pool queues, + # - before scheduling new samples, call get_finished to know how many are already done. @property def n_levels(self): + """Return number of MLMC levels managed by this sampler.""" return len(self._level_sim_objects) @property def n_finished_samples(self): """ - Retrieve number of all finished samples - :return: + Retrieve numbers of finished samples for all levels. + + :return: array-like containing finished counts per level """ return self.sample_storage.n_finished() def _create_level_sim_objects(self, level_parameters, sim_factory): """ - Create LevelSimulation object for each level, use simulation factory - :param: level_parameters: List, simulation steps, ... - :param: sim_factory: Simulation instance + Create LevelSimulation object for each level via the simulation factory. + + :param level_parameters: List of per-level parameters + :param sim_factory: Simulation factory providing level_instance and calculate methods :return: None """ n_levels = len(level_parameters) for level_id in range(n_levels): if level_id == 0: level_sim = sim_factory.level_instance(level_parameters[level_id], [0]) - else: level_sim = sim_factory.level_instance(level_parameters[level_id], level_parameters[level_id - 1]) + # Attach factory methods and metadata to the LevelSimulation level_sim._calculate = sim_factory.calculate level_sim._result_format = sim_factory.result_format level_sim._level_id = level_id @@ -81,30 +96,37 @@ def _create_level_sim_objects(self, level_parameters, sim_factory): def sample_range(self, n0, nL): """ - Geometric sequence of L elements decreasing from n0 to nL. - Useful to set number of samples explicitly. - :param n0: int - :param nL: int - :return: np.array of length L = n_levels. + Generate a geometric sequence of length L decreasing from n0 to nL. + + Useful to generate a set of target sample counts across levels. + + :param n0: int, number of samples at finest level + :param nL: int, number of samples at coarsest level + :return: np.ndarray of length self.n_levels with integer sample counts """ return np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), self.n_levels))).astype(int) def set_initial_n_samples(self, n_samples=None): """ - Set target number of samples for each level - :param n_samples: array of number of samples + Set initial target number of samples for each level. + + Accepts: + - None (defaults to [100, 10]), + - single integer (interpreted as n0, with default nL=10), + - two-element list [n0, nL] (geometric interpolation across levels). + + :param n_samples: scalar, length-2 list, or array specifying target counts :return: None """ if n_samples is None: n_samples = [100, 10] - # Num of samples to ndarray n_samples = np.atleast_1d(n_samples) - # Just maximal number of samples is set + # Single value -> treat as n0 with default nL if len(n_samples) == 1: n_samples = np.array([n_samples[0], 10]) - # Create number of samples for all levels + # Two values -> create geometric progression across levels if len(n_samples) == 2: n0, nL = n_samples n_samples = self.sample_range(n0, nL) @@ -113,56 +135,81 @@ def set_initial_n_samples(self, n_samples=None): def _get_sample_tag(self, level_id): """ - Create sample tag + Create a unique sample tag for a given level. + :param level_id: identifier of current level - :return: str + :return: str unique sample tag (e.g. 'L00_S0000123') """ return "L{:02d}_S{:07d}".format(level_id, int(self._n_scheduled_samples[level_id])) - def schedule_samples(self, timeout=None): + def schedule_samples(self, timeout=None, level_id=None, n_samples=None): """ - Create simulation samples, loop through "levels" and its samples (given the number of target samples): - 1) generate sample tag (same for fine and coarse simulation) - 2) get LevelSimulation instance by simulation factory - 3) schedule sample via sampling pool - 4) store scheduled samples in sample storage, separately for each level - :param timeout: int, get_finished - while break timeout in seconds + Schedule new simulation samples in the sampling pool and record them in storage. + + For each scheduled sample: + 1) generate a unique sample id shared by fine and coarse tasks, + 2) obtain the LevelSimulation instance for the level, + 3) schedule the sample with SamplingPool, + 4) store scheduled sample ids in SampleStorage. + + :param timeout: float or None, passed to ask_sampling_pool_for_samples() before scheduling + :param level_id: int or None, if provided schedule only for this level (default: highest level) + :param n_samples: int or None, if provided schedule exactly this many samples for the specified level :return: None """ + # First, collect any finished samples self.ask_sampling_pool_for_samples(timeout=timeout) plan_samples = self._n_target_samples - self._n_scheduled_samples - for level_id, n_samples in enumerate(plan_samples): + # Default to the coarsest level if not specified + if level_id is None: + level_id = len(plan_samples) - 1 + + # If a specific number of samples for one level is requested + if n_samples is not None: samples = [] for _ in range(int(n_samples)): - # Unique sample id sample_id = self._get_sample_tag(level_id) level_sim = self._level_sim_objects[level_id] - # Schedule current sample self._sampling_pool.schedule_sample(sample_id, level_sim) - # Increment number of created samples at current level self._n_scheduled_samples[level_id] += 1 - samples.append(sample_id) - # Store scheduled samples self.sample_storage.save_scheduled_samples(level_id, samples) + else: + # Iterate levels from coarsest to finest and schedule required samples + for n_samples in np.flip(plan_samples): + samples = [] + for _ in range(int(n_samples)): + sample_id = self._get_sample_tag(level_id) + level_sim = self._level_sim_objects[level_id] + + self._sampling_pool.schedule_sample(sample_id, level_sim) + self._n_scheduled_samples[level_id] += 1 + samples.append(sample_id) + + self.sample_storage.save_scheduled_samples(level_id, samples) + level_id -= 1 def _check_failed_samples(self): """ - Get unfinished samples and check if failed samples have saved results then collect them - :return: + Query storage for unfinished sample IDs and inform the sampling pool. + + This allows the sampling pool to reattach or handle 'permanent' samples + that may have been started previously. + :return: None """ unfinished_sample_ids = self.sample_storage.unfinished_ids() self._sampling_pool.have_permanent_samples(unfinished_sample_ids) def ask_sampling_pool_for_samples(self, sleep=0, timeout=None): """ - Waiting for running simulations - :param sleep: time for doing nothing - :param timeout: maximum time for waiting on running simulations - :return: int, number of running simulations + Poll the sampling pool for finished simulations and store their results. + + :param sleep: float, time to sleep between polls (seconds) + :param timeout: float or None, maximum time to wait; if <= 0 returns immediately + :return: int, number of running simulations remaining after the call """ if timeout is None: timeout = 0 @@ -173,7 +220,7 @@ def ask_sampling_pool_for_samples(self, sleep=0, timeout=None): t0 = time.perf_counter() while n_running > 0: successful_samples, failed_samples, n_running, n_ops = self._sampling_pool.get_finished() - # Store finished samples + # Persist finished samples and operation counts self._store_samples(successful_samples, failed_samples, n_ops) time.sleep(sleep) if 0 < timeout < (time.perf_counter() - t0): @@ -183,10 +230,11 @@ def ask_sampling_pool_for_samples(self, sleep=0, timeout=None): def _store_samples(self, successful_samples, failed_samples, n_ops): """ - Store finished samples - :param successful_samples: Dict[level_id, List[Tuple[sample_id:str, Tuple[ndarray, ndarray]]]] - :param failed_samples: Dict[level_id, List[Tuple[sample_id: str, error message: str]]] - :param n_ops: Dict[level_id: int, List[total time: float, number of success samples: int]] + Persist finished samples and operation time estimates to storage. + + :param successful_samples: Dict[level_id, List[Tuple[sample_id:str, (fine, coarse)]]] + :param failed_samples: Dict[level_id, List[Tuple[sample_id:str, error_message:str]]] + :param n_ops: Dict[level_id, Tuple[total_time:float, n_success_samples:int]] :return: None """ self.sample_storage.save_samples(successful_samples, failed_samples) @@ -194,24 +242,24 @@ def _store_samples(self, successful_samples, failed_samples, n_ops): def process_adding_samples(self, n_estimated, sleep=0, add_coeff=0.1, timeout=ADDING_SAMPLES_TIMEOUT): """ - Process adding samples - Note: n_estimated are wrong if n_ops is similar through all levels - :param n_estimated: Number of estimated samples on each level, list - :param sleep: Sample waiting time - :param add_coeff: default value 0.1, The number of scheduled samples would be 'add_coef' fraction of difference - between current number of target samples and new estimated number of target samples - :param timeout: ask sampling pool for finished samples timeout - :return: bool, if True adding samples is complete + Add newly estimated samples in batches, scheduling a fraction of the difference + between current scheduled and newly estimated targets. + + Note: n_estimated may be unreliable if per-level n_ops are similar across levels. + + :param n_estimated: array-like, estimated target samples per level + :param sleep: float, time to sleep while waiting for results + :param add_coeff: float in (0,1], fraction of the difference to schedule each iteration (default 0.1) + :param timeout: float, timeout passed to ask_sampling_pool_for_samples() + :return: bool, True if scheduled counts reached the estimates for all levels """ + # Ensure storage reflects any finished work self.ask_sampling_pool_for_samples(timeout=timeout) - # Get default scheduled samples + # Currently scheduled samples per level n_scheduled = self.l_scheduled_samples() - # New scheduled sample will be 10 percent of difference - # between current number of target samples and new estimated one - # If 10 percent of estimated samples is greater than difference between estimated and scheduled samples, - # set scheduled samples to estimated samples + # Compute new scheduled values (add_coeff fraction of the remaining difference) new_scheduled = np.where((n_estimated * add_coeff) > (n_estimated - n_scheduled), n_estimated, n_scheduled + (n_estimated - n_scheduled) * add_coeff) @@ -220,41 +268,43 @@ def process_adding_samples(self, n_estimated, sleep=0, add_coeff=0.1, timeout=AD n_scheduled, new_scheduled)) - # Levels where estimated are greater than scheduled + # Levels where estimated > scheduled greater_items = np.where(np.greater(n_estimated, n_scheduled))[0] - # Scheduled samples and wait until at least half of the samples are done + # Schedule and wait until at least a fraction of newly scheduled samples finish self.set_scheduled_and_wait(n_scheduled, greater_items, sleep, timeout=timeout) return np.all(n_estimated[greater_items] == n_scheduled[greater_items]) def set_scheduled_and_wait(self, n_scheduled, greater_items, sleep, fin_sample_coef=0.5, timeout=1e-7): """ - Scheduled samples on each level and wait until at least half of the samples is done - :param n_scheduled: ndarray, number of scheduled samples on each level - :param greater_items: Items where n_estimated is greater than n_scheduled - :param sleep: Time waiting for samples - :param fin_sample_coef: The proportion of samples to finished for further estimate + Set scheduled sample targets and wait until a proportion of those samples finish. + + :param n_scheduled: ndarray, target number of scheduled samples per level + :param greater_items: iterable of indices where targets were increased + :param sleep: float, time to sleep between polls + :param fin_sample_coef: float in (0,1], fraction of scheduled samples that should finish before continuing + :param timeout: float, timeout passed to ask_sampling_pool_for_samples() :return: None """ - # Set scheduled samples and run simulations + # Update internal targets and schedule required samples self.set_level_target_n_samples(n_scheduled) self.schedule_samples(timeout=timeout) - # Finished level samples + # Current finished counts n_finished = self.n_finished_samples - # Wait until at least half of the scheduled samples are done on each level + # Wait until at least fin_sample_coef fraction of scheduled samples are finished for affected levels while np.any(n_finished[greater_items] < fin_sample_coef * n_scheduled[greater_items]): - # Wait a while time.sleep(sleep) self.ask_sampling_pool_for_samples(timeout=timeout) n_finished = self.n_finished_samples def set_level_target_n_samples(self, n_samples): """ - Set level number of target samples - :param n_samples: list, each level target samples + Update the per-level target sample counts to at least the provided values. + + :param n_samples: iterable of new target samples per level :return: None """ for level, n in enumerate(n_samples): @@ -262,14 +312,18 @@ def set_level_target_n_samples(self, n_samples): def l_scheduled_samples(self): """ - Get all levels number of scheduled samples - :return: list + Return the currently scheduled sample counts per level. + + :return: list or array-like of scheduled sample counts """ return self._n_scheduled_samples def renew_failed_samples(self): """ - Resurrect failed samples + Reschedule previously failed samples. + + Retrieves failed sample IDs from storage, re-schedules them in the sampling pool, + and clears failed records from storage. :return: None """ failed_samples = self.sample_storage.failed_samples() @@ -279,8 +333,8 @@ def renew_failed_samples(self): level_id = int(level_id) for sample_id in sample_ids: level_sim = self._level_sim_objects[level_id] - # Schedule current sample self._sampling_pool.schedule_sample(sample_id, level_sim) samples.append(sample_id) + # Clear failed sample records after rescheduling self.sample_storage.clear_failed() diff --git a/mlmc/sampling_pool.py b/mlmc/sampling_pool.py index 0d402325..9044246d 100644 --- a/mlmc/sampling_pool.py +++ b/mlmc/sampling_pool.py @@ -5,7 +5,7 @@ import time import hashlib import numpy as np -from typing import List +from typing import List, Tuple, Dict, Optional, Any import traceback from abc import ABC, abstractmethod from multiprocessing import Pool as ProcPool @@ -15,18 +15,22 @@ class SamplingPool(ABC): """ - Determining the runtime environment of samples, eg single process, multiple processes, running PBS, ... + Abstract base class defining the runtime environment for sample simulations. + It manages sample execution across different backends (single process, + multiprocessing, PBS, etc.). """ FAILED_DIR = 'failed' SEVERAL_SUCCESSFUL_DIR = 'several_successful' N_SUCCESSFUL = 5 - # Number of successful samples to store + # Number of successful samples to store. - def __init__(self, work_dir=None, debug=False): + def __init__(self, work_dir: Optional[str] = None, debug: bool = False): """ - :param work_dir: Path to working directory - :param debug: bool, if True keep sample directories + Initialize the sampling pool environment. + + :param work_dir: Path to the working directory where outputs are stored. + :param debug: If True, keep sample directories for debugging. """ self._output_dir = None if work_dir is not None: @@ -34,14 +38,16 @@ def __init__(self, work_dir=None, debug=False): self._output_dir = os.path.join(work_dir, "output") self._debug = debug - self._create_dir() # prepare output dir - self._create_dir(SamplingPool.FAILED_DIR) # prepare failed dir - self._successful_dir = self._create_dir(SamplingPool.SEVERAL_SUCCESSFUL_DIR) # prepare several successful dir + # Prepare main output, failed, and successful directories. + self._create_dir() + self._create_dir(SamplingPool.FAILED_DIR) + self._successful_dir = self._create_dir(SamplingPool.SEVERAL_SUCCESSFUL_DIR) - def _create_dir(self, directory=""): + def _create_dir(self, directory: str = "") -> Optional[str]: """ - Create output directory, in 'debug' mode not remove existing output_dir - :return: None + Create the output directory if it does not exist. + + In debug mode, existing directories are preserved. """ if self._output_dir is not None: directory = os.path.join(self._output_dir, directory) @@ -49,288 +55,446 @@ def _create_dir(self, directory=""): shutil.rmtree(directory) os.makedirs(directory, mode=0o775, exist_ok=True) return directory + return None + + # --- Abstract methods to be implemented by subclasses --- @abstractmethod - def schedule_sample(self, sample_id, level_sim: LevelSimulation): + def schedule_sample(self, sample_id: str, level_sim: LevelSimulation): """ - Method for calculating simulation samples - :param sample_id: str - :param level_sim: level_simulation.LevelSimulation instance + Schedule a simulation sample for execution. + + :param sample_id: Unique sample identifier. + :param level_sim: LevelSimulation instance. :return: Tuple[str, List] """ @abstractmethod - def have_permanent_samples(self, sample_ids): + def have_permanent_samples(self, sample_ids: List[str]) -> bool: """ - Informs the Pool about sample_ids that have been scheduled but not yet finished + Inform the pool about samples that have been scheduled but not yet finished. """ @abstractmethod def get_finished(self): """ - Return finished samples - :return: list of results, number of running samples + Retrieve finished sample results. + + :return: Tuple containing (successful samples, failed samples, number of running samples) """ + # --- Utility methods shared across subclasses --- + @staticmethod - def compute_seed(sample_id): + def compute_seed(sample_id: str) -> int: """ - Calculate seed for given sample id - :param sample_id: str - :return: int + Compute a deterministic seed for a given sample ID. + + :param sample_id: Unique sample identifier. + :return: Integer seed value. """ - hash = hashlib.md5(sample_id.encode('ascii')) - seed = np.frombuffer(hash.digest(), dtype='uint32')[0] - return seed + hash_val = hashlib.md5(sample_id.encode('ascii')) + seed = np.frombuffer(hash_val.digest(), dtype='uint32')[0] + return int(seed) @staticmethod - def calculate_sample(sample_id, level_sim, work_dir=None, seed=None): + def calculate_sample(sample_id: str, level_sim: LevelSimulation, + work_dir: Optional[str] = None, + seed: Optional[int] = None) -> Tuple[str, Any, str, float]: """ - Method for calculating results - :param sample_id: str - :param level_sim: LevelSimulation - :param work_dir: working directory - :param seed: random seed - :return: sample id, sample result, error message with traceback, running time + Execute a single simulation sample. + + :param sample_id: Sample identifier. + :param level_sim: LevelSimulation instance. + :param work_dir: Working directory for the sample. + :param seed: Optional random seed (generated if not provided). + :return: Tuple(sample_id, result, error_message, running_time) """ if seed is None: seed = SamplingPool.compute_seed(sample_id) + res = (None, None) err_msg = "" - running_time = 0 + running_time = 0.0 if level_sim.need_sample_workspace: SamplingPool.handle_sim_files(work_dir, sample_id, level_sim) + try: start = time.time() res = level_sim._calculate(level_sim.config_dict, seed) running_time = time.time() - start - # Check result format - if type(res[0]) is np.ndarray and type(res[1]) is np.ndarray: + # Validate result format. + if isinstance(res[0], np.ndarray) and isinstance(res[1], np.ndarray): flatten_fine_res = res[0].flatten() flatten_coarse_res = res[1].flatten() - res_expected_len = np.sum( - [np.prod(quantity_spec.shape) * len(quantity_spec.times) * len(quantity_spec.locations) - for quantity_spec in level_sim._result_format()]) + expected_len = np.sum([ + np.prod(q.shape) * len(q.times) * len(q.locations) + for q in level_sim._result_format() + ]) - assert len(flatten_fine_res) == len(flatten_coarse_res) == res_expected_len, \ - "Unexpected result format, expected length: {}, resultf length: {}".format(res_expected_len, - len(flatten_fine_res)) + assert len(flatten_fine_res) == len(flatten_coarse_res) == expected_len, \ + f"Unexpected result format. Expected length: {expected_len}, got: {len(flatten_fine_res)}" except Exception: - str_list = traceback.format_exception(*sys.exc_info()) - err_msg = "".join(str_list) + err_msg = "".join(traceback.format_exception(*sys.exc_info())) + print("Error msg:", err_msg) return sample_id, res, err_msg, running_time + # --- File handling helpers --- + @staticmethod - def change_to_sample_directory(work_dir, path: str): + def change_to_sample_directory(work_dir: str, path: str) -> str: """ - Create sample directory and change working directory - :param path: str - :return: None + Create and switch to the sample-specific directory. + + :param work_dir: Base working directory. + :param path: Sample subdirectory name. + :return: Absolute path to the created sample directory. """ sample_dir = os.path.join(work_dir, path) - if not os.path.isdir(sample_dir): - os.makedirs(sample_dir, mode=0o775, exist_ok=True) + os.makedirs(sample_dir, mode=0o775, exist_ok=True) return sample_dir @staticmethod - def copy_sim_files(files: List[str], sample_dir): + def copy_sim_files(files: List[str], sample_dir: str): """ - Copy simulation common files to current simulation sample directory - :param files: List of files - :return: None + Copy shared simulation files to the sample directory. + + :param files: List of file paths to copy. + :param sample_dir: Destination sample directory. """ for file in files: shutil.copy(file, sample_dir) @staticmethod - def handle_sim_files(work_dir, sample_id, level_sim): + def handle_sim_files(work_dir: str, sample_id: str, level_sim: LevelSimulation): """ - Change working directory to sample dir and copy common files - :param sample_id: str - :param level_sim: LevelSimulation - :return: None + Prepare the sample workspace (create directory, copy common files, set cwd). + + :param work_dir: Base working directory. + :param sample_id: Sample identifier. + :param level_sim: LevelSimulation instance. """ if level_sim.need_sample_workspace: sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id) - if level_sim.common_files is not None: SamplingPool.copy_sim_files(level_sim.common_files, sample_dir) os.chdir(sample_dir) @staticmethod - def move_successful_rm(sample_id, level_sim, output_dir, dest_dir): + def move_successful_rm(sample_id: str, level_sim: LevelSimulation, + output_dir: str, dest_dir: str): + """ + Move successful sample directories and remove originals. + """ if int(sample_id[-7:]) < SamplingPool.N_SUCCESSFUL: - SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir=dest_dir) + SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir) SamplingPool.remove_sample_dir(sample_id, level_sim.need_sample_workspace, output_dir) @staticmethod - def move_failed_rm(sample_id, level_sim, output_dir, dest_dir): - SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir=dest_dir) + def move_failed_rm(sample_id: str, level_sim: LevelSimulation, + output_dir: str, dest_dir: str): + """ + Move failed sample directories and remove originals. + """ + SamplingPool.move_dir(sample_id, level_sim.need_sample_workspace, output_dir, dest_dir) SamplingPool.remove_sample_dir(sample_id, level_sim.need_sample_workspace, output_dir) @staticmethod - def move_dir(sample_id, sample_workspace, work_dir, dest_dir): + def move_dir(sample_id: str, sample_workspace: bool, + work_dir: str, dest_dir: str): """ - Move failed sample dir to failed directory - :param sample_id: str - :param sample_workspace: bool, simulation needs workspace - :param work_dir: str - :param dest_dir: destination - :return: None + Move a sample directory to another location (e.g., failed or successful). + + :param sample_id: Sample identifier. + :param sample_workspace: Whether the sample uses its own workspace. + :param work_dir: Base working directory. + :param dest_dir: Destination subdirectory name. """ - if sample_workspace and work_dir is not None and dest_dir is not None: + if sample_workspace and work_dir and dest_dir: destination_dir = os.path.join(work_dir, dest_dir) sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id) - if os.path.exists(os.path.join(destination_dir, sample_id)): - shutil.rmtree(os.path.join(destination_dir, sample_id), ignore_errors=True) - shutil.copytree(sample_dir, os.path.join(destination_dir, sample_id)) + target_dir = os.path.join(destination_dir, sample_id) + if os.path.exists(target_dir): + shutil.rmtree(target_dir, ignore_errors=True) + shutil.copytree(sample_dir, target_dir) @staticmethod - def remove_sample_dir(sample_id, sample_workspace, work_dir): + def remove_sample_dir(sample_id: str, sample_workspace: bool, work_dir: str): """ - Remove sample directory - :param sample_id: str - :param sample_workspace: bool, simulation needs workspace - :param work_dir: str - :return: None + Remove the directory for a completed or failed sample. + + :param sample_id: Sample identifier. + :param sample_workspace: Whether the sample uses its own workspace. + :param work_dir: Base working directory. """ - if sample_workspace and work_dir is not None: + if sample_workspace and work_dir: sample_dir = SamplingPool.change_to_sample_directory(work_dir, sample_id) shutil.rmtree(sample_dir, ignore_errors=True) class OneProcessPool(SamplingPool): + """ + Sampling pool implementation that executes all samples sequentially in a single process. + Used primarily for debugging or lightweight simulations. + """ def __init__(self, work_dir=None, debug=False): """ - Everything is running in one process + Initialize the one-process pool. + + Parameters + ---------- + work_dir : str, optional + Working directory for storing sample outputs. + debug : bool, default=False + If True, disables moving/removing files after successful execution. """ super().__init__(work_dir=work_dir, debug=debug) - self._failed_queues = {} - self._queues = {} - self._n_running = 0 - self.times = {} + self._failed_queues = {} # Stores failed sample queues per level + self._queues = {} # Stores successful sample queues per level + self._n_running = 0 # Tracks number of currently running samples + self.times = {} # Stores total runtime and count per level def schedule_sample(self, sample_id, level_sim): - self._n_running += 1 + """ + Execute a single sample synchronously (in the current process). + + Parameters + ---------- + sample_id : int + Identifier of the sample. + level_sim : LevelSimulation + Simulation instance containing configuration for the sample. + """ + self._n_running += 1 # Increment running sample counter + # Set output directory if required by simulation if self._output_dir is None and level_sim.need_sample_workspace: self._output_dir = os.getcwd() - sample_id, result, err_msg, running_time = SamplingPool.calculate_sample(sample_id, level_sim, - work_dir=self._output_dir) + # Run the sample and collect result, error message, and runtime + sample_id, result, err_msg, running_time = SamplingPool.calculate_sample( + sample_id, level_sim, work_dir=self._output_dir + ) + # Process result (successful or failed) self._process_result(sample_id, result, err_msg, running_time, level_sim) def _process_result(self, sample_id, result, err_msg, running_time, level_sim): """ - Save sample result - :param sample_id: sample identifier from calculate_sample() - :param result: sample result from calculate_sample() - :param err_msg: sample error message from calculate_sample() - :param running_time: running time for sample from calculate_sample() - :param level_sim: level_simulation instance - :return: None - """ - # Save running time for n_ops + Process result from a sample execution and store it in the appropriate queue. + + Parameters + ---------- + sample_id : int + Identifier of the executed sample. + result : tuple + Pair of fine and coarse results (numpy arrays). + err_msg : str + Error message if the sample failed, empty string otherwise. + running_time : float + Runtime of the sample execution in seconds. + level_sim : LevelSimulation + Simulation instance used to produce the sample. + """ + # Record runtime for this level self._save_running_time(level_sim._level_id, running_time) + # If no error occurred, store successful result if not err_msg: - self._queues.setdefault(level_sim._level_id, queue.Queue()).put((sample_id, (result[0], result[1]))) + self._queues.setdefault(level_sim._level_id, queue.Queue()).put( + (sample_id, (result[0], result[1])) + ) + # Move successful sample to its permanent directory unless debugging if not self._debug: - SamplingPool.move_successful_rm(sample_id, level_sim, output_dir=self._output_dir, dest_dir=self._successful_dir) + SamplingPool.move_successful_rm( + sample_id, level_sim, output_dir=self._output_dir, dest_dir=self._successful_dir + ) else: + # If the simulation failed if not level_sim.need_sample_workspace: - print("Sample {} error: {}".format(sample_id, err_msg)) + print(f"Sample {sample_id} error: {err_msg}") else: - SamplingPool.move_failed_rm(sample_id, level_sim, output_dir=self._output_dir, dest_dir=SamplingPool.FAILED_DIR) + SamplingPool.move_failed_rm( + sample_id, level_sim, output_dir=self._output_dir, dest_dir=SamplingPool.FAILED_DIR + ) self._failed_queues.setdefault(level_sim._level_id, queue.Queue()).put((sample_id, err_msg)) def _save_running_time(self, level_id, running_time): """ - Save running time to dictionary, store total time and number of samples - :param level_id: int - :param running_time: float - :return: None + Save sample execution time in the tracking dictionary. + + Parameters + ---------- + level_id : int + Identifier of the simulation level. + running_time : float + Execution time of the sample. """ - # Save sample times [total time, number of samples] + # Initialize level entry if missing if level_id not in self.times: self.times[level_id] = [0, 0] - # Failed samples have running time equal 0 by default + # Only count successful samples with nonzero runtime if running_time != 0: - self.times[level_id][0] += running_time - self.times[level_id][1] += 1 + self.times[level_id][0] += running_time # Accumulate total runtime + self.times[level_id][1] += 1 # Increment sample count def have_permanent_samples(self, sample_ids): + """ + Return False, indicating that no samples are stored permanently. + + Parameters + ---------- + sample_ids : list + List of sample identifiers (ignored). + + Returns + ------- + bool + Always False. + """ return False def get_finished(self): """ - return results from queue - list of (sample_id, pair_of_result_vectors, error_message) + Retrieve all completed (successful and failed) samples. + + Returns + ------- + successful : dict + Dictionary of successful samples by level. + failed : dict + Dictionary of failed samples by level. + n_running : int + Number of currently running samples. + times : list + List of (level_id, [total_time, n_samples]) pairs. """ successful = self._queues_to_list(list(self._queues.items())) failed = self._queues_to_list(list(self._failed_queues.items())) - return successful, failed, self._n_running, list(self.times.items()) def _queues_to_list(self, queue_dict_list): + """ + Convert queues to lists and clear them safely. + + Parameters + ---------- + queue_dict_list : list + List of (level_id, queue.Queue) pairs. + + Returns + ------- + results : dict + Dictionary mapping level_id to list of queue entries. + """ results = {} for level_id, q in queue_dict_list: queue_list = list(q.queue) if not queue_list: continue results[level_id] = queue_list - # Thread safe clear + + # Thread-safe queue clearing with q.mutex: q.queue.clear() + # Update running sample counter self._n_running -= len(results[level_id]) - return results +# ============================================================================== + class ProcessPool(OneProcessPool): """ - Suitable for local parallel sampling for simulations WITHOUT external program call + Sampling pool using multiprocessing for parallel sample execution. + Suitable for simulations without external program calls. """ def __init__(self, n_processes, work_dir=None, debug=False): - self._pool = ProcPool(n_processes) + """ + Initialize process-based parallel sampling pool. + + Parameters + ---------- + n_processes : int + Number of worker processes to use. + work_dir : str, optional + Working directory for samples. + debug : bool, default=False + If True, disables moving/removing sample outputs. + """ + self._pool = ProcPool(n_processes) # Multiprocessing pool super().__init__(work_dir=work_dir, debug=debug) def res_callback(self, result, level_sim): """ - Process simulation results - :param result: tuple - :param level_sim: LevelSimulation instance - :return: None + Callback for handling results from asynchronous execution. + + Parameters + ---------- + result : tuple + Returned result from SamplingPool.calculate_sample(). + level_sim : LevelSimulation + Simulation level instance. """ self._process_result(*result, level_sim) def schedule_sample(self, sample_id, level_sim): + """ + Schedule a sample for parallel execution in a separate process. + + Parameters + ---------- + sample_id : int + Sample identifier. + level_sim : LevelSimulation + Simulation configuration instance. + """ self._n_running += 1 + # Set working directory for output files if self._output_dir is None and level_sim.need_sample_workspace: self._output_dir = os.getcwd() - self._pool.apply_async(SamplingPool.calculate_sample, args=(sample_id, level_sim, self._output_dir), - callback=lambda res: self.res_callback(res, level_sim), - error_callback=lambda res: self.res_callback(res, level_sim)) + # Submit task asynchronously to process pool + self._pool.apply_async( + SamplingPool.calculate_sample, + args=(sample_id, level_sim, self._output_dir), + callback=lambda res: self.res_callback(res, level_sim), + error_callback=lambda res: self.res_callback(res, level_sim) + ) + +# ============================================================================== class ThreadPool(ProcessPool): """ - Suitable local parallel sampling for simulations WITH external program call + Sampling pool using threading for local parallel sampling. + Suitable for simulations with external program calls (I/O-bound). """ def __init__(self, n_thread, work_dir=None, debug=False): + """ + Initialize thread-based parallel sampling pool. + + Parameters + ---------- + n_thread : int + Number of threads to use. + work_dir : str, optional + Working directory for samples. + debug : bool, default=False + If True, disables moving/removing sample outputs. + """ super().__init__(n_thread, work_dir=work_dir, debug=debug) - self._pool = pool.ThreadPool(n_thread) + self._pool = pool.ThreadPool(n_thread) # Thread-based pool instead of process-based self._failed_queues = {} self._queues = {} self._n_running = 0 diff --git a/mlmc/sampling_pool_pbs.py b/mlmc/sampling_pool_pbs.py index 01c4128b..2142fdcd 100644 --- a/mlmc/sampling_pool_pbs.py +++ b/mlmc/sampling_pool_pbs.py @@ -5,6 +5,8 @@ import pickle import json import glob +import time +import numpy as np from mlmc.level_simulation import LevelSimulation from mlmc.sampling_pool import SamplingPool from mlmc.tool.pbs_job import PbsJob @@ -133,9 +135,17 @@ def pbs_common_setting(self, **kwargs): :return: None """ # Script header - select_flags_list = kwargs.get('select_flags', []) - if select_flags_list: - kwargs['select_flags'] = ":" + ":".join(select_flags_list) + select_flags_dict = kwargs.get('select_flags', {}) + + # Set scratch dir + if any(re.compile('scratch.*').match(flag) for flag in list(select_flags_dict.keys())): + if kwargs['scratch_dir'] is None: + kwargs['scratch_dir'] = "$SCRATCHDIR" + else: + kwargs['scratch_dir'] = '' + + if select_flags_dict: + kwargs['select_flags'] = ":" + ':'.join('{}={}'.format(*item) for item in select_flags_dict.items()) else: kwargs['select_flags'] = "" @@ -162,7 +172,7 @@ def pbs_common_setting(self, **kwargs): kwargs['optional_pbs_requests']) # e.g. ['#PBS -m ae'] means mail is sent when the job aborts or terminates self._pbs_header_template.extend(('MLMC_WORKDIR=\"{}\"'.format(self._work_dir),)) self._pbs_header_template.extend(kwargs['env_setting']) - self._pbs_header_template.extend(('{python} -m mlmc.tool.pbs_job {output_dir} {job_name} >' + self._pbs_header_template.extend(('{python} -m mlmc.tool.pbs_job {output_dir} {job_name} {scratch_dir} >' '{pbs_output_dir}/{job_name}_STDOUT 2>&1',)) self._pbs_config = kwargs @@ -221,28 +231,31 @@ def execute(self): script_content = "\n".join(self.pbs_script) self.write_script(script_content, job_file) - process = subprocess.run(['qsub', job_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE) - try: - if process.returncode != 0: - raise Exception(process.stderr.decode('ascii')) - # Find all finished jobs - self._qsub_failed_n = 0 - # Write current job count - self._job_count += 1 - - # Get pbs_id from qsub output - pbs_id = process.stdout.decode("ascii").split(".")[0] - # Store pbs id for future qstat calls - self._pbs_ids.append(pbs_id) - pbs_process.write_pbs_id(pbs_id) - - self._current_job_weight = 0 - self._n_samples_in_job = 0 - self._scheduled = [] - except: - self._qsub_failed_n += 1 - if self._qsub_failed_n > SamplingPoolPBS.QSUB_FAILED_MAX_N: - raise Exception(process.stderr.decode("ascii")) + while self._qsub_failed_n <= SamplingPoolPBS.QSUB_FAILED_MAX_N: + process = subprocess.run(['qsub', job_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE) + try: + if process.returncode != 0: + raise Exception(process.stderr.decode('ascii')) + # Find all finished jobs + self._qsub_failed_n = 0 + # Write current job count + self._job_count += 1 + + # Get pbs_id from qsub output + pbs_id = process.stdout.decode("ascii").split(".")[0] + # Store pbs id for future qstat calls + self._pbs_ids.append(pbs_id) + pbs_process.write_pbs_id(pbs_id) + + self._current_job_weight = 0 + self._n_samples_in_job = 0 + self._scheduled = [] + break + except: + self._qsub_failed_n += 1 + time.sleep(30) + if self._qsub_failed_n > SamplingPoolPBS.QSUB_FAILED_MAX_N: + raise Exception(process.stderr.decode("ascii")) def _create_script(self): """ @@ -277,6 +290,64 @@ def get_finished(self): finished_pbs_jobs, unfinished_pbs_jobs = self._qstat_pbs_job() return self._get_result_files(finished_pbs_jobs, unfinished_pbs_jobs) + def collect_data(self): + successful_results = {} + failed_results = {} + times = {} + sim_data_results = {} + # running_times = {} + # extract_mesh_times = {} + # make_field_times = {} + # generate_rnd_times = {} + # fine_flow_times = {} + # coarse_flow_times = {} + n_running = 0 + + os.chdir(self._jobs_dir) + for file in glob.glob("*_STDOUT"): + job_id = re.findall(r'(\d+)_STDOUT', file)[0] + + successful, failed, time = PbsJob.read_results(job_id, self._jobs_dir) + + # Split results to levels + for level_id, results in successful.items(): + successful_results.setdefault(level_id, []).extend(results) + for level_id, results in failed.items(): + failed_results.setdefault(level_id, []).extend(results) + for level_id, results in time.items(): + if level_id in times: + times[level_id][0] += results[-1][0] + times[level_id][1] += results[-1][1] + else: + times[level_id] = list(results[-1]) + + # # Optional simulation data + # for level_id, results in sim_data.items(): + # sim_data_results.setdefault(level_id, []).extend(results) + + # for level_id, results in running_time.items(): + # running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in extract_mesh.items(): + # extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in make_field.items(): + # make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in generate_rnd.items(): + # generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in fine_flow.items(): + # fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in coarse_flow.items(): + # coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + + return successful_results, failed_results, n_running #, sim_data_results #list(times.items()), list(running_times.items()), \ + # list(extract_mesh_times.items()), list(make_field_times.items()), list(generate_rnd_times.items()), \ + # list(fine_flow_times.items()), \ + # list(coarse_flow_times.items()) + def _qstat_pbs_job(self): """ Parse qstat output and get all unfinished job ids @@ -286,23 +357,36 @@ def _qstat_pbs_job(self): if len(self._pbs_ids) > 0: # Get PBS id's status, # '-x' - displays status information for finished and moved jobs in addition to queued and running jobs. - qstat_call = ["qstat", "-x"] + qstat_call = ["qstat", "-xs"] qstat_call.extend(self._pbs_ids) - # qstat call - process = subprocess.run(qstat_call, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - try: - if process.returncode != 0: - raise Exception(process.stderr.decode("ascii")) - output = process.stdout.decode("ascii") - # Find all finished jobs - finished_pbs_jobs = re.findall(r"(\d+)\..*\d+ F", output) - self._qstat_failed_n = 0 - except: - self._qstat_failed_n += 1 - if self._qstat_failed_n > SamplingPoolPBS.QSTAT_FAILED_MAX_N: - raise Exception(process.stderr.decode("ascii")) - finished_pbs_jobs = [] + while self._qstat_failed_n <= SamplingPoolPBS.QSTAT_FAILED_MAX_N: + # qstat call + unknown_job_ids = [] + process = subprocess.run(qstat_call, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + try: + if process.returncode != 0: + err_output = process.stderr.decode("ascii") + # Presumably, Job Ids are 'unknown' for PBS after some time of their inactivity + unknown_job_ids = re.findall(r"Unknown Job Id (\d+)\.", err_output) + + if len(unknown_job_ids) == 0: + raise Exception(process.stderr.decode("ascii")) + + output = process.stdout.decode("ascii") + # Find all finished jobs + finished_pbs_jobs = re.findall(r"(\d+)\..*\d+ F", output) + finished_moved_pbs_jobs = re.findall(r"(\d+)\..*\d+ M.*\n.*Job finished", output) + finished_pbs_jobs.extend(finished_moved_pbs_jobs) + finished_pbs_jobs.extend(unknown_job_ids) + self._qstat_failed_n = 0 + break + except: + self._qstat_failed_n += 1 + time.sleep(30) + if self._qstat_failed_n > SamplingPoolPBS.QSTAT_FAILED_MAX_N: + raise Exception(process.stderr.decode("ascii")) + finished_pbs_jobs = [] # Get unfinished as diff between planned and finished unfinished_pbs_jobs = [] @@ -327,7 +411,7 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs): :return: successful_results: Dict[level_id, List[Tuple[sample_id: str, Tuple[fine_result: np.ndarray, coarse_result: n.ndarray]]]] failed_results: Dict[level_id, List[Tuple[sample_id: str, err_msg: str]]] n_running: int, number of running samples - times: + times: """ os.chdir(self._jobs_dir) @@ -343,6 +427,14 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs): successful_results = {} failed_results = {} times = {} + #sim_data_results = {} + # running_times = {} + # extract_mesh_times = {} + # make_field_times = {} + # generate_rnd_times = {} + # fine_flow_times = {} + # coarse_flow_times = {} + for pbs_id in finished_pbs_jobs: reg = "*_{}".format(pbs_id) # JobID_PbsId file file = glob.glob(reg) @@ -359,6 +451,8 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs): successful_results.setdefault(level_id, []).extend(results) for level_id, results in failed.items(): failed_results.setdefault(level_id, []).extend(results) + # for level_id, results in sim_data.items(): + # sim_data_results.setdefault(level_id, []).extend(results) for level_id, results in time.items(): if level_id in times: times[level_id][0] += results[-1][0] @@ -366,14 +460,38 @@ def _get_result_files(self, finished_pbs_jobs, unfinished_pbs_jobs): else: times[level_id] = list(results[-1]) + # for level_id, results in running_time.items(): + # running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in extract_mesh.items(): + # extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in make_field.items(): + # make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in generate_rnd.items(): + # generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in fine_flow.items(): + # fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in coarse_flow.items(): + # coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] # Delete pbsID file - it means job is finished SamplingPoolPBS.delete_pbs_id_file(file) if self._unfinished_sample_ids: successful_results, failed_results, times = self._collect_unfinished(successful_results, - failed_results, times) + failed_results, times, + ) + # running_times + # extract_mesh_times, + # make_field_times, + # generate_rnd_times, + # fine_flow_times, + # coarse_flow_times) - return successful_results, failed_results, n_running, list(times.items()) + return successful_results, failed_results, n_running, list(times.items())#, sim_data_results def _collect_unfinished(self, successful_results, failed_results, times): """ @@ -384,42 +502,64 @@ def _collect_unfinished(self, successful_results, failed_results, times): :return: all input dictionaries """ already_collected = set() + for sample_id in self._unfinished_sample_ids: if sample_id in already_collected: continue - job_id = PbsJob.job_id_from_sample_id(sample_id, self._jobs_dir) + try: + job_id = PbsJob.job_id_from_sample_id(sample_id, self._jobs_dir) + except (FileNotFoundError, KeyError) as e: + level_id = int(re.findall(r'L0?(\d*)', sample_id)[0]) + failed_results.setdefault(level_id, []).append((sample_id, "".format(e))) + continue + successful, failed, time = PbsJob.read_results(job_id, self._jobs_dir) # Split results to levels for level_id, results in successful.items(): - for res in results: - if res[0] in self._unfinished_sample_ids: - already_collected.add(res[0]) - successful_results.setdefault(level_id, []).append(res) - - for level_id, results in failed_results.items(): - for res in results: - if res[0] in self._unfinished_sample_ids: - already_collected.add(res[0]) - failed_results.setdefault(level_id, []).append(res) - - for level_id, results in times.items(): - for res in results: - if res[0] in self._unfinished_sample_ids: - times.setdefault(level_id, []).append(res) - times[level_id] = results + successful_results.setdefault(level_id, []).extend(results) + for level_id, results in failed.items(): + failed_results.setdefault(level_id, []).extend(results) + for level_id, results in time.items(): + times[level_id] = results[-1] + + # for level_id, results in sim_data.items(): + # sim_data_results.setdefault(level_id, []).extend(results) + + # for level_id, results in running_time.items(): + # running_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in extract_mesh.items(): + # extract_mesh_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in make_field.items(): + # make_field_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in generate_rnd.items(): + # generate_rnd_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in fine_flow.items(): + # fine_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + # + # for level_id, results in coarse_flow.items(): + # coarse_flow_times[level_id] = [np.sum(results, axis=0)[0], results[-1][1]] + + level_id_sample_id_seed = PbsJob.get_scheduled_sample_ids(job_id, self._jobs_dir) + + for level_id, sample_id, _ in level_id_sample_id_seed: + already_collected.add(sample_id) # Delete pbsID file - it means job is finished # SamplingPoolPBS.delete_pbs_id_file(file) self._unfinished_sample_ids = set() - return successful_results, failed_results, times + return successful_results, failed_results, times#, sim_data_results def have_permanent_samples(self, sample_ids): """ - List of unfinished sample ids, the corresponding samples are collecting in next get_finished() call . + List of unfinished sample ids, the corresponding samples are collecting in next get_finished() call """ self._unfinished_sample_ids = set(sample_ids) diff --git a/mlmc/sim/simulation.py b/mlmc/sim/simulation.py index bd9a04d0..e5d5dd33 100644 --- a/mlmc/sim/simulation.py +++ b/mlmc/sim/simulation.py @@ -5,29 +5,49 @@ class Simulation(ABC): + """ + Abstract base class for multi-level Monte Carlo (MLMC) simulations. + + Defines the interface that all concrete simulation classes must implement. + Provides methods for creating level simulations, specifying result formats, and running calculations. + """ @abstractmethod def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation: """ - Create LevelSimulation object which is farther used for calculation etc. - :param fine_level_params: - :param coarse_level_params: - :return: LevelSimulation + Create a LevelSimulation object for a given level. + + The LevelSimulation instance is used for sample generation and result extraction + at both the fine and coarse levels in MLMC. + + :param fine_level_params: List of floats defining parameters for the fine simulation level. + :param coarse_level_params: List of floats defining parameters for the coarse simulation level. + :return: LevelSimulation instance configured for the given level parameters. """ @abstractmethod def result_format(self) -> List[QuantitySpec]: """ - Define simulation result format - :return: List[QuantitySpec, ...] + Define the format of the simulation results. + + This method should return a list of QuantitySpec objects, which describe the + type, shape, and units of each quantity produced by the simulation. + + :return: List of QuantitySpec objects defining the simulation output format. """ @staticmethod @abstractmethod - def calculate(config_dict, seed): + def calculate(config_dict, seed: int): """ - Method that actually run the calculation, calculate fine and coarse sample and also extract their results - :param config_dict: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance) - :param seed: random seed, int - :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation._calculate()) + Execute a single simulation calculation. + + This method runs the simulation for both fine and coarse levels, computes + the results, and returns them in a flattened form suitable for MLMC analysis. + + :param config_dict: Dictionary containing simulation configuration parameters + (usually LevelSimulation.config_dict from level_instance). + :param seed: Random seed (int) to ensure reproducibility of the stochastic simulation. + :return: List containing two elements: + [fine_result, coarse_result], both as flattened arrays. """ diff --git a/mlmc/sim/synth_simulation.py b/mlmc/sim/synth_simulation.py index 53417219..14a3ca61 100644 --- a/mlmc/sim/synth_simulation.py +++ b/mlmc/sim/synth_simulation.py @@ -1,5 +1,5 @@ import os -import ruamel.yaml as yaml +import ruamel.yaml as ruyaml import numpy as np from typing import List import scipy.stats as stats @@ -9,6 +9,14 @@ class SynthSimulation(Simulation): + """ + Artificial (synthetic) simulation used for testing and examples. + + The simulation generates random samples from a specified distribution and + optionally injects numerical error / NaN failures according to configuration. + It implements the Simulation interface: provides `level_instance`, `calculate`, + and `result_format` methods and a simple cost estimator `n_ops_estimate`. + """ n_nans = 0 nan_fraction = 0 @@ -18,49 +26,59 @@ class SynthSimulation(Simulation): # Artificial simulation. Just random parameter + numerical error.""" def __init__(self, config=None): """ - :param config: Dict: - distr= particular distribution, - complexity=2, - nan_fraction=fraction of failed samples - sim_method=used method for calculating sample result + Initialize the synthetic simulation. + + :param config: Dict, optional configuration with keys: + - 'distr': a scipy.stats distribution object (default: stats.norm()) + - 'complexity': exponent used for cost estimate (default: 2) + - 'nan_fraction': fraction of samples that should be returned as NaN + If config is None, a default normal distribution is used. """ super().__init__() if config is None: config = dict(distr=stats.norm(), complexity=2) self.config = config + + # Static counters / settings used across instances SynthSimulation.n_nans = 0 SynthSimulation.nan_fraction = config.get('nan_fraction', 0.0) SynthSimulation.len_results = 0 - # This attribute is obligatory + + # Indicates whether this simulation needs a workspace directory for samples self.need_workspace: bool = False @staticmethod def sample_fn(x, h): """ - Calculates the simulation sample - :param x: Distribution sample - :param h: Simluation step - :return: sample + Compute a (noisy) synthetic sample value for given distribution samples. + + :param x: Distribution sample(s) (scalar or array-like). + :param h: Simulation step (resolution parameter). Typically small positive float. + :return: Computed sample(s). Introduces small h-dependent perturbation: + x + h * sqrt(1e-4 + |x|). This can produce outliers for certain x. """ - # This function can cause many outliers depending on chosen domain of moments function return x + h * np.sqrt(1e-4 + np.abs(x)) @staticmethod def sample_fn_no_error(x, h): """ - Calculates the simulation sample - :param x: Distribution sample - :param h: Simluation step - :return: sample + Compute a synthetic sample without introducing numerical error. + + :param x: Distribution sample(s) (scalar or array-like). + :param h: Simulation step (ignored for this function). + :return: The input sample(s) unchanged (identity mapping). """ return x def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation: """ + Create a LevelSimulation configured for a pair of fine/coarse level parameters. - :param fine_level_params: - :param coarse_level_params: - :return: + :param fine_level_params: List-like where the first element is the fine step size. + :param coarse_level_params: List-like where the first element is the coarse step size. + :return: LevelSimulation instance initialized with: + - config_dict containing 'fine.step', 'coarse.step', 'distr', and 'res_format' + - task_size estimated by n_ops_estimate(...) """ config = dict() config["fine"] = {} @@ -75,16 +93,19 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li @staticmethod def generate_random_samples(distr, seed, size): """ - Generate random samples from given distribution - :param distr: scipy distribution - :param seed: uint32 - :param size: size of result - :return: fine sample, coarse sample + Draw random samples from the provided scipy distribution reproducibly. + + :param distr: scipy.stats distribution object (must support .rvs()). + :param seed: Integer seed used to construct a RandomState for reproducibility. + :param size: Number of samples to draw. + :return: Tuple (fine_samples, coarse_samples). For this synthetic sim both are identical. + May return [np.nan] to simulate a failed sample according to nan_fraction. """ SynthSimulation.len_results += 1 distr.random_state = np.random.RandomState(seed) y = distr.rvs(size=size) + # Inject NaN failures up to configured fraction if SynthSimulation.n_nans / (1e-10 + SynthSimulation.len_results) < SynthSimulation.nan_fraction: SynthSimulation.n_nans += 1 y = [np.nan] @@ -94,35 +115,51 @@ def generate_random_samples(distr, seed, size): @staticmethod def calculate(config, seed): """ - Calculate fine and coarse sample and also extract their results - :param config: dictionary containing simulation configuration - :param seed: random number generator seed - :return: np.ndarray, np.ndarray + Calculate fine and coarse samples and convert them to the expected result format. + + :param config: Dictionary containing simulation configuration (must include 'res_format', + 'fine.step' and 'coarse.step' keys). + :param seed: Integer RNG seed for reproducibility. + :return: Tuple (fine_flat, coarse_flat) where both are 1D numpy arrays produced by + flattening the per-quantity/time/location arrays constructed below. + :raises: Exception if any resulting sample contains NaN. """ quantity_format = config["res_format"] - fine_random, coarse_random = SynthSimulation.generate_random_samples(config["distr"], seed, np.prod(quantity_format[0].shape)) + + # Generate base random values for fine and coarse (identical in this toy sim) + fine_random, coarse_random = SynthSimulation.generate_random_samples( + config["distr"], + seed, + np.prod(quantity_format[0].shape) + ) fine_step = config["fine"]["step"] coarse_step = config["coarse"]["step"] + # Compute sample values for fine and coarse levels fine_result = SynthSimulation.sample_fn(fine_random, fine_step) if coarse_step == 0: - coarse_result = np.zeros(len(fine_result)) + coarse_result = np.zeros(len(fine_result)) # coarse = zero baseline if step==0 else: coarse_result = SynthSimulation.sample_fn(coarse_random, coarse_step) + # Fail hard if NaNs are present if np.any(np.isnan(fine_result)) or np.any(np.isnan(coarse_result)): raise Exception("result is nan") + # Convert results into list-of-quantities × times × locations arrays and then flatten results = [] for result in [fine_result, coarse_result]: quantities = [] for quantity in quantity_format: if coarse_step == 0: + # replicate the same result for each location (coarse step 0 special case) locations = np.array([result for _ in range(len(quantity.locations))]) else: + # create simple distinct location-dependent arrays for demonstration locations = np.array([result + i for i in range(len(quantity.locations))]) + # repeat across times times = np.array([locations for _ in range(len(quantity.times))]) quantities.append(times) @@ -131,21 +168,42 @@ def calculate(config, seed): return results[0].flatten(), results[1].flatten() def n_ops_estimate(self, step): + """ + Estimate number of operations (cost) for a sample at given step size. + + :param step: Level step size (h). + :return: Estimated operation count (float). Uses configured complexity exponent. + """ return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0)) def result_format(self) -> List[QuantitySpec]: """ - Result format - :return: + Define the synthetic simulation's result format. + + :return: List[QuantitySpec] describing the shape, units, times and locations + for each reported quantity. This informs how `calculate` arranges + and flattens outputs. """ spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']) spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']) - # spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=[(1, 2, 3), (4, 5, 6)]) - # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=[(7, 8, 9), (10, 11, 12)]) + # Alternative examples with numeric locations (commented out) + # spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], + # locations=[(1, 2, 3), (4, 5, 6)]) + # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], + # locations=[(7, 8, 9), (10, 11, 12)]) return [spec1, spec2] + class SynthSimulationWorkspace(SynthSimulation): + """ + Synthetic simulation variant that requires a workspace (reads config from YAML). + + This subclass behaves like `SynthSimulation` but: + - Reads distribution and nan_fraction from a YAML configuration file. + - Declares `need_workspace = True` so sample files are written to/read from disk. + - Supplies `common_files` (the YAML) to LevelSimulation so workspaces get that file. + """ n_nans = 0 nan_fraction = 0 @@ -157,48 +215,55 @@ class SynthSimulationWorkspace(SynthSimulation): # Artificial simulation. Just random parameter + numerical error.""" def __init__(self, config): """ - :param config: Dict: - distr= particular distribution, - complexity=2, - nan_fraction=fraction of failed samples - sim_method=used method for calculating sample result + Initialize the workspace-capable synthetic simulation. + + :param config: Dict with at least: + - "config_yaml": path to YAML configuration file (relative or absolute) + Optionally may contain 'nan_fraction' as a fallback. """ self.config_yaml = config["config_yaml"] + # Reset static counters SynthSimulationWorkspace.n_nans = 0 SynthSimulationWorkspace.nan_fraction = config.get('nan_fraction', 0.0) SynthSimulationWorkspace.len_results = 0 - # This attribute is obligatory + # This simulation requires a workspace directory for sample execution self.need_workspace: bool = True @staticmethod def sample_fn(x, h): """ - Calculates the simulation sample - :param x: Distribution sample - :param h: Simluation step - :return: sample + Compute a (noisy) synthetic sample value for given distribution samples. + + :param x: Distribution sample(s) (scalar or array-like). + :param h: Simulation step (resolution parameter). + :return: Computed sample(s): x + h * sqrt(1e-4 + |x|). """ - # This function can cause many outliers depending on chosen domain of moments function return x + h * np.sqrt(1e-4 + np.abs(x)) @staticmethod def sample_fn_no_error(x, h): """ - Calculates the simulation sample - :param x: Distribution sample - :param h: Simluation step - :return: sample + Identity sampling function (no added numerical error). + + :param x: Distribution sample(s). + :param h: Simulation step (ignored). + :return: x (unchanged). """ return x def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation: """ - - :param fine_level_params: - :param coarse_level_params: - :return: + Produce a LevelSimulation configured to use the YAML config as a common file. + + :param fine_level_params: list-like where first element is fine step size. + :param coarse_level_params: list-like where first element is coarse step size. + :return: LevelSimulation configured with: + - config_dict: containing 'fine.step', 'coarse.step', 'res_format' + - common_files: list containing the YAML path (so worker/workspace has it) + - task_size: small constant (1/job_weight) to simulate job weighting + - need_sample_workspace: True (this class requires workspace) """ config = dict() config["fine"] = {} @@ -208,21 +273,30 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li config["coarse"]["step"] = coarse_level_params[0] config["res_format"] = self.result_format() + # Use a fixed job weight to keep task_size small (simulating many small jobs) job_weight = 20000 - return LevelSimulation(config_dict=config, - common_files=[self.config_yaml], - task_size=1.0 / job_weight, - need_sample_workspace=self.need_workspace) + return LevelSimulation( + config_dict=config, + common_files=[self.config_yaml], + task_size=1.0 / job_weight, + need_sample_workspace=self.need_workspace + ) @staticmethod def generate_random_samples(distr, seed, size): """ - Generate random samples from given distribution - :param distr: scipy distribution - :param seed: uint32 - :param size: size of result - :return: fine sample, coarse sample + Draw random samples based on YAML-specified distribution names. + + This implementation currently supports only the string "norm" which + maps to scipy.stats.norm(loc=1, scale=2). A NotImplementedError is raised + for other distribution identifiers. + + :param distr: Either a string identifier (e.g. "norm") or a scipy distribution. + :param seed: Integer RNG seed used to create a RandomState for reproducibility. + :param size: Integer number of samples to draw. + :return: Tuple (fine_samples, coarse_samples) — identical arrays for this toy sim. + May return [np.nan] to simulate a failed sample according to nan_fraction. """ SynthSimulationWorkspace.len_results += 1 @@ -234,6 +308,7 @@ def generate_random_samples(distr, seed, size): distr.random_state = np.random.RandomState(seed) y = distr.rvs(size=size) + # Inject NaN failure if configured fraction not yet reached if SynthSimulationWorkspace.n_nans / (1e-10 + SynthSimulationWorkspace.len_results) < SynthSimulationWorkspace.nan_fraction: SynthSimulationWorkspace.n_nans += 1 y = [np.nan] @@ -243,18 +318,28 @@ def generate_random_samples(distr, seed, size): @staticmethod def calculate(config, seed): """ - Calculate fine and coarse sample and also extract their results - :param config: dictionary containing simulation configuration - :param seed: random number generator seed - :return: np.ndarray, np.ndarray + Calculate fine and coarse samples (using values from the YAML config file). + + Workflow: + 1. Read YAML configuration (via _read_config) to get distribution and nan_fraction. + 2. Generate base random numbers (fine_random, coarse_random). + 3. Compute fine_result and coarse_result via sample functions. + 4. Assemble results into arrays shaped by res_format and flatten them. + + :param config: LevelSimulation.config_dict (must include 'res_format', 'fine.step', 'coarse.step'). + :param seed: Integer RNG seed. + :return: Tuple (fine_flat, coarse_flat) — 1D numpy arrays produced by flattening quantities × times × locations. + :raises: Exception if any computed result contains NaN. """ + # Load runtime YAML config (distribution name and nan_fraction) config_file = SynthSimulationWorkspace._read_config() SynthSimulationWorkspace.nan_fraction = config_file["nan_fraction"] quantity_format = config["res_format"] - fine_random, coarse_random = SynthSimulationWorkspace.generate_random_samples(config_file["distr"], seed, - np.prod(quantity_format[0].shape)) + fine_random, coarse_random = SynthSimulationWorkspace.generate_random_samples( + config_file["distr"], seed, np.prod(quantity_format[0].shape) + ) fine_step = config["fine"]["step"] coarse_step = config["coarse"]["step"] @@ -272,7 +357,6 @@ def calculate(config, seed): results = [] for result in [fine_result, coarse_result]: quantities = [] - for quantity in quantity_format: if coarse_step == 0: locations = np.array([result for _ in range(len(quantity.locations))]) @@ -285,12 +369,26 @@ def calculate(config, seed): return results[0].flatten(), results[1].flatten() def n_ops_estimate(self, step): - # @TODO: how to determine n ops + """ + Estimate a synthetic operation count for the workspace-enabled simulation. + :param step: Level step size. + :return: Estimated operation cost (float). Uses a fixed exponent of 2 here. + """ return (1 / step) ** 2 * np.log(max(1 / step, 2.0)) @staticmethod def _read_config(): + """ + Read the YAML configuration file (CONFIG_FILE) from the current working directory. + + The YAML is parsed using ruamel.yaml and should contain keys expected by this class + (e.g. "distr" and "nan_fraction"). + + :return: Parsed configuration dictionary. + :raises: IOError / FileNotFoundError if the YAML file is missing. + """ with open(os.path.join(os.getcwd(), SynthSimulationWorkspace.CONFIG_FILE)) as file: + yaml = ruyaml.YAML(typ='rt') config = yaml.load(file) return config diff --git a/mlmc/tool/context_statprof.py b/mlmc/tool/context_statprof.py deleted file mode 100644 index faf3afc4..00000000 --- a/mlmc/tool/context_statprof.py +++ /dev/null @@ -1,13 +0,0 @@ -import statprof -from contextlib import contextmanager - - - - -@contextmanager -def stat_profiler(): - statprof.start() - yield statprof - statprof.stop() - statprof.display() - diff --git a/mlmc/tool/distribution.py b/mlmc/tool/distribution.py index e377607b..1427ef2b 100644 --- a/mlmc/tool/distribution.py +++ b/mlmc/tool/distribution.py @@ -48,39 +48,6 @@ def __init__(self, moments_obj, moment_data, domain=None, force_decay=(True, Tru # Flag for monitoring convergence on stdout. self.monitor = monitor - # def choose_parameters_from_samples(self, samples): - # """ - # Determine model hyperparameters, in particular domain of the density function, - # from given samples. - # :param samples: np array of samples from the distribution or its approximation. - # :return: None - # """ - # self.domain = (np.min(samples), np.max(samples)) - # - # @staticmethod - # def choose_parameters_from_moments(mean, variance, quantile=0.9999, log=False): - # """ - # Determine model hyperparameters, in particular domain of the density function, - # from given samples. - # :param samples: np array of samples from the distribution or its approximation. - # :return: None - # """ - # if log: - # # approximate by log normal - # # compute mu, sigma parameters from observed mean and variance - # sigma_sq = np.log(np.exp(np.log(variance) - 2.0 * np.log(mean)) + 1.0) - # mu = np.log(mean) - sigma_sq / 2.0 - # sigma = np.sqrt(sigma_sq) - # domain = tuple(sc.stats.lognorm.ppf([1.0 - quantile, quantile], s=sigma, scale=np.exp(mu))) - # assert np.isclose(mean, sc.stats.lognorm.mean(s=sigma, scale=np.exp(mu))) - # assert np.isclose(variance, sc.stats.lognorm.var(s=sigma, scale=np.exp(mu))) - # else: - # domain = tuple(sc.stats.norm.ppf([1.0 - quantile, quantile], loc=mean, scale=np.sqrt(variance))) - # return domain - # - # def choose_parameters_from_approximation(self): - # pass - def estimate_density_minimize(self, tol=1e-5, reg_param =0.01): """ @@ -411,15 +378,9 @@ def _calculate_jacobian_matrix(self, multipliers): jacobian_matrix[np.diag_indices_from(jacobian_matrix)] += self._stab_penalty - - #e_vals = np.linalg.eigvalsh(jacobian_matrix) - - #print(multipliers) - #print("jac spectra: ", e_vals[0], e_vals[-1], e_vals[-1]/e_vals[0]) return jacobian_matrix - def compute_exact_moments(moments_fn, density, tol=1e-4): """ Compute approximation of moments using exact density. diff --git a/mlmc/tool/flow_mc.py b/mlmc/tool/flow_mc.py index 09fc12da..cdddb9c1 100644 --- a/mlmc/tool/flow_mc.py +++ b/mlmc/tool/flow_mc.py @@ -15,14 +15,21 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1, mode_no=1000): """ - Create random fields - :return: + Create correlated random-field provider (cf.Fields) according to selected backend. + + :param model: One of 'fourier', 'svd', 'exp', 'TPLgauss', 'TPLexp', 'TPLStable', or others (defaults to 'gauss'). + :param corr_length: Correlation length (used by GSTools or SVD implementations). + :param dim: Spatial dimension of the field (1, 2 or 3). + :param log: If True, generate log-normal field (exponentiate underlying Gaussian field). + :param sigma: Standard deviation for the generated field. + :param mode_no: Number of Fourier modes + :return: cf.Fields instance that can generate random field samples. """ if model == 'fourier': return cf.Fields([ cf.Field('conductivity', cf.FourierSpatialCorrelatedField('gauss', dim=dim, - corr_length=corr_length, - log=log, sigma=sigma)), + corr_length=corr_length, + log=log, sigma=sigma)), ]) elif model == 'svd': @@ -52,13 +59,14 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1 ]) - def substitute_placeholders(file_in, file_out, params): """ - Substitute for placeholders of format '' from the dict 'params'. - :param file_in: Template file. - :param file_out: Values substituted. - :param params: { 'name': value, ...} + Replace placeholders of form '' in a template file with corresponding values. + + :param file_in: Path to the template file containing placeholders. + :param file_out: Path where the substituted output will be written. + :param params: Dictionary mapping placeholder names to replacement values, e.g. {'mesh_file': 'mesh.msh'}. + :return: List of parameter names that were actually used (replaced) in the template. """ used_params = [] with open(file_in, 'r') as src: @@ -76,10 +84,10 @@ def substitute_placeholders(file_in, file_out, params): def force_mkdir(path, force=False): """ - Make directory 'path' with all parents, - remove the leaf dir recursively if it already exists. - :param path: path to directory - :param force: if dir already exists then remove it and create new one + Create directory tree; optionally remove existing leaf directory first. + + :param path: Directory path to create (parents created as needed). + :param force: If True and the directory already exists, remove it (recursively) before creating. :return: None """ if force: @@ -92,55 +100,36 @@ class FlowSim(Simulation): # placeholders in YAML total_sim_id = 0 MESH_FILE_VAR = 'mesh_file' - # Timestep placeholder given as O(h), h = mesh step - TIMESTEP_H1_VAR = 'timestep_h1' - # Timestep placeholder given as O(h^2), h = mesh step - TIMESTEP_H2_VAR = 'timestep_h2' + TIMESTEP_H1_VAR = 'timestep_h1' # O(h) + TIMESTEP_H2_VAR = 'timestep_h2' # O(h^2) - # files + # filenames used in workspace and job directories GEO_FILE = 'mesh.geo' MESH_FILE = 'mesh.msh' YAML_TEMPLATE = 'flow_input.yaml.tmpl' YAML_FILE = 'flow_input.yaml' FIELDS_FILE = 'fields_sample.msh' - """ - Gather data for single flow call (coarse/fine) - - Usage: - mlmc.sampler.Sampler uses instance of FlowSim, it calls once level_instance() for each level step (The level_instance() method - is called as many times as the number of levels), it takes place in main process - - mlmc.tool.pbs_job.PbsJob uses static methods in FlowSim, it calls calculate(). That's where the calculation actually runs, - it takes place in PBS process - It also extracts results and passes them back to PbsJob, which handles the rest - - """ - def __init__(self, config=None, clean=None): """ - Simple simulation using flow123d - :param config: configuration of the simulation, processed keys: - env - Environment object. - fields - FieldSet object - yaml_file: Template for main input file. Placeholders: - - replaced by generated mesh - - for FIELD be name of any of `fields`, replaced by the FieldElementwise field with generated - field input file and the field name for the component. - geo_file: Path to the geometry file. - :param clean: bool, if True remove existing simulation files - mesh files, ... + Initialize FlowSim instance that runs flow123d simulations using generated random fields. + + :param config: Dict with keys: + - env: dict of environment executables (flow123d, gmsh, gmsh_version, etc.) + - fields_params: parameters forwarded to create_corr_field + - yaml_file: base YAML template path + - geo_file: geometry (.geo) file path + - work_dir: base working directory for generated level common files + - field_template: optional template string for field definition in YAML + - time_factor: optional multiplier for timestep selection (default 1.0) + :param clean: If True, regenerate common files (mesh, yaml) for the given level. """ - self.need_workspace = True - # This simulation requires workspace + self.need_workspace = True # this simulation needs per-sample work directories self.env = config['env'] - # Environment variables, flow123d, gmsh, ... self._fields_params = config['fields_params'] self._fields = create_corr_field(**config['fields_params']) self._fields_used_params = None - # Random fields instance self.time_factor = config.get('time_factor', 1.0) - # It is used for minimal element from mesh determination (see level_instance method) - self.base_yaml_file = config['yaml_file'] self.base_geo_file = config['geo_file'] self.field_template = config.get('field_template', @@ -148,54 +137,55 @@ def __init__(self, config=None, clean=None): self.work_dir = config['work_dir'] self.clean = clean - super(Simulation, self).__init__() + super(Simulation, self).__init__() # keep compatibility with parent initialization + def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation: """ - Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.) - :param fine_level_params: in this version, it is just fine simulation step - :param coarse_level_params: in this version, it is just coarse simulation step - :return: mlmc.LevelSimulation object, this object is serialized in SamplingPoolPbs and deserialized in PbsJob, - so it allows pass simulation data from main process to PBS process + Create a LevelSimulation object for given fine/coarse steps. + + This method is called in the main process (Sampler) and must prepare + common files (mesh, YAML) for that level. The returned LevelSimulation + is serialized and sent to PBS jobs (PbsJob) for actual execution. + + :param fine_level_params: list with single element [fine_step] (mesh step) + :param coarse_level_params: list with single element [coarse_step] (mesh step) or [0] for one-level MC + :return: LevelSimulation configured with task size and calculate method """ fine_step = fine_level_params[0] coarse_step = coarse_level_params[0] - # TODO: determine minimal element from mesh + # Set time steps used in YAML substitution (O(h) and O(h^2) placeholders) self.time_step_h1 = self.time_factor * fine_step self.time_step_h2 = self.time_factor * fine_step * fine_step - # Set fine simulation common files directory - # Files in the directory are used by each simulation at that level + # Directory to store files common to all samples at this fine level common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(fine_step)) force_mkdir(common_files_dir, force=self.clean) self.mesh_file = os.path.join(common_files_dir, self.MESH_FILE) if self.clean: - # Prepare mesh + # Create computational mesh from geometry template geo_file = os.path.join(common_files_dir, self.GEO_FILE) shutil.copyfile(self.base_geo_file, geo_file) - self._make_mesh(geo_file, self.mesh_file, fine_step) # Common computational mesh for all samples. + self._make_mesh(geo_file, self.mesh_file, fine_step) - # Prepare main input YAML + # Prepare main YAML by substituting placeholders yaml_template = os.path.join(common_files_dir, self.YAML_TEMPLATE) shutil.copyfile(self.base_yaml_file, yaml_template) yaml_file = os.path.join(common_files_dir, self.YAML_FILE) self._substitute_yaml(yaml_template, yaml_file) - # Mesh is extracted because we need number of mesh points to determine task_size parameter (see return value) + # Extract mesh metadata to determine task_size (number of points affects job weight) fine_mesh_data = self.extract_mesh(self.mesh_file) - # Set coarse simulation common files directory - # Files in the directory are used by each simulation at that level + # Set coarse sim common files dir if coarse level exists coarse_sim_common_files_dir = None if coarse_step != 0: coarse_sim_common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(coarse_step)) - # Simulation config - # Configuration is used in mlmc.tool.pbs_job.PbsJob instance which is run from PBS process - # It is part of LevelSimulation which is serialized and then deserialized in mlmc.tool.pbs_job.PbsJob + # Prepare configuration dict that will be serialized in LevelSimulation config = dict() config["fine"] = {} config["coarse"] = {} @@ -204,71 +194,66 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li config["fine"]["common_files_dir"] = common_files_dir config["coarse"]["common_files_dir"] = coarse_sim_common_files_dir - config[ - "fields_used_params"] = self._fields_used_params # Params for Fields instance, which is createed in PbsJob + config["fields_used_params"] = self._fields_used_params config["gmsh"] = self.env['gmsh'] config["flow123d"] = self.env['flow123d'] config['fields_params'] = self._fields_params - # Auxiliary parameter which I use to determine task_size (should be from 0 to 1, if task_size is above 1 then pbs job is scheduled) - job_weight = 17000000 # 4000000 - 20 min, 2000000 - cca 10 min + # job_weight is used to convert mesh size into a normalized task_size + job_weight = 17000000 return LevelSimulation(config_dict=config, task_size=len(fine_mesh_data['points']) / job_weight, calculate=FlowSim.calculate, - # method which carries out the calculation, will be called from PBS processs - need_sample_workspace=True # If True, a sample directory is created + need_sample_workspace=True ) @staticmethod def calculate(config, seed): """ - Method that actually run the calculation, it's called from mlmc.tool.pbs_job.PbsJob.calculate_samples() - Calculate fine and coarse sample and also extract their results - :param config: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance) - :param seed: random seed, int - :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation.calculate()) + Execute one MLMC sample calculation (fine and optional coarse) inside PBS job. + + :param config: Configuration dict from LevelSimulation.config_dict (contains common_files dirs, steps, fields params) + :param seed: Random seed for the sample generation (derived from sample id) + :return: Tuple (fine_result_array, coarse_result_array), both numpy arrays (coarse may be zeros for one-level MC) """ - # Init correlation field objects - fields = create_corr_field(**config['fields_params']) # correlated_field.Fields instance + # Initialize fields object in the worker process + fields = create_corr_field(**config['fields_params']) fields.set_outer_fields(config["fields_used_params"]) - coarse_step = config["coarse"]["step"] # Coarse simulation step, zero if one level MC - flow123d = config["flow123d"] # Flow123d command + coarse_step = config["coarse"]["step"] + flow123d = config["flow123d"] - # Extract fine mesh - fine_common_files_dir = config["fine"]["common_files_dir"] # Directory with fine simulation common files + # Extract fine mesh structure and optionally coarse mesh structure + fine_common_files_dir = config["fine"]["common_files_dir"] fine_mesh_data = FlowSim.extract_mesh(os.path.join(fine_common_files_dir, FlowSim.MESH_FILE)) - # Extract coarse mesh coarse_mesh_data = None coarse_common_files_dir = None if coarse_step != 0: - coarse_common_files_dir = config["coarse"][ - "common_files_dir"] # Directory with coarse simulation common files + coarse_common_files_dir = config["coarse"]["common_files_dir"] coarse_mesh_data = FlowSim.extract_mesh(os.path.join(coarse_common_files_dir, FlowSim.MESH_FILE)) - # Create fields both fine and coarse + # Prepare combined fields object that has points for both fine and coarse meshes fields = FlowSim.make_fields(fields, fine_mesh_data, coarse_mesh_data) - # Set random seed, seed is calculated from sample id, so it is not user defined + # Sample random field realizations reproducibly np.random.seed(seed) - # Generate random samples - fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(fields, coarse_step=coarse_step, - n_fine_elements=len( - fine_mesh_data['points'])) + fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample( + fields, coarse_step=coarse_step, n_fine_elements=len(fine_mesh_data['points']) + ) - # Run fine sample + # Run fine-level simulation fields_file = os.path.join(os.getcwd(), FlowSim.FIELDS_FILE) fine_res = FlowSim._run_sample(fields_file, fine_mesh_data['ele_ids'], fine_input_sample, flow123d, fine_common_files_dir) - # Rename fields_sample.msh to fine_fields_sample.msh, we might remove it + # Move generated files to have 'fine_' prefix so they don't collide for filename in os.listdir(os.getcwd()): if not filename.startswith("fine"): shutil.move(os.path.join(os.getcwd(), filename), os.path.join(os.getcwd(), "fine_" + filename)) - # Run coarse sample + # Run coarse-level simulation if coarse sample exists coarse_res = np.zeros(len(fine_res)) if coarse_input_sample: coarse_res = FlowSim._run_sample(fields_file, coarse_mesh_data['ele_ids'], coarse_input_sample, flow123d, @@ -279,17 +264,19 @@ def calculate(config, seed): @staticmethod def make_fields(fields, fine_mesh_data, coarse_mesh_data): """ - Create random fields that are used by both coarse and fine simulation - :param fields: correlated_field.Fields instance - :param fine_mesh_data: Dict contains data extracted from fine mesh file (points, point_region_ids, region_map) - :param coarse_mesh_data: Dict contains data extracted from coarse mesh file (points, point_region_ids, region_map) - :return: correlated_field.Fields + Assign evaluation points to fields and return the Fields object prepared for sampling. + + :param fields: correlated_field.Fields instance (with local field definitions) + :param fine_mesh_data: Dict returned by extract_mesh() for the fine mesh + :param coarse_mesh_data: Dict returned by extract_mesh() for the coarse mesh (or None for one-level) + :return: the same cf.Fields object with points set for sampling """ - # One level MC has no coarse_mesh_data + # If no coarse mesh, just register fine mesh points if coarse_mesh_data is None: fields.set_points(fine_mesh_data['points'], fine_mesh_data['point_region_ids'], fine_mesh_data['region_map']) else: + # Concatenate fine and coarse points to compute joint fields (ensures consistent sampling) coarse_centers = coarse_mesh_data['points'] both_centers = np.concatenate((fine_mesh_data['points'], coarse_centers), axis=0) both_regions_ids = np.concatenate( @@ -302,13 +289,14 @@ def make_fields(fields, fine_mesh_data, coarse_mesh_data): @staticmethod def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_dir): """ - Create random fields file, call Flow123d and extract results - :param fields_file: Path to file with random fields - :param ele_ids: Element IDs in computational mesh - :param fine_input_sample: fields: {'field_name' : values_array, ..} - :param flow123d: Flow123d command - :param common_files_dir: Directory with simulations common files (flow_input.yaml, ) - :return: simulation result, ndarray + Write random fields to Gmsh file, call flow123d, and extract sample results. + + :param fields_file: Path where fields will be written (in current working directory) + :param ele_ids: Array of element ids for which field values are provided + :param fine_input_sample: Dict mapping field names to arrays of shape (n_elements, 1) + :param flow123d: Path/command to flow123d executable + :param common_files_dir: Directory containing common YAML and other input files for the level + :return: numpy.ndarray with extracted simulation result (e.g., water balance) """ gmsh_io.GmshIO().write_fields(fields_file, ele_ids, fine_input_sample) @@ -321,11 +309,16 @@ def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_ @staticmethod def generate_random_sample(fields, coarse_step, n_fine_elements): """ - Generate random field, both fine and coarse part. - Store them separeted. - :return: Dict, Dict + Generate random field samples for the fine and (optionally) coarse meshes. + + :param fields: cf.Fields object (already configured with points) + :param coarse_step: coarse-level step (0 for no coarse sample) + :param n_fine_elements: Number of elements that belong to fine mesh (used to split combined sample) + :return: Tuple (fine_input_sample: dict, coarse_input_sample: dict) + Each dict maps field name -> array shaped (n_elements, 1). """ fields_sample = fields.sample() + # Fine inputs are first n_fine_elements rows; coarse are the remainder (if any) fine_input_sample = {name: values[:n_fine_elements, None] for name, values in fields_sample.items()} coarse_input_sample = {} if coarse_step != 0: @@ -336,10 +329,12 @@ def generate_random_sample(fields, coarse_step, n_fine_elements): def _make_mesh(self, geo_file, mesh_file, fine_step): """ - Make the mesh, mesh_file: _step.msh. - Make substituted yaml: _step.yaml, - using common fields_step.msh file for generated fields. - :return: + Invoke Gmsh to produce a mesh with the requested geometric scale (clscale). + + :param geo_file: Path to the .geo file used to generate the mesh + :param mesh_file: Path where the .msh output will be written + :param fine_step: Mesh step (controls element size via -clscale) + :return: None """ if self.env['gmsh_version'] == 2: subprocess.call( @@ -350,9 +345,14 @@ def _make_mesh(self, geo_file, mesh_file, fine_step): @staticmethod def extract_mesh(mesh_file): """ - Extract mesh from file - :param mesh_file: Mesh file path - :return: Dict + Parse a Gmsh mesh file and extract points (element centers), element ids and region mapping. + + :param mesh_file: Path to .msh file to parse (Gmsh 2/4 depending on GmshIO implementation) + :return: Dict with keys: + - 'points': np.ndarray of shape (n_elements, dim) with element center coordinates + - 'point_region_ids': np.ndarray of region id per element + - 'ele_ids': np.ndarray of original element ids + - 'region_map': dict mapping region name -> region id """ mesh = gmsh_io.GmshIO(mesh_file) is_bc_region = {} @@ -386,7 +386,7 @@ def extract_mesh(mesh_file): diff = max_pt - min_pt min_axis = np.argmin(diff) non_zero_axes = [0, 1, 2] - # TODO: be able to use this mesh_dimension in fields + # If mesh is effectively 2D (one axis collapsed), remove that axis from point coordinates if diff[min_axis] < 1e-10: non_zero_axes.pop(min_axis) points = centers[:, non_zero_axes] @@ -395,8 +395,11 @@ def extract_mesh(mesh_file): def _substitute_yaml(self, yaml_tmpl, yaml_out): """ - Create substituted YAML file from the tamplate. - :return: + Build YAML input file for flow123d by substituting placeholders for mesh and fields. + + :param yaml_tmpl: Path to YAML template with placeholders like '' and ''. + :param yaml_out: Path to output YAML file that will be used by flow123d. + :return: None (also populates self._fields_used_params with names of substituted fields) """ param_dict = {} field_tmpl = self.field_template @@ -412,11 +415,12 @@ def _substitute_yaml(self, yaml_tmpl, yaml_out): @staticmethod def _extract_result(sample_dir): """ - Extract the observed value from the Flow123d output. - :param sample_dir: str, path to sample directory - :return: None, inf or water balance result (float) and overall sample time + Extract the observed quantity (e.g., water balance flux) from a flow123d run directory. + + :param sample_dir: Directory where flow123d output (water_balance.yaml) is located. + :return: numpy.ndarray with a single value [-total_flux] representing outflow (negative sign). + Raises Exception if expected data is not found or inflow at outlet is positive. """ - # extract the flux balance_file = os.path.join(sample_dir, "water_balance.yaml") with open(balance_file, "r") as f: @@ -434,44 +438,19 @@ def _extract_result(sample_dir): flux_in = float(flux_item['data'][1]) if flux_in > 1e-10: raise Exception("Possitive inflow at outlet region.") - total_flux += flux # flux field + total_flux += flux found = True - # Get flow123d computing time - # run_time = FlowSim.get_run_time(sample_dir) - if not found: - raise Exception + raise Exception("No outlet flux found in water_balance.yaml") return np.array([-total_flux]) @staticmethod def result_format() -> List[QuantitySpec]: """ - Define simulation result format - :return: List[QuantitySpec, ...] + Describe the simulation output format as a list of QuantitySpec objects. + + :return: List[QuantitySpec] describing each output quantity (name, unit, shape, times, locations) """ spec1 = QuantitySpec(name="conductivity", unit="m", shape=(1, 1), times=[1], locations=['0']) - # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']) return [spec1] - - # @staticmethod - # def get_run_time(sample_dir): - # """ - # Get flow123d sample running time from profiler - # :param sample_dir: Sample directory - # :return: float - # """ - # profiler_file = os.path.join(sample_dir, "profiler_info_*.json") - # profiler = glob.glob(profiler_file)[0] - # - # try: - # with open(profiler, "r") as f: - # prof_content = json.load(f) - # - # run_time = float(prof_content['children'][0]['cumul-time-sum']) - # except: - # print("Extract run time failed") - # - # return run_time - - diff --git a/mlmc/tool/gmsh_io.py b/mlmc/tool/gmsh_io.py index c5a3ad36..0d059918 100644 --- a/mlmc/tool/gmsh_io.py +++ b/mlmc/tool/gmsh_io.py @@ -3,21 +3,8 @@ import struct import numpy as np -import enum -# class ElementType(enum.IntEnum): -# simplex_1d = 1 -# simplex_2d = 2 -# simplex_3d = 4 -# -# element_sizes = { -# 1: 1, -# 2: 2, -# 4: 3 -# } -# - class GmshIO: """This is a class for storing nodes and elements. Based on Gmsh.py @@ -28,25 +15,60 @@ class GmshIO: Methods: read([file]) -- Parse a Gmsh version 1.0 or 2.0 mesh file - write([file]) -- Output a Gmsh version 2.0 mesh file + write_ascii([file]) -- Output a Gmsh version 2.0 mesh file (ASCII) + write_binary([file]) -- Output a Gmsh version 2.0 mesh file (binary) + write_element_data(f, ele_ids, name, values) -- write $ElementData block + write_fields(msh_file, ele_ids, fields) -- convenience to write several ElementData blocks """ def __init__(self, filename=None): - """Initialise Gmsh data structure""" + """ + Initialise Gmsh data structure. + + :param filename: Optional path to a .msh file. If provided, the file is read on construction. + :return: None + """ self.reset() self.filename = filename if self.filename: self.read() def reset(self): - """Reinitialise Gmsh data structure""" + """ + Reinitialise internal storage. + + Clears nodes, elements, physical names and element_data dictionaries. + + :return: None + """ self.nodes = {} self.elements = {} self.physical = {} self.element_data = {} def read_element_data_head(self, mshfile): - + """ + Read header of a $ElementData block from an open mshfile. + + The method expects the lines after '$ElementData' to match the conventional + Gmsh textual ElementData header layout: + + "" + +