diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 55a3cec7..00000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,97 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -import datetime -sys.path.insert(0, os.path.abspath("../../")) - -# -- Project information ----------------------------------------------------- -# General information about the project. -curr_year = datetime.datetime.now().year -project = "mlmc" -copyright = "{}, Jan Březina, Martin Špetlík".format(curr_year) -author = "Jan Březina, Martin Špetlík" - -# The full version, including alpha/beta/rc tags -release = '1.0.1' - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.doctest', - 'sphinx.ext.autosectionlabel' - ] - -# autosummaries from source-files -autosummary_generate = True -# dont show __init__ docstring -autoclass_content = 'class' -# sort class members -autodoc_member_order = "groupwise" -# autodoc_member_order = 'bysource' - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" #'alabaster' - -html_theme_options = { - # 'canonical_url': '', - # 'analytics_id': '', - "logo_only": False, - "display_version": True, - "prev_next_buttons_location": "top", - # 'style_external_links': False, - # 'vcs_pageview_mode': '', - # Toc options - "collapse_navigation": False, - "sticky_navigation": True, - "navigation_depth": 4, - "includehidden": True, - "titles_only": False, -} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] - -# autodoc_default_options = { -# 'members': True, -# # The ones below should be optional but work nicely together with -# # example_package/autodoctest/doc/source/_templates/autosummary/class.rst -# # and other defaults in sphinx-autodoc. -# 'show-inheritance': True, -# 'inherited-members': True, -# 'no-special-members': True, -# } -master_doc = "contents" diff --git a/docs/source/contents.rst b/docs/source/contents.rst deleted file mode 100644 index 96976686..00000000 --- a/docs/source/contents.rst +++ /dev/null @@ -1,11 +0,0 @@ -======== -Contents -======== - -.. toctree:: - :includehidden: - :maxdepth: 2 - - index - examples - mlmc diff --git a/docs/source/examples.rst b/docs/source/examples.rst deleted file mode 100644 index 23b4acd6..00000000 --- a/docs/source/examples.rst +++ /dev/null @@ -1,19 +0,0 @@ -========== -Tutorials -========== - -.. automodule:: examples - -The following tutorials illustrates how to use mlmc package. - -.. toctree:: - :includehidden: - :maxdepth: 1 - - examples_sampler_creation - examples_samples_scheduling - examples_quantity - examples_postprocessing - - -You can find more complex examples in :any:`examples.shooting` diff --git a/docs/source/examples.shooting.rst b/docs/source/examples.shooting.rst deleted file mode 100644 index 5b040675..00000000 --- a/docs/source/examples.shooting.rst +++ /dev/null @@ -1,53 +0,0 @@ -examples.shooting package -========================= - -Submodules ----------- - -examples.shooting.Shooting1DPBS module --------------------------------------- - -.. automodule:: examples.shooting.Shooting1DPBS - :members: - :undoc-members: - :show-inheritance: - -examples.shooting.shooting\_1D module -------------------------------------- - -.. automodule:: examples.shooting.shooting_1D - :members: - :undoc-members: - :show-inheritance: - -examples.shooting.shooting\_2D module -------------------------------------- - -.. automodule:: examples.shooting.shooting_2D - :members: - :undoc-members: - :show-inheritance: - -examples.shooting.simulation\_shooting\_1D module -------------------------------------------------- - -.. automodule:: examples.shooting.simulation_shooting_1D - :members: - :undoc-members: - :show-inheritance: - -examples.shooting.simulation\_shooting\_2D module -------------------------------------------------- - -.. automodule:: examples.shooting.simulation_shooting_2D - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: examples.shooting - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/examples_postprocessing.rst b/docs/source/examples_postprocessing.rst deleted file mode 100644 index 56479b6f..00000000 --- a/docs/source/examples_postprocessing.rst +++ /dev/null @@ -1,82 +0,0 @@ -Results postprocessing -====================== - -If you already know how to create a sampler, schedule samples and handle quantities, -postprocessing will be easy for you. Otherwise, see the previous tutorials before. - - -First, schedule samples and estimate moments for a particular quantity - -.. testcode:: - - import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels - target_var = 1e-4 - n_moments = 10 - level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists - - simulation_factory = mlmc.SynthSimulation() - sampling_pool = mlmc.OneProcessPool() - # Memory() storage keeps samples in the computer main memory - sample_storage = mlmc.Memory() - - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) - - sampler.set_initial_n_samples() - sampler.schedule_samples() - running = 1 - while running > 0: - running = 0 - running += sampler.ask_sampling_pool_for_samples() - - # Get particular quantity - root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format()) - length = root_quantity['length'] - time = length[1] - location = time['10'] - q_value = location[0] - - - true_domain = mlmc.Estimate.estimate_domain(q_value, sample_storage) - moments_fn = mlmc.Legendre(n_moments, true_domain) - estimate_obj = mlmc.Estimate(q_value, sample_storage=sampler.sample_storage, - moments_fn=moments_fn) - - variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples) - - from mlmc.estimator import estimate_n_samples_for_target_variance - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) - - while not sampler.process_adding_samples(n_estimated): - # New estimation according to already finished samples - variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) - - running = 1 - while running > 0: - running = 0 - running += sampler.ask_sampling_pool_for_samples() - - -Probability density function approximation ---------------------- - -.. testcode:: - - from mlmc.plot.plots import Distribution - distr_obj, result, _, _ = estimate_obj.construct_density() - distr_plot = Distribution(title="distributions", error_plot=None) - distr_plot.add_distribution(distr_obj) - - if n_levels == 1: - samples = estimate_obj.get_level_samples(level_id=0)[..., 0] - distr_plot.add_raw_samples(np.squeeze(samples)) # add histogram - distr_plot.show() diff --git a/docs/source/examples_quantity.rst b/docs/source/examples_quantity.rst deleted file mode 100644 index 9ab4fa58..00000000 --- a/docs/source/examples_quantity.rst +++ /dev/null @@ -1,249 +0,0 @@ -.. _examples quantity: - -Quantity tutorial -================= - -An overview of basic :any:`mlmc.quantity.quantity.Quantity` operations. -Quantity related classes and functions allow estimate mean and variance of MLMC samples results, -derive other quantities from original ones and much more. - -.. testcode:: - :hide: - - import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels - level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists - - simulation_factory = mlmc.SynthSimulation() - sampling_pool = mlmc.OneProcessPool() - # Memory() storage keeps samples in the computer main memory - sample_storage = mlmc.Memory() - - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) - - n_samples = [100, 75, 50] - sampler.set_initial_n_samples(n_samples) - - running = 1 - while running > 0: - running = 0 - running += sampler.ask_sampling_pool_for_samples() - - - -.. testcode:: - - import numpy as np - import mlmc.quantity.quantity_estimate - from examples.synthetic_quantity import create_sampler - - -First, the synthetic Quantity with the following :code:`result_format` is created - -.. testcode:: - - # result_format = [ - # mlmc.QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20']), - # mlmc.QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']), - # ] - # Meaning: sample results contain data on two quantities in three time steps [1, 2, 3] and in two locations, - # each quantity can have different shape - - sampler, simulation_factory, moments_fn = create_sampler() - root_quantity = mlmc.make_root_quantity(sampler.sample_storage, simulation_factory.result_format()) - -:code:`root_quantity` is :py:class:`mlmc.quantity.quantity.Quantity` instance and represents the whole result data. -According to :code:`result_format` it contains two sub-quantities named "length" and "width". - - -Mean estimates ---------------- -To get estimated mean of a quantity: - -.. testcode:: - - root_quantity_mean = mlmc.quantity.quantity_estimate.estimate_mean(root_quantity) - -:code:`root_quantity_mean` is an instance of :py:class:`mlmc.quantity.quantity.QuantityMean` - -To get the total mean value: - -.. testcode:: - - root_quantity_mean.mean - -To get the total variance value: - -.. testcode:: - - root_quantity_mean.var - -To get means at each level: - -.. testcode:: - - root_quantity_mean.l_means - -To get variances at each level: - -.. testcode:: - - root_quantity_mean.l_vars - - -Estimate moments and covariance matrix --------------------------------------- - -Create a quantity representing moments and get their estimates - -.. testcode:: - - moments_quantity = mlmc.quantity.quantity_estimate.moments(root_quantity, moments_fn=moments_fn) - moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(moments_quantity) - -To obtain central moments, use: - -.. testcode:: - - central_root_quantity = root_quantity - root_quantity_mean.mean - central_moments_quantity = mlmc.quantity.quantity_estimate.moments(central_root_quantity, - moments_fn=moments_fn) - central_moments_mean = mlmc.quantity.quantity_estimate.estimate_mean(central_moments_quantity) - - -Create a quantity representing a covariance matrix - -.. testcode:: - - covariance_quantity = mlmc.quantity.quantity_estimate.covariance(root_quantity, moments_fn=moments_fn) - cov_mean = mlmc.quantity.quantity_estimate.estimate_mean(covariance_quantity) - - - -Quantity selection ------------------- - -According to the result_format, it is possible to select items from a quantity - -.. testcode:: - - length = root_quantity["length"] # Get quantity with name="length" - width = root_quantity["width"] # Get quantity with name="width" - -:code:`length` and :code:`width` are still :py:class:`mlmc.quantity.quantity.Quantity` instances - -To get a quantity at particular time: - -.. testcode:: - - length_locations = length.time_interpolation(2.5) - -:code:`length_locations` represents results for all locations of quantity named "length" at the time 2.5 - -To get quantity at particular location: - -.. testcode:: - - length_result = length_locations['10'] - -:code:`length_result` represents results shape=(2, 1) of quantity named "length" at the time 2,5 and location '10' - -Now it is possible to slice Quantity :code:`length_result` the same way as :code:`np.ndarray`. For example: - -.. testcode:: - - length_result[1, 0] - length_result[:, 0] - length_result[:, :] - length_result[:1, :1] - length_result[:2, ...] - -Keep in mind: - - all derived quantities such as :code:`length_locations` and :code:`length_result`, ... are still :py:class:`mlmc.quantity.quantity.Quantity` instances - - selecting location before time is not supported! - - -Binary operations ------------------ -Following operations are supported - - - Addition, subtraction, ... of compatible quantities - - .. testcode:: - - quantity = root_quantity + root_quantity - quantity = root_quantity + root_quantity + root_quantity - - - Operations with Quantity and a constant - - .. testcode:: - - const = 5 - quantity_const_add = root_quantity + const - quantity_const_sub = root_quantity - const - quantity_const_mult = root_quantity * const - quantity_const_div = root_quantity / const - quantity_const_mod = root_quantity % const - quantity_add_mult = root_quantity + root_quantity * const - - -NumPy universal functions --------------------------- - -Examples of tested NumPy universal functions: - -.. testcode:: - - quantity_np_add = np.add(root_quantity, root_quantity) - quantity_np_max = np.max(root_quantity, axis=0, keepdims=True) - quantity_np_sin = np.sin(root_quantity) - quantity_np_sum = np.sum(root_quantity, axis=0, keepdims=True) - quantity_np_maximum = np.maximum(root_quantity, root_quantity) - - x = np.ones(24) - quantity_np_divide_const = np.divide(x, root_quantity) - quantity_np_add_const = np.add(x, root_quantity) - quantity_np_arctan2_cosnt = np.arctan2(x, root_quantity) - - -Quantity selection by conditions ---------------------------------- - -Method :code:`select` returns :py:class:`mlmc.quantity.quantity.Quantity` instance - -.. testcode:: - - selected_quantity = root_quantity.select(0 < root_quantity) - -.. testcode:: - - quantity_add = root_quantity + root_quantity - quantity_add_select = quantity_add.select(root_quantity < quantity_add) - root_quantity_selected = root_quantity.select(-1 != root_quantity) - -Logical operation among more provided conditions is AND - -.. testcode:: - - quantity_add.select(root_quantity < quantity_add, root_quantity < 10) - -User can use one of the logical NumPy universal functions - -.. testcode:: - - selected_quantity_or = root_quantity.select(np.logical_or(0 < root_quantity, root_quantity < 10)) - -It is possible to explicitly define the selection condition of one quantity by another quantity - -.. testcode:: - - mask = np.logical_and(0 < root_quantity, root_quantity < 10) # mask is Quantity instance - q_bounded = root_quantity.select(mask) - - diff --git a/docs/source/examples_sampler_creation.rst b/docs/source/examples_sampler_creation.rst deleted file mode 100644 index b363d774..00000000 --- a/docs/source/examples_sampler_creation.rst +++ /dev/null @@ -1,56 +0,0 @@ -Sampler creation -================= -Sampler controls the execution of MLMC samples. - - -First, import mlmc package and define basic MLMC parameters. - -.. testcode:: - - import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels - level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists - - -Prepare a simulation, it must be instance of class that inherits from :any:`mlmc.sim.simulation.Simulation`. - -.. testcode:: - - simulation_factory = mlmc.SynthSimulation() - -Create a sampling pool. - -.. testcode:: - - sampling_pool = mlmc.OneProcessPool() - - -You can also use :any:`mlmc.sampling_pool.ProcessPool` which supports parallel execution of MLMC samples. -In order to use PBS (portable batch system), employ :any:`mlmc.sampling_pool_pbs.SamplingPoolPBS`. - - -Create a sample storage. It contains sample's related data e.g. simulation result. - -.. testcode:: - - # Memory() storage keeps samples in the computer main memory - sample_storage = mlmc.Memory() - -We support also HDF5 file storage :any:`mlmc.sample_storage_hdf.SampleStorageHDF`. - - -Finally, create a sampler that manages scheduling MLMC samples and also saves the results. - -.. testcode:: - - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) - - - -:ref:`examples samples scheduling` \ No newline at end of file diff --git a/docs/source/examples_samples_scheduling.rst b/docs/source/examples_samples_scheduling.rst deleted file mode 100644 index 135a421c..00000000 --- a/docs/source/examples_samples_scheduling.rst +++ /dev/null @@ -1,136 +0,0 @@ -.. _examples samples scheduling: - -Samples scheduling -================== - -Once you create a sampler you can schedule samples. - - -1. Prescribe the exact number of samples ----------------------------------------------------------------- - -.. testcode:: - :hide: - - import mlmc - n_levels = 3 # number of MLMC levels - step_range = [0.5, 0.005] # simulation steps at the coarsest and finest levels - level_parameters = mlmc.estimator.determine_level_parameters(n_levels, step_range) - # level_parameters determine each level simulation steps - # level_parameters can be manually prescribed as a list of lists - - simulation_factory = mlmc.SynthSimulation() - sampling_pool = mlmc.OneProcessPool() - # Memory() storage keeps samples in the computer main memory - sample_storage = mlmc.Memory() - - sampler = mlmc.Sampler(sample_storage=sample_storage, - sampling_pool=sampling_pool, - sim_factory=simulation_factory, - level_parameters=level_parameters) - - -.. testcode:: - - n_samples = [100, 75, 50] - sampler.set_initial_n_samples(n_samples) - -Schedule set samples. - -.. testcode:: - - sampler.schedule_samples() - -You can wait until all samples are finished. - -.. testcode:: - - running = 1 - while running > 0: - running = 0 - running += sampler.ask_sampling_pool_for_samples() - - -2. Prescribe a target variance -------------------------------------------------------------- - -Set target variance and number of random variable moments that must meet this variance. - -.. testcode:: - - target_var = 1e-4 - n_moments = 10 - -The first phase is the same as the first approach, but the initial samples are automatically determined -as a sequence from 100 samples at the coarsest level to 10 samples at the finest level. - -.. testcode:: - - sampler.set_initial_n_samples() - sampler.schedule_samples() - running = 1 - while running > 0: - running = 0 - running += sampler.ask_sampling_pool_for_samples() - - -The :py:class:`mlmc.quantity.quantity.Quantity` instance is created, for details see :ref:`examples quantity` - -.. testcode:: - - root_quantity = mlmc.make_root_quantity(storage=sampler.sample_storage, - q_specs=sampler.sample_storage.load_result_format()) - -:code:`root_quantity` contains the structure of sample results and also allows access to their values. - -In order to estimate moment values including variance, moment functions class (in this case Legendre polynomials) instance -and :py:class:`mlmc.estimator.Estimate` instance are created. - -.. testcode:: - - true_domain = mlmc.Estimate.estimate_domain(root_quantity, sample_storage) - moments_fn = mlmc.Legendre(n_moments, true_domain) - - estimate_obj = mlmc.Estimate(root_quantity, sample_storage=sampler.sample_storage, - moments_fn=moments_fn) - - -At first, the variance of moments and average execution time per sample at each level are estimated from already finished samples. - -.. testcode:: - - variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler.n_finished_samples) - -Then, an initial estimate of the number of MLMC samples that should meet prescribed target variance is conducted. - -.. testcode:: - - from mlmc.estimator import estimate_n_samples_for_target_variance - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) - - -Now it is time for our sampling algorithm that gradually schedules samples and refines the total number of samples -until the number of estimated samples is greater than the number of scheduled samples. - -.. testcode:: - - while not sampler.process_adding_samples(n_estimated): - # New estimation according to already finished samples - variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) - n_estimated = estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) - - -Finally, wait until all samples are finished. - -.. testcode:: - - running = 1 - while running > 0: - running = 0 - running += sampler.ask_sampling_pool_for_samples() - -Since our sampling algorithm determines the number of samples according to moment variances, -the type of moment functions (Legendre by default) might affect total number of MLMC samples. - diff --git a/docs/source/generated/mlmc.estimator.Estimate.rst b/docs/source/generated/mlmc.estimator.Estimate.rst deleted file mode 100644 index 7935c38c..00000000 --- a/docs/source/generated/mlmc.estimator.Estimate.rst +++ /dev/null @@ -1,41 +0,0 @@ -mlmc.estimator.Estimate -======================= - -.. currentmodule:: mlmc.estimator - -.. autoclass:: Estimate - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Estimate.__init__ - ~Estimate.bs_target_var_n_estimated - ~Estimate.construct_density - ~Estimate.est_bootstrap - ~Estimate.estimate_covariance - ~Estimate.estimate_diff_vars - ~Estimate.estimate_diff_vars_regression - ~Estimate.estimate_domain - ~Estimate.estimate_moments - ~Estimate.fine_coarse_violinplot - ~Estimate.get_level_samples - ~Estimate.plot_bs_var_log - ~Estimate.plot_variances - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~Estimate.n_moments - ~Estimate.quantity - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.level_simulation.LevelSimulation.rst b/docs/source/generated/mlmc.level_simulation.LevelSimulation.rst deleted file mode 100644 index 80287f63..00000000 --- a/docs/source/generated/mlmc.level_simulation.LevelSimulation.rst +++ /dev/null @@ -1,22 +0,0 @@ -mlmc.level\_simulation.LevelSimulation -====================================== - -.. currentmodule:: mlmc.level_simulation - -.. autoclass:: LevelSimulation - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~LevelSimulation.__init__ - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.moments.Fourier.rst b/docs/source/generated/mlmc.moments.Fourier.rst deleted file mode 100644 index 0e49c712..00000000 --- a/docs/source/generated/mlmc.moments.Fourier.rst +++ /dev/null @@ -1,32 +0,0 @@ -mlmc.moments.Fourier -==================== - -.. currentmodule:: mlmc.moments - -.. autoclass:: Fourier - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Fourier.__init__ - ~Fourier.change_size - ~Fourier.clip - ~Fourier.eval - ~Fourier.eval_all - ~Fourier.eval_all_der - ~Fourier.eval_diff - ~Fourier.eval_diff2 - ~Fourier.eval_single_moment - ~Fourier.inv_linear - ~Fourier.linear - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.moments.Legendre.rst b/docs/source/generated/mlmc.moments.Legendre.rst deleted file mode 100644 index 7fa21e31..00000000 --- a/docs/source/generated/mlmc.moments.Legendre.rst +++ /dev/null @@ -1,32 +0,0 @@ -mlmc.moments.Legendre -===================== - -.. currentmodule:: mlmc.moments - -.. autoclass:: Legendre - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Legendre.__init__ - ~Legendre.change_size - ~Legendre.clip - ~Legendre.eval - ~Legendre.eval_all - ~Legendre.eval_all_der - ~Legendre.eval_diff - ~Legendre.eval_diff2 - ~Legendre.eval_single_moment - ~Legendre.inv_linear - ~Legendre.linear - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.moments.Moments.rst b/docs/source/generated/mlmc.moments.Moments.rst deleted file mode 100644 index 34c8e55b..00000000 --- a/docs/source/generated/mlmc.moments.Moments.rst +++ /dev/null @@ -1,32 +0,0 @@ -mlmc.moments.Moments -==================== - -.. currentmodule:: mlmc.moments - -.. autoclass:: Moments - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Moments.__init__ - ~Moments.change_size - ~Moments.clip - ~Moments.eval - ~Moments.eval_all - ~Moments.eval_all_der - ~Moments.eval_diff - ~Moments.eval_diff2 - ~Moments.eval_single_moment - ~Moments.inv_linear - ~Moments.linear - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.moments.Monomial.rst b/docs/source/generated/mlmc.moments.Monomial.rst deleted file mode 100644 index 690f60e5..00000000 --- a/docs/source/generated/mlmc.moments.Monomial.rst +++ /dev/null @@ -1,32 +0,0 @@ -mlmc.moments.Monomial -===================== - -.. currentmodule:: mlmc.moments - -.. autoclass:: Monomial - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Monomial.__init__ - ~Monomial.change_size - ~Monomial.clip - ~Monomial.eval - ~Monomial.eval_all - ~Monomial.eval_all_der - ~Monomial.eval_diff - ~Monomial.eval_diff2 - ~Monomial.eval_single_moment - ~Monomial.inv_linear - ~Monomial.linear - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.plot.rst b/docs/source/generated/mlmc.plot.rst deleted file mode 100644 index 5b86fc22..00000000 --- a/docs/source/generated/mlmc.plot.rst +++ /dev/null @@ -1,23 +0,0 @@ -mlmc.plot -========= - -.. automodule:: mlmc.plot - - - - - - - - - - - - - - - - - - - diff --git a/docs/source/generated/mlmc.quantity.rst b/docs/source/generated/mlmc.quantity.rst deleted file mode 100644 index 223b8cc0..00000000 --- a/docs/source/generated/mlmc.quantity.rst +++ /dev/null @@ -1,23 +0,0 @@ -mlmc.quantity -============= - -.. automodule:: mlmc.quantity - - - - - - - - - - - - - - - - - - - diff --git a/docs/source/generated/mlmc.random.rst b/docs/source/generated/mlmc.random.rst deleted file mode 100644 index 9e28b061..00000000 --- a/docs/source/generated/mlmc.random.rst +++ /dev/null @@ -1,23 +0,0 @@ -mlmc.random -=========== - -.. automodule:: mlmc.random - - - - - - - - - - - - - - - - - - - diff --git a/docs/source/generated/mlmc.sample_storage.Memory.rst b/docs/source/generated/mlmc.sample_storage.Memory.rst deleted file mode 100644 index d97e76ad..00000000 --- a/docs/source/generated/mlmc.sample_storage.Memory.rst +++ /dev/null @@ -1,39 +0,0 @@ -mlmc.sample\_storage.Memory -=========================== - -.. currentmodule:: mlmc.sample_storage - -.. autoclass:: Memory - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Memory.__init__ - ~Memory.chunks - ~Memory.get_level_ids - ~Memory.get_level_parameters - ~Memory.get_n_collected - ~Memory.get_n_levels - ~Memory.get_n_ops - ~Memory.load_result_format - ~Memory.load_scheduled_samples - ~Memory.n_finished - ~Memory.sample_pairs - ~Memory.sample_pairs_level - ~Memory.save_global_data - ~Memory.save_n_ops - ~Memory.save_result_format - ~Memory.save_samples - ~Memory.save_scheduled_samples - ~Memory.unfinished_ids - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sample_storage.SampleStorage.rst b/docs/source/generated/mlmc.sample_storage.SampleStorage.rst deleted file mode 100644 index b42b7d8f..00000000 --- a/docs/source/generated/mlmc.sample_storage.SampleStorage.rst +++ /dev/null @@ -1,38 +0,0 @@ -mlmc.sample\_storage.SampleStorage -================================== - -.. currentmodule:: mlmc.sample_storage - -.. autoclass:: SampleStorage - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SampleStorage.__init__ - ~SampleStorage.chunks - ~SampleStorage.get_level_ids - ~SampleStorage.get_level_parameters - ~SampleStorage.get_n_collected - ~SampleStorage.get_n_levels - ~SampleStorage.get_n_ops - ~SampleStorage.load_result_format - ~SampleStorage.load_scheduled_samples - ~SampleStorage.n_finished - ~SampleStorage.sample_pairs - ~SampleStorage.save_global_data - ~SampleStorage.save_n_ops - ~SampleStorage.save_result_format - ~SampleStorage.save_samples - ~SampleStorage.save_scheduled_samples - ~SampleStorage.unfinished_ids - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sample_storage_hdf.SampleStorageHDF.rst b/docs/source/generated/mlmc.sample_storage_hdf.SampleStorageHDF.rst deleted file mode 100644 index 493a8e34..00000000 --- a/docs/source/generated/mlmc.sample_storage_hdf.SampleStorageHDF.rst +++ /dev/null @@ -1,41 +0,0 @@ -mlmc.sample\_storage\_hdf.SampleStorageHDF -========================================== - -.. currentmodule:: mlmc.sample_storage_hdf - -.. autoclass:: SampleStorageHDF - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SampleStorageHDF.__init__ - ~SampleStorageHDF.chunks - ~SampleStorageHDF.clear_failed - ~SampleStorageHDF.failed_samples - ~SampleStorageHDF.get_level_ids - ~SampleStorageHDF.get_level_parameters - ~SampleStorageHDF.get_n_collected - ~SampleStorageHDF.get_n_levels - ~SampleStorageHDF.get_n_ops - ~SampleStorageHDF.load_result_format - ~SampleStorageHDF.load_scheduled_samples - ~SampleStorageHDF.n_finished - ~SampleStorageHDF.sample_pairs - ~SampleStorageHDF.sample_pairs_level - ~SampleStorageHDF.save_global_data - ~SampleStorageHDF.save_n_ops - ~SampleStorageHDF.save_result_format - ~SampleStorageHDF.save_samples - ~SampleStorageHDF.save_scheduled_samples - ~SampleStorageHDF.unfinished_ids - - - - - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sampler.Sampler.rst b/docs/source/generated/mlmc.sampler.Sampler.rst deleted file mode 100644 index 974e0d55..00000000 --- a/docs/source/generated/mlmc.sampler.Sampler.rst +++ /dev/null @@ -1,39 +0,0 @@ -mlmc.sampler.Sampler -==================== - -.. currentmodule:: mlmc.sampler - -.. autoclass:: Sampler - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Sampler.__init__ - ~Sampler.ask_sampling_pool_for_samples - ~Sampler.l_scheduled_samples - ~Sampler.process_adding_samples - ~Sampler.renew_failed_samples - ~Sampler.sample_range - ~Sampler.schedule_samples - ~Sampler.set_initial_n_samples - ~Sampler.set_level_target_n_samples - ~Sampler.set_scheduled_and_wait - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~Sampler.ADDING_SAMPLES_TIMEOUT - ~Sampler.n_finished_samples - ~Sampler.n_levels - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sampling_pool.OneProcessPool.rst b/docs/source/generated/mlmc.sampling_pool.OneProcessPool.rst deleted file mode 100644 index e2b83e30..00000000 --- a/docs/source/generated/mlmc.sampling_pool.OneProcessPool.rst +++ /dev/null @@ -1,42 +0,0 @@ -mlmc.sampling\_pool.OneProcessPool -================================== - -.. currentmodule:: mlmc.sampling_pool - -.. autoclass:: OneProcessPool - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~OneProcessPool.__init__ - ~OneProcessPool.calculate_sample - ~OneProcessPool.change_to_sample_directory - ~OneProcessPool.compute_seed - ~OneProcessPool.copy_sim_files - ~OneProcessPool.get_finished - ~OneProcessPool.handle_sim_files - ~OneProcessPool.have_permanent_samples - ~OneProcessPool.move_dir - ~OneProcessPool.move_failed_rm - ~OneProcessPool.move_successful_rm - ~OneProcessPool.remove_sample_dir - ~OneProcessPool.schedule_sample - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~OneProcessPool.FAILED_DIR - ~OneProcessPool.N_SUCCESSFUL - ~OneProcessPool.SEVERAL_SUCCESSFUL_DIR - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sampling_pool.ProcessPool.rst b/docs/source/generated/mlmc.sampling_pool.ProcessPool.rst deleted file mode 100644 index 2aea7ee4..00000000 --- a/docs/source/generated/mlmc.sampling_pool.ProcessPool.rst +++ /dev/null @@ -1,43 +0,0 @@ -mlmc.sampling\_pool.ProcessPool -=============================== - -.. currentmodule:: mlmc.sampling_pool - -.. autoclass:: ProcessPool - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ProcessPool.__init__ - ~ProcessPool.calculate_sample - ~ProcessPool.change_to_sample_directory - ~ProcessPool.compute_seed - ~ProcessPool.copy_sim_files - ~ProcessPool.get_finished - ~ProcessPool.handle_sim_files - ~ProcessPool.have_permanent_samples - ~ProcessPool.move_dir - ~ProcessPool.move_failed_rm - ~ProcessPool.move_successful_rm - ~ProcessPool.remove_sample_dir - ~ProcessPool.res_callback - ~ProcessPool.schedule_sample - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~ProcessPool.FAILED_DIR - ~ProcessPool.N_SUCCESSFUL - ~ProcessPool.SEVERAL_SUCCESSFUL_DIR - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sampling_pool.SamplingPool.rst b/docs/source/generated/mlmc.sampling_pool.SamplingPool.rst deleted file mode 100644 index 47340579..00000000 --- a/docs/source/generated/mlmc.sampling_pool.SamplingPool.rst +++ /dev/null @@ -1,42 +0,0 @@ -mlmc.sampling\_pool.SamplingPool -================================ - -.. currentmodule:: mlmc.sampling_pool - -.. autoclass:: SamplingPool - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SamplingPool.__init__ - ~SamplingPool.calculate_sample - ~SamplingPool.change_to_sample_directory - ~SamplingPool.compute_seed - ~SamplingPool.copy_sim_files - ~SamplingPool.get_finished - ~SamplingPool.handle_sim_files - ~SamplingPool.have_permanent_samples - ~SamplingPool.move_dir - ~SamplingPool.move_failed_rm - ~SamplingPool.move_successful_rm - ~SamplingPool.remove_sample_dir - ~SamplingPool.schedule_sample - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~SamplingPool.FAILED_DIR - ~SamplingPool.N_SUCCESSFUL - ~SamplingPool.SEVERAL_SUCCESSFUL_DIR - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sampling_pool_pbs.SamplingPoolPBS.rst b/docs/source/generated/mlmc.sampling_pool_pbs.SamplingPoolPBS.rst deleted file mode 100644 index 7d6c9cab..00000000 --- a/docs/source/generated/mlmc.sampling_pool_pbs.SamplingPoolPBS.rst +++ /dev/null @@ -1,53 +0,0 @@ -mlmc.sampling\_pool\_pbs.SamplingPoolPBS -======================================== - -.. currentmodule:: mlmc.sampling_pool_pbs - -.. autoclass:: SamplingPoolPBS - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SamplingPoolPBS.__init__ - ~SamplingPoolPBS.calculate_sample - ~SamplingPoolPBS.change_to_sample_directory - ~SamplingPoolPBS.compute_seed - ~SamplingPoolPBS.copy_sim_files - ~SamplingPoolPBS.delete_pbs_id_file - ~SamplingPoolPBS.execute - ~SamplingPoolPBS.get_finished - ~SamplingPoolPBS.handle_sim_files - ~SamplingPoolPBS.have_permanent_samples - ~SamplingPoolPBS.move_dir - ~SamplingPoolPBS.move_failed_rm - ~SamplingPoolPBS.move_successful_rm - ~SamplingPoolPBS.pbs_common_setting - ~SamplingPoolPBS.remove_sample_dir - ~SamplingPoolPBS.schedule_sample - ~SamplingPoolPBS.serialize_level_sim - ~SamplingPoolPBS.write_script - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~SamplingPoolPBS.FAILED_DIR - ~SamplingPoolPBS.JOB - ~SamplingPoolPBS.JOBS_DIR - ~SamplingPoolPBS.LEVEL_SIM_CONFIG - ~SamplingPoolPBS.N_SUCCESSFUL - ~SamplingPoolPBS.OUTPUT_DIR - ~SamplingPoolPBS.QSTAT_FAILED_MAX_N - ~SamplingPoolPBS.QSUB_FAILED_MAX_N - ~SamplingPoolPBS.SEVERAL_SUCCESSFUL_DIR - - \ No newline at end of file diff --git a/docs/source/generated/mlmc.sim.rst b/docs/source/generated/mlmc.sim.rst deleted file mode 100644 index 4fc27922..00000000 --- a/docs/source/generated/mlmc.sim.rst +++ /dev/null @@ -1,23 +0,0 @@ -mlmc.sim -======== - -.. automodule:: mlmc.sim - - - - - - - - - - - - - - - - - - - diff --git a/docs/source/generated/mlmc.tool.rst b/docs/source/generated/mlmc.tool.rst deleted file mode 100644 index b2fa9396..00000000 --- a/docs/source/generated/mlmc.tool.rst +++ /dev/null @@ -1,23 +0,0 @@ -mlmc.tool -========= - -.. automodule:: mlmc.tool - - - - - - - - - - - - - - - - - - - diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index 27a83141..00000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,29 +0,0 @@ -===== -MLMC -===== - -.. image:: https://github.com/GeoMop/MLMC/workflows/package/badge.svg - :target: https://github.com/GeoMop/MLMC/actions -.. image:: https://img.shields.io/pypi/v/mlmc.svg - :target: https://pypi.org/project/mlmc/ -.. image:: https://img.shields.io/pypi/pyversions/mlmc.svg - :target: https://pypi.org/project/mlmc/ - -MLMC provides tools for the multilevel Monte Carlo method, which is theoretically described by `M. Giles `_. - -mlmc package includes: - -- samples scheduling -- estimation of generalized moment functions -- probability density function approximation -- advanced post-processing with our Quantity structure - - -Installation -============ -mlmc can be installed via `pip `_ - -.. code-block:: none - - pip install mlmc - diff --git a/docs/source/mlmc.plot.rst b/docs/source/mlmc.plot.rst deleted file mode 100644 index 47f0bd09..00000000 --- a/docs/source/mlmc.plot.rst +++ /dev/null @@ -1,34 +0,0 @@ -mlmc.plot -================= - -.. automodule:: mlmc.plot - :members: - :undoc-members: - :show-inheritance: - -Submodules ----------- - -mlmc.plot.plots module ----------------------- - -.. automodule:: mlmc.plot.plots - :members: - :undoc-members: - :show-inheritance: - -mlmc.plot.violinplot module ---------------------------- - -.. automodule:: mlmc.plot.violinplot - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: mlmc.plot - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/mlmc.quantity.rst b/docs/source/mlmc.quantity.rst deleted file mode 100644 index 68c48998..00000000 --- a/docs/source/mlmc.quantity.rst +++ /dev/null @@ -1,48 +0,0 @@ -mlmc.quantity -===================== - -.. automodule:: mlmc.quantity - - -Submodules ----------- - -mlmc.quantity.quantity module ------------------------------ - -.. automodule:: mlmc.quantity.quantity - :members: - :undoc-members: - :show-inheritance: - -mlmc.quantity.quantity\_estimate module ---------------------------------------- - -.. automodule:: mlmc.quantity.quantity_estimate - :members: - :undoc-members: - :show-inheritance: - -mlmc.quantity.quantity\_spec module ------------------------------------ - -.. automodule:: mlmc.quantity.quantity_spec - :members: - :undoc-members: - :show-inheritance: - -mlmc.quantity.quantity\_types module ------------------------------------- - -.. automodule:: mlmc.quantity.quantity_types - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: mlmc.quantity - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/mlmc.random.rst b/docs/source/mlmc.random.rst deleted file mode 100644 index 37dd33ec..00000000 --- a/docs/source/mlmc.random.rst +++ /dev/null @@ -1,39 +0,0 @@ -mlmc.random -=================== - -.. automodule:: mlmc.random - -Submodules ----------- - -mlmc.random.correlated\_field module ------------------------------------- - -.. automodule:: mlmc.random.correlated_field - :members: - :undoc-members: - :show-inheritance: - -mlmc.random.frac\_geom module ------------------------------ - -.. automodule:: mlmc.random.frac_geom - :members: - :undoc-members: - :show-inheritance: - -mlmc.random.gstools\_wrapper module ------------------------------------ - -.. automodule:: mlmc.random.gstools_wrapper - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: mlmc.random - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/mlmc.rst b/docs/source/mlmc.rst deleted file mode 100644 index 113fc65e..00000000 --- a/docs/source/mlmc.rst +++ /dev/null @@ -1,15 +0,0 @@ -============= -MLMC package -============= - -.. automodule:: mlmc - -.. toctree:: - :includehidden: - :maxdepth: 1 - - mlmc.plot.rst - mlmc.quantity.rst - mlmc.random.rst - mlmc.sim.rst - mlmc.tool.rst \ No newline at end of file diff --git a/docs/source/mlmc.sim.rst b/docs/source/mlmc.sim.rst deleted file mode 100644 index 932f4b9c..00000000 --- a/docs/source/mlmc.sim.rst +++ /dev/null @@ -1,31 +0,0 @@ -mlmc.sim -================ - -.. automodule:: mlmc.sim - -Submodules ----------- - -mlmc.sim.simulation module --------------------------- - -.. automodule:: mlmc.sim.simulation - :members: - :undoc-members: - :show-inheritance: - -mlmc.sim.synth\_simulation module ---------------------------------- - -.. automodule:: mlmc.sim.synth_simulation - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: mlmc.sim - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/mlmc.tool.rst b/docs/source/mlmc.tool.rst deleted file mode 100644 index 6e0bdc51..00000000 --- a/docs/source/mlmc.tool.rst +++ /dev/null @@ -1,87 +0,0 @@ -mlmc.tool -================= - -.. automodule:: mlmc.tool - -Submodules ----------- - -mlmc.tool.context\_statprof module ----------------------------------- - -.. automodule:: mlmc.tool.context_statprof - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.distribution module ------------------------------ - -.. automodule:: mlmc.tool.distribution - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.flow\_mc module -------------------------- - -.. automodule:: mlmc.tool.flow_mc - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.gmsh\_io module -------------------------- - -.. automodule:: mlmc.tool.gmsh_io - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.hdf5 module ---------------------- - -.. automodule:: mlmc.tool.hdf5 - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.pbs\_job module -------------------------- - -.. automodule:: mlmc.tool.pbs_job - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.process\_base module ------------------------------- - -.. automodule:: mlmc.tool.process_base - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.simple\_distribution module -------------------------------------- - -.. automodule:: mlmc.tool.simple_distribution - :members: - :undoc-members: - :show-inheritance: - -mlmc.tool.stats\_tests module ------------------------------ - -.. automodule:: mlmc.tool.stats_tests - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: mlmc.tool - :members: - :undoc-members: - :show-inheritance: diff --git a/mlmc/estimator.py b/mlmc/estimator.py index af4f6e03..9f615602 100644 --- a/mlmc/estimator.py +++ b/mlmc/estimator.py @@ -3,6 +3,7 @@ import scipy.integrate as integrate import mlmc.quantity.quantity_estimate as qe import mlmc.tool.simple_distribution +from mlmc.quantity.quantity_estimate import mask_nan_samples from mlmc.quantity.quantity_types import ScalarType from mlmc.plot import plots from mlmc.quantity.quantity_spec import ChunkSpec @@ -16,6 +17,7 @@ def __init__(self, quantity, sample_storage, moments_fn=None): self._quantity = quantity self._sample_storage = sample_storage self._moments_fn = moments_fn + self._moments_mean = None @property def quantity(self): @@ -29,6 +31,16 @@ def quantity(self, quantity): def n_moments(self): return self._moments_fn.size + @property + def moments_mean_obj(self): + return self._moments_mean + + @moments_mean_obj.setter + def moments_mean_obj(self, moments_mean): + if not isinstance(moments_mean, mlmc.quantity.quantity.QuantityMean): + raise TypeError + self._moments_mean = moments_mean + def estimate_moments(self, moments_fn=None): """ Use collected samples to estimate moments and variance of this estimate. @@ -39,6 +51,7 @@ def estimate_moments(self, moments_fn=None): moments_fn = self._moments_fn moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn)) + self.moments_mean_obj = moments_mean return moments_mean.mean, moments_mean.var def estimate_covariance(self, moments_fn=None): @@ -81,6 +94,8 @@ def estimate_diff_vars(self, moments_fn=None): diff_variance - shape LxR, variances of diffs of moments_fn n_samples - shape L, num samples for individual levels. """ + if moments_fn is None: + moments_fn = self._moments_fn moments_mean = qe.estimate_mean(qe.moments(self._quantity, moments_fn)) return moments_mean.l_vars, moments_mean.n_samples @@ -148,7 +163,6 @@ def _variance_of_variance(self, n_samples=None): ns, var_var = self._saved_var_var if np.sum(np.abs(np.array(ns) - np.array(n_samples))) == 0: return var_var - vars = [] for ns in n_samples: df = ns - 1 @@ -328,7 +342,7 @@ def construct_density(self, tol=1e-8, reg_param=0.0, orth_moments_tol=1e-4, exac domain=moments_obj.domain) result = distr_obj.estimate_density_minimize(tol, reg_param) # 0.95 two side quantile - return distr_obj, info, result, moments_obj + return distr_obj, info, result, moments_obj, moments_mean def get_level_samples(self, level_id, n_samples=None): """ @@ -340,6 +354,51 @@ def get_level_samples(self, level_id, n_samples=None): chunk_spec = next(self._sample_storage.chunks(level_id=level_id, n_samples=n_samples)) return self._quantity.samples(chunk_spec=chunk_spec) + def kurtosis_check(self, quantity=None): + if quantity is None: + quantity = self._quantity + moments_mean_quantity = qe.estimate_mean(quantity) + kurtosis = qe.level_kurtosis(quantity, moments_mean_quantity) + return kurtosis + + +def consistency_check(quantity, sample_storage=None): + + fine_samples = {} + coarse_samples = {} + for chunk_spec in quantity.get_quantity_storage().chunks(): + samples = quantity.samples(chunk_spec) + chunk, n_mask_samples = mask_nan_samples(samples) + + # No samples in chunk + if chunk.shape[1] == 0: + continue + + fine_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 0]) + if chunk_spec.level_id > 0: + coarse_samples.setdefault(chunk_spec.level_id, []).extend(chunk[:, :, 1]) + + cons_check_val = {} + for level_id in range(sample_storage.get_n_levels()): + if level_id > 0: + fine_mean = np.mean(fine_samples[level_id]) + coarse_mean = np.mean(coarse_samples[level_id]) + diff_mean = np.mean(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id])) + + fine_var = np.var(fine_samples[level_id]) + coarse_var = np.var(fine_samples[level_id]) + diff_var = np.var(np.array(fine_samples[level_id]) - np.array(coarse_samples[level_id])) + + val = np.abs(coarse_mean - fine_mean + diff_mean) / ( + 3 * (np.sqrt(coarse_var) + np.sqrt(fine_var) + np.sqrt(diff_var))) + + assert np.isclose(coarse_mean - fine_mean + diff_mean, 0) + assert val < 0.9 + + cons_check_val[level_id] = val + + return cons_check_val + def estimate_domain(quantity, sample_storage, quantile=None): """ @@ -363,7 +422,23 @@ def estimate_domain(quantity, sample_storage, quantile=None): return np.min(ranges[:, 0]), np.max(ranges[:, 1]) -def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels): +def coping_with_high_kurtosis(vars, costs, kurtosis, kurtosis_threshold=100): + """ + Coping with high kurtosis is recommended by prof. M. Giles in http://people.maths.ox.ac.uk/~gilesm/talks/MCQMC_22_b.pdf + :param vars: vars[L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance. + :param costs: cost of level's sample + :param kurtosis: each level's sample kurtosis + :param kurtosis_threshold: Kurtosis is considered to be too high if it is above this threshold. + Original variances are underestimated and therefore modified in this metod + :return: vars + """ + for l_id in range(2, vars.shape[0]): + if kurtosis[l_id] > kurtosis_threshold: + vars[l_id] = np.maximum(vars[l_id], 0.5 * vars[l_id - 1] * costs[l_id - 1] / costs[l_id]) + return vars + + +def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels, theta=0, kurtosis=None): """ Estimate optimal number of samples for individual levels that should provide a target variance of resulting moment estimate. @@ -372,12 +447,20 @@ def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_op :param prescribe_vars: vars[ L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance. :param n_ops: number of operations at each level :param n_levels: number of levels + :param theta: number of samples N_l control parameter, suitable values: 0.25 ... 0.5 + :param kurtosis: levels' kurtosis :return: np.array with number of optimal samples for individual levels and moments_fn, array (LxR) """ vars = prescribe_vars + + if kurtosis is not None and len(vars) == len(kurtosis): + vars = coping_with_high_kurtosis(vars, n_ops, kurtosis) + sqrt_var_n = np.sqrt(vars.T * n_ops) # moments_fn in rows, levels in cols total = np.sum(sqrt_var_n, axis=1) # sum over levels n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int) # moments_fn in cols + n_samples_estimate = 1/(1-theta) * n_samples_estimate + # Limit maximal number of samples per level n_samples_estimate_safe = np.maximum( np.minimum(n_samples_estimate, vars * n_levels / target_variance), 2) diff --git a/mlmc/metamodel/02_conc_rnd_field_tests.py b/mlmc/metamodel/02_conc_rnd_field_tests.py new file mode 100644 index 00000000..d00e92ab --- /dev/null +++ b/mlmc/metamodel/02_conc_rnd_field_tests.py @@ -0,0 +1,394 @@ +import time +import numpy as np +from mlmc.tool import gmsh_io +from mlmc.tool.flow_mc import FlowSim, create_corr_field +from mlmc.tool.flow_mc_2 import FlowSimProcConc +import gstools +from mlmc.random import correlated_field as cf +import matplotlib.pyplot as plt +from mlmc.metamodel.create_graph import get_node_features, extract_mesh_gmsh_io + + +def create_corr_fields(model='gauss', corr_length=0.125, dim=2, log=True, factor_sigma=1, sigma=1, mode_no=1000, + por_top_mean=-1.0, por_bot_mean=-1.0, por_top_sigma=1, por_bot_sigma=1, + por_top_len_scale=0.2, por_bot_len_scale=0.2, factor_top_mean=1e-8, factor_bot_mean=1e-8): + """ + Create random fields + :return: + """ + # por_top = cf.SpatialCorrelatedField( + # corr_exp='gauss', + # dim=2, + # corr_length=0.2, + # mu=-1.0, + # sigma=1.0, + # log=True + # ) + # + # print("por top ", por_top) + + + por_top = cf.GSToolsSpatialCorrelatedField(gstools.Gaussian(dim=2, len_scale=por_top_len_scale), + log=log, mean=por_top_mean, sigma=por_top_sigma, mode_no=mode_no) + + #print("por top gstools ", por_top_gstools) + + # por_bot = cf.SpatialCorrelatedField( + # corr_exp='gauss', + # dim=2, + # corr_length=0.2, + # mu=-1.0, + # sigma=1.0, + # log=True + # ) + + por_bot = cf.GSToolsSpatialCorrelatedField(gstools.Gaussian(dim=2, len_scale=por_bot_len_scale), + log=log, mean=por_bot_mean, sigma=por_bot_sigma, mode_no=mode_no) + + #por_bot = gstools.Gaussian(dim=dim, len_scale=0.2, mu=-1.0, sigma=1.0, log=True) + + water_viscosity = 8.90e-4 + + factor_top_model = gstools.Gaussian(dim=dim, len_scale=1) + factor_bot_model = gstools.Gaussian(dim=dim, len_scale=1) + + fields = cf.Fields([ + cf.Field('por_top', por_top, regions='ground_0'), + cf.Field('porosity_top', cf.positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'), + cf.Field('por_bot', por_bot, regions='ground_1'), + cf.Field('porosity_bot', cf.positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'), + cf.Field('porosity_repo', 0.5, regions='repo'), + #cf.Field('factor_top', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_0'), + + cf.Field('factor_top', cf.GSToolsSpatialCorrelatedField(factor_top_model, log=log, mean=factor_top_mean, sigma=factor_sigma, mode_no=mode_no), + regions='ground_0'), + + #cf.Field('factor_top', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1.0, log=True), regions='ground_0'), + # conductivity about + #cf.Field('factor_bot', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_1'), + #cf.Field('factor_bot', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1, log=True), regions='ground_1'), + cf.Field('factor_bot', + cf.GSToolsSpatialCorrelatedField(factor_bot_model, log=log, mean=factor_bot_mean, sigma=factor_sigma, mode_no=mode_no), + regions='ground_1'), + + # cf.Field('factor_repo', cf.SpatialCorrelatedField('gauss', mu=1e-10, sigma=1, log=True), regions='repo'), + cf.Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 'factor_top', water_viscosity], + regions='ground_0'), + cf.Field('conductivity_bot', cf.kozeny_carman, ['porosity_bot', 1, 'factor_bot', water_viscosity], + regions='ground_1'), + # cf.Field('conductivity_repo', cf.kozeny_carman, ['porosity_repo', 1, 'factor_repo', water_viscosity], regions='repo') + cf.Field('conductivity_repo', 0.001, regions='repo') + ]) + + return fields + + + +# def corr_field_sample_time(mesh_file=None, corr_length_config=None): +# # import matplotlib +# # from matplotlib import ticker, cm +# #matplotlib.rcParams.update({'font.size': 22}) +# +# if corr_length_config.get('02_conc', False): +# return conc_rnd_sample_time(mesh_file, corr_length_config) +# +# dim = 2 +# log = True +# cl = 0.1 +# s = 1 +# +# if mesh_file is None: +# #mesh_file = "/home/martin/Sync/Documents/flow123d_results/flow_experiments/Exponential/corr_length_0_01/l_step_0.0055_common_files/mesh.msh" +# #mesh_file = "/home/martin/Documents/metamodels/data/5_ele/cl_0_1_s_1/L5/l_step_0.020196309484414757_common_files/mesh.msh" +# mesh_file = "/home/martin/Documents/metamodels/data/1000_ele/l_step_0.055_common_files/mesh.msh" +# +# +# start_time = time.process_time() +# mesh_data = FlowSim.extract_mesh(mesh_file) +# if corr_length_config is not None: +# fields = create_corr_field(model="exp", dim=dim, +# sigma=corr_length_config['sigma'], +# corr_length=corr_length_config['corr_length'], +# log=corr_length_config['log']) +# else: +# fields = create_corr_field(model="exp", dim=dim, +# sigma=s, +# corr_length=cl, +# log=log) +# # # Create fields both fine and coarse +# fields = FlowSim.make_fields(fields, mesh_data, None) +# +# n_samples = 200 +# for i in range(n_samples): +# +# fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(fields, coarse_step=0, +# n_fine_elements=len( +# mesh_data['points'])) +# +# len(fine_input_sample["conductivity"]) +# features_log = np.log(fine_input_sample["conductivity"]) +# +# # print("conductivity mean ", np.mean(fine_input_sample["conductivity"])) +# # print("conductivity var ", np.var(fine_input_sample["conductivity"])) +# output = 1 +# # +# # print("fine input sample ", fine_input_sample["conductivity"].shape) +# # +# # gmsh_io.GmshIO().write_fields('fields_sample.msh', mesh_data['ele_ids'], fine_input_sample) +# # +# # mesh = gmsh_io.GmshIO('fields_sample.msh') +# # element_data = mesh.current_elem_data +# # features = list(element_data.values()) +# # print("features ", np.array(features).shape) +# +# rnd_time = time.process_time() - start_time +# print("rnd_time / n_samples ", rnd_time / n_samples) +# return rnd_time / n_samples +# +# #Xfinal, Yfinal = fields.fields[0].correlated_field.points[:, 0], fields.fields[0].correlated_field.points[:, 1] +# +# # cont = ax.tricontourf(Xfinal, +# # Yfinal, +# # fine_input_sample['conductivity'].ravel())#, locator=ticker.LogLocator()) +# +# # fig.colorbar(cont) +# # fig.savefig("cl_{}_var_{}.pdf".format(cl, s ** 2)) +# # plt.show() +# +# # print("fields ", fields) +# # model = gs.Exponential(dim=2, len_scale=cl) +# # srf = gs.SRF(model, mesh_type="unstructed", seed=20170519, mode_no=1000, generator='RandMeth') +# # print("model.var ", model.var) +# # field = srf( +# # (fields.fields[0].correlated_field.points[:, 0], fields.fields[0].correlated_field.points[:, 1])) +# # srf.vtk_export("field") +# # ax = srf.plot() +# # ax.set_aspect("equal") + + +def conc_corr_field(mesh_file, corr_field_config): + mesh_data = FlowSim.extract_mesh(mesh_file) + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + por_top_mean = -1.0 + por_bot_mean = -1.0 + por_top_sigma = 1 + por_bot_sigma = 1 + factor_sigma = 1 + por_top_len_scale = 1 + por_bot_len_scale = 1 + factor_top_mean = 1e-8 + factor_bot_mean = 1e-8 + + all_features = [] + n_samples = 50 + for i in range(n_samples): + fields = create_corr_fields(dim=2, log=corr_field_config["log"], + por_top_mean=por_top_mean, + por_bot_mean=por_bot_mean, + por_top_sigma=por_top_sigma, + por_bot_sigma=por_bot_sigma, + factor_sigma=factor_sigma, + mode_no=1000, + por_top_len_scale=por_top_len_scale, + por_bot_len_scale=por_bot_len_scale, + factor_top_mean=factor_top_mean, + factor_bot_mean=factor_bot_mean + ) + + fields.set_points(mesh_data['points'], mesh_data['point_region_ids'], + mesh_data['region_map']) + + fine_input_sample, coarse_input_sample = FlowSimProcConc.generate_random_sample(fields, coarse_step=0, + n_fine_elements=len( + mesh_data['points'])) + fields_file = 'fields_sample.msh' + gmsh_io.GmshIO().write_fields(fields_file, mesh_data['ele_ids'], fine_input_sample) + + + features = get_node_features(fields_file, feature_names) + + #features = np.log(features) + all_features.append(features) + + plot_rescale(all_features, mesh_file) + + # mesh_data = extract_mesh_gmsh_io(mesh_file, get_points=True) + # points = mesh_data['points'] + # X = points[:, 0] + # Y = points[:, 1] + # fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + # + # cont = ax.tricontourf(X, Y, features.ravel(), levels=32) + # fig.colorbar(cont) + # plt.title("input") + # plt.show() + + +def cond_corr_field(mesh_file=None, corr_length_config=None): + # import matplotlib + # from matplotlib import ticker, cm + # matplotlib.rcParams.update({'font.size': 22}) + dim = 2 + log = True + cl = 0.1 + s = 1 + + feature_names = [['conductivity']] + + if mesh_file is None: + # mesh_file = "/home/martin/Sync/Documents/flow123d_results/flow_experiments/Exponential/corr_length_0_01/l_step_0.0055_common_files/mesh.msh" + # mesh_file = "/home/martin/Documents/metamodels/data/5_ele/cl_0_1_s_1/L5/l_step_0.020196309484414757_common_files/mesh.msh" + mesh_file = "/home/martin/Documents/metamodels/data/1000_ele/l_step_0.055_common_files/mesh.msh" + mesh_file = "/home/martin/Documents/metamodels/data/cl_0_1_s_1/l_step_0.27232698153315_common_files/mesh.msh" # L2 10.5 s + mesh_file = "/home/martin/Documents/metamodels/data/cl_0_1_s_1/l_step_0.07416198487095663_common_files/mesh.msh" # L3 12s + + start_time = time.process_time() + mesh_data = FlowSim.extract_mesh(mesh_file) + + all_features = [] + n_samples = 500 + for i in range(n_samples): + if corr_length_config is not None: + fields = create_corr_field(model="exp", dim=dim, + sigma=corr_length_config['sigma'], + corr_length=corr_length_config['corr_length'], + log=corr_length_config['log']) + else: + fields = create_corr_field(model="exp", dim=dim, + sigma=s, + corr_length=cl, + log=log) + + # # Create fields both fine and coarse + fields = FlowSim.make_fields(fields, mesh_data, None) + + # len(fine_input_sample["conductivity"]) + # features_log = np.log(fine_input_sample["conductivity"]) + + fields.set_points(mesh_data['points'], mesh_data['point_region_ids'], + mesh_data['region_map']) + + fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(fields, coarse_step=0, + n_fine_elements=len( + mesh_data['points'])) + + fields_file = 'fields_sample.msh' + gmsh_io.GmshIO().write_fields(fields_file, mesh_data['ele_ids'], fine_input_sample) + + + features = get_node_features(fields_file, feature_names) + + #features = np.log(features) + all_features.append(features) + + + #### + # Plot random field + #### + # fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + # mesh_data = extract_mesh_gmsh_io(mesh_file, get_points=True) + # points = mesh_data['points'] + # X = points[:, 0] + # Y = points[:, 1] + # cont = ax.tricontourf(X, Y, features.ravel(), levels=32) + # fig.colorbar(cont) + # plt.title("input") + # plt.show() + + + plot_rescale(all_features, mesh_file) + + # print("conductivity mean ", np.mean(fine_input_sample["conductivity"])) + # print("conductivity var ", np.var(fine_input_sample["conductivity"])) + output = 1 + # + # print("fine input sample ", fine_input_sample["conductivity"].shape) + # + # gmsh_io.GmshIO().write_fields('fields_sample.msh', mesh_data['ele_ids'], fine_input_sample) + # + # mesh = gmsh_io.GmshIO('fields_sample.msh') + # element_data = mesh.current_elem_data + # features = list(element_data.values()) + # print("features ", np.array(features).shape) + + rnd_time = time.process_time() - start_time + print("rnd_time / n_samples ", rnd_time / n_samples) + return rnd_time / n_samples + + +def plot_rescale(all_features, mesh_file): + mean_features = np.mean(all_features, axis=0) + variance_features = np.var(all_features, axis=0) + print('mean features ', mean_features) + print("variance features ", variance_features) + + min_features = np.min(all_features, axis=0) + max_features = np.max(all_features, axis=0) + + print("min features ", min_features) + print("max features ", max_features) + + for features in all_features[:3]: + print("features[:10] ", features[:10]) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + mesh_data = extract_mesh_gmsh_io(mesh_file, get_points=True) + points = mesh_data['points'] + X = points[:, 0] + Y = points[:, 1] + print("features shape ", features.shape) + cont = ax.tricontourf(X, Y, features.ravel(), levels=32) + fig.colorbar(cont) + plt.title("input") + plt.show() + + # features -= mean_features + # print("features - mean ", features) + # features /= variance_features + + + #features = np.log(features) + + # print("features - min_features ", features - min_features) + features = (features - min_features) / (max_features - min_features) + # print("final features ", features) + + features = np.nan_to_num(features) + print("final features ", features) + + # print("final features nan to num ", features) + # + # print("final features ", features.shape) + + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + mesh_data = extract_mesh_gmsh_io(mesh_file, get_points=True) + points = mesh_data['points'] + X = points[:, 0] + Y = points[:, 1] + cont = ax.tricontourf(X, Y, features.ravel(), levels=32) + fig.colorbar(cont) + plt.title("input") + plt.show() + + +if __name__ == "__main__": + corr_file_config = {"02_conc": True, 'log': True} + #mesh_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_conr/l_step_1.0_common_files/repo.msh" + mesh_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/l_step_0.1414213562373095_common_files/repo.msh" + + # corr_file_config = {"02_conc": False, 'log': True, 'corr_length':0.1, 'sigma':1} + # mesh_file = "/home/martin/Documents/metamodels/data/1000_ele/l_step_0.055_common_files/mesh.msh" + + + ############################### + ### generate conc random sample + #### 02_conc + #conc_corr_field(mesh_file, corr_file_config) + + ## + #### 01_cond_field + corr_file_config = {"02_conc": False, 'sigma': 1, 'corr_length': 0.1, 'log': True} + mesh_file = "/home/martin/Documents/metamodels/data/mesh_size/l_step_0.07416198487095663_common_files/mesh.msh" + + cond_corr_field(mesh_file, corr_file_config) + diff --git a/mlmc/metamodel/GCN.py b/mlmc/metamodel/GCN.py new file mode 100644 index 00000000..d859c065 --- /dev/null +++ b/mlmc/metamodel/GCN.py @@ -0,0 +1,102 @@ +""" +This example shows how to perform regression of molecular properties with the +QM9 database, using a simple GNN in disjoint mode. +""" + +import numpy as np +import tensorflow as tf +from tensorflow.keras.layers import Dense, Input +from tensorflow.keras.losses import MeanSquaredError +from tensorflow.keras.models import Model +from tensorflow.keras.optimizers import Adam + +from spektral.data import DisjointLoader +from spektral.datasets import QM9 +from spektral.layers import ECCConv, GlobalSumPool + +################################################################################ +# PARAMETERS +################################################################################ +learning_rate = 1e-3 # Learning rate +epochs = 10 # Number of training epochs +batch_size = 32 # Batch size + +################################################################################ +# LOAD DATA +################################################################################ +dataset = QM9(amount=1000) # Set amount=None to train on whole dataset + +# Parameters +F = dataset.n_node_features # Dimension of node features +S = dataset.n_edge_features # Dimension of edge features +n_out = dataset.n_labels # Dimension of the target + +# Train/test split +idxs = np.random.permutation(len(dataset)) +split = int(0.9 * len(dataset)) +idx_tr, idx_te = np.split(idxs, [split]) +dataset_tr, dataset_te = dataset[idx_tr], dataset[idx_te] + +loader_tr = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=epochs) +loader_te = DisjointLoader(dataset_te, batch_size=batch_size, epochs=1) + +################################################################################ +# BUILD MODEL +################################################################################ +X_in = Input(shape=(F,), name="X_in") +A_in = Input(shape=(None,), sparse=True, name="A_in") +E_in = Input(shape=(S,), name="E_in") +I_in = Input(shape=(), name="segment_ids_in", dtype=tf.int32) + +X_1 = ECCConv(32, activation="relu")([X_in, A_in, E_in]) +X_2 = ECCConv(32, activation="relu")([X_1, A_in, E_in]) +X_3 = GlobalSumPool()([X_2, I_in]) +output = Dense(n_out)(X_3) + +# Build model +model = Model(inputs=[X_in, A_in, E_in, I_in], outputs=output) +opt = Adam(lr=learning_rate) +loss_fn = MeanSquaredError() + + +################################################################################ +# FIT MODEL +################################################################################ +@tf.function(input_signature=loader_tr.tf_signature(), experimental_relax_shapes=True) +def train_step(inputs, target): + with tf.GradientTape() as tape: + predictions = model(inputs, training=True) + loss = loss_fn(target, predictions) + loss += sum(model.losses) + gradients = tape.gradient(loss, model.trainable_variables) + opt.apply_gradients(zip(gradients, model.trainable_variables)) + return loss + + +print("Fitting model") +current_batch = 0 +model_loss = 0 +for batch in loader_tr: + outs = train_step(*batch) + + model_loss += outs + current_batch += 1 + if current_batch == loader_tr.steps_per_epoch: + print("Loss: {}".format(model_loss / loader_tr.steps_per_epoch)) + model_loss = 0 + current_batch = 0 + +################################################################################ +# EVALUATE MODEL +################################################################################ +print("Testing model") +model_loss = 0 +for batch in loader_te: + inputs, target = batch + predictions = model(inputs, training=False) + + print("target ", target) + print("prediction ", predictions) + model_loss += loss_fn(target, predictions) +model_loss /= loader_te.steps_per_epoch +print("Done. Test loss: {}".format(model_loss)) diff --git a/mlmc/metamodel/__init__.py b/mlmc/metamodel/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mlmc/metamodel/analyze_nn.py b/mlmc/metamodel/analyze_nn.py new file mode 100644 index 00000000..53866788 --- /dev/null +++ b/mlmc/metamodel/analyze_nn.py @@ -0,0 +1,2036 @@ +import warnings +import os +import logging +logging.getLogger('tensorflow').disabled = True +logging.getLogger('absl').disabled = True + +import numpy as np +import time +import glob +import copy +import pickle +#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Run on CPU only +from mlmc.metamodel.flow_dataset import FlowDataset +from mlmc.metamodel.create_graph import graph_creator +from mlmc.moments import Legendre_tf, Monomial +from mlmc.metamodel.random_field_time import corr_field_sample_time +from mlmc.plot import plots +import matplotlib.pyplot as plt +# Make numpy printouts easier to read. +from scipy import stats +# np.set_printoptions(precision=9, suppress=True) +import tensorflow as tf + +from tensorflow import keras +from scipy.stats import ks_2samp +import sklearn.model_selection +from mlmc.metamodel.custom_methods import abs_activation, MSE_moments +from mlmc.metamodel.postprocessing import analyze_results, plot_loss, estimate_density, process_mlmc, plot_progress, plot_learning_rate +from mlmc.metamodel.flow_task_NN import DNN +from mlmc.metamodel.flow_task_CNN import CNN +import keras.backend as K +from mlmc.metamodel.flow_task_GNN_2 import GNN +from tensorflow.keras.losses import MeanSquaredError +from spektral.data import MixedLoader +from spektral.utils.sparse import sp_matrix_to_sp_tensor +from sklearn.metrics import r2_score + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) +epochs = 100 + + +def prepare_data(data): + data = np.squeeze(np.stack(data.to_numpy(), axis=0)) + return np.asarray(data).astype('float64') + + +def split_dataset(dataset): + # Load data + dataset = dataset.dropna() + train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split(dataset.x, dataset.y, + test_size=0.2, random_state=123) + + train_x = prepare_data(train_x) + train_y = prepare_data(train_y) + + test_x = prepare_data(test_x) + test_y = prepare_data(test_y) + + return train_x, train_y, test_x, test_y + + +def run(): + # Parameters + loss = "mean_squared_error" + optimizer = tf.optimizers.Adam(learning_rate=0.001) + + data = FlowDataset() + # dataset = data.dataset[:10000] + # test_dataset = data.dataset[10000:50000] + + dataset = data.dataset[:50000] + test_dataset = data.dataset[50000:] + + train_input = prepare_data(dataset.x) + train_output = prepare_data(dataset.y) + + # train_input, train_output, test__input, test_output = split_dataset(dataset) + # print("len test(output) ", len(test_output)) + + dnn = DNN(loss=loss, optimizer=optimizer, output_activation=abs_activation, hidden_activation='relu', epochs=150) + dnn.fit(train_input, train_output) + + test_input = prepare_data(test_dataset.x) + test_output = prepare_data(test_dataset.y) + + predictions = dnn.predict(test_input) + predictions = np.squeeze(predictions) + + print("len(predictions) ", len(predictions)) + + plot_loss(dnn.history.history['loss'], dnn.history.history['val_loss']) + analyze_results(test_output, predictions) + + estimate_density(test_output) + estimate_density(predictions) + + +def run_CNN(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, level, log): + # Parameters + loss = "mean_squared_error" + optimizer = tf.optimizers.Adam(learning_rate=0.01) + + data = FlowDataset(output_dir=output_dir, level=level, log=log) + dataset = data.dataset[:] + + train_input, train_output, test_input, test_output = split_dataset(dataset) + + train_input = train_input[:2000] + train_output = train_output[:2000] + + print("len test(output) ", len(test_output)) + + train_input = np.expand_dims(train_input, axis=-1) + test_input = np.expand_dims(test_input, axis=-1) + print("train input shape ", train_input.shape) + + dnn = CNN(loss=loss, optimizer=optimizer, output_activation=abs_activation, hidden_activation='relu') + + dnn.fit(train_input, train_output) + + test_dataset = data.dataset[2000:] + test_input = prepare_data(test_dataset.x) + test_input = np.expand_dims(test_input, axis=-1) + print("test input shape ", test_input.shape) + test_output = prepare_data(test_dataset.y) + + predictions = dnn.predict(test_input) + predictions = np.squeeze(predictions) + + plot_loss(dnn.history.history['loss'], dnn.history.history['val_loss']) + + analyze_results(test_output, predictions) + + # estimate_density(test_output) + # estimate_density(predictions) + + +def bootstrap(): + loss = "mean_absolute_error" + optimizer = tf.optimizers.Adam(learning_rate=0.001) + n_subsamples = 10 + size = 10000 + + train_losses = [] + val_losses = [] + all_test_outputs = [] + all_predictions = [] + ks_statistics = [] + ks_p_values = [] + + data = FlowDataset() + dataset = data.dataset.dropna() + + for i in range(n_subsamples): + dset = dataset.sample(size, replace=True) + train_input, train_output, test_input, test_output = split_dataset(dset) + + print("Size TRAIN in: {}, out: {}, TEST in: {}, out: {}".format(len(train_input), len(train_output), + len(test_input), len(test_output))) + + dnn = DNN(loss=loss, optimizer=optimizer, output_activation=abs_activation, hidden_activation='relu') + dnn.fit(train_input, train_output) + + predictions = dnn.predict(test_input) + predictions = np.squeeze(predictions) + + train_losses.append(dnn.history.history['loss']) + val_losses.append(dnn.history.history['val_loss']) + + all_test_outputs.append(test_output) + all_predictions.append(predictions) + + statistics, pvalue = ks_2samp(test_output, predictions) + ks_statistics.append(statistics) + ks_p_values.append(pvalue) + + analyze_results(np.mean(all_test_outputs, axis=0), np.mean(all_predictions, axis=0)) + analyze_results(np.var(all_test_outputs, axis=0), np.var(all_predictions, axis=0)) + # + # estimate_density(np.mean(all_test_outputs, axis=0), title="Test outputs") + # estimate_density(np.mean(all_predictions, axis=0), title="Predictions") + + +def run_SVR(config, stats=True, train=True, log=False, seed=1234): + from sklearn.svm import SVR + print("seed ", seed) + + batch_size = 200 + epochs = 1000 + hidden_regularization = None # l2(2e-10) + graph_creation_time = config['graph_creation_time'] + if graph_creation_time == 0: + graph_creator_preproces_time = time.process_time() + graph_creator(config['output_dir'], config['hdf_path'], config['mesh'], level=config['level']) + graph_creation_time = time.process_time() - graph_creator_preproces_time + print("graph creation time ", graph_creation_time) + exit() + + preprocess_start_time = time.process_time() + # Load data + data = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log) + data.shuffle(seed=seed) + + dataset = data.dataset + dataset = dataset.sample(frac=1) + + train = dataset[:config['n_train_samples']] + test = dataset[config['n_train_samples']:] + + train_input, train_output = train.x, train.y + test_input, test_output = test.x, test.y + + train_input = prepare_data(train_input) + train_output = prepare_data(train_output) + + test_input = prepare_data(test_input) + test_output = prepare_data(test_output) + + # sc_X = StandardScaler() + # sc_y = StandardScaler() + # train_input = sc_X.fit_transform(train_input) + # train_output = sc_y.fit_transform(train_output.reshape(-1,1)) + # test_input = sc_X.fit_transform(test_input) + # test_output = sc_y.fit_transform(test_output.reshape(-1,1)) + #train_input, train_output, test_input, test_output = split_dataset(dataset) + + preprocess_time = time.process_time() - preprocess_start_time + preprocess_time = preprocess_time + graph_creation_time + learning_time_start = time.process_time() + + print("train input ", train_input.shape) + print("train output ", train_output.shape) + + svr_rbf = SVR(kernel='rbf', verbose=True) # 'linear' kernel fitting is never-ending and 'poly' kernel gives very bad score (e.g. -2450), sigmoid gives also bad score (e.g. -125) + svr_rbf.fit(train_input, train_output) + train_error = svr_rbf.score(train_input, train_output) + + #print("svr_rbf.get_params() ", svr_rbf.get_params()) + total_steps = 0 + + #test_input = sc_X.fit_transform(test_input) + test_error = svr_rbf.score(test_input, test_output) + + targets = test_output + train_targets = train_output + + # test_y = sc_y.fit_transform(test.y.to_numpy().reshape(-1,1)) + + predictions = svr_rbf.predict(test_input) + + print("train error ", train_error) + print("test error ", test_error) + + train_predictions = svr_rbf.predict(train_input) + #train_predictions = np.squeeze(train_predictions) + + learning_time = time.process_time() - learning_time_start + print("learning time ", learning_time) + + val_targets = [] + + orig_targets = targets + orig_predictions = predictions + print("MSE ", np.mean((predictions - targets) ** 2)) + + if log: + targets = np.exp(targets) + predictions = np.exp(predictions) + + print("np.var(target-predictions) ", np.var(targets - predictions)) + + if not stats: + #plot_loss(gnn._train_loss, gnn._val_loss) + analyze_results(targets, predictions) + + import matplotlib.pyplot as plt + + # plt.hist(train_output, bins=50, alpha=0.5, label='train target', density=True) + # plt.hist(train_predictions, bins=50, alpha=0.5, label='train predictions', density=True) + # + # # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # plt.show() + + plt.hist(targets, bins=50, alpha=0.5, label='target', density=True) + plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + + # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + plt.legend(loc='upper right') + # plt.xlim(-0.5, 1000) + plt.yscale('log') + plt.show() + + #predict_l_0_start_time = time.process_time() + l_0_targets, l_0_predictions, predict_l_0_time = predict_level_zero_SVR(svr_rbf, config['l_0_output_dir'], + config['l_0_hdf_path'], + config['mesh'], batch_size, log, + stats=stats, + corr_field_config=config['corr_field_config'], + seed=seed) + + val_predictions = [] + + if stats: + l1_sample_time = preprocess_time / len(data) + learning_time / len(data) + l0_sample_time = predict_l_0_time / len(l_0_targets) + + # print("targets ", targets) + # print("predictions ", predictions) + + # orig_max_vars, predict_max_vars = process_mlmc(hdf_path, sampling_info_path, ref_mlmc_file, targets, predictions, train_targets, + # train_predictions, + # val_targets, l_0_targets, + # l_0_predictions, l1_sample_time, l0_sample_time, nn_level=level, replace_level=replace_level, + # stats=stats) + + return svr_rbf, targets, predictions, learning_time, train_targets, train_predictions, \ + val_targets, val_predictions, l_0_targets, l_0_predictions, l1_sample_time, l0_sample_time, total_steps, None, None, None + + save_times(config['save_path'], False, (preprocess_time, len(data)), learning_time, (predict_l_0_time, len(l_0_targets))) + save_load_data(config['save_path'], False, targets, predictions, train_targets, train_predictions, val_targets, l_0_targets, + l_0_predictions) + + +def predict_level_zero_SVR(nn, output_dir, hdf_path, mesh, batch_size=1000, log=False, stats=False, corr_field_config=None, seed=1234): + #graph_creator(output_dir, hdf_path, mesh, level=0) + sample_time = 0 + if corr_field_config: + sample_time = corr_field_sample_time(mesh, corr_field_config) + + # Load data + data = FlowDataset(output_dir=output_dir, log=log) + data.shuffle(seed=seed) + dataset = data.dataset[:] + + predict_time_start = time.process_time() + test_input = prepare_data(dataset.x) + targets = prepare_data(dataset.y) + #print("data prepared") + + predictions = [] + for i in range(0, len(test_input), batch_size): + predictions.extend(nn.predict(test_input[i:i + batch_size])) + predictions = np.array(predictions) + #print("predictison shape ", predictions.shape) + predictions = np.squeeze(predictions) + + if log: + targets = np.exp(targets) + predictions = np.exp(predictions) + # analyze_results(targets, predictions) + predict_time = time.process_time() - predict_time_start + return targets, predictions, predict_time + sample_time * len(data) + + +def statistics(config): + n_subsamples = 25 + + model_title, mch_l_model, log = config['machine_learning_model'] + model_data = {} + model_data["log"] = log + + + # seeds = [] + # for i in range(n_subsamples): + # seeds.append(i * 125) + + if not os.path.isdir(config['save_path']): + os.makedirs(config['save_path']) + + if os.path.exists(os.path.join(config['save_path'], "dataset_config.pkl")): + os.remove(os.path.join(config['save_path'], "dataset_config.pkl")) + + # create a binary pickle file + with open(os.path.join(config['save_path'], "dataset_config.pkl"), "wb") as writer: + pickle.dump(config["dataset_config"], writer) + + else: + print("dir exists {}".format(config['save_path'])) + exit() + + for i in range(n_subsamples): + iter_dir = os.path.join(config['save_path'], "{}".format(i)) + if not os.path.isdir(iter_dir): + os.makedirs(iter_dir) + + config['iter_dir'] = iter_dir + + gnn, targets, predictions, learning_time, train_targets, train_predictions, \ + val_targets, val_predictions, l_0_targets, l_0_predictions, l1_sample_time, l0_sample_time,\ + total_steps, targets_to_est, predictions_to_est = \ + mch_l_model(config, stats=True, train=config.get('train_model', True), log=log, index=i) + + if config['save_model']: + model_data["model"] = gnn._model + model_data["train_loss"] = gnn._train_loss + model_data["train_acc"] = gnn._train_acc + model_data["val_loss"] = gnn._val_loss + model_data["test_loss"] = gnn._test_loss + model_data["learning_rates"] = gnn._learning_rates + model_data["test_targets"] = targets + model_data["test_predictions"] = predictions + model_data["train_targets"] = train_targets + model_data["train_predictions"] = train_predictions + model_data["val_targets"] = val_targets + model_data["val_predictions"] = val_predictions + model_data["l_0_targets"] = l_0_targets + model_data["l_0_predictions"] = l_0_predictions + model_data["l1_sample_time"] = l1_sample_time + model_data["l0_sample_time"] = l0_sample_time + model_data["total_steps"] = total_steps + model_data["learning_times"] = learning_time + model_data["targets_to_est"] = targets_to_est + model_data["predictions_to_est"] = predictions_to_est + + save_statistics(iter_dir, model_data) + + # save_times(save_path, False, (preprocess_time, len(data)), learning_time, (predict_l_0_time, len(l_0_targets))) + # save_load_data(save_path, False, targets, predictions, train_targets, val_targets, l_0_targets, l_0_predictions) + + # for i in range(len(train_losses)): + # print("train loss ", train_losses[i]) + # print("test loss ", test_losses[i]) + # analyze_results(all_targets[i], all_predictions[i]) + # print("learning time ", learning_times[i]) + # print("##################################################") + + return analyze_statistics(config) + + # plot_loss(train_losses, val_losses) + # analyze_results(np.mean(all_test_outputs, axis=0), np.mean(all_predictions, axis=0)) + # analyze_results(np.var(all_test_outputs, axis=0), np.var(all_predictions, axis=0)) + # + # estimate_density(np.mean(all_test_outputs, axis=0), title="Test outputs") + # estimate_density(np.mean(all_predictions, axis=0), title="Predictions") + + +def save_statistics(save_dir_path, model_data): + for file_name, data in model_data.items(): + if file_name == "model" and data is not None: + data.save(os.path.join(save_dir_path, file_name)) + else: + np.save(os.path.join(save_dir_path, file_name), data) + + +def load_statistics(dir_path): + models_data = {} + models_data["model"] = [] + models_data["train_loss"] = [] + models_data["train_acc"] = [] + models_data["val_loss"] = [] + models_data["test_loss"] = [] + models_data["learning_rates"] = [] + models_data["test_targets"] = [] + models_data["test_predictions"] = [] + models_data["train_targets"] = [] + models_data["train_predictions"] = [] + models_data["val_targets"] = [] + models_data["val_predictions"] = [] + models_data["l_0_targets"] = [] + models_data["l_0_predictions"] = [] + models_data["l1_sample_time"] = [] + models_data["l0_sample_time"] = [] + models_data["total_steps"] = [] + models_data["learning_times"] = [] + models_data["log"] = [] + models_data["dataset_config"] = [] + models_data["targets_to_est"] = [] + models_data["predictions_to_est"] = [] + + #dirs = (os.path.split(dir_path)[-1]).split("_") + n_iters = 25 + for i in range(n_iters): + data_dir_path = os.path.join(dir_path, str(i)) + if not os.path.isdir(data_dir_path): + print("data dir not exists {}".format(data_dir_path)) + break + if os.path.exists(os.path.join(data_dir_path, 'model')): + models_data['model'].append(keras.models.load_model(os.path.join(data_dir_path, 'model'))) + for file in glob.glob(os.path.join(data_dir_path, "*.npy")): + file_name = os.path.split(file)[-1] + file_name = file_name.split(".")[0] + # if file_name not in models_data: + # print("file name ", file_name) + # models_data[file_name] = [] + # print("np.load(file, allow_pickle=True) ", np.load(file, allow_pickle=True)) + # exit() + models_data[file_name].append(np.load(file, allow_pickle=True)) + + if os.path.exists(os.path.join(data_dir_path, "dataset_config.pkl")): + # Save config to Pickle + import pickle + # create a binary pickle file + with open(os.path.join(data_dir_path, "dataset_config.pkl"), "rb") as reader: + dataset_config = pickle.load(reader) + models_data["dataset_config"].append(dataset_config) + + return models_data + + +def plot_sse(data_nn, data_mlmc, x_label="ith moment", y_label="MSE", title=""): + import matplotlib + #matplotlib.rcParams.update({'font.size': 38}) + matplotlib.rcParams.update({'lines.markersize': 14}) + fig, axes = plt.subplots(1, 1, figsize=(22, 10)) + data = np.array(data_nn) + x = range(data.shape[1]) + axes.set_title(title) + axes.set_xlabel(x_label) + axes.set_ylabel(y_label) + axes.errorbar(x, np.mean(data_nn, axis=0), yerr=np.sqrt(np.var(data_nn, axis=0)), fmt='o', label="NN MLMC", color="red") + axes.errorbar(x, np.mean(data_mlmc, axis=0), yerr=np.sqrt(np.var(data_mlmc, axis=0)), fmt='o', label="MLMC", color="blue") + fig.legend() + fig.savefig("{}.pdf".format(title)) + fig.show() + + +def compare_models(model_1, model_2, config): + check_loss(config, model_1, dataset_config=config["dataset_config"]) + check_loss(config, model_2, dataset_config=config["dataset_config"]) + + exit() + + +def check_loss(config, model, log=True, dataset_config={}): + if model is None: + return + batch_size = config['batch_size'] + + config['dataset_config'] = dataset_config + + print("config ", config) + print("dataset config ", config["dataset_config"]) + + data = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, index=None) + data = data # [:10000] + + data.a = config['conv_layer'].preprocess(data.a) + data.a = sp_matrix_to_sp_tensor(data.a) + + train_data_len = config["n_train_samples"] + + idx = 0 + data_tr = data[idx * train_data_len: idx * train_data_len + train_data_len] + data_te = data.get_test_data(idx, train_data_len) + + print("len(datate) ", len(data_te)) + print("batch size ", batch_size) + + loader_tr = MixedLoader(data_tr, batch_size=batch_size) + loader_te = MixedLoader(data_te, batch_size=batch_size) + + train_targets, train_predictions = model_predict(model, loader_tr) + train_predictions = np.squeeze(train_predictions) + + test_targets, test_predictions = model_predict(model, loader_te) + test_predictions = np.squeeze(test_predictions) + + #print("(train_predictions - train_targets) ", (train_predictions - train_targets)) + + train_MSE = np.mean((train_predictions - train_targets) ** 2) + train_bias = np.mean((train_targets - np.mean(train_predictions))**2) + train_variance = np.mean((train_predictions - np.mean(train_predictions))**2) + train_variance_2 = np.var(train_predictions) + + test_MSE = np.mean((test_predictions - test_targets) ** 2) + test_bias = np.mean((test_targets - np.mean(test_predictions)) ** 2) + test_variance = np.mean((test_predictions - np.mean(test_predictions)) ** 2) + test_variance_2 = np.var(test_predictions) + + # print("test targets ", np.sort(test_targets)[:10]) + # print("test predictions ", test_predictions) + + print("train MSE: {}, test MSE: {}".format(train_MSE, test_MSE)) + + print("train MSE: {}, bias: {}, variance: {}, var2: {}".format(train_MSE, train_bias, train_variance, train_variance_2)) + print("test MSE: {}, bias: {}, variance: {}, var2: {}".format(test_MSE, test_bias, test_variance, test_variance_2)) + + exit() + + conv_layers = {} + dense_layers = {} + flatten_input = [] + flatten_output = [] + + +def model_predict(model, loader): + targets = [] + predictions = [] + step = 0 + for batch in loader: + step += 1 + inputs, target = batch + targets.extend(target) + predictions.extend(model(inputs, training=False)) + + if step == loader.steps_per_epoch: + return targets, predictions + + return targets, predictions + + +def predict_data(config, model, mesh_file, log=True): + if model is None: + return + batch_size = config['batch_size'] + data = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, index=0, + predict=True) + data = data # [:10000] + + data.a = config['conv_layer'].preprocess(data.a) + data.a = sp_matrix_to_sp_tensor(data.a) + + # idx = 0 + # data_te = data.get_test_data(idx, train_data_len) + data_te = data[-1:] + + print("len(datate) ", len(data_te)) + print("batch size ", batch_size) + + # We use a MixedLoader since the dataset is in mixed mode + loader = MixedLoader(data_te, batch_size=batch_size) + + conv_layers = {} + dense_layers = {} + flatten_input = [] + flatten_output = [] + + step = 0 + for batch in loader: + if step == loader.steps_per_epoch: + break + inputs, target = batch + x, a = inputs + + print("x ", x) + + for conv_index, conv_layer in enumerate(model._conv_layers): + if conv_index not in conv_layers: + conv_layers[conv_index] = [[], [], []] + conv_layers[conv_index][0].extend(x) # inputs + print("conv_layer.kernel.numpy().shape", conv_layer.kernel.numpy().shape) + conv_layers[conv_index][1].extend(conv_layer.kernel.numpy()) # weights (kernel) + conv_out = conv_layer([x, a]) + + print("conv out ", conv_out) + conv_layers[conv_index][2].extend(conv_out) # outputs + + flatten_input = conv_layers[conv_index][2][-1] + # flatten_output = model.flatten(conv_out) + # + # print("flatten output ", flatten_output) + + prev_layer_input = conv_out + prev_layer = model.flatten + + print("flatten output ", flatten_output) + print("model._dense_layers ", model._dense_layers) + + for index, dense_layer in enumerate(model._dense_layers): + # if index == 1: + # break + + if index not in dense_layers: + dense_layers[index] = [[], [], []] + + if prev_layer is None: + prev_layer = model._dense_layers[index - 1] + + print("dense layer ", dense_layer) + print("dense layer ", dense_layer.weights) + print("prev layer ", prev_layer) + print("prev layer ", prev_layer.weights) + + # + # print("prev layer ", prev_layer.weights) + + #print("dense layer kernel", dense_layer.kernel) + #print("model.flatten(conv_out) ", model.flatten(conv_out)) + + print("prev layer input ", prev_layer_input) + + dense_layers[index][0].extend(prev_layer(prev_layer_input)) # inputs + dense_layers[index][1].extend(dense_layer.weights) # weights (kernel) + dense_layers[index][2].extend(dense_layer(prev_layer(prev_layer_input))) # outputs + + prev_layer_input = prev_layer(prev_layer_input) + + prev_layer = None + + step += 1 + + plot_progress(conv_layers, dense_layers, flatten_output, mesh_file=mesh_file) + + +def remove_empty(data_1, data_2): + new_data_1 = [] + new_data_2 = [] + + # print("data 1 ", data_1) + # print("data 2 ", data_2) + + for d1, d2 in zip(data_1, data_2): + if len(d1) > 0 and len(d2) > 0: + new_data_1.append(d1) + new_data_2.append(d2) + + # print("new data ", new_data_1) + # print("new data ", new_data_2) + return np.array(new_data_1), np.array(new_data_2) + + +def remove_outliers(data, limit): + new_data = [] + for d in data: + if d < limit: + new_data.append(d) + return new_data + + +def process_data(data_dict): + new_dict = data_dict + for tag, d in data_dict.items(): + print("tag ", tag) + print("d ", d) + + if tag in ["train_predictions", "train_targets", "test_predictions", "test_targets"]: + dt = [] + + min_length = 10**9 + for item in data_dict[tag]: + if len(item) < min_length and len(item) > 0: + min_length = len(item) + + for item in data_dict[tag]: + dt.append(item[:min_length]) + # print("dt " ,dt) + # print("array dt shape ", np.array(dt).shape) + # print("tag ", tag) + + new_dict[tag] = np.array(dt) + + return new_dict + + +def analyze_statistics(config, get_model=True): + if not os.path.isdir(config['save_path']): + print("dir not exists") + exit() + + data_dict = load_statistics(config['save_path']) + + data_dict = process_data(data_dict) + + rescale_data = True + + # print("train predictions type ", type(data_dict["train_predictions"])) + # print("train predictions type ", type(data_dict["train_predictions"][0])) + # print("train predictions shape ", np.array(data_dict["train_predictions"]).shape) + # print("train predictions ", data_dict["train_predictions"]) + # print("train predictions as matrix shape", np.asmatrix(np.array(data_dict["train_predictions"])).shape) + + # print("data dict ", data_dict) + + # for key, data_dict in models_data.items(): + # print("model: {}".format(key)) + + mlmc_n_collected_all = [] + nn_n_collected_all = [] + n_ops_all = [] + n_ops_predict_all = [] + + all_mlmc_moments_mean = [] + all_nn_moments_mean = [] + + all_mlmc_moments_var = [] + all_nn_moments_var = [] + + mlmc_times = [] + nn_times = [] + mlmc_times_levels = [] + nn_times_levels = [] + + mlmc_l_vars = [] + mlmc_vars = [] + nn_vars = [] + nn_l_vars = [] + mlmc_l_means = [] + nn_l_means = [] + mlmc_vars_mse = [] + nn_vars_mse = [] + mlmc_means_mse = [] + nn_means_mse = [] + mlmc_means_diff = [] + nn_means_diff = [] + + mlmc_vars_mse_2 = [] + nn_vars_mse_2 = [] + mlmc_nn_vars_mse_2 = [] + mlmc_means_mse_2 = [] + nn_means_mse_2 = [] + mlmc_nn_means_mse_2 = [] + mlmc_means_diff_2 = [] + nn_means_diff_2 = [] + mlmc_nn_means_diff_2 = [] + + mlmc_moments_mean = [] + mlmc_nn_moments_mean = [] + mlmc_moments_var = [] + mlmc_nn_moments_var = [] + + kl_mlmc_all = [] + kl_nn_all = [] + + orth_mlmc_means_mse = [] + orth_nn_means_mse = [] + + train_MSE_list = [] + train_bias = [] + train_variance = [] + test_MSE_list = [] + test_bias = [] + test_variance = [] + train_RSE_list = [] + train_RMSE_list = [] + train_MAE_list = [] + train_relRMSE_list = [] + test_relRMSE_list = [] + + + all_train_samples = [] + all_test_samples = [] + + test_RSE_list = [] + test_RMSE_list = [] + test_MAE_list = [] + + + limit = 5 # 0.008#0.01#0.0009 + #limit = 0.37 + + for i in range(len(data_dict["test_targets"])): + # print("index i ", i) + # if i == 4: + # continue + + # if i == 1: + # continue + + #print("index ", i) + + # if i not in [0]: + # continue + + # if i in [2, 11, 12]: + # continue + + # if i in [7, 13, 14]: + # continue + + # if i not in [13]: + # continue + + predictions = data_dict["test_predictions"][i] + targets = data_dict["test_targets"][i] + train_predictions = data_dict["train_predictions"][i] + train_targets = data_dict["train_targets"][i] + val_predictions = data_dict["val_predictions"][i] + val_targets = data_dict["val_targets"][i] + l_0_predictions = data_dict["l_0_predictions"][i] + l_0_targets = data_dict["l_0_targets"][i] + l1_sample_time = data_dict["l1_sample_time"][i] + l0_sample_time = data_dict["l0_sample_time"][i] + total_steps = data_dict["total_steps"][i] + learning_time = data_dict["learning_times"][i] + print("learning time ", learning_time) + + try: + model = data_dict["model"][i] + model_train_loss = data_dict["train_loss"][i] + if "train_acc" in data_dict and len(data_dict["train_acc"]) > 0: + model_train_acc = data_dict["train_acc"][i] + else: + model_train_acc = None + model_val_loss = data_dict["val_loss"][i] + model_test_loss = data_dict["test_loss"][i] + model_learning_rates = data_dict["learning_rates"][i] + except: + model = None + + if model is not None: + plot_loss(model_train_loss, model_val_loss, model_train_acc) + #plot_learning_rate(model_learning_rates) + #print("model learning rates ", model_learning_rates) + + print("model ", model) + print("dir(model.optimizer) ", dir(model.optimizer)) + #print("model weights ", model.weights) + print("model.optimizer", model.optimizer) + # print("model.optimizer", K.eval(model.optimizer.lr)) + # exit() + + if rescale_data: + if "dataset_config" in data_dict: + dataset_config = data_dict["dataset_config"][i] + + if dataset_config.get('output_normalization', False): + min_out = dataset_config.get('min_output') + max_out = dataset_config.get('max_output') + + targets = targets * (max_out - min_out) + min_out + predictions = predictions * (max_out - min_out) + min_out + train_targets = train_targets * (max_out - min_out) + min_out + train_predictions = train_predictions * (max_out - min_out) + min_out + + if dataset_config.get('output_scale', False): + # mean_targets = np.mean(targets) + # var_targets = np.var(targets) + + mean_targets = dataset_config.get('mean_output', False) + var_targets = dataset_config.get('var_output', False) + + targets = var_targets * targets + mean_targets + predictions = var_targets * predictions + mean_targets + + # mean_l_0_targets = mean_targets + # var_l_0_targets = var_targets + + train_targets = var_targets * train_targets + mean_targets + train_predictions = var_targets * train_predictions + mean_targets + + if dataset_config.get('output_log', False): + targets = np.exp(targets) + predictions = np.exp(predictions) + train_predictions = np.exp(train_predictions) + train_targets = np.exp(train_targets) + + if dataset_config.get('first_log_output', False): + targets = np.exp(targets) + predictions = np.exp(predictions) + train_predictions = np.exp(train_predictions) + train_targets = np.exp(train_targets) + + iter_test_MSE = np.mean((predictions - targets) ** 2) + + iter_test_bias = np.sqrt(np.mean((targets - np.mean(predictions)) ** 2)) + iter_test_variance = np.mean((predictions - np.mean(predictions)) ** 2) + + iter_train_MSE = np.mean((train_predictions - train_targets) ** 2) + + iter_train_bias = np.sqrt(np.mean((train_targets - np.mean(train_predictions)) ** 2)) + iter_train_variance = np.mean((train_predictions - np.mean(train_predictions)) ** 2) + + all_test_samples.append(targets) + all_train_samples.append(train_targets) + + mean_t = np.mean(targets) + iter_test_RSE = np.sum((predictions - targets) ** 2) / np.sum((targets - mean_t) ** 2) + + mean_tr = np.mean(train_targets) + iter_train_RSE = np.sum((train_predictions - train_targets) ** 2) / np.sum((train_targets - mean_tr) ** 2) + + iter_test_MAE = np.abs((predictions - targets)) + iter_train_MAE = np.abs((train_predictions - train_targets)) + + train_MSE_list.append(iter_train_MSE) + train_RSE_list.append(iter_train_RSE) + train_RMSE_list.append(np.sqrt(iter_train_MSE)) + train_relRMSE_list.append(np.sqrt(iter_train_MSE)/np.mean(train_targets)) + train_MAE_list.append(iter_train_MAE) + + train_bias.append(iter_train_bias) + train_variance.append(iter_train_variance) + + test_MSE_list.append(iter_test_MSE) + test_RSE_list.append(iter_test_RSE) + test_RMSE_list.append(np.sqrt(iter_test_MSE)) + test_relRMSE_list.append(np.sqrt(iter_train_MSE) / np.mean(targets)) + test_MAE_list.append(iter_test_MAE) + + test_bias.append(iter_test_bias) + test_variance.append(iter_test_variance) + + if iter_test_MSE > limit: + continue + + print("iter test MSE: {}, bias: {}, variance:{} ".format(iter_test_MSE, iter_test_bias, iter_test_variance)) + print("iter train MSE: {}, bias: {}, variance:{} ".format(iter_train_MSE, iter_train_bias, iter_train_variance)) + + + # if "current_patience" in data_dict: + # current_patience = data_dict["current_patience"][i] + # print("current patience ", current_patience) + + dataset_config = {} + if 'dataset_config' in data_dict and len(data_dict.get("dataset_config")) > 0: + dataset_config = data_dict.get("dataset_config")[i] + else: + if os.path.exists(os.path.join(config['save_path'], "dataset_config.pkl")): + # Save config to Pickle + import pickle + # create a binary pickle file + with open(os.path.join(config['save_path'], "dataset_config.pkl"), "rb") as reader: + dataset_config = pickle.load(reader) + + config['dataset_config'] = dataset_config + + #check_loss(config, model, dataset_config=config["dataset_config"]) + + #predict_data(config, model, mesh_file=config["mesh"]) + #exit() + + print("total steps ", total_steps) + #try: + mlmc_n_collected, nn_mlmc_n_collected, n_ops, n_ops_predict, orig_moments_mean, predict_moments_mean, \ + ref_moments_mean, orig_level_params, nn_level_params, kl_mlmc, kl_nn, target_variance, \ + orig_orth_moments, predict_orth_moments, ref_orth_moments,\ + ref_orig_moments, ref_predict_moments, mlmc_predict_moments = process_mlmc(config['hdf_path'], + config['sampling_info_path'], + config['ref_mlmc_file'], + data_dict["test_targets"][i], + data_dict["test_predictions"][i], + data_dict["train_targets"][i], + data_dict["train_predictions"][i], + data_dict["val_targets"][i], + data_dict["l_0_targets"][i], + data_dict["l_0_predictions"][i], + l1_sample_time, + l0_sample_time, + nn_level=config['level'], + replace_level=config['replace_level'], + mlmc_hdf_file=config['mlmc_hdf_path'], + stats=True, + learning_time=learning_time, + dataset_config=dataset_config, + targets_to_est=data_dict["targets_to_est"][i], + predictions_to_est=data_dict["predictions_to_est"][i]) + # except: + # continue + + mlmc_n_collected_all.append(mlmc_n_collected) + nn_n_collected_all.append(nn_mlmc_n_collected) + n_ops_all.append(n_ops) + n_ops_predict_all.append(n_ops_predict) + mlmc_times_levels.append(np.array(mlmc_n_collected) * np.array(n_ops)) + mlmc_times.append(np.sum(np.array(mlmc_n_collected) * np.array(n_ops))) + nn_times.append(np.sum(np.array(nn_mlmc_n_collected) * np.array(n_ops_predict))) + nn_times_levels.append(np.array(nn_mlmc_n_collected) * np.array(n_ops_predict)) + + mlmc_l_vars.append(orig_moments_mean.l_vars) + nn_l_vars.append(predict_moments_mean.l_vars) + + mlmc_vars.append(orig_moments_mean.var) + nn_vars.append(predict_moments_mean.var) + + mlmc_l_means.append(orig_moments_mean.l_means) + nn_l_means.append(predict_moments_mean.l_means) + + mlmc_vars_mse.append((ref_moments_mean.var - orig_moments_mean.var) ** 2) + nn_vars_mse.append((ref_moments_mean.var - predict_moments_mean.var) ** 2) + + mlmc_means_mse.append((ref_moments_mean.mean - orig_moments_mean.mean) ** 2) + nn_means_mse.append((ref_moments_mean.mean - predict_moments_mean.mean) ** 2) + + mlmc_means_diff.append(np.abs(ref_moments_mean.mean - orig_moments_mean.mean)) + nn_means_diff.append(np.abs(ref_moments_mean.mean - predict_moments_mean.mean)) + + + ####################################### + # Moments same domain for comparison # + ####################################### + + mlmc_vars_mse_2.append((ref_orig_moments[0].var - ref_orig_moments[1].var) ** 2) + nn_vars_mse_2.append((ref_predict_moments[0].var - ref_predict_moments[1].var) ** 2) + + mlmc_means_mse_2.append((ref_orig_moments[0].mean - ref_orig_moments[1].mean) ** 2) + nn_means_mse_2.append((ref_predict_moments[0].mean - ref_predict_moments[1].mean) ** 2) + + mlmc_means_diff_2.append(np.abs(ref_orig_moments[0].mean - ref_orig_moments[1].mean)) + nn_means_diff_2.append(np.abs(ref_predict_moments[0].mean - ref_predict_moments[1].mean)) + + mlmc_nn_vars_mse_2.append((mlmc_predict_moments[0].var - mlmc_predict_moments[1].var) ** 2) + mlmc_nn_means_mse_2.append((mlmc_predict_moments[0].mean - mlmc_predict_moments[1].mean) ** 2) + mlmc_nn_means_diff_2.append(np.abs(mlmc_predict_moments[0].mean - mlmc_predict_moments[1].mean)) + + + mlmc_moments_mean.append(mlmc_predict_moments[0].mean) + mlmc_moments_var.append(mlmc_predict_moments[0].var) + mlmc_nn_moments_mean.append(mlmc_predict_moments[1].mean) + mlmc_nn_moments_var.append(mlmc_predict_moments[1].var) + + # print("np.min(len(ref_orth_moments.mean), len(orig_orth_moments.mean)) ", np.min(len(ref_orth_moments.mean), len(orig_orth_moments.mean))) + # print("ref_orth_moments.mean[:np.min(len(ref_orth_moments.mean), len(orig_orth_moments.mean))] ", ref_orth_moments.mean[:np.min(len(ref_orth_moments.mean), len(orig_orth_moments.mean))]) + if ref_orth_moments is not None: + orth_mlmc_means_mse.append( + (ref_orth_moments.mean[:np.min((len(ref_orth_moments.mean), len(orig_orth_moments.mean)))] - + orig_orth_moments.mean[:np.min((len(ref_orth_moments.mean), len(orig_orth_moments.mean)))]) ** 2) + orth_nn_means_mse.append( + (ref_orth_moments.mean[:np.min((len(ref_orth_moments.mean), len(predict_orth_moments.mean)))] + - predict_orth_moments.mean[ + :np.min((len(ref_orth_moments.mean), len(predict_orth_moments.mean)))]) ** 2) + + print("orig moments mean ", orig_moments_mean.mean) + all_mlmc_moments_mean.append(orig_moments_mean.mean) + all_nn_moments_mean.append(predict_moments_mean.mean) + + all_mlmc_moments_var.append(orig_moments_mean.var) + all_nn_moments_var.append(predict_moments_mean.var) + + kl_mlmc_all.append(kl_mlmc) + kl_nn_all.append(kl_nn) + + moments_plot = plots.MomentsPlots(log_var_y=True) + + print("all mlmc moments mean ", all_mlmc_moments_mean) + print("np.mean(all_mlmc_moments_var, axis=0) ", np.mean(all_mlmc_moments_var, axis=0)) + moments_plot.add_moments((np.mean(all_mlmc_moments_mean, axis=0), + np.mean(all_mlmc_moments_var, axis=0)), label="mlmc moments") + moments_plot.add_moments((np.mean(all_nn_moments_mean, axis=0), + np.mean(all_nn_moments_var, axis=0)), label="nn moments") + moments_plot.add_moments((orig_moments_mean.mean, + orig_moments_mean.var), label="orig moments") + moments_plot.show(None) + + moments_plot_2 = plots.MomentsPlots(log_var_y=True, title="Moments MLMC domain") + moments_plot_2.add_moments((np.mean(mlmc_moments_mean, axis=0), + np.mean(mlmc_moments_var, axis=0)), label="mlmc moments") + moments_plot_2.add_moments((np.mean(mlmc_nn_moments_mean, axis=0), + np.mean(mlmc_nn_moments_var, axis=0)), label="mlmc nn moments") + + moments_plot_2.show(None) + + display_vars(mlmc_vars, nn_vars, target_variance=target_variance) + + + print("mlmc l vars list ", mlmc_l_vars) + + print("mlmc l vars ", np.mean(mlmc_l_vars, axis=0)) + print("nn l vars ", np.mean(nn_l_vars, axis=0)) + + print("var mlmc l vars ", np.var(mlmc_l_vars, axis=0)) + print("var nn l vars ", np.var(nn_l_vars, axis=0)) + + print("MAX mlmc l vars ", np.max(np.mean(mlmc_l_vars, axis=0), axis=1)) + print("MAX nn l vars ", np.max(np.mean(nn_l_vars, axis=0), axis=1)) + # + print("mlmc means MSE ", np.mean(mlmc_means_mse, axis=0)) + print("nn means MSE ", np.mean(nn_means_mse, axis=0)) + + print("mlmc means diff ", np.mean(mlmc_means_diff, axis=0)) + print("nn means diff ", np.mean(nn_means_diff, axis=0)) + + print("mlmc times ", mlmc_times) + print("nn times ", nn_times) + + print("mlmc times levels ", mlmc_times_levels) + print("nn times levels ", nn_times_levels) + + print("n ops all ", n_ops_all) + print("n ops predict all ", n_ops_predict_all) + print("len(nn times) ", len(nn_times)) + + mlmc_total_time = np.mean(mlmc_times) + nn_total_time = np.mean(nn_times) + + print("#############################") + print("mlmc total time ", mlmc_total_time) + print("nn total time ", nn_total_time) + print("#############################") + print("KL mlmc ", np.mean(kl_mlmc_all)) + print("KL nn ", np.mean(kl_nn_all)) + + print("MC: to 10: {}, above: {}".format(np.sum(np.mean(mlmc_means_mse, axis=0)[:10]), + np.sum(np.mean(mlmc_means_mse, axis=0)[10:]))) + print("NN: to 10: {}, above: {}".format(np.sum(np.mean(nn_means_mse, axis=0)[:10]), + np.sum(np.mean(nn_means_mse, axis=0)[10:]))) + + n_ops_mlmc_mean = np.mean(n_ops_all, axis=0) + n_ops_nn_mean = np.mean(n_ops_predict_all, axis=0) + + # print("n ops all ", n_ops_all) + # print("n ops predict all ", n_ops_predict_all) + # + # print("n ops mlmc mean ", n_ops_mlmc_mean) + # print("n ops nn mean ", n_ops_nn_mean) + + mlmc_n_collected = np.mean(mlmc_n_collected_all, axis=0) + nn_n_collected = np.mean(nn_n_collected_all, axis=0) + + print("mlmc n collected ", mlmc_n_collected_all) + print("nn n collected all ", nn_n_collected_all) + print("mlmc n collected ", mlmc_n_collected) + print("nn n collected ", nn_n_collected) + + plt_var = plots.VarianceNN() + plt_var.set_n_ops(np.mean(n_ops_predict_all, axis=0)) + l_vars = np.mean(mlmc_l_vars, axis=0) + # print("np.squeeze(orig_level_params) ", orig_level_params) + # print("l vars ", l_vars) + # print("np.squeeze(orig_level_params) shape", orig_level_params.shape) + # print("l vars shape", l_vars.shape) + print("orig level params ", orig_level_params) + # plt_var.add_level_variances(np.squeeze(orig_level_params), l_vars) + plt_var.add_level_variances(orig_level_params, l_vars) + + # plt_var.show(None) + # plt_var.show("mlmc_vars") + # + # plt_var = plot.Variance() + l_vars = np.mean(nn_l_vars, axis=0) + # print("nn l vars ", l_vars) + # print("nn level parsm ", nn_level_params) + if len(nn_level_params) > 1: + level_params = np.squeeze(nn_level_params) + else: + level_params = nn_level_params[0] + + print("level params ", level_params) + + + level_params[0] *= 2 + plt_var.add_level_variances_nn(level_params, l_vars) + plt_var.show("nn_vars") + plt_var.show(None) + + plot_sse(nn_vars_mse, mlmc_vars_mse, title="moments_var") + plot_sse(nn_means_mse, mlmc_means_mse, title="moments_mean") + plot_sse(mlmc_means_mse, mlmc_means_mse, title="mlmc moments_mean") + + plot_sse(nn_vars_mse_2, mlmc_vars_mse_2, title="moments_var same domain") + plot_sse(nn_means_mse_2, mlmc_means_mse_2, title="moments_mean same domain") + plot_sse(mlmc_means_mse_2, mlmc_means_mse_2, title="mlmc moments_mean same domain") + + + # if ref_orth_moments is not None: + # print("orth nn means mse ", orth_nn_means_mse) + # print("orth mlmc means mse ", orth_mlmc_means_mse) + # plot_sse(orth_nn_means_mse, orth_mlmc_means_mse, title="orthogonal moments_mean") + + data_dict["test_targets"] = np.array(data_dict["test_targets"]) + data_dict["test_predictions"] = np.array(data_dict["test_predictions"]) + data_dict["train_targets"] = np.array(data_dict["train_targets"]) + data_dict["train_predictions"] = np.array(data_dict["train_predictions"]) + data_dict["val_targets"] = np.array(data_dict["val_targets"]) + data_dict["val_predictions"] = np.array(data_dict["val_predictions"]) + + print("data dict train predictions ", data_dict["train_predictions"]) + + # if data_dict["log"][0]: + # # print("np.exp(10)", np.exp(10)) + # print("test targets ", data_dict["test_targets"]) + # print("type test targets ", type(data_dict["test_targets"])) + # + # data_dict["test_targets"], data_dict["test_predictions"] = exp_values(data_dict["test_targets"], data_dict["test_predictions"]) + # data_dict["train_targets"], data_dict["train_predictions"] = exp_values(data_dict["train_targets"], data_dict["train_predictions"]) + # + # # print("test targets ", data_dict["test_targets"]) + # print("test predictions ", data_dict["test_predictions"]) + # + # print("orig max vars ", data_dict["orig_max_vars"]) + # print("predict max vars ", data_dict["predict_max_vars"]) + + # mean_orig_vars = np.mean(data_dict["orig_max_vars"], axis=0) + # mean_predict_vars = np.mean(data_dict["predict_max_vars"], axis=0) + total_steps = np.mean(data_dict["total_steps"]) + + # print("mean orig vars ", mean_orig_vars) + # print("mean predict vars ", mean_predict_vars) + print("total steps ", total_steps) + + # print("test targets ", data_dict["test_targets"]) + # print("test predictions ", data_dict["test_predictions"]) + # print("test diff ", data_dict["test_predictions"] - data_dict["test_targets"]) + # print("test diff squared ", (data_dict["test_predictions"] - data_dict["test_targets"])**2) + + # print("(test_predictions - test_targets)**2 ", (data_dict["test_predictions"] - data_dict["test_targets"])**2) + + # print("test targets shape ", data_dict["test_targets"].shape) + + test_MSE = np.mean((data_dict["test_predictions"] - data_dict["test_targets"]) ** 2, axis=1) + print("val predictions ", data_dict["val_predictions"]) + print("val targets ", data_dict["val_targets"]) + #val_MSE = np.mean((data_dict["val_predictions"] - data_dict["val_targets"]) ** 2, axis=1) + test_RMSE = np.sqrt(test_MSE) + + test_MAE = np.mean(np.abs(data_dict["test_predictions"] - data_dict["test_targets"]), axis=1) + + #print("val MSE ", val_MSE) + print("test MSE ", test_MSE) + + all_test_RSE = [] + for index, t_targets in enumerate(data_dict["test_targets"]): + if test_MSE[index] > limit: + continue + mean_t = np.mean(t_targets) + RSE = np.sum((data_dict["test_predictions"][index] - t_targets) ** 2) / np.sum((t_targets - mean_t) ** 2) + all_test_RSE.append(RSE) + + all_train_RSE = [] + try: + for index, t_targets in enumerate(data_dict["train_targets"]): + if test_MSE[index] > limit: + continue + mean_t = np.mean(t_targets) + # print("train predictions index ", data_dict["train_predictions"][index]) + RSE = np.sum((data_dict["train_predictions"][index] - t_targets) ** 2) / np.sum((t_targets - mean_t) ** 2) + all_train_RSE.append(RSE) + except: + pass + + # print("all test RSE ", np.mean(all_test_RSE)) + + # Relative squared error + test_RSE = np.sum((data_dict["test_predictions"] - data_dict["test_targets"]) ** 2) / \ + np.sum((data_dict["test_targets"] - np.mean(data_dict["test_targets"])) ** 2) + + print("test RSE ", test_RSE) + + test_RAE = np.sqrt(np.sum((data_dict["test_predictions"] - data_dict["test_targets"]) ** 2)) / \ + np.sqrt(np.sum((data_dict["test_targets"]) ** 2)) + + print("test MSE / mean targets", np.mean(test_MSE) / np.mean(data_dict["test_targets"])) + + print("test RSE ", test_RSE) + print("test RAE ", test_RAE) + print("test_MSE ", test_MSE) + + + for pred, target in zip(data_dict["test_predictions"], data_dict["test_targets"]): + R_squared = r2_score(target, pred) + print("R squared ", R_squared) + + t_mse_sum = [] + for t_mse in test_MSE: + # Note: je mozne odstranit vetsi hodnoty MSE pro L4, protoze by slo dosahnout mensich hodnot pokud by se navysil pocet iteraci nebo by se vysledek pro nejlepsi train + val MSE a ne posledni vysledek + if t_mse > limit: # 0.009: + continue + t_mse_sum.append(t_mse) + + print("t mse ", t_mse_sum) + print("LEN t mse ", len(t_mse_sum)) + print("T MSE sum ", np.mean(t_mse_sum)) + + print("train_predictions ", np.array(data_dict["train_predictions"]).shape) + print("train_targets ", data_dict["train_targets"]) + + data_dict["train_predictions"], data_dict["train_targets"] = remove_empty(data_dict["train_predictions"], + data_dict["train_targets"]) + + print("remove empty train targets ", data_dict["train_targets"]) + + # data_dict["train_predictions"] = np.squeeze(data_dict["train_predictions"]) + + print("train_predictions - train_targets ", data_dict["train_predictions"] - data_dict["train_targets"]) + + train_MSE = np.mean((data_dict["train_predictions"] - data_dict["train_targets"]) ** 2, axis=1) + train_RMSE = np.sqrt(train_MSE) + train_MAE = np.mean(np.abs(data_dict["train_predictions"] - data_dict["train_targets"]), axis=1) + learning_times = data_dict["learning_times"] + + # Relative squared error + train_RSE = np.sum((data_dict["train_predictions"] - data_dict["train_targets"]) ** 2) / \ + np.sum((data_dict["train_targets"] - np.mean(data_dict["train_targets"])) ** 2) + + # Relative absolute error + train_RAE = np.sqrt(np.sum((data_dict["train_predictions"] - data_dict["train_targets"]) ** 2)) / \ + np.sqrt(np.sum((data_dict["train_targets"]) ** 2)) + + print("train RSE ", train_RSE) + print("train REA ", train_RAE) + + # plot_data(test_MSE, label="test MSE") + # plot_data(test_MAE, label="test MAE") + + print("NN moments MSE sum ", np.sum(np.mean(nn_means_mse, axis=0))) + + print("mean test MSE ", np.mean(test_MSE)) + print("mean test RSE ", np.mean(test_RSE)) + print("mean test RMSE ", np.mean(test_RMSE)) + print("mean test MAE ", np.mean(test_MAE)) + print("max test MSE ", np.max(test_MSE)) + # print("max test RMSE ", np.max(test_RMSE)) + # print("max test MAE ", np.max(test_MAE)) + + print("train_MSE ", train_MSE) + + print("mean train MSE ", np.mean(train_MSE)) + + print("test RSE ", np.mean(all_test_RSE)) + print("test RSE ", np.mean(all_train_RSE)) + print("mean train RSE ", np.mean(train_RSE)) + print("mean train RMSE ", np.mean(train_RMSE)) + print("mean train MAE ", np.mean(train_MAE)) + print("max train MSE ", np.max(train_MSE)) + # print("max train RMSE ", np.max(train_RMSE)) + # print("max train MAE ", np.max(train_MAE)) + + print("learning time ", learning_times) + print("mean learning time ", np.mean(learning_times)) + print("max learning time ", np.max(learning_times)) + + test_MSE = remove_outliers(test_MSE, limit) + train_MSE = remove_outliers(train_MSE, limit) + print("############# OUTPUT ################") + print("len(train MSE) ", len(train_MSE)) + print("train MSE ", np.mean(train_MSE)) + # print("train MSE sqrt var", np.sqrt(np.var(train_MSE))) + # print("train MSE std", np.std(train_MSE)) + + # output_mult_factor = 1437603411 + # print("orig train MSE ", train_MSE) + # train_MSE = np.array(train_MSE) * output_mult_factor + # print("train MSE ", train_MSE) + # test_MSE = np.array(test_MSE) * output_mult_factor + + print("train MSE ", train_MSE) + print("stats.sem(train_MSE) ", stats.sem(train_MSE)) + print("test MSE ", np.mean(test_MSE)) + print("test MSE ", test_MSE) + print("stats.sem(test_MSE) ", stats.sem(test_MSE)) + print("train MSE: {}, bias: {}, variance: {}".format(np.mean(train_MSE_list), np.mean(train_bias), np.mean(train_variance))) + print("test MSE: {}, bias: {}, variance: {}".format(np.mean(test_MSE_list), np.mean(test_bias), + np.mean(test_variance))) + print("SEM train MSE: {}".format(stats.sem(train_MSE_list))) + print("SEM test MSE: {}".format(stats.sem(test_MSE_list))) + # print("test MSE std", np.sqrt(np.var(test_MSE))) + print("mean train sample: {}".format(np.mean(all_train_samples))) + print("mean test sample: {}".format(np.mean(all_test_samples))) + #print("mean sample : {}".format(np.mean(np.array(all_train_samples).flatten() + np.array(all_test_samples).flatten()))) + print("RMSE train: {}, test: {}".format(np.mean(train_RMSE_list), np.mean(test_RMSE_list))) + print("relative RMSE train: {}, test: {}".format(np.mean(train_RMSE_list)/np.mean(all_train_samples), np.mean(test_RMSE_list)/np.mean(all_test_samples))) + print("iter relative RMSE train: {}, test: {}".format(np.mean(train_relRMSE_list), + np.mean(test_relRMSE_list))) + print("RSE train: {}, test: {}".format(np.mean(train_RSE_list), np.mean(test_RSE_list))) + print("MAE train: {}, test: {}".format(np.mean(train_MAE_list), np.mean(test_MAE_list))) + + # print("train RSE ", np.mean(train_RSE)) + # print("test RSE ", np.mean(test_RMSE)) + # + # print("train RMSE ", np.mean(train_RMSE)) + # print("test RMSE ", np.mean(test_RMSE)) + + print("MC mom mean MSE: total: {:0.5g}, to 10: {:0.5g}, above: {:0.5g}".format(np.sum(np.mean(mlmc_means_mse, axis=0)), np.sum(np.mean(mlmc_means_mse, axis=0)[:10]), + np.sum(np.mean(mlmc_means_mse, axis=0)[10:]))) + print("NN mom mean MSE: total: {:0.5g}, to 10: {:0.5g}, above: {:0.5g}".format(np.sum(np.mean(nn_means_mse, axis=0)), np.sum(np.mean(nn_means_mse, axis=0)[:10]), + np.sum(np.mean(nn_means_mse, axis=0)[10:]))) + + print("compare moments each couple has same domain (common domain)") + print("MC mom mean MSE: total: {:0.5g}, to 10: {:0.5g}, above: {:0.5g}".format( + np.sum(np.mean(mlmc_means_mse_2, axis=0)), np.sum(np.mean(mlmc_means_mse_2, axis=0)[:10]), + np.sum(np.mean(mlmc_means_mse_2, axis=0)[10:]))) + print("NN mom mean MSE: total: {:0.5g}, to 10: {:0.5g}, above: {:0.5g}".format(np.sum(np.mean(nn_means_mse_2, axis=0)), + np.sum(np.mean(nn_means_mse_2, axis=0)[ + :10]), + np.sum(np.mean(nn_means_mse_2, axis=0)[ + 10:]))) + + print("MLMC vs NN mom mean MSE: total: {:0.5g}, to 10: {:0.5g}, above: {:0.5g}".format( + np.sum(np.mean(mlmc_nn_means_mse_2, axis=0)), np.sum(np.mean(mlmc_nn_means_mse_2, axis=0)[:10]), + np.sum(np.mean(mlmc_nn_means_mse_2, axis=0)[10:]))) + print("MLMC vs NN mom var MSE: total: {:0.5g}, to 10: {:0.5g}, above: {:0.5g}".format( + np.sum(np.mean(mlmc_nn_vars_mse_2, axis=0)), + np.sum(np.mean(mlmc_nn_vars_mse_2, axis=0)[:10]), + np.sum(np.mean(mlmc_nn_vars_mse_2, axis=0)[10:]))) + + print("nn total time ", nn_total_time) + print("mlmc total time ", mlmc_total_time) + + print("KL mlmc ", np.mean(kl_mlmc_all)) + print("KL nn ", np.mean(kl_nn_all)) + + print("mean learning time ", np.mean(learning_times)) + print("max learning time ", np.max(learning_times)) + + print("######################################") + return train_MSE, test_MSE, all_train_RSE, all_test_RSE, nn_total_time, mlmc_total_time, kl_mlmc_all, kl_nn_all,\ + learning_times + + +def plot_sse(data_nn, data_mlmc, x_label="ith moment", y_label="MSE", title=""): + import matplotlib + matplotlib.rcParams.update({'font.size': 16}) + matplotlib.rcParams.update({'lines.markersize': 8}) + #fig, axes = plt.subplots(1, 1, figsize=(22, 10)) + fig, axes = plt.subplots(1, 1, figsize=(8, 5)) + data = np.array(data_nn) + x = range(data.shape[1]) + axes.set_title(title) + axes.set_xlabel(x_label) + axes.set_ylabel(y_label) + axes.errorbar(x, np.mean(data_mlmc, axis=0), yerr=np.sqrt(np.var(data_mlmc, axis=0)), fmt='o', label="MC", + color="blue") + axes.errorbar(x, np.mean(data_nn, axis=0), yerr=np.sqrt(np.var(data_nn, axis=0)), fmt='o', + label="MLMC with meta-model", color="red") + fig.legend() + fig.savefig("{}.pdf".format(title)) + fig.show() + + +def plot_sse_scatter(data_nn, data_mlmc, x_label="ith moment", y_label="MSE", title=""): + import matplotlib + #matplotlib.rcParams.update({'font.size': 38}) + matplotlib.rcParams.update({'lines.markersize': 14}) + fig, axes = plt.subplots(1, 1, figsize=(22, 10)) + data = np.array(data_nn) + + print("data.shape ", data.shape) + x = range(len(data[0])) + axes.set_title(title) + axes.set_xlabel(x_label) + axes.set_ylabel(y_label) + axes.scatter(x, np.mean(data_nn, axis=0), label="NN MLMC", color="red") + axes.scatter(x, np.mean(data_mlmc, axis=0), label="MLMC", color="blue") + fig.legend() + fig.savefig("{}.pdf".format(title)) + fig.show() + + +def display_vars(mlmc_vars, nn_vars, target_variance, title=""): + mlmc_mean_vars = np.mean(mlmc_vars, axis=0) + nn_mean_vars = np.mean(nn_vars, axis=0) + + import matplotlib + # matplotlib.rcParams.update({'font.size': 38}) + matplotlib.rcParams.update({'lines.markersize': 14}) + fig, axes = plt.subplots(1, 1, figsize=(22, 10)) + + x = range(0, len(mlmc_vars[0])) + axes.set_xlabel("moments") + axes.set_ylabel("var") + axes.set_yscale("log") + + axes.axhline(y=target_variance, label="target var: {}".format(target_variance)) + + print("mlmc error bar ", np.sqrt(np.var(mlmc_vars, axis=0))) + axes.errorbar(x, np.mean(mlmc_vars, axis=0), yerr=np.sqrt(np.var(mlmc_vars, axis=0)), fmt='o', label="MLMC vars", + color="blue") + + print("mlmc vars ", mlmc_vars) + print("nn vars ", nn_vars) + print("nn error bar ", np.sqrt(np.var(nn_vars, axis=0))) + axes.errorbar(x, np.mean(nn_vars, axis=0), yerr=np.sqrt(np.var(nn_vars, axis=0)), fmt='o', label="NN vars", + color="red") + + fig.legend() + fig.savefig("{}.pdf".format(title)) + fig.show() + + +def set_model_weights(new_model, old_model): + for new_conv_layer, old_conv_layer in zip(new_model._conv_layers, old_model._conv_layers): + new_conv_layer.kernel = old_conv_layer.kernel + new_conv_layer.bias = old_conv_layer.bias + + # print("input data shape ", input_data.dataset[0].shape) + # + # print(old_conv_layer.kernel.numpy().shape) + # input_imgs = Input(shape=(None, 108, 1)) + # print("old_model.flatten.weights", old_model.flatten().weights) + + for new_dense_layer, old_dense_layer in zip(new_model._dense_layers, old_model._dense_layers): + + # print("old_dense_layer.get_weights() shape ", old_dense_layer.get_weights()[0].shape) + # print("old_dense_layer.get_weights() shape ", old_dense_layer.get_weights()[1].shape) + # input_imgs = Input(shape=(None, 108, 1)) + # new_dense_layer(input_imgs) + # # model = Model(inputs=input_imgs, outputs=encoded) + # # dense_layer.set_weights(weights) + # + # print("new dense layer weights ", new_dense_layer.weights) + new_dense_layer.set_weights(old_dense_layer.get_weights()) + + # print("old_dense_layer.get_weights() ", old_dense_layer.get_weights()) + # print("new_model._dense_layers[-1].weights ", new_model._dense_layers[-1].weights) + # exit() + + +def set_model_layers(new_model, old_model): + for new_conv_layer, old_conv_layer in zip(new_model._conv_layers, old_model._conv_layers): + new_conv_layer.kernel = old_conv_layer.kernel + new_conv_layer.bias = old_conv_layer.bias + + # print("old conv layer get config ", old_conv_layer.get_config()) + # print("new conv layer get config ", new_conv_layer.get_config()) + + #exit() + + # print("input data shape ", input_data.dataset[0].shape) + # + # print(old_conv_layer.kernel.numpy().shape) + # input_imgs = Input(shape=(None, 108, 1)) + # print("old_model.flatten.weights", old_model.flatten().weights) + + for new_dense_layer, old_dense_layer in zip(new_model._dense_layers, old_model._dense_layers): + # config = layer.get_config() + # weights = layer.get_weights() + # cloned_layer = type(layer).from_config(config) + # cloned_layer.build(layer.input_shape) + # cloned_layer.set_weights(weights) + + # print("old_dense_layer.get_weights() shape ", old_dense_layer.get_weights()[0].shape) + # print("old_dense_layer.get_weights() shape ", old_dense_layer.get_weights()[1].shape) + # input_imgs = Input(shape=(None, 108, 1)) + # new_dense_layer(input_imgs) + # # model = Model(inputs=input_imgs, outputs=encoded) + # # dense_layer.set_weights(weights) + # + # print("new dense layer weights ", new_dense_layer.weights) + #new_dense_layer.set_weights(old_dense_layer.get_weights()) + + new_dense_layer.set_weights(old_dense_layer.get_weights()) + + # print("new_model._dense_layers[0].get_config() ", new_model._dense_layers[0].get_config()) + # print("old_model._dense_layers[0].get_config() ", old_model._dense_layers[0].get_config()) + # print("new_model._dense_layers[0].get_weights() ", new_model._dense_layers[0].get_weights()) + # print("old_model._dense_layers[0].get_weights() ", old_model._dense_layers[0].get_weights()) + + + # print("old_dense_layer.get_weights() ", old_dense_layer.get_weights()) + # print("new_model._dense_layers[-1].weights ", new_model._dense_layers[-1].weights) + # exit() + + +def run_GNN(config, stats=True, train=True, log=False, index=0): + print("seed ", index) + + loss = MeanSquaredError() # var_loss_function# + accuracy_func = MSE_moments + # loss = MeanAbsoluteError() + # loss = MeanSquaredLogarithmicError() + #loss = KLDivergence() + # loss = total_loss_function + optimizer = tf.optimizers.Adam(learning_rate=config['learning_rate']) + batch_size = config['batch_size']#2000 + epochs = config['epochs']#1000 + hidden_regularization = None # l2(2e-10) + + graph_creation_time = config['graph_creation_time'] + if graph_creation_time == 0: + graph_creator_preproces_time = time.process_time() + + graph_creator(config['output_dir'], config['hdf_path'], config['mesh'], level=config['level'], + feature_names=config.get('feature_names', [['conductivity']])) + graph_creation_time = time.process_time() - graph_creator_preproces_time + print("graph creation time ", graph_creation_time) + exit() + + preprocess_start_time = time.process_time() + + independent_samples = config.get("independent_samples", False) + + if independent_samples and train: + data = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, + index=index, n_test_samples=100000) + len_all_samples = len(data) + + last_train_sample = index * config['n_train_samples'] + config['n_train_samples'] + last_test_sample = len_all_samples - (index * config['n_train_samples'] + config['n_train_samples']) + + print("last train sample ", last_train_sample) + print("last test sample ", last_test_sample) + + if last_train_sample > last_test_sample: + return + + data_tr = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, + index=index, train_samples=True, independent_sample=True) + + print("len data tr ", len(data_tr)) + + data_te = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, + index=index, predict=True, test_samples=True, independent_samples=True) + + print("len data te ", len(data_te)) + + + else: + if train: + data_tr = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, + index=index, train_samples=True) + data_te = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, + index=index, predict=True, test_samples=True) + else: + data = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, + index=index) + data_tr = data + data_te = data + + + # Dataset preprocess config + config['dataset_config'] = data_tr._dataset_config + + preprocess_time = time.process_time() - preprocess_start_time + preprocess_time = preprocess_time + graph_creation_time + print("preprocess time ", preprocess_time) + + if config["predict_dir"] is not None: + # data_te_predict = FlowDataset(output_dir=config['predict_dir'], level=config['level'], log=log, config=config, + # index=index, n_test_samples=50000) + + data_te_predict = FlowDataset(output_dir=config['predict_dir'], config=config, predict=True) + + data_te_predict.a = config['conv_layer'].preprocess(data_te_predict.a) + data_te_predict.a = sp_matrix_to_sp_tensor(data_te_predict.a) + + + + learning_time_start = time.process_time() + data_tr.a = sp_matrix_to_sp_tensor(config['conv_layer'].preprocess(data_tr.a)) + data_te.a = data_tr.a #sp_matrix_to_sp_tensor(config['conv_layer'].preprocess(data_te.a)) + + val_data_len = int(len(data_tr) * config['val_samples_ratio']) + # print("val data len ", val_data_len) + # data_tr, data_va = data_tr.split_val_train(val_data_len) + data_tr, data_va = data_tr[:-val_data_len], data_tr[-val_data_len:] + + print("len data tr ", len(data_tr)) + print("len data va ", len(data_va)) + print("len data te ", len(data_te)) + + # ############################# + # #### OLD version + # ############################# + # # Load data + # data = FlowDataset(output_dir=config['output_dir'], level=config['level'], log=log, config=config, index=seed) + # data = data#[:10000] + # + # # Dataset preprocess config + # config['dataset_config'] = data._dataset_config + # + # #print("len data ", len(data)) + # #data.shuffle(seed=seed) + # preprocess_time = time.process_time() - preprocess_start_time + # #print("preproces time ", preprocess_time) + # preprocess_time = preprocess_time + graph_creation_time + # #print("total preprocess time ", preprocess_time) + # + # learning_time_start = time.process_time() + # data.a = config['conv_layer'].preprocess(data.a) + # data.a = sp_matrix_to_sp_tensor(data.a) + # #train_data_len = int(len(data) * 0.8) + # train_data_len = config['n_train_samples'] + # # Train/valid/test split + # #print("train data len ", train_data_len) + # + # if not train: + # data_tr = data + # data_te = data + # else: + # #data_tr = data[seed*train_data_len: seed*train_data_len + train_data_len] + # data_tr = data.get_train_data(seed, train_data_len) + # #print("data tr ", data_tr) + # data_te = data.get_test_data(seed, train_data_len) + # #data_tr, data_te = data[:train_data_len], data[train_data_len:] + # + # #np.random.shuffle(data_tr) + # val_data_len = int(len(data_tr) * config['val_samples_ratio']) + # #print("val data len ", val_data_len) + # #data_tr, data_va = data_tr.split_val_train(val_data_len) + # data_tr, data_va = data_tr[:-val_data_len], data_tr[-val_data_len:] + + + ########################################### + ########################################### + ########################################### + ########################################### + + # print("data tr ", data_tr) + # print("data va ", data_va) + + # print("data_tr len ", len(data_tr)) + # print("data_va len ", len(data_va)) + # print("data_te len ", len(data_te)) + + gnn = config['gnn'](**config['model_config']) + + # We use a MixedLoader since the dataset is in mixed mode + loader_tr = MixedLoader(data_tr, batch_size=batch_size, epochs=epochs) + loader_va = MixedLoader(data_va, batch_size=batch_size) + loader_te = MixedLoader(data_te, batch_size=batch_size) + + loader_te_predict = MixedLoader(data_te_predict, batch_size=batch_size) + + if not train: + gnn.fit(MixedLoader(data_tr[:10], batch_size=batch_size, epochs=epochs), + MixedLoader(data_tr[10:20], batch_size=batch_size), MixedLoader(data_tr[20:30], batch_size=batch_size)) + set_model_weights(gnn._model, config["set_model"]) + + #set_model_layers(gnn._model, config["set_model"]) + + #gnn._model = config["set_model"] + #compare_models(gnn._model, config["set_model"], config) + + # + if gnn is None: + gnn = GNN(loss=loss, optimizer=optimizer, conv_layer=config['conv_layer'], output_activation=abs_activation, + hidden_activation='relu', patience=150, hidden_reqularizer=hidden_regularization, + model=config['model'], accuracy_func=accuracy_func) # tanh takes to much time + # ideally patience = 150 + # batch_size 500, ideally 500 epochs, patience 35 + + if train: + print("gnn ", gnn) + # gnn.run_eagerly = True + train_targets = gnn.fit(loader_tr, loader_va, loader_te) + + learning_time = time.process_time() - learning_time_start + + print("learning time ", learning_time) + + states = gnn._states + # print("states ", states) + # for state in states.values(): + # print("state._model", state._model) + + if len(states) > 0: + min_key = np.min(list(states.keys())) + gnn = states[min_key] + + # print("gnn._model.layers[min].get_weights() ", states[np.min(list(states.keys()))]._model.layers[0].get_weights()) + # print("gnn._model.layers[max].get_weights() ", states[np.max(list(states.keys()))]._model.layers[0].get_weights()) + + train_targets, train_predictions = gnn.predict(MixedLoader(data_tr, batch_size=batch_size, epochs=1)) + train_predictions = np.squeeze(train_predictions) + + val_targets, val_predictions = gnn.predict(loader_va) + val_predictions = np.squeeze(val_predictions) + + #val_targets = gnn.val_targets + total_steps = gnn._total_n_steps + + targets, predictions = gnn.predict(loader_te) + predictions = np.squeeze(predictions) + + targets_to_est, predictions_to_est = gnn.predict(loader_te_predict) + predictions_to_est = np.squeeze(predictions_to_est) + targets_to_est = np.array(targets_to_est) + predictions_to_est = np.array(predictions_to_est) + + #print("learning time ", learning_time) + + targets = np.array(targets) + predictions = np.array(predictions) + + #print("MSE ", np.mean((predictions-targets)**2)) + + if log: + targets = np.exp(targets) + predictions = np.exp(predictions) + target_to_est = np.exp(targets_to_est) + predictions_to_est = np.exp(predictions_to_est) + + if not stats: + analyze_results(targets, predictions) + plot_loss(gnn._train_loss, gnn._val_loss) + analyze_results(targets, predictions) + + import matplotlib.pyplot as plt + + plt.hist(targets, bins=50, alpha=0.5, label='target', density=True) + plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + + # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + plt.legend(loc='upper right') + # plt.xlim(-0.5, 1000) + plt.yscale('log') + plt.show() + + #predict_l_0_start_time = time.process_time() + l_0_targets, l_0_predictions, predict_l_0_time = predict_level_zero(gnn, config['l_0_output_dir'], + config['l_0_hdf_path'], config['mesh'], + config['conv_layer'], batch_size, log, + stats=stats, + corr_field_config=config['corr_field_config'], + seed=index, + feature_names=config.get('feature_names', [['conductivity']]), + config=config + ) + #predict_l_0_time = time.process_time() - predict_l_0_start_time + + if stats: + l1_sample_time = preprocess_time / (len(data_tr) + len(data_te)) + learning_time / (len(data_tr) + len(data_te)) + l0_sample_time = predict_l_0_time / len(l_0_targets) + + # print("targets ", targets) + # print("predictions ", predictions) + + # orig_max_vars, predict_max_vars = process_mlmc(hdf_path, sampling_info_path, ref_mlmc_file, targets, predictions, train_targets, + # train_predictions, + # val_targets, l_0_targets, + # l_0_predictions, l1_sample_time, l0_sample_time, nn_level=level, replace_level=replace_level, + # stats=stats) + + return gnn, targets, predictions, learning_time, train_targets, train_predictions,\ + val_targets, val_predictions, l_0_targets, l_0_predictions, l1_sample_time, l0_sample_time, total_steps, targets_to_est, predictions_to_est + + save_times(config['save_path'], False, (preprocess_time, (len(data_tr) + len(data_te))), learning_time, (predict_l_0_time, len(l_0_targets))) + save_load_data(config['save_path'], False, targets, predictions, train_targets, train_predictions, val_targets, l_0_targets, + l_0_predictions) + + +def predict_level_zero(nn, output_dir, hdf_path, mesh, conv_layer, batch_size=1000, log=False, stats=False, + corr_field_config=None, seed=1234, feature_names=[], config=None): + #graph_creator(output_dir, hdf_path, mesh, level=0, feature_names=feature_names) + # Load data + sample_time = 0 + if corr_field_config: + sample_time = corr_field_sample_time(mesh, corr_field_config) + else: + raise Exception("No corr field config passed") + + data = FlowDataset(output_dir=output_dir, log=log, config=config, predict=True)#, mesh=mesh, corr_field_config=corr_field_config) + #data = data # [:10000] + data.shuffle(seed=seed) + + # print("output_dir ", output_dir) + # print("len(data) ", len(data)) + # print("data[0] ", data[0]) + + predict_time_start = time.process_time() + data.a = conv_layer.preprocess(data.a) + data.a = sp_matrix_to_sp_tensor(data.a) + + loader_te = MixedLoader(data, batch_size=batch_size) + + targets, predictions = nn.predict(loader_te) + predictions = np.squeeze(predictions) + + if not stats: + analyze_results(targets, predictions) + + if log: + targets = np.exp(targets) + predictions = np.exp(predictions) + if not stats: + analyze_results(targets, predictions) + + predict_time = time.process_time() - predict_time_start + + return targets, predictions, predict_time + sample_time * len(data) + + +def save_times(path, load=False, preprocess=None, learning_time=None, predict_l_0=None): + if load: + preprocess_time = None + preprocess_n = None + predict_time = None + predict_n = None + if os.path.exists(os.path.join(path, "preprocess_time.npy")): + preprocess_time = np.load(os.path.join(path, "preprocess_time.npy")) + if os.path.exists(os.path.join(path, "preprocess_n.npy")): + preprocess_n = np.load(os.path.join(path, "preprocess_n.npy")) + if os.path.exists(os.path.join(path, "learning_time.npy")): + learning_time = np.load(os.path.join(path, "learning_time.npy")) + if os.path.exists(os.path.join(path, "predict_l_0_time.npy")): + predict_time = np.load(os.path.join(path, "predict_l_0_time.npy")) + if os.path.exists(os.path.join(path, "predict_l_0_n.npy")): + predict_n = np.load(os.path.join(path, "predict_l_0_n.npy")) + return preprocess_time, preprocess_n, learning_time, predict_time, predict_n + else: + if preprocess is not None: + np.save(os.path.join(path, "preprocess_time"), preprocess[0]) + np.save(os.path.join(path, "preprocess_n"), preprocess[1]) + if learning_time is not None: + np.save(os.path.join(path, "learning_time"), learning_time) + if preprocess is not None: + np.save(os.path.join(path, "predict_l_0_time"), predict_l_0[0]) + np.save(os.path.join(path, "predict_l_0_n"), predict_l_0[1]) + + +def save_load_data(path, load=False, targets=None, predictions=None, train_targets=None, train_predictions=None, + val_targets=None, l_0_targets=None, + l_0_predictions=None): + if load: + if os.path.exists(os.path.join(path, "targets.npy")): + targets = np.load(os.path.join(path, "targets.npy")) + if os.path.exists(os.path.join(path, "predictions.npy")): + predictions = np.load(os.path.join(path, "predictions.npy")) + if os.path.exists(os.path.join(path, "train_targets.npy")): + train_targets = np.load(os.path.join(path, "train_targets.npy")) + if os.path.exists(os.path.join(path, "train_predictions.npy")): + train_predictions = np.load(os.path.join(path, "train_predictions.npy")) + if os.path.exists(os.path.join(path, "val_targets.npy")): + val_targets = np.load(os.path.join(path, "val_targets.npy")) + if os.path.exists(os.path.join(path, "l_0_targets.npy")): + l_0_targets = np.load(os.path.join(path, "l_0_targets.npy")) + if os.path.exists(os.path.join(path, "l_0_predictions.npy")): + l_0_predictions = np.load(os.path.join(path, "l_0_predictions.npy")) + return targets, predictions, train_targets, train_predictions, val_targets, l_0_targets, l_0_predictions + else: + if targets is not None: + np.save(os.path.join(path, "targets"), targets) + if predictions is not None: + np.save(os.path.join(path, "predictions"), predictions) + if train_targets is not None: + np.save(os.path.join(path, "train_targets"), train_targets) + if train_predictions is not None: + np.save(os.path.join(path, "train_predictions"), train_predictions) + if val_targets is not None: + np.save(os.path.join(path, "val_targets"), val_targets) + if l_0_targets is not None: + np.save(os.path.join(path, "l_0_targets"), l_0_targets) + if l_0_predictions is not None: + np.save(os.path.join(path, "l_0_predictions"), l_0_predictions) + + +def process_results(hdf_path, sampling_info_path, ref_mlmc_file, save_path, nn_level, replace_level): + targets, predictions, train_targets, train_predictions, val_targets, l_0_targets, l_0_predictions = save_load_data( + save_path, load=True) + preprocess_time, preprocess_n, learning_time, predict_l_0_time, predict_l_0_n = save_times(save_path, load=True) + + l1_sample_time = preprocess_time / preprocess_n + learning_time / preprocess_n + l0_sample_time = predict_l_0_time / predict_l_0_n + + print("preprocess_time ", preprocess_time) + print("preprocess_n ", preprocess_n) + print("learning_time ", learning_time) + print("predict_l_0_time ", predict_l_0_time) + print("predict_l_0_n ", predict_l_0_n) + + print("l1 sample time ", l1_sample_time) + print("l0 sample time ", l0_sample_time) + + print("len targets ", len(targets)) + print("len predictions ", len(predictions)) + + print("len train targets ", len(train_targets)) + print("len val targets ", len(val_targets)) + + process_mlmc(hdf_path, sampling_info_path, ref_mlmc_file, targets, predictions, train_targets, train_predictions, + val_targets, l_0_targets, + l_0_predictions, l1_sample_time, l0_sample_time, nn_level=nn_level, replace_level=replace_level) diff --git a/mlmc/metamodel/create_graph.py b/mlmc/metamodel/create_graph.py new file mode 100644 index 00000000..a6df5c91 --- /dev/null +++ b/mlmc/metamodel/create_graph.py @@ -0,0 +1,234 @@ +import os +import os.path +import numpy as np +import networkx as nx +from mlmc.tool import gmsh_io +from mlmc.tool.hdf5 import HDF5 +from mlmc.sample_storage_hdf import SampleStorageHDF +from mlmc.quantity.quantity import make_root_quantity +from spektral.data import Graph +from mlmc.metamodel.flow_dataset import FlowDataset + + +MESH = "/home/martin/Documents/metamodels/data/L1/test/01_cond_field/l_step_0.055_common_files/mesh.msh" +#FIELDS_SAMPLE_MESH = "/home/martin/Documents/metamodels/data/L1/test/01_cond_field/output/L00_S0000000/fine_fields_sample.msh" +FIELDS_SAMPLE = "fine_fields_sample.msh" +# OUTPUT_DIR = "/home/martin/Documents/metamodels/data/1000_ele/test/01_cond_field/output/" +# HDF_PATH = "/home/martin/Documents/metamodels/data/1000_ele/test/01_cond_field/mlmc_1.hdf5" + +# OUTPUT_DIR = "/home/martin/Documents/metamodels/data/cl_0_3_s_4/L5/test/01_cond_field/output/" +# HDF_PATH = "/home/martin/Documents/metamodels/data/cl_0_3_s_4/L5/mlmc_5.hdf5" + +# OUTPUT_DIR = "/home/martin/Documents/metamodels/data/cl_0_1_s_1/L5/test/01_cond_field/output/" +# HDF_PATH = "/home/martin/Documents/metamodels/data/cl_0_1_s_1/L5/mlmc_5.hdf5" + +# OUTPUT_DIR = "/home/martin/Documents/metamodels/data/1000_ele/cl_0_1_s_1/L5/test/01_cond_field/output/" +# HDF_PATH = "/home/martin/Documents/metamodels/data/1000_ele/cl_0_1_s_1/L5/mlmc_5.hdf5" + + +def extract_mesh_gmsh_io(mesh_file, get_points=False): + """ + Extract mesh from file + :param mesh_file: Mesh file path + :return: Dict + """ + mesh = gmsh_io.GmshIO(mesh_file) + is_bc_region = {} + region_map = {} + for name, (id, _) in mesh.physical.items(): + unquoted_name = name.strip("\"'") + is_bc_region[id] = (unquoted_name[0] == '.') + region_map[unquoted_name] = id + + bulk_elements = [] + + for id, el in mesh.elements.items(): + _, tags, i_nodes = el + region_id = tags[0] + if not is_bc_region[region_id]: + bulk_elements.append(id) + + n_bulk = len(bulk_elements) + centers = np.empty((n_bulk, 3)) + ele_ids = np.zeros(n_bulk, dtype=int) + ele_nodes = {} + point_region_ids = np.zeros(n_bulk, dtype=int) + + for i, id_bulk in enumerate(bulk_elements): + _, tags, i_nodes = mesh.elements[id_bulk] + region_id = tags[0] + centers[i] = np.average(np.array([mesh.nodes[i_node] for i_node in i_nodes]), axis=0) + point_region_ids[i] = region_id + ele_ids[i] = id_bulk + ele_nodes[id_bulk] = i_nodes + + if get_points: + + min_pt = np.min(centers, axis=0) + max_pt = np.max(centers, axis=0) + diff = max_pt - min_pt + min_axis = np.argmin(diff) + non_zero_axes = [0, 1, 2] + # TODO: be able to use this mesh_dimension in fields + if diff[min_axis] < 1e-10: + non_zero_axes.pop(min_axis) + points = centers[:, non_zero_axes] + + return {'points': points, 'point_region_ids': point_region_ids, 'ele_ids': ele_ids, 'region_map': region_map} + + return ele_nodes + + +def get_node_features(fields_mesh, feature_names): + """ + Extract mesh from file + :param fields_mesh: Mesh file + :param feature_names: [[], []] - fields in each sublist are joint to one feature, each sublist corresponds to one vertex feature + :return: list + """ + mesh = gmsh_io.GmshIO(fields_mesh) + + features = [] + for f_names in feature_names: + joint_features = join_fields(mesh._fields, f_names) + + features.append(list(joint_features.values())) + + return np.array(features).T + + +def join_fields(fields, f_names): + if len(f_names) > 0: + x_name = len(set([*fields[f_names[0]]])) + assert all(x_name == len(set([*fields[f_n]])) for f_n in f_names) + + # # Using defaultdict + # c = [collections.Counter(fields[f_n]) for f_n in f_names] + # Cdict = collections.defaultdict(int) + + joint_dict = {} + for f_n in f_names: + for key, item in fields[f_n].items(): + #print("key: {}, item: {}".format(key, np.squeeze(item))) + joint_dict.setdefault(key, 0) + + if joint_dict[key] != 0 and np.squeeze(item) != 0: + raise ValueError("Just one field value should be non zero for each element") + joint_dict[key] += np.squeeze(item) + + return joint_dict + + +def create_adjacency_matrix(ele_nodes): + + adjacency_matrix = np.zeros((len(ele_nodes), len(ele_nodes))) + #adjacency_matrix = sparse.csr_matrix((len(ele_nodes), len(ele_nodes))) # + + nodes = list(ele_nodes.values()) + for i in range(adjacency_matrix.shape[0]): + ele_nodes = nodes[i] + + for j in range(i+1, len(nodes)): + if i == j: + continue + ele_n = nodes[j] + + if len(list(set(ele_nodes).intersection(ele_n))) == 2: + adjacency_matrix[j][i] = adjacency_matrix[i][j] = 1 + + #print(np.count_nonzero(adjacency_matrix)) + assert np.allclose(adjacency_matrix, adjacency_matrix.T) # symmetry + return adjacency_matrix + + +def plot_graph(adjacency_matrix): + import matplotlib.pyplot as plt + #G = nx.from_scipy_sparse_matrix(adjacency_matrix) + G = nx.from_numpy_matrix(adjacency_matrix) + nx.draw_kamada_kawai(G, with_labels=True, node_size=1, font_size=6) + plt.axis('equal') + plt.show() + + +def reject_outliers(data, m=2): + #print("abs(data - np.mean(data)) < m * np.std(data) ", abs(data - np.mean(data)) < m * np.std(data)) + #return data[abs(data - np.mean(data)) < m * np.std(data)] + return abs(data - np.mean(data)) < m * np.std(data) + + +def graph_creator(output_dir, hdf_path, mesh, level=0, feature_names=[['conductivity']], quantity_name="conductivity"): + adjacency_matrix = create_adjacency_matrix(extract_mesh_gmsh_io(mesh)) + np.save(os.path.join(output_dir, "adjacency_matrix"), adjacency_matrix, allow_pickle=True) + loaded_adjacency_matrix = np.load(os.path.join(output_dir, "adjacency_matrix.npy"), allow_pickle=True) + + #plot_graph(loaded_adjacency_matrix) + + sample_storage = SampleStorageHDF(file_path=hdf_path) + sample_storage.chunk_size = 1e8 + result_format = sample_storage.load_result_format() + root_quantity = make_root_quantity(sample_storage, result_format) + + #@TODO: + conductivity = root_quantity[quantity_name] + time = conductivity[1] # times: [1] + location = time['0'] # locations: ['0'] + q_value = location[0, 0] + + hdf = HDF5(file_path=hdf_path, load_from_file=True) + level_group = hdf.add_level_group(level_id=str(level)) + + chunk_spec = next(sample_storage.chunks(level_id=level, n_samples=sample_storage.get_n_collected()[int(level)])) + collected_values = q_value.samples(chunk_spec=chunk_spec)[0] + + collected_ids = sample_storage.collected_ids(level_id=level) + + indices = np.ones(len(collected_values)) + collected = zip(collected_ids, collected_values) + + graphs = [] + data = [] + i = 0 + for keep, (sample_id, col_values) in zip(indices, collected): + if not keep: + continue + + output_value = col_values[0] + + sample_dir = os.path.join(output_dir, sample_id) + field_mesh = os.path.join(sample_dir, FIELDS_SAMPLE) + if os.path.exists(field_mesh): + # i += 1 + # if i > 150: + # break + + features = get_node_features(field_mesh, feature_names) + np.save(os.path.join(sample_dir, "nodes_features"), features) + np.save(os.path.join(sample_dir, "output"), output_value) + + #graphs.append(Graph(x=features, y=output_value)) # , a=self.adjacency_matrix)) + # Save data for pandas dataframe creation, not used with Graph neural network + #data.append({'x': features, 'y': output_value}) + + #loaded_features = np.load(os.path.join(sample_dir, "nodes_features.npy")) + #print("loaded features ", loaded_features) + + #FlowDataset.pickle_data(graphs, FlowDataset.GRAPHS_FILE) + #FlowDataset.pickle_data(data, FlowDataset.DATA_FILE) + + +if __name__ == "__main__": + + # mesh = "/home/martin/Documents/metamodels/data/5_ele/cl_0_1_s_1/L5/l_step_0.020196309484414757_common_files/mesh.msh" + # output_dir = "/home/martin/Documents/metamodels/data/5_ele/cl_0_3_s_4/L1_3/test/01_cond_field/output/" + # hdf_path = "/home/martin/Documents/metamodels/data/5_ele/cl_0_1_s_1/L1_3/mlmc_1.hdf5" + + import cProfile + import pstats + pr = cProfile.Profile() + pr.enable() + + my_result = graph_creator(output_dir, hdf_path, mesh) + + pr.disable() + ps = pstats.Stats(pr).sort_stats('cumtime') + ps.print_stats() diff --git a/mlmc/metamodel/custom_methods.py b/mlmc/metamodel/custom_methods.py new file mode 100644 index 00000000..3a66ce0c --- /dev/null +++ b/mlmc/metamodel/custom_methods.py @@ -0,0 +1,42 @@ +import numpy as np +import tensorflow as tf +from tensorflow.keras import backend as K +# import tensorflow.experimental.numpy as tnp +# tnp.experimental_enable_numpy_behavior() +from mlmc.moments import Monomial, Legendre + + +def abs_activation(x): + return K.abs(x) + + +def var_loss_function(y_true, y_predict): + if tf.is_tensor(y_true): + y_true = float(y_true) + + # else: + # print("diff shape ", (y_true - K.squeeze(y_predict, axis=1)).shape) + + return K.var(y_true - K.squeeze(y_predict, axis=1)) + #return K.sum(K.abs(y_true - K.squeeze(y_predict, axis=1))) + + +def total_loss_function(y_true, y_predict): + if tf.is_tensor(y_true): + y_true = float(y_true) + + # else: + # print("diff shape ", (y_true - K.squeeze(y_predict, axis=1)).shape) + + #return K.var(K.abs(y_true - K.squeeze(y_predict, axis=1))) + + return K.mean((y_true - K.squeeze(y_predict, axis=1))**2) + K.var(K.abs(y_true - K.squeeze(y_predict, axis=1))) + + +def MSE_moments(moments_fn=None): + if moments_fn is None: + raise ValueError + + def calc_err(y_true, y_predict): + return K.mean(K.sum((moments_fn.eval_all(y_true) - moments_fn.eval_all(y_predict))**2, axis=1)) + return calc_err \ No newline at end of file diff --git a/mlmc/metamodel/flow_dataset.py b/mlmc/metamodel/flow_dataset.py new file mode 100644 index 00000000..6616ded9 --- /dev/null +++ b/mlmc/metamodel/flow_dataset.py @@ -0,0 +1,493 @@ +import os +import re +import copy +import random +import numpy as np +import pandas as pd +from mlmc.tool import gmsh_io +from mlmc.tool.flow_mc import FlowSim, create_corr_field +from spektral.data import Dataset, Graph +import pickle + +#MESH = "/home/martin/Documents/metamodels/data/L1/test/01_cond_field/l_step_0.055_common_files/mesh.msh" +FIELDS_SAMPLE = "fine_fields_sample.msh" +#OUTPUT_DIR = "/home/martin/Documents/metamodels/data/1000_ele/test/01_cond_field/output/" +#OUTPUT_DIR = "/home/martin/Documents/metamodels/data/cl_0_3_s_4/L5/test/01_cond_field/output/" +#OUTPUT_DIR = "/home/martin/Documents/metamodels/data/cl_0_1_s_1/L5/test/01_cond_field/output/" +#OUTPUT_DIR = "/home/martin/Documents/metamodels/data/1000_ele/cl_0_1_s_1/L5/test/01_cond_field/output/" + + +class FlowDataset(Dataset): + GRAPHS_FILE = "graphs" + DATA_FILE = "data" + + def __init__(self, output_dir=None, level=0, log=False, mesh=None, corr_field_config=None, config={}, index=None, adj_matrix=None, dataset=None, graphs=None, + predict=False, train_samples=False, test_samples=False, n_test_samples=50000, independent_samples=False, **kwargs): + self._output_dir = output_dir + # if self._output_dir is None: + # self._output_dir = OUTPUT_DIR + self._log = log + self.level = level + self._mesh = mesh + self._corr_field_config = corr_field_config + self.adjacency_matrix = adj_matrix + if self.adjacency_matrix is None and self._output_dir is not None and os.path.exists(os.path.join(self._output_dir, "adjacency_matrix.npy")): + self.adjacency_matrix = np.load(os.path.join(self._output_dir, "adjacency_matrix.npy"), allow_pickle=True) # adjacency matrix + self.data = [] + self._aug_data = [] + self._config = config + self._index = index + self._predict = predict + self._train_samples = train_samples + self._test_samples = test_samples + self._independent_samples = independent_samples + self._n_test_samples = n_test_samples + self._dataset_config = config.get('dataset_config', {}) + self._augment_data = config.get('augment_data', False) + + if predict: + self._min_features = self._dataset_config.get('min_features', None) + self._max_features = self._dataset_config.get('max_features', None) + self._mean_features = self._dataset_config.get('mean_features', None) + self._var_features = self._dataset_config.get('var_features', None) + self._min_output = self._dataset_config.get('min_output', None) + self._max_output = self._dataset_config.get('max_output', None) + self._mean_output = self._dataset_config.get('mean_output', None) + self._var_output = self._dataset_config.get('var_output', None) + self._output_mult_factor = self._dataset_config.get('output_mult_factor', 1) + else: + self._min_features = None + self._max_features = None + self._mean_features = None + self._var_features = None + self._min_output = None + self._max_output = None + self._mean_output = None + self._var_output = None + self._output_mult_factor = 1 + + self._columns = None + + self.dataset = dataset + self.graphs = graphs + + if self.dataset is None or self.graphs is None: + super().__init__(**kwargs) + self.dataset = pd.DataFrame(self.data) + self._df_for_augmentation = pd.DataFrame(self._aug_data, columns=self._columns) + else: + self.a = self.adjacency_matrix + + #self._data_augmentation() + + def get_train_data(self, index, length): + new_dataset = self.dataset[index * length: index * length + length] + new_graphs = self.graphs[index * length: index * length + length] # self.graphs is read() method output + + if self._augment_data: + new_dataset, new_graphs = self._data_augmentation(self._df_for_augmentation[index * length: index * length + length], new_graphs) + + new_obj = FlowDataset(output_dir=copy.deepcopy(self._output_dir), level=copy.deepcopy(self.level), + log=copy.deepcopy(self._log), mesh=copy.deepcopy(self._mesh), + corr_field_config=copy.deepcopy(self._corr_field_config), + config=copy.deepcopy(self._config), index=copy.deepcopy(self._index), + adj_matrix=copy.deepcopy(self.adjacency_matrix), dataset=new_dataset, graphs=new_graphs) + + # self_dict = self.__dict__ + # self_dict["dataset"] = new_dataset + # self_dict["graphs"] = new_graphs + # + #new_obj.__dict__.update(self_dict) + + #new_obj = copy.deepcopy(self) + #new_obj.dataset = new_dataset + #new_obj.graphs = new_graphs + + return new_obj + + def split_val_train(self, len_val_data): + tr_dataset = self.dataset[:-len_val_data] + va_dataset = self.dataset[-len_val_data:] + + tr_graphs = self.graphs[:-len_val_data] + va_graphs = self.graphs[-len_val_data:] + #new_graphs = self.graphs[index * length: index * length + length] # self.graphs is read() method output + + tr_obj = FlowDataset(output_dir=copy.deepcopy(self._output_dir), level=copy.deepcopy(self.level), + log=copy.deepcopy(self._log), mesh=copy.deepcopy(self._mesh), + corr_field_config=copy.deepcopy(self._corr_field_config), + config=copy.deepcopy(self._config), index=copy.deepcopy(self._index), + adj_matrix=copy.deepcopy(self.adjacency_matrix), dataset=tr_dataset, graphs=tr_graphs) + + va_obj = FlowDataset(output_dir=copy.deepcopy(self._output_dir), level=copy.deepcopy(self.level), + log=copy.deepcopy(self._log), mesh=copy.deepcopy(self._mesh), + corr_field_config=copy.deepcopy(self._corr_field_config), + config=copy.deepcopy(self._config), index=copy.deepcopy(self._index), + adj_matrix=copy.deepcopy(self.adjacency_matrix), dataset=va_dataset, graphs=va_graphs) + + # tr_obj = copy.deepcopy(self) + # va_obj = copy.deepcopy(self) + # + # tr_obj.dataset = tr_dataset + # va_obj.dataset = va_dataset + # + # tr_obj.graphs = tr_graphs + # va_obj.graphs = va_graphs + + return tr_obj, va_obj + + def get_test_data(self, index, length): + if self._independent_samples: + if index > 0: + new_dataset =self.dataset[-index * length - length:-index * length] + new_graphs = self.graphs[-index * length - length:-index * length] + else: + new_dataset = self.dataset[-index * length - length:] + new_graphs = self.graphs[-index * length - length:] + + else: + new_dataset = self.dataset[0:index * length] + self.dataset[index * length + length:] + new_graphs = self.graphs[0:index * length] + self.graphs[index * length + length:] + + new_obj = FlowDataset(output_dir=copy.deepcopy(self._output_dir), level=copy.deepcopy(self.level), log=copy.deepcopy(self._log), mesh=copy.deepcopy(self._mesh), + corr_field_config=copy.deepcopy(self._corr_field_config), config=copy.deepcopy(self._config), index=copy.deepcopy(self._index), + adj_matrix=copy.deepcopy(self.adjacency_matrix), dataset=new_dataset, graphs=new_graphs) + + return new_obj + + def shuffle(self, seed=None): + if seed is not None: + random.seed(seed) + + random.shuffle(self.data) + self.dataset = pd.DataFrame(self.data) + + def _data_augmentation(self, df_slice, new_graphs): + import smogn + import matplotlib.pyplot as plt + #import seaborn + + # df_slice = self._df_for_augmentation[self._index * self._config['n_train_samples']: + # self._index * self._config['n_train_samples'] + self._config['n_train_samples']] + + df_slice = df_slice.reset_index(drop=True) + if 'augmentation_config' in self._config: + dataset_modified = smogn.smoter(data=df_slice, y="y", **self._config["augmentation_config"]) + else: + dataset_modified = smogn.smoter(data=df_slice, y="y", k=9, samp_method="extreme") + + # print("dataset modified shape", dataset_modified.shape) + # print("dataset modified ", dataset_modified) + # + # print("df stats ", smogn.box_plot_stats(df_slice['y'])['stats']) + # print("modified stats ", smogn.box_plot_stats(dataset_modified['y'])['stats']) + # + # fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + # ax.hist(df_slice['y'], bins=50, alpha=0.5, label='target', density=True) + # plt.title("original") + # plt.show() + # + # fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + # ax.hist(dataset_modified['y'], bins=50, alpha=0.5, label='target', density=True) + # plt.title("modified") + # plt.show() + + appended_dataset = df_slice.append(dataset_modified) + + numpy_frame = dataset_modified.to_numpy() + + for i in range(numpy_frame.shape[0]): + features = numpy_frame[i][:-1] + features = features.reshape((len(features), 1)) + new_graphs.append(Graph(x=features, y=numpy_frame[i][-1])) + + return appended_dataset, copy.deepcopy(new_graphs) + + # seaborn.kdeplot(df_slice['y'], label="Original") + # seaborn.kdeplot(dataset_modified['y'], label="Modified") + + + # def generate_data(self): + # n_samples = 10**5 + # graphs = [] + # mesh_data = FlowSim.extract_mesh(self._mesh) + # fields = create_corr_field(model="exp", dim=2, + # sigma=self._corr_field_config['sigma'], + # corr_length=self._corr_field_config['corr_length'], + # log=self._corr_field_config['log']) + # + # # # Create fields both fine and coarse + # fields = FlowSim.make_fields(fields, mesh_data, None) + # + # for i in range(n_samples): + # fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(fields, coarse_step=0, + # n_fine_elements=len( + # mesh_data['points'])) + # # print("len fine input sample ", len(fine_input_sample["conductivity"])) + # # print("fine input sample ", fine_input_sample["conductivity"]) + # + # features = fine_input_sample["conductivity"] + # output = 1 + # + # # gmsh_io.GmshIO().write_fields('fields_sample.msh', mesh_data['ele_ids'], fine_input_sample) + # # + # # mesh = gmsh_io.GmshIO('fields_sample.msh') + # # element_data = mesh.current_elem_data + # # features = list(element_data.values()) + # + # if self._log: + # features = np.log(features) + # output = np.log(output) + # # features = (features - minimum) / (maximum - minimum) + # graphs.append(Graph(x=features, y=output)) # , a=self.adjacency_matrix)) + # # Save data for pandas dataframe creation, not used with Graph neural network + # self.data.append({'x': features, 'y': output}) + # + # self.a = self.adjacency_matrix + # return graphs + + def read(self): + all_outputs = [] + all_features = [] + + for idx, s_dir in enumerate(os.listdir(self._output_dir)): + try: + l = re.findall(r'L(\d+)_S', s_dir)[0] + if int(l) != self.level: + continue + except IndexError: + continue + if os.path.isdir(os.path.join(self._output_dir, s_dir)): + sample_dir = os.path.join(self._output_dir, s_dir) + if os.path.exists(os.path.join(sample_dir, "nodes_features.npy")): + features = np.load(os.path.join(sample_dir, "nodes_features.npy")) + # print("type features ", type(features)) + # print("features shape ", features.shape) + # features = np.expand_dims(features, axis=0) + # if all_features is None: + # all_features = features + # else: + # all_features = np.vstack((all_features, features)) + all_features.append(features) + + output = np.load(os.path.join(sample_dir, "output.npy")) + all_outputs.append(output) + + if self._dataset_config.get("first_log_features", False): + all_features = np.log(all_features) + + if self._dataset_config.get("first_log_output", False): + all_outputs = np.log(all_outputs) + + #print("all outputs ", np.array(all_outputs).shape) + # min_output = np.min(all_outputs) + # max_output = np.max(all_outputs) + + if not self._predict: + if self._index is not None: + train_outputs = all_outputs[self._index * self._config['n_train_samples']: + self._index * self._config['n_train_samples'] + self._config['n_train_samples']] + train_features = all_features[self._index * self._config['n_train_samples']: + self._index * self._config['n_train_samples'] + self._config['n_train_samples']] + else: + train_outputs = all_outputs + train_features = all_features + + if self._dataset_config.get("calc_output_mult_factor", False) is True: + self._output_mult_factor = 1/np.mean(train_outputs) + self._dataset_config["output_mult_factor"] = self._output_mult_factor + self._save_data_config() + + if self._dataset_config.get("features_normalization", False): + self._min_features = np.min(train_features, axis=0) + self._max_features = np.max(train_features, axis=0) + self._dataset_config["min_features"] = self._min_features + self._dataset_config["max_features"] = self._max_features + + if self._dataset_config.get("features_scale", False): + self._mean_features = np.mean(train_features, axis=0) + self._var_features = np.var(train_features, axis=0) + self._dataset_config["mean_features"] = self._mean_features + self._dataset_config["var_features"] = self._var_features + + if self._dataset_config.get("output_normalization", False): + self._min_output = np.min(train_outputs, axis=0) + self._max_output = np.max(train_outputs, axis=0) + self._dataset_config["min_output"] = self._min_output + self._dataset_config["max_output"] = self._max_output + + if self._dataset_config.get("output_scale", False): + self._mean_output = np.mean(train_outputs, axis=0) + self._var_output = np.var(train_outputs, axis=0) + self._dataset_config["mean_output"] = self._mean_output + self._dataset_config["var_output"] = self._var_output + + self._save_data_config() + + if self._train_samples: + all_features = train_features + all_outputs = train_outputs + + elif self._test_samples: + if self._independent_samples: + if self._index > 0: + all_outputs = all_outputs[-self._index * self._config['n_train_samples']- self._config['n_train_samples']:-self._index * self._config['n_train_samples']] + all_features = all_features[-self._index * self._config['n_train_samples']- self._config['n_train_samples']:-self._index *self._config['n_train_samples']] + else: + all_outputs = all_outputs[-self._index * self._config['n_train_samples'] - self._config[ + 'n_train_samples']:] + all_features = all_features[-self._index * self._config['n_train_samples'] - self._config[ + 'n_train_samples']:] + + else: + + if isinstance(all_outputs, list): + all_outputs = all_outputs[0:self._index * self._config['n_train_samples']] + all_outputs[self._index * self._config['n_train_samples'] + + self._config['n_train_samples']:] + else: + all_outputs = np.concatenate([all_outputs[0:self._index * self._config['n_train_samples']], all_outputs[self._index * self._config['n_train_samples'] + self._config['n_train_samples']:]]) + + if isinstance(all_features, list): + all_features = all_features[0:self._index * self._config['n_train_samples']] + all_features[ + self._index * self._config['n_train_samples'] + self._config[ + 'n_train_samples']:] + else: + all_features = np.concatenate([all_features[0:self._index * self._config['n_train_samples']], + all_features[ + self._index * self._config['n_train_samples'] + self._config[ + 'n_train_samples']:]]) + + all_outputs = all_outputs[:self._n_test_samples] + all_features = all_features[:self._n_test_samples] + + graphs = [] + for features, output in zip(all_features, all_outputs): + if self._dataset_config.get("features_normalization", False): + features = (features - self._min_features) / (self._max_features - self._min_features) + features = np.nan_to_num(features) + + if self._dataset_config.get("output_normalization", False): + output = (output - self._min_output) / (self._max_output - self._min_output) + output = np.nan_to_num(output) + + if self._dataset_config.get("features_scale", False): + features -= self._mean_features + features /= self._var_features + + if self._dataset_config.get("output_scale", False): + output -= self._mean_output + output /= self._var_output + + # output = (output - min_output) / (max_output - min_output) + # print("max ", maximum) + # print("max ", minimum) + # + # print("new featuers max ", np.max(new_features)) + # print("new featuers min ", np.min(new_features)) + # exit() + + output_mult_factor = self._output_mult_factor + features_mult_factor = self._dataset_config.get("features_mult_factor", 1) + + features *= features_mult_factor + output *= output_mult_factor + + if self._log and self._dataset_config.get("features_log", False) is False and self._dataset_config.get("output_log", False) is False: + features = np.log(features) + output = np.log(output) + + if self._dataset_config.get("last_log_features", False): + features = np.log(features) + + if self._dataset_config.get("last_log_output", False): + output = np.log(output) + + graphs.append(Graph(x=features, y=output))#, a=self.adjacency_matrix)) + + # Save data for pandas dataframe creation, not used with Graph neural network + self.data.append({'x': features, 'y': output}) + + if self._augment_data: + if self._columns is None: + d = [pd.DataFrame(features.reshape((features.shape[1], features.shape[0])).tolist()).add_prefix("x_")] + new_df = pd.concat(d, axis=1) + #new_df.insert(loc=0, column="y", value=output) + self._columns = [] + for col in new_df.columns: + self._columns.append(col) + self._columns.append("y") + + squeezed_features = list(np.squeeze(features)) + squeezed_features.append(output) + self._aug_data.append(squeezed_features) + + + #new_df["y"] = output + # print("new df y ", new_df["y"]) + # print("new df .shape ", new_df.shape) + #print("new df ", new_df) + + # if self._df_for_augmentation is not None: + # self._df_for_augmentation = self._df_for_augmentation.append(new_df) + # else: + # self._df_for_augmentation = new_df + + self.a = self.adjacency_matrix + return graphs + + def _save_data_config(self): + # Save config to Pickle + import pickle + import shutil + + if "iter_dir" in self._config: + if os.path.exists(os.path.join(self._config['iter_dir'], "dataset_config.pkl")): + os.remove(os.path.join(self._config['iter_dir'], "dataset_config.pkl")) + + # create a binary pickle file + with open(os.path.join(self._config['iter_dir'], "dataset_config.pkl"), "wb") as writer: + pickle.dump(self._dataset_config, writer) + + @staticmethod + def pickle_data(data, output_dir, file_path): + with open(os.path.join(output_dir, file_path), 'wb') as writer: + pickle.dump(data, writer) + + +def extract_mesh_gmsh_io(mesh_file): + """ + Extract mesh from file + :param mesh_file: Mesh file path + :return: Dict + """ + mesh = gmsh_io.GmshIO(mesh_file) + is_bc_region = {} + region_map = {} + for name, (id, _) in mesh.physical.items(): + unquoted_name = name.strip("\"'") + is_bc_region[id] = (unquoted_name[0] == '.') + region_map[unquoted_name] = id + + bulk_elements = [] + + for id, el in mesh.elements.items(): + _, tags, i_nodes = el + region_id = tags[0] + if not is_bc_region[region_id]: + bulk_elements.append(id) + + n_bulk = len(bulk_elements) + centers = np.empty((n_bulk, 3)) + ele_ids = np.zeros(n_bulk, dtype=int) + ele_nodes = {} + point_region_ids = np.zeros(n_bulk, dtype=int) + + for i, id_bulk in enumerate(bulk_elements): + _, tags, i_nodes = mesh.elements[id_bulk] + region_id = tags[0] + centers[i] = np.average(np.array([mesh.nodes[i_node] for i_node in i_nodes]), axis=0) + point_region_ids[i] = region_id + ele_ids[i] = id_bulk + ele_nodes[id_bulk] = i_nodes + + return ele_nodes diff --git a/mlmc/metamodel/flow_task_CNN.py b/mlmc/metamodel/flow_task_CNN.py new file mode 100644 index 00000000..3d9cf41f --- /dev/null +++ b/mlmc/metamodel/flow_task_CNN.py @@ -0,0 +1,65 @@ +import tensorflow as tf +from mlmc.metamodel.graph_models import cnn_model +from tensorflow.keras.layers.experimental import preprocessing + +# Following 3 lines prevent "Failed to get convolution algorithm. This is probably because cuDNN failed to initialize" +config = tf.compat.v1.ConfigProto() +config.gpu_options.allow_growth = True +session = tf.compat.v1.InteractiveSession(config=config) + +import os +import numpy as np +#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Run on CPU only +import tensorflow as tf +from tensorflow.keras.losses import MeanSquaredError, SparseCategoricalCrossentropy, KLDivergence +from tensorflow.keras.metrics import mean_squared_error, kl_divergence +from tensorflow.keras.callbacks import History +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.regularizers import l2 +from mlmc.metamodel.postprocessing import analyze_results, plot_loss +from spektral.data import MixedLoader +from mlmc.metamodel.flow_dataset import FlowDataset +from spektral.layers import GCNConv, GlobalSumPool, ChebConv, GraphSageConv, ARMAConv, GATConv, APPNPConv, GINConv +from spektral.utils.sparse import sp_matrix_to_sp_tensor +from tensorflow.keras.layers.experimental import preprocessing +from mlmc.metamodel.custom_methods import abs_activation + +from mlmc.metamodel.graph_models import Net1 + + + +################################## +# Convolutional neural network # +################################## + +class CNN: + def __init__(self, **kwargs): + self._epochs = kwargs.get('epochs', 100) + self._val_split = kwargs.get('var_split', 0.2) + self._verbose = kwargs.get('verbose', False) + + self._loss = kwargs.get('loss', 'mean_squared_error') + self._optimizer = kwargs.get('optimizer', tf.optimizers.Adam(learning_rate=0.001)) + self._normalizer = kwargs.get('normalizer', preprocessing.Normalization()) + + self.history = None # Set in fit method + self._model = cnn_model() + self._model.compile(loss=self._loss, optimizer=self._optimizer) + + def fit(self, train_input, train_output): + self.history = self._model.fit(train_input, train_output, validation_split=self._val_split, + verbose=self._verbose, epochs=self._epochs) + + def predict(self, test_input): + return self._model.predict(test_input) + + def summary(self): + """ + Should be called after fit method + """ + return self._model.summary() + + + + + diff --git a/mlmc/metamodel/flow_task_GNN.py b/mlmc/metamodel/flow_task_GNN.py new file mode 100644 index 00000000..30181119 --- /dev/null +++ b/mlmc/metamodel/flow_task_GNN.py @@ -0,0 +1,176 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Run on CPU only +import tensorflow as tf +from tensorflow.keras.regularizers import l2 + +from tensorflow.keras.losses import MeanSquaredError, SparseCategoricalCrossentropy, KLDivergence +from tensorflow.keras.metrics import mean_squared_error, kl_divergence +from tensorflow.keras.optimizers import Adam + +from mlmc.metamodel.postprocessing import analyze_results, plot_loss +from mlmc.metamodel.custom_methods import abs_activation +from spektral.data import MixedLoader +from mlmc.metamodel.flow_dataset import FlowDataset +from spektral.layers import GCNConv, GlobalSumPool, ChebConv, GraphSageConv, ARMAConv, GATConv, APPNPConv, GINConv +from spektral.utils.sparse import sp_matrix_to_sp_tensor + + + + +#conv_layer = GCNConv +conv_layer = ChebConv # Seems better than GCNConv, good distribution of predictions +#conv_layer = GraphSageConv # Seems better than ChebConv, good loss but very narrow distribution of predictions +#conv_layer = ARMAConv # Seems worse than GraphSageConv +#conv_layer = GATConv # Slow and not better than GraphSageConv +# conv_layer = APPNPConv # Not bad but worse than GraphSageConv +# conv_layer = GINConv # it is comparable to APPNPConv +act_func = "relu"#"tanh"#"elu" # ReLU keep predictions above zero + +optimizer = Adam() +loss_fn = MeanSquaredError() +#loss_fn = KLDivergence() + +acc_fn = mean_squared_error +#acc_fn = kl_divergence + +print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) + +# Parameters +batch_size = 10000 # Batch size +epochs = 1000 # Number of training epochs +patience = 10 # Patience for early stopping +l2_reg = 0#5e-4 # Regularization rate for l2 + + +kernel_regularization = l2(l2_reg) + +# Create model +model = Net1(conv_layer=conv_layer, hidden_activation='relu', output_activation=abs_activation, + kernel_regularization=kernel_regularization) +#model = GeneralGNN(output=1, activation=abs_activation) + + +# Load data +data = FlowDataset() + +#print("data.a ", data.a) + +data.a = conv_layer.preprocess(data.a) +data.a = sp_matrix_to_sp_tensor(data.a) + +# Train/valid/test split +data_tr, data_te = data[:10000], data[10000:], +np.random.shuffle(data_tr) +data_tr, data_va = data_tr[:8000], data_tr[8000:] + +# We use a MixedLoader since the dataset is in mixed mode +loader_tr = MixedLoader(data_tr, batch_size=batch_size, epochs=epochs) +loader_va = MixedLoader(data_va, batch_size=batch_size) +loader_te = MixedLoader(data_te, batch_size=batch_size) + + + + + + + +# Training function +@tf.function +def train_on_batch(inputs, target): + with tf.GradientTape() as tape: + print("inputs data shape ", inputs[0].shape) # (number of train samples, number of vertices, number of properties for each vertex) + predictions = model(inputs, training=True) + #@TODO: zkusit pridat k loss function KLDivergence + # print(KLDivergence(target, predictions)) + # exit() + loss = loss_fn(target, predictions) + sum(model.losses)# + KLDivergence(target, predictions)#+ sum(model.losses) + acc = tf.reduce_mean(acc_fn(target, predictions)) + + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + return loss, acc + + +# Evaluation function +def evaluate(loader): + step = 0 + results = [] + for batch in loader: + step += 1 + inputs, target = batch + print("loader ", loader) + print("inputs data shape ", inputs[0].shape) # (number of validation or test samples, number of vertices, number of properties for each vertex) + predictions = model(inputs, training=False) + + loss = loss_fn(target, predictions) + acc = tf.reduce_mean(acc_fn(target, predictions)) + results.append((loss, acc, len(target))) # Keep track of batch size + if step == loader.steps_per_epoch: + results = np.array(results) + print("np.average(results[:, :-1], axis=0, weights=results[:, -1]) ", + np.average(results[:, :-1], axis=0, weights=results[:, -1])) + exit() + return np.average(results[:, :-1], axis=0, weights=results[:, -1]), target, predictions + + +# Setup training +best_val_loss = np.inf +current_patience = patience +step = 0 + +# Training loop +results_tr = [] +for batch in loader_tr: + step += 1 + + # Training step + inputs, target = batch + loss, acc = train_on_batch(inputs, target) + results_tr.append((loss, acc, len(target))) + + all_targets = [] + all_predictions = [] + + if step == loader_tr.steps_per_epoch: + results_va, target, predictions = evaluate(loader_va) + + print("results_va[0] ", results_va[0]) + exit() + if results_va[0] < best_val_loss: + best_val_loss = results_va[0] + current_patience = patience + results_te, target, predictions = evaluate(loader_te) + + print("target ", target) + print("predictions ", np.squeeze(predictions.numpy())) + print("len(target) ", len(target)) + print("len(predictions) ", len(np.squeeze(predictions.numpy()))) + + analyze_results(target, np.squeeze(predictions.numpy())) + + else: + current_patience -= 1 + if current_patience == 0: + print("Early stopping") + break + + # Print results + results_tr = np.array(results_tr) + results_tr = np.average(results_tr[:, :-1], 0, weights=results_tr[:, -1]) + print( + "Train loss: {:.4f}, acc: {:.4f} | " + "Valid loss: {:.4f}, acc: {:.4f} | " + "Test loss: {:.4f}, acc: {:.4f}".format( + *results_tr, *results_va, *results_te + ) + ) + + # Reset epoch + results_tr = [] + step = 0 + + + + diff --git a/mlmc/metamodel/flow_task_GNN_2.py b/mlmc/metamodel/flow_task_GNN_2.py new file mode 100644 index 00000000..d4762083 --- /dev/null +++ b/mlmc/metamodel/flow_task_GNN_2.py @@ -0,0 +1,272 @@ +import os +import warnings +import logging +logging.getLogger('tensorflow').disabled = True +logging.getLogger('absl').disabled = True +warnings.simplefilter("ignore") +import numpy as np +#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Run on CPU only +import tensorflow as tf +from tensorflow.keras.losses import MeanSquaredError, SparseCategoricalCrossentropy, KLDivergence +from tensorflow.keras.metrics import mean_squared_error, kl_divergence +from tensorflow.keras.callbacks import History +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.regularizers import l2 +from mlmc.metamodel.postprocessing import analyze_results, plot_loss, estimate_density +from spektral.data import MixedLoader +from mlmc.metamodel.flow_dataset import FlowDataset +from spektral.layers import GCNConv, GlobalSumPool, ChebConv, GraphSageConv, ARMAConv, GATConv, APPNPConv, GINConv +from spektral.utils.sparse import sp_matrix_to_sp_tensor +from tensorflow.keras.layers.experimental import preprocessing +import copy +from mlmc.metamodel.custom_methods import abs_activation, var_loss_function +import keras.backend as K + +from mlmc.metamodel.graph_models import Net1 +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +class GNN: + def __init__(self, **kwargs): + + self._epochs = kwargs.get('epochs', 100) + #self._val_split = kwargs.get('var_split', 0.2) + #self._verbose = kwargs.get('verbose', False) + + self._hidden_activation = kwargs.get('hidden_activation', 'relu') + self._hidden_regularizer = kwargs.get('hidden_reqularizer', None) + self._output_activation = kwargs.get('output_activation', 'linear') + self._conv_layer = kwargs.get('conv_layer', None) + #self._n_hidden_layers = kwargs.get('n_hidden_layers', 1) + #self._n_hidden_neurons = kwargs.get('n_hidden_neurons', [64]) # Number of hidden neurons for each hidden layer + + self._loss = kwargs.get('loss', MeanSquaredError) + self._accuracy_func = kwargs.get('accuracy_func', mean_squared_error) + self._optimizer = kwargs.get('optimizer', tf.optimizers.Adam(learning_rate=0.001)) + self._normalizer = kwargs.get('normalizer', preprocessing.Normalization()) + self._patience = kwargs.get('patience', 20) + self._verbose = kwargs.get('verbose', True) + + self._train_loss = [] + self._val_loss = [] + self._test_loss = [] + self._train_acc = [] + self._learning_rates = [] + + self.val_targets = [] + self._states = {} + self._total_n_steps = 0 + + if 'model_class' in kwargs: + model_class = kwargs.get('model_class') + net_model_config = kwargs.get('net_model_config') + model = model_class(**net_model_config) + else: + model = kwargs.get('model') + + if model is None: + self._model = Net1(conv_layer=self._conv_layer, hidden_activation=self._hidden_activation, + output_activation=self._output_activation, + kernel_regularization=self._hidden_regularizer, + normalizer=self._normalizer) + else: + self._model = model + # self._model = model(conv_layer=self._conv_layer, hidden_activation=self._hidden_activation, + # output_activation=self._output_activation, + # kernel_regularization=self._hidden_regularizer, + # normalizer=self._normalizer) + #self._model = model(n_labels=1, output_activation="relu") + + self._model.optimizer = self._optimizer + + def fit(self, loader_tr, loader_va, loader_te): + """ + Training procedure + """ + # Setup training + best_val_loss = np.inf + current_patience = self._patience + step = 0 + self._total_n_steps = 0 + + train_targets = True + train_targets_list = [] + + # Training loop + results_tr = [] + for batch in loader_tr: + step += 1 + self._total_n_steps += 1 + + # Training step + inputs, target = batch + + if train_targets: + train_targets_list.extend(target) + + loss, acc = self.train_on_batch(inputs, target) + self._train_loss.append(loss) + self._train_acc.append(acc) + results_tr.append((loss, acc, len(target))) + + results_va = self.evaluate(loader_va) + self._val_loss.append(results_va[0]) + + if step == loader_tr.steps_per_epoch: # step_per_epoch = int(np.ceil(len(self.dataset) / self.batch_size)) + train_targets = False + # results_va = self.evaluate(loader_va) + # self._val_loss.append(results_va[0]) + #print("results_va[0] ", results_va[0]) + + if results_va[0] < best_val_loss: + best_val_loss = results_va[0] + current_patience = self._patience + self._states = {} + self._states[results_va[0]] = copy.deepcopy(self) + results_te = self.evaluate(loader_te) + self._test_loss.append(results_te[0]) + else: + current_patience -= 1 + #results_tr_0 = np.array(results_tr) + if current_patience == 0: + print("Early stopping") + break + + lr = K.eval(self._optimizer._decayed_lr(tf.float32)) + self._learning_rates.append(lr) + + # Print results + results_tr = np.array(results_tr) + results_tr = np.average(results_tr[:, :-1], 0, weights=results_tr[:, -1]) + if self._verbose: + print( + "Train loss: {:.12f}, acc: {:.12f} | " + "Valid loss: {:.12f}, acc: {:.12f} | " + "Test loss: {:.12f}, acc: {:.12f} | LR: {:.12f}".format( + *results_tr, *results_va, *results_te, lr + ) + ) + + # Reset epoch + results_tr = [] + step = 0 + + return train_targets_list + + # Training function + @tf.function + def train_on_batch(self, inputs, target): + with tf.GradientTape() as tape: + predictions = self._model(inputs, training=True) + loss = self._loss(target, predictions) + sum(self._model.losses) #+ 5 * var_loss_function(target, predictions) + acc = tf.reduce_mean(self._accuracy_func(target, predictions)) + + gradients = tape.gradient(loss, self._model.trainable_variables) + self._optimizer.apply_gradients(zip(gradients, self._model.trainable_variables)) + + return loss, acc + + def evaluate(self, loader): + step = 0 + results = [] + + if len(self.val_targets) > 0: + val_targets = False + else: + val_targets = True + + for batch in loader: + step += 1 + inputs, target = batch + + if val_targets: + self.val_targets.extend(target) + + loss, acc = self.evaluate_batch(inputs, target) + + #predictions = self._model(inputs, training=False) + #loss = self._loss(target, predictions) + #acc = tf.reduce_mean(self._accuracy_func(target, predictions)) + results.append((loss, acc, len(target))) # Keep track of batch size + if step == loader.steps_per_epoch: + results = np.array(results) + return np.average(results[:, :-1], axis=0, weights=results[:, -1]) + + @tf.function + def evaluate_batch(self, inputs, target): + predictions = self._model(inputs, training=False) + + loss = self._loss(target, predictions) + acc = tf.reduce_mean(self._accuracy_func(target, predictions)) + + return loss, acc + + def predict(self, loader): + targets = [] + predictions = [] + step = 0 + for batch in loader: + + step += 1 + inputs, target = batch + targets.extend(target) + predictions.extend(self._model(inputs, training=False)) + + if step == loader.steps_per_epoch: + return targets, predictions + + return targets, predictions + + +# if __name__ == "__main__": +# # Parameters +# #conv_layer = GCNConv +# conv_layer = ChebConv # Seems better than GCNConv, good distribution of predictions +# # # conv_layer = GraphSageConv # Seems better than ChebConv, good loss but very narrow distribution of predictions +# # # conv_layer = ARMAConv # Seems worse than GraphSageConv +# # # conv_layer = GATConv # Slow and not better than GraphSageConv +# # # conv_layer = APPNPConv # Not bad but worse than GraphSageConv +# # # conv_layer = GINConv # it is comparable to APPNPConv +# # act_func = "relu" # "tanh"#"elu" # ReLU keep predictions above zero +# loss = MeanSquaredError() +# optimizer = tf.optimizers.Adam(learning_rate=0.001) +# batch_size = 500 +# epochs = 100 +# +# # Load data +# data = FlowDataset() +# data = data#[:10000] +# #data.a = conv_layer.preprocess(data.a) +# data.a = sp_matrix_to_sp_tensor(data.a) +# +# train_data_len = int(len(data) * 0.8) +# train_data_len = 10000 +# +# # Train/valid/test split +# data_tr, data_te = data[:train_data_len], data[train_data_len:], +# np.random.shuffle(data_tr) +# +# val_data_len = int(len(data_tr) * 0.2) +# data_tr, data_va = data_tr[:-val_data_len], data_tr[-val_data_len:] +# +# print("data_tr len ", len(data_tr)) +# print("data_va len ", len(data_va)) +# print("data_te len ", len(data_te)) +# +# # We use a MixedLoader since the dataset is in mixed mode +# loader_tr = MixedLoader(data_tr, batch_size=batch_size, epochs=epochs) +# loader_va = MixedLoader(data_va, batch_size=batch_size) +# loader_te = MixedLoader(data_te, batch_size=batch_size) +# +# gnn = GNN(loss=loss, optimizer=optimizer, conv_layer=conv_layer, output_activation=abs_activation, +# hidden_activation='relu', patience=20) +# gnn.fit(loader_tr, loader_va, loader_te) +# +# targets, predictions = gnn.predict(loader_te) +# predictions = np.squeeze(predictions) +# +# plot_loss(gnn._train_loss, gnn._val_loss) +# analyze_results(targets, predictions) +# +# estimate_density(targets) +# estimate_density(predictions) diff --git a/mlmc/metamodel/flow_task_NN.py b/mlmc/metamodel/flow_task_NN.py new file mode 100644 index 00000000..98c80686 --- /dev/null +++ b/mlmc/metamodel/flow_task_NN.py @@ -0,0 +1,403 @@ +import tensorflow as tf +import numpy as np +from tensorflow.keras.metrics import mean_squared_error, kl_divergence +from tensorflow.keras.layers.experimental import preprocessing +from mlmc.metamodel.graph_models import dnn_model +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras.layers.experimental import preprocessing + + +# epochs = 200 +# +# +# # Parameters +# loss_fn = "mean_squared_error" +# optimizer_nn = tf.optimizers.Adam(learning_rate=0.1) +# optimizer_dnn = tf.optimizers.Adam(learning_rate=0.001) +# +# +# # Load data +# data = FlowDataset() +# +# dataset = data.dataset[:5000] +# print("len(dataset) ", len(dataset)) +# dataset = dataset.dropna() +# +# train_dataset = dataset.sample(frac=0.8, random_state=0) +# test_dataset = dataset.drop(train_dataset.index) +# +# train_x = np.squeeze(np.stack(train_dataset.x.to_numpy(), axis=0)) +# train_x = np.asarray(train_x).astype('float64') +# train_y = train_dataset.y.to_numpy() +# train_y = np.asarray(train_y).astype('float64') +# +# test_x = np.squeeze(np.stack(test_dataset.x.to_numpy(), axis=0)) +# test_x = np.asarray(test_x).astype('float32') +# test_y = test_dataset.y.to_numpy() +# test_y = np.asarray(test_y).astype('float32') + + +# #################### +# ## Neural network - very bad for our purposes ## +# #################### +# +# normalizer = preprocessing.Normalization() +# linear_model = tf.keras.Sequential([ +# normalizer, +# layers.Dense(units=1) +# ]) +# +# linear_model.compile( +# optimizer=optimizer_nn, +# loss=loss_fn) +# # +# # history = linear_model.fit( +# # train_x, train_y, +# # epochs=100, +# # # suppress logging +# # verbose=0, +# # # Calculate validation results on 20% of the training data +# # validation_split=0.2) +# # +# # linear_model.summary() +# # +# # +# # plot_loss(history) +# # predictions = np.squeeze(linear_model.predict(test_x)) +# # analyze_results(target=test_y, predictions=predictions) + + + +######################### +# Deep neural network # +######################### + + +class DNN: + def __init__(self, **kwargs): + self._epochs = kwargs.get('epochs', 100) + self._val_split = kwargs.get('var_split', 0.2) + self._verbose = kwargs.get('verbose', False) + + self._hidden_activation = kwargs.get('hidden_activation', 'relu') + self._hidden_regularizer = kwargs.get('hidden_reqularizer', None) + self._output_activation = kwargs.get('output_activation', 'linear') + self._n_hidden_layers = kwargs.get('n_hidden_layers', 1) + self._n_hidden_neurons = kwargs.get('n_hidden_neurons', [64]) # Number of hidden neurons for each hidden layer + + self._loss = kwargs.get('loss', 'mean_squared_error') + self._optimizer = kwargs.get('optimizer', tf.optimizers.Adam(learning_rate=0.001)) + self._normalizer = kwargs.get('normalizer', preprocessing.Normalization()) + + self.history = None # Set in fit method + self._create_model() + + def _create_model(self): + hidden_layers = [] + for i in range(self._n_hidden_layers): + if self._hidden_regularizer is not None: + hidden_layers.append( + layers.Dense(self._n_hidden_neurons[i], + kernel_regularizer=self._hidden_regularizer, + activation=self._hidden_activation)) + else: + hidden_layers.append( + layers.Dense(self._n_hidden_neurons[i],activation=self._hidden_activation)) + + self._model = keras.Sequential([ + self._normalizer, + *hidden_layers, + layers.Dense(1, activation=self._output_activation) + ]) + + self._model.compile(loss=self._loss, optimizer=self._optimizer) + + def fit(self, train_input, train_output): + self.history = self._model.fit(train_input, train_output, validation_split=self._val_split, + verbose=self._verbose, epochs=self._epochs) + + def predict(self, test_input): + return self._model.predict(test_input) + + def summary(self): + """ + Should be called after fit method + """ + return self._model.summary() + + +class DNN_2: + def __init__(self, **kwargs): + print("######## Create GNN #########") + + self._epochs = kwargs.get('epochs', 100) + #self._val_split = kwargs.get('var_split', 0.2) + #self._verbose = kwargs.get('verbose', False) + + self._hidden_activation = kwargs.get('hidden_activation', 'relu') + self._hidden_regularizer = kwargs.get('hidden_reqularizer', None) + self._output_activation = kwargs.get('output_activation', 'linear') + self._conv_layer = kwargs.get('conv_layer', None) + #self._n_hidden_layers = kwargs.get('n_hidden_layers', 1) + #self._n_hidden_neurons = kwargs.get('n_hidden_neurons', [64]) # Number of hidden neurons for each hidden layer + + self._loss = kwargs.get('loss', mean_squared_error) + self._final_loss = kwargs.get('final_loss', mean_squared_error) + self._accuracy_func = kwargs.get('accuracy_func', mean_squared_error) + self._optimizer = kwargs.get('optimizer', tf.optimizers.Adam(learning_rate=0.001)) + self._normalizer = kwargs.get('normalizer', preprocessing.Normalization()) + self._patience = kwargs.get('patience', 20) + self._verbose = kwargs.get('verbose', True) + #self._loss_changed = False + + self._train_loss = [] + self._val_loss = [] + self._test_loss = [] + + self._loss_params = {} + self._n_moments = 3 + + self.val_targets = [] + self._states = {} + self._total_n_steps = 0 + + if 'model_class' in kwargs: + model_class = kwargs.get('model_class') + net_model_config = kwargs.get('net_model_config') + model = model_class(**net_model_config) + print("model class model ", model) + else: + model = kwargs.get('model') + + if model is None: + self._model = DNNNet(conv_layer=self._conv_layer, hidden_activation=self._hidden_activation, + output_activation=self._output_activation, + kernel_regularization=self._hidden_regularizer, + normalizer=self._normalizer) + else: + self._model = model + # self._model = model(conv_layer=self._conv_layer, hidden_activation=self._hidden_activation, + # output_activation=self._output_activation, + # kernel_regularization=self._hidden_regularizer, + # normalizer=self._normalizer) + #self._model = model(n_labels=1, output_activation="relu") + + def fit(self, train, validation, test): + """ + Training procedure + """ + print("fit init loss ", self._loss) + # Setup training + self._best_val_loss = np.inf + self._current_patience = self._patience + step = 0 + self._total_n_steps = 0 + + train_targets = True + train_targets_list = [] + + # Training loop + results_tr = [] + + step += 1 + self._total_n_steps += 1 + + # Training step + inputs, target = train.x, train.y + + if train_targets: + train_targets_list.extend(target) + + loss, acc = self.train_on_batch(inputs, target) + results_tr.append((loss, acc, len(target))) + + results_va = self.evaluate(validation) + self._val_loss.append(results_va[0]) + + + train_targets = False + # results_va = self.evaluate(loader_va) + # self._val_loss.append(results_va[0]) + #print("results_va[0] ", results_va[0]) + + #print("self best val loss ", self._best_val_loss) + + if (results_va[0] + results_tr[-1][0]) < self._best_val_loss:# or (self._val_loss[-1] < self._val_loss[-2] and self._val_loss[-2] < self._val_loss[-3]): # Continue to learn if validation loss is decreasing + self._best_val_loss = (results_va[0] + results_tr[-1][0])#results_va[0] + self._current_patience = self._patience + self._states = {} + results_te = self.evaluate(test) + self._test_loss.append(results_te[0]) + else: + self._current_patience -= 1 + #results_tr_0 = np.array(results_tr) + loss_tr = results_va[0] + self._states[loss_tr] = self + + if self._current_patience == 0: + #if self._update_loss(patience=True): + print("Early stopping") + return + + # Print results + results_tr = np.array(results_tr) + results_tr = np.average(results_tr[:, :-1], 0, weights=results_tr[:, -1]) + + self._train_loss.append(results_tr[0]) + if self._verbose: + print( + "Train loss: {:.4f}, acc: {:.4f} | " + "Valid loss: {:.4f}, acc: {:.4f} | " + "Test loss: {:.4f}, acc: {:.4f}".format( + *results_tr, *results_va, *results_te + ) + ) + #self._update_loss() + # Reset epoch + results_tr = [] + step = 0 + + return train_targets_list + + def _update_loss(self, patience=False): + condition_max_loss = self._loss_params["loss_max"] #/ self._n_moments + #condition_max_loss = self._loss_params["loss_max"] + # print("self.train_loss ", self._train_loss) + m_increment = 1 + + if patience and self._n_moments <= self._loss_params["max_moments"]: + self._n_moments += m_increment + moments_fn = self._loss_params['moments_class'](self._n_moments, self._loss_params["domain"]) + self._loss = self._final_loss(moments_fn=moments_fn) + # self._loss = MSE_moments_2(moments_fn=moments_fn) + + self._best_val_loss = np.inf + print("self._loss ", self._loss) + elif patience: + return True + + # if self._train_loss[-1] > 1e10: + # moments_fn = self._loss_params['moments_class'](self._n_moments, self._loss_params["domain"]) + # #self._loss = self._final_loss(moments_fn=moments_fn) + # self._loss = MSE_moments_2(moments_fn=moments_fn) + # else: + # moments_fn = self._loss_params['moments_class'](self._n_moments, self._loss_params["domain"]) + # self._loss = self._final_loss(moments_fn=moments_fn) + + if self._n_moments <= self._loss_params["max_moments"] and len(self._train_loss) > 0\ + and self._train_loss[-1] < condition_max_loss and self._val_loss[-1] < condition_max_loss: + # print("self._train_loss ", self._train_loss) + # print("change loss, n_moments {}, last train loss: {}".format(self._n_moments, self._train_loss[-1])) + #self._n_moments = self._loss_params["max_moments"] + + print("self._n_moments ", self._n_moments) + self._n_moments += m_increment + moments_fn = self._loss_params['moments_class'](self._n_moments, self._loss_params["domain"]) + self._loss = self._final_loss(moments_fn=moments_fn) + #self._loss = MSE_moments_2(moments_fn=moments_fn) + + self._best_val_loss = np.inf + print("self._loss ", self._loss) + + # Training function + #@tf.function + def train_on_batch(self, inputs, target): + with tf.GradientTape() as tape: + predictions = self._model(inputs, training=True) + loss = self._loss(target, predictions) + sum(self._model.losses) #+ 5 * var_loss_function(target, predictions) + #loss = 100 * var_loss_function(target, predictions) + acc = tf.reduce_mean(self._accuracy_func(target, predictions)) + + gradients = tape.gradient(loss, self._model.trainable_variables) + self._optimizer.apply_gradients(zip(gradients, self._model.trainable_variables)) + + return loss, acc + + def evaluate(self, data): + step = 0 + results = [] + + if len(self.val_targets) > 0: + val_targets = False + else: + val_targets = True + + step += 1 + inputs, target = data.x, data.y + + if val_targets: + self.val_targets.extend(target) + + predictions = self._model(inputs, training=False) + + #print("evaluate loss function ", self._loss) + + loss = self._loss(target, predictions) + #print("target ", target) + #print("loss ", np.mean((target - predictions)**2)) + acc = tf.reduce_mean(self._accuracy_func(target, predictions)) + results.append((loss, acc, len(target))) # Keep track of batch size + results = np.array(results) + return np.average(results[:, :-1], axis=0, weights=results[:, -1]) + + def predict(self, data): + targets = [] + predictions = [] + + inputs, target = data.x, data.y + targets.extend(target) + predictions.extend(self._model(inputs, training=False)) + + return targets, predictions + + +# def build_and_compile_model(normalizer): +# model = keras.Sequential([ +# normalizer, +# layers.Dense(450, activation='relu'), #64 +# #layers.Dense(64, activation='relu'), +# layers.Dense(1, activation=abs_activation) +# ]) +# +# model.compile(loss=loss_fn, +# optimizer=optimizer_dnn) +# return model +# +# +# normalizer = preprocessing.Normalization() +# dnn_model = build_and_compile_model(normalizer) +# dnn_history = dnn_model.fit( +# train_x, train_y, +# validation_split=0.2, +# verbose=0, epochs=epochs) +# dnn_model.summary() +# +# plot_loss(dnn_history) +# +# predictions = np.squeeze(dnn_model.predict(test_x)) +# +# print("target ", test_y) +# print("predictions ", predictions) +# +# +# for index, (t, p) in enumerate(zip(test_y, predictions)): +# if index > 100: +# break +# print("t: {}, p: {}".format(t, p)) +# +# print("target mean ", np.mean(test_y)) +# print("predictions mean ", np.mean(predictions)) +# +# print("target var ", np.var(test_y)) +# print("predictions var ", np.var(predictions)) +# +# analyze_results(target=test_y, predictions=predictions) + + + + + + + + diff --git a/mlmc/metamodel/graph_models.py b/mlmc/metamodel/graph_models.py new file mode 100644 index 00000000..beb7aea7 --- /dev/null +++ b/mlmc/metamodel/graph_models.py @@ -0,0 +1,128 @@ +from tensorflow.keras import Model +from tensorflow.keras.layers import Dense +from spektral.layers import GlobalSumPool, GlobalMaxPool, GlobalAvgPool +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from mlmc.metamodel.custom_methods import abs_activation +from tensorflow.keras.layers.experimental import preprocessing + + +# Build model +class Net1(Model): + def __init__(self, conv_layer, hidden_activation, output_activation, kernel_regularization, normalizer, **kwargs): + super().__init__(**kwargs) + #self.normalizer = normalizer + #self.norm_layer = tf.keras.layers.LayerNormalization(axis=1) + self.conv1 = conv_layer(256, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv2 = conv_layer(128, K=2, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv3 = conv_layer(16, K=2, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv4 = conv_layer(8, K=2,activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv2 = conv_layer(32, K=2, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv3 = conv_layer(16, K=2, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv3 = conv_layer(8, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv4 = conv_layer(4, activation=hidden_activation, kernel_regularizer=kernel_regularization) + #self.conv3 = conv_layer(64, activation=hidden_activation, kernel_regularizer=kernel_regularization) + self.flatten = GlobalSumPool() + #self.fc1 = Dense(32, activation=hidden_activation) + self.fc2 = Dense(1)#, activation=output_activation) # linear activation for output neuron + + def call(self, inputs): + x, a = inputs + #print("x ", x) + #x = self.normalizer(x) + #x = self.norm_layer(x) + #print("normalized x ", x) + + #print("x[0,0,:] ", x[0, 0, :]) + x = self.conv1([x, a]) + + #print("x[0,0,:] ", x[0,0,:]) + # print("x[0, 0, :] ", tf.make_ndarray(x[0,0,:].op.get_attr('net1/strided_slice_1:0'))) + #print("x.shape ", x.shape) + # x = self.conv2([x, a]) + # # print("conv2 x shape", x.shape) + # x = self.conv3([x, a]) + # x = self.conv4([x, a]) + output1 = self.flatten(x) + #output2 = self.fc1(output1) + output = self.fc2(output1) + + # print("x1 " ,x1) + # print("output1 ", output1) + # print("output2 ", output2) + # print("output ", output) + #print("output ", output.shape) + + return output + + +# Build model +class NetGCN(Model): + # Setup from https://arxiv.org/pdf/1901.06181.pdf + def __init__(self, conv_layer, hidden_activation, output_activation, kernel_regularization, normalizer, **kwargs): + super().__init__(**kwargs) + + #self.normalizer = normalizer + #self.norm_layer = tf.keras.layers.LayerNormalization(axis=1) + self.conv1 = conv_layer(256, activation=hidden_activation, kernel_regularizer=kernel_regularization) + #.conv2 = conv_layer(32, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv3 = conv_layer(16, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv4 = conv_layer(16, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv5 = conv_layer(32, activation=hidden_activation, kernel_regularizer=kernel_regularization) + self.flatten = GlobalSumPool() + #self.fc1 = Dense(16, activation=hidden_activation) + self.fc2 = Dense(1)#, activation=output_activation) # linear activation for output neuron + + def call(self, inputs): + x, a = inputs + #print("x ", x) + #x = self.normalizer(x) + #x = self.norm_layer(x) + #print("normalized x ", x) + + #print("x[0,0,:] ", x[0, 0, :]) + x = self.conv1([x, a]) + #x = self.conv2([x, a]) + # x = self.conv3([x, a]) + # x = self.conv4([x, a]) + # x = self.conv5([x, a]) + # + # #print("x[0,0,:] ", x[0,0,:]) + # print("x[0, 0, :] ", tf.make_ndarray(x[0,0,:].op.get_attr('net1/strided_slice_1:0'))) + # print("x.shape ", x.shape) + # x = self.conv2([x, a]) + # x = self.conv3([x, a]) + output = self.flatten(x) + #output = self.fc1(output) + output = self.fc2(output) + + #print("output ", output.shape) + + return output + + +def cnn_model(): + return keras.Sequential([ + #@TODO: Try normalization + #self._normalizer, # Seems worse results with normalization + layers.Conv1D(filters=256, kernel_size=3, activation='relu', input_shape=(958, 1)),#input_shape=(958, 1)), + #layers.BatchNormalization(), + layers.AveragePooling1D(pool_size=2), + layers.Conv1D(filters=128, kernel_size=3, activation='relu'), + #layers.BatchNormalization(), + layers.AveragePooling1D(pool_size=2), + + layers.Flatten(), + layers.Dense(64, activation='relu'), + layers.Dense(1, activation=abs_activation) + ]) + + +def dnn_model(): + return keras.Sequential([ + preprocessing.Normalization(), + layers.Dense(32, activation="relu"), + #layers.Dense(32, activation="relu"), + layers.Dense(1, activation=abs_activation) + ]) \ No newline at end of file diff --git a/mlmc/metamodel/main.py b/mlmc/metamodel/main.py new file mode 100644 index 00000000..464fd9ae --- /dev/null +++ b/mlmc/metamodel/main.py @@ -0,0 +1,149 @@ +import os +import numpy as np +import pandas as pd +from mlmc.tool.hdf5 import HDF5 +import matplotlib.pyplot as plt + +from sklearn.model_selection import train_test_split + +DATA_PATH = "/home/martin/Documents/metamodels/data" + +def get_inputs(dir): + fields_file = os.path.join(dir, "fine_fields_sample.msh") + input = [] + with open(fields_file, "r") as r: + fields = r.readlines() + for f in fields[12:]: + line = f.split(" ") + if len(line) > 1: + input.append(float(line[1])) + else: + break + return input + + +def preprocess_data(): + dir_path = "/home/martin/Documents/metamodels/data/L1/test/01_cond_field/output" + hdf = HDF5(file_path="/home/martin/Documents/metamodels/data/L1/test/01_cond_field/mlmc_1.hdf5", + load_from_file=True) + level_group = hdf.add_level_group(level_id=str(0)) + collected = zip(level_group.get_collected_ids(), level_group.collected()) + + df_values = [] + for sample_id, col_values in collected: + output_value = col_values[0, 0] + sample_dir = os.path.join(dir_path, sample_id) + if os.path.isdir(sample_dir): + input = get_inputs(sample_dir) + d = {'x': np.array(input), 'y': output_value} + df_values.append(d) + + df = pd.DataFrame(df_values) + df.to_pickle(os.path.join(DATA_PATH, "data.pkl")) + + +def load_data(): + df = pd.read_pickle(os.path.join(DATA_PATH, "data.pkl")) + return df + + +def data_analysis(df): + print(df.info()) + print(df.y.describe()) + + # df.y.plot.hist(bins=50, logx=True) + # plt.show() + df.y.plot.kde(bw_method=0.3) + plt.xlim([-5, df.y.max()]) + plt.show() + + +def support_vector_regression(df): + from sklearn.svm import SVR + from sklearn.preprocessing import StandardScaler + print("df. info ", df.info) + train, test = train_test_split(df[:8000], test_size=0.2) + print("train describe", train.describe()) + print("test describe ", test.describe()) + + x = np.stack(train.x.to_numpy(), axis=0) + y = train.y.to_numpy() + + sc_X = StandardScaler() + sc_y = StandardScaler() + x = sc_X.fit_transform(x) + # print("y.shape ", y.shape) + # print("y.reshape(-1, 1) ", y.reshape(-1, 1).shape) + y = sc_y.fit_transform(y.reshape(-1, 1)) + + test_x = sc_X.fit_transform(np.stack(test.x.to_numpy(), axis=0)) + test_y = test.y.to_numpy().reshape(-1, 1) + test_y = sc_y.fit_transform(test.y.to_numpy().reshape(-1, 1)) + + # plt.hist(y, bins=50, alpha=0.5, label='train', density=True) + # plt.hist(test_y, bins=50, alpha=0.5, label='test', density=True) + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # plt.show() + # exit() + + # print("x.shape ", x.shape) + # print("y.shape ", y.shape) + # exit() + + #svr_rbf = SVR(kernel='rbf', verbose=True) # 'linear' kernel fitting is never-ending and 'poly' kernel gives very bad score (e.g. -2450), sigmoid gives also bad score (e.g. -125) + svr_rbf = SVR(kernel='poly', degree=50, verbose=True) + svr_rbf.fit(x, y) + + train_error = svr_rbf.score(x, y) + + test_error = svr_rbf.score(test_x, test_y) + + predictions = svr_rbf.predict(test_x) + + plt.hist(test_y, bins=50, alpha=0.5, label='target', density=True) + plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + + # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + plt.legend(loc='upper right') + # plt.xlim(-0.5, 1000) + plt.yscale('log') + plt.show() + + plt.hist(test_y, bins=50, alpha=0.5, label='target', density=True) + plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + + # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + plt.legend(loc='upper right') + # plt.xlim(-0.5, 1000) + #plt.yscale('log') + plt.show() + + exit() + + print("train error ", train_error) + print("test error ", test_error) + + +def svr_run(): + # preprocess_data() + df = load_data() + # data_analysis(df) + support_vector_regression(df) + + +if __name__ == "__main__": + # import cProfile + # import pstats + # pr = cProfile.Profile() + # pr.enable() + svr_run() + # my_result = svr_run() + # + # pr.disable() + # ps = pstats.Stats(pr).sort_stats('cumtime') + # ps.print_stats() + + + diff --git a/mlmc/metamodel/mnist.py b/mlmc/metamodel/mnist.py new file mode 100644 index 00000000..59a2158c --- /dev/null +++ b/mlmc/metamodel/mnist.py @@ -0,0 +1,167 @@ +import numpy as np +import tensorflow as tf +from tensorflow.keras import Model +from tensorflow.keras.layers import Dense +from tensorflow.keras.losses import SparseCategoricalCrossentropy +from tensorflow.keras.metrics import sparse_categorical_accuracy +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.regularizers import l2 + +from spektral.data import MixedLoader +from spektral.datasets.mnist import MNIST +from spektral.layers import GCNConv, GlobalSumPool +from spektral.layers.ops import sp_matrix_to_sp_tensor + + + +print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) + + +# Parameters +batch_size = 32 # Batch size +epochs = 1000 # Number of training epochs +patience = 10 # Patience for early stopping +l2_reg = 5e-4 # Regularization rate for l2 + +# Load data +data = MNIST() + +print("data.a ", data.a) + + +# The adjacency matrix is stored as an attribute of the dataset. +# Create filter for GCN and convert to sparse tensor. +data.a = GCNConv.preprocess(data.a) +data.a = sp_matrix_to_sp_tensor(data.a) + + +# Train/valid/test split +data_tr, data_te = data[:-10000], data[-10000:] +np.random.shuffle(data_tr) +data_tr, data_va = data_tr[:-10000], data_tr[-10000:] + + + +# for tr in data_tr[:10]: +# #print(tr.x) +# print(tr.y) +# exit() + +print("data_tr[0] ", data_tr[0].n_node_features) + + +# We use a MixedLoader since the dataset is in mixed mode +loader_tr = MixedLoader(data_tr, batch_size=batch_size, epochs=epochs) +loader_va = MixedLoader(data_va, batch_size=batch_size) +loader_te = MixedLoader(data_te, batch_size=batch_size) + + +# Build model +class Net(Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.conv1 = GCNConv(32, activation="elu", kernel_regularizer=l2(l2_reg)) + self.conv2 = GCNConv(32, activation="elu", kernel_regularizer=l2(l2_reg)) + self.flatten = GlobalSumPool() + self.fc1 = Dense(512, activation="relu") + self.fc2 = Dense(10, activation="softmax") # MNIST has 10 classes + + def call(self, inputs): + x, a = inputs + x = self.conv1([x, a]) + x = self.conv2([x, a]) + output = self.flatten(x) + output = self.fc1(output) + output = self.fc2(output) + + return output + + +# Create model +model = Net() +optimizer = Adam() +loss_fn = SparseCategoricalCrossentropy() + + +# Training function +@tf.function +def train_on_batch(inputs, target): + with tf.GradientTape() as tape: + predictions = model(inputs, training=True) + + print("predictions ", predictions) + print("target" , target) + loss = loss_fn(target, predictions) + sum(model.losses) + acc = tf.reduce_mean(sparse_categorical_accuracy(target, predictions)) + + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + return loss, acc + + +# Evaluation function +def evaluate(loader): + step = 0 + results = [] + for batch in loader: + step += 1 + inputs, target = batch + predictions = model(inputs, training=False) + loss = loss_fn(target, predictions) + acc = tf.reduce_mean(sparse_categorical_accuracy(target, predictions)) + results.append((loss, acc, len(target))) # Keep track of batch size + if step == loader.steps_per_epoch: + results = np.array(results) + return np.average(results[:, :-1], 0, weights=results[:, -1]) + + +# Setup training +best_val_loss = 99999 +current_patience = patience +step = 0 + +# Training loop +results_tr = [] +for batch in loader_tr: + step += 1 + + # Training step + inputs, target = batch + + # print("inputs ", inputs) + # print("target ", target) + # + # print("len(inputs) ", len(inputs)) + # print("len(target) ", len(target)) + # print("inputs shape ", np.array(inputs).shape) + # exit() + + loss, acc = train_on_batch(inputs, target) + results_tr.append((loss, acc, len(target))) + + if step == loader_tr.steps_per_epoch: + results_va = evaluate(loader_va) + if results_va[0] < best_val_loss: + best_val_loss = results_va[0] + current_patience = patience + results_te = evaluate(loader_te) + else: + current_patience -= 1 + if current_patience == 0: + print("Early stopping") + break + + # Print results + results_tr = np.array(results_tr) + results_tr = np.average(results_tr[:, :-1], 0, weights=results_tr[:, -1]) + print( + "Train loss: {:.4f}, acc: {:.4f} | " + "Valid loss: {:.4f}, acc: {:.4f} | " + "Test loss: {:.4f}, acc: {:.4f}".format( + *results_tr, *results_va, *results_te + ) + ) + + # Reset epoch + results_tr = [] + step = 0 diff --git a/mlmc/metamodel/own_cheb_conv.py b/mlmc/metamodel/own_cheb_conv.py new file mode 100644 index 00000000..de54ded7 --- /dev/null +++ b/mlmc/metamodel/own_cheb_conv.py @@ -0,0 +1,378 @@ +import copy +import warnings + +import numpy as np +from scipy import linalg +from scipy import sparse as sp +from scipy.sparse.linalg import ArpackNoConvergence + +import tensorflow as tf +from tensorflow.keras import backend as K + +from spektral.layers import ops +from spektral.layers.convolutional.conv import Conv +import matplotlib.pyplot as plt + + +class OwnChebConv(Conv): + r""" + A Chebyshev convolutional layer from the paper + + > [Convolutional Neural Networks on Graphs with Fast Localized Spectral + Filtering](https://arxiv.org/abs/1606.09375)
+ > Michaël Defferrard et al. + + **Mode**: single, disjoint, mixed, batch. + + This layer computes: + $$ + \X' = \sum \limits_{k=0}^{K - 1} \T^{(k)} \W^{(k)} + \b^{(k)}, + $$ + where \( \T^{(0)}, ..., \T^{(K - 1)} \) are Chebyshev polynomials of \(\tilde \L\) + defined as + $$ + \T^{(0)} = \X \\ + \T^{(1)} = \tilde \L \X \\ + \T^{(k \ge 2)} = 2 \cdot \tilde \L \T^{(k - 1)} - \T^{(k - 2)}, + $$ + where + $$ + \tilde \L = \frac{2}{\lambda_{max}} \cdot (\I - \D^{-1/2} \A \D^{-1/2}) - \I. + $$ + + **Input** + + - Node features of shape `([batch], n_nodes, n_node_features)`; + - A list of K Chebyshev polynomials of shape + `[([batch], n_nodes, n_nodes), ..., ([batch], n_nodes, n_nodes)]`; can be computed with + `spektral.utils.convolution.chebyshev_filter`. + + **Output** + + - Node features with the same shape of the input, but with the last + dimension changed to `channels`. + + **Arguments** + + - `channels`: number of output channels; + - `K`: order of the Chebyshev polynomials; + - `activation`: activation function; + - `use_bias`: bool, add a bias vector to the output; + - `kernel_initializer`: initializer for the weights; + - `bias_initializer`: initializer for the bias vector; + - `kernel_regularizer`: regularization applied to the weights; + - `bias_regularizer`: regularization applied to the bias vector; + - `activity_regularizer`: regularization applied to the output; + - `kernel_constraint`: constraint applied to the weights; + - `bias_constraint`: constraint applied to the bias vector. + + """ + + def __init__( + self, + channels, + K=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs + ): + super().__init__( + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs + ) + self.channels = channels + self.K = K + + # self.use_bias = use_bias + + def build(self, input_shape): + assert len(input_shape) >= 2 + input_dim = input_shape[0][-1] + + self.kernel = self.add_weight( + shape=(self.K, input_dim, self.channels), + initializer=self.kernel_initializer, + name="kernel", + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + if self.use_bias: + self.bias = self.add_weight( + shape=(self.channels,), + initializer=self.bias_initializer, + name="bias", + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + self.built = True + + def call(self, inputs): + x, a = inputs + + T_0 = x + output = K.dot(T_0, self.kernel[0]) + + if self.K > 1: + T_1 = ops.modal_dot(a, x) + output += K.dot(T_1, self.kernel[1]) + + # print("T_1 ", T_1) + # print("self.kernel[1] ", self.kernel[1]) + + for k in range(2, self.K): + T_2 = 2 * ops.modal_dot(a, T_1) - T_0 + output += K.dot(T_2, self.kernel[k]) + T_0, T_1 = T_1, T_2 + + #print("self use bias ", self.use_bias) + if self.use_bias: + #print("use bias") + output = K.bias_add(output, self.bias) + output = self.activation(output) + + return output + + @property + def config(self): + return {"channels": self.channels, "K": self.K} + + @staticmethod + def preprocess(a): + a = normalized_laplacian(a) + a = rescale_laplacian(a) + return a + + +def degree_matrix(A): + """ + Computes the degree matrix of the given adjacency matrix. + :param A: rank 2 array or sparse matrix. + :return: if A is a dense array, a dense array; if A is sparse, a sparse + matrix in DIA format. + """ + degrees = np.array(A.sum(1)).flatten() + if sp.issparse(A): + D = sp.diags(degrees) + else: + D = np.diag(degrees) + return D + + +def degree_power(A, k): + r""" + Computes \(\D^{k}\) from the given adjacency matrix. Useful for computing + normalised Laplacian. + :param A: rank 2 array or sparse matrix. + :param k: exponent to which elevate the degree matrix. + :return: if A is a dense array, a dense array; if A is sparse, a sparse + matrix in DIA format. + """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + degrees = np.power(np.array(A.sum(1)), k).ravel() + degrees[np.isinf(degrees)] = 0.0 + if sp.issparse(A): + D = sp.diags(degrees) + else: + D = np.diag(degrees) + return D + + +def normalized_adjacency(A, symmetric=True): + r""" + Normalizes the given adjacency matrix using the degree matrix as either + \(\D^{-1}\A\) or \(\D^{-1/2}\A\D^{-1/2}\) (symmetric normalization). + :param A: rank 2 array or sparse matrix; + :param symmetric: boolean, compute symmetric normalization; + :return: the normalized adjacency matrix. + """ + if symmetric: + #print("symmetric") + normalized_D = degree_power(A, -0.5) + #print("normalized D") + return normalized_D.dot(A).dot(normalized_D) + else: + normalized_D = degree_power(A, -1.0) + return normalized_D.dot(A) + + +def laplacian(A): + r""" + Computes the Laplacian of the given adjacency matrix as \(\D - \A\). + :param A: rank 2 array or sparse matrix; + :return: the Laplacian. + """ + return degree_matrix(A) - A + + +def normalized_laplacian(A, symmetric=True): + r""" + Computes a normalized Laplacian of the given adjacency matrix as + \(\I - \D^{-1}\A\) or \(\I - \D^{-1/2}\A\D^{-1/2}\) (symmetric normalization). + :param A: rank 2 array or sparse matrix; + :param symmetric: boolean, compute symmetric normalization; + :return: the normalized Laplacian. + """ + if sp.issparse(A): + I = sp.eye(A.shape[-1], dtype=A.dtype) + else: + I = np.eye(A.shape[-1], dtype=A.dtype) + normalized_adj = normalized_adjacency(A, symmetric=symmetric) + return I - normalized_adj + + +def rescale_laplacian(L, lmax=None): + """ + Rescales the Laplacian eigenvalues in [-1,1], using lmax as largest eigenvalue. + :param L: rank 2 array or sparse matrix; + :param lmax: if None, compute largest eigenvalue with scipy.linalg.eisgh. + If the eigendecomposition fails, lmax is set to 2 automatically. + If scalar, use this value as largest eigenvalue when rescaling. + :return: + """ + if lmax is None: + try: + if sp.issparse(L): + lmax = sp.linalg.eigsh(L, 1, which="LM", return_eigenvectors=False)[0] + else: + n = L.shape[-1] + lmax = linalg.eigh(L, eigvals_only=True, eigvals=[n - 2, n - 1])[-1] + except ArpackNoConvergence: + lmax = 2 + if sp.issparse(L): + I = sp.eye(L.shape[-1], dtype=L.dtype) + else: + I = np.eye(L.shape[-1], dtype=L.dtype) + L_scaled = (2.0 / lmax) * L - I + return L_scaled + + +def gcn_filter(A, symmetric=True): + r""" + Computes the graph filter described in + [Kipf & Welling (2017)](https://arxiv.org/abs/1609.02907). + :param A: array or sparse matrix with rank 2 or 3; + :param symmetric: boolean, whether to normalize the matrix as + \(\D^{-\frac{1}{2}}\A\D^{-\frac{1}{2}}\) or as \(\D^{-1}\A\); + :return: array or sparse matrix with rank 2 or 3, same as A; + """ + out = copy.deepcopy(A) + if isinstance(A, list) or (isinstance(A, np.ndarray) and A.ndim == 3): + for i in range(len(A)): + out[i] = A[i] + out[i][np.diag_indices_from(out[i])] += 1 + out[i] = normalized_adjacency(out[i], symmetric=symmetric) + else: + if hasattr(out, "tocsr"): + out = out.tocsr() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + out[np.diag_indices_from(out)] += 1 + out = normalized_adjacency(out, symmetric=symmetric) + + if sp.issparse(out): + out.sort_indices() + return out + + +def chebyshev_polynomial(X, k): + """ + Calculates Chebyshev polynomials of X, up to order k. + :param X: rank 2 array or sparse matrix; + :param k: the order up to which compute the polynomials, + :return: a list of k + 1 arrays or sparse matrices with one element for each + degree of the polynomial. + """ + T_k = list() + if sp.issparse(X): + T_k.append(sp.eye(X.shape[0], dtype=X.dtype).tocsr()) + else: + T_k.append(np.eye(X.shape[0], dtype=X.dtype)) + T_k.append(X) + + def chebyshev_recurrence(T_k_minus_one, T_k_minus_two, X): + if sp.issparse(X): + X_ = sp.csr_matrix(X, copy=True) + else: + X_ = np.copy(X) + return 2 * X_.dot(T_k_minus_one) - T_k_minus_two + + for _ in range(2, k + 1): + T_k.append(chebyshev_recurrence(T_k[-1], T_k[-2], X)) + + return T_k + + +def chebyshev_filter(A, k, symmetric=True): + r""" + Computes the Chebyshev filter from the given adjacency matrix, as described + in [Defferrard et at. (2016)](https://arxiv.org/abs/1606.09375). + :param A: rank 2 array or sparse matrix; + :param k: integer, the order of the Chebyshev polynomial; + :param symmetric: boolean, whether to normalize the matrix as + \(\D^{-\frac{1}{2}}\A\D^{-\frac{1}{2}}\) or as \(\D^{-1}\A\); + :return: a list of k + 1 arrays or sparse matrices with one element for each + degree of the polynomial. + """ + normalized_adj = normalized_adjacency(A, symmetric) + if sp.issparse(A): + I = sp.eye(A.shape[0], dtype=A.dtype) + else: + I = np.eye(A.shape[0], dtype=A.dtype) + L = I - normalized_adj # Compute Laplacian + + # Rescale Laplacian + L_scaled = rescale_laplacian(L) + + # Compute Chebyshev polynomial approximation + T_k = chebyshev_polynomial(L_scaled, k) + + # Sort indices + if sp.issparse(T_k[0]): + for i in range(len(T_k)): + T_k[i].sort_indices() + + return T_k + + +def add_self_loops(a, value=1): + """ + Sets the inner diagonals of `a` to `value`. + :param a: a np.array or scipy.sparse matrix, the innermost two dimensions + must be equal. + :param value: value to set the diagonals to. + :return: a np.array or scipy.sparse matrix with the same shape as `a`. + """ + a = a.copy() + if len(a.shape) < 2: + raise ValueError("a must have at least rank 2") + n = a.shape[-1] + if n != a.shape[-2]: + raise ValueError( + "Innermost two dimensions must be equal. Got {}".format(a.shape) + ) + if sp.issparse(a): + a = a.tolil() + a.setdiag(value) + return a.tocsr() + else: + idx = np.arange(n) + a[..., idx, idx] = value + return a diff --git a/mlmc/metamodel/own_diffpool.py b/mlmc/metamodel/own_diffpool.py new file mode 100644 index 00000000..e69de29b diff --git a/mlmc/metamodel/own_src.py b/mlmc/metamodel/own_src.py new file mode 100644 index 00000000..e69de29b diff --git a/mlmc/metamodel/postprocessing.py b/mlmc/metamodel/postprocessing.py new file mode 100644 index 00000000..262656ff --- /dev/null +++ b/mlmc/metamodel/postprocessing.py @@ -0,0 +1,2134 @@ +import os +import random +import copy +import matplotlib.pyplot as plt +import mlmc.estimator +from mlmc.tool import gmsh_io +import mlmc.quantity.quantity_estimate as qe +from mlmc.sample_storage import Memory +from mlmc.quantity.quantity_spec import QuantitySpec, ChunkSpec +import numpy as np +from mlmc.sample_storage_hdf import SampleStorageHDF +from mlmc.moments import Legendre, Monomial +from mlmc.quantity.quantity import make_root_quantity +from mlmc.metamodel.create_graph import extract_mesh_gmsh_io +from mlmc.plot import plots + + +QUANTILE = 1e-6 +# QUANTILE = 0.001 +N_MOMENTS = 20 +TARGET_VAR = 1e-5 + + +def use_levels_from_mlmc(sample_storage, use_levels): + print("mlmc sample storage get N collected ", sample_storage.get_n_collected()) + n_levels = len(sample_storage.get_level_ids()) + original_moments, estimator, original_true_domain, _ = estimate_moments(sample_storage) + data_mlmc = [] + new_coarse_data = [] + level_parameters = sample_storage.get_level_parameters() + new_level_parameters = [] + + mlmc_n_collected = estimator._sample_storage.get_n_collected() + for l_id, l_n_collected in zip(range(n_levels), mlmc_n_collected): + print("l id ", l_id) + if l_id == np.max(use_levels): + new_coarse_data = estimator.get_level_samples(level_id=1, n_samples=mlmc_n_collected[1])[:, :, 1] + + if l_id in use_levels: + level_samples = estimator.get_level_samples(level_id=l_id, n_samples=l_n_collected) + #fine_samples = level_samples[:, :, 0] + #coarse_samples = level_samples[:, :, 1] + print("level samples shape ", level_samples.shape) + else: + continue + + print("level samples shape ", level_samples.shape) + + + # if len(data_mlmc) == 0: + # level_samples[:, :, 1] = 0 + if len(new_coarse_data) > 0: + print("new coarse data len ", len(new_coarse_data[0])) + print("new_coarse_data[:, :level_samples.shape[1]] ", new_coarse_data[:, :level_samples.shape[1]]) + level_samples[:, :, 1] = new_coarse_data[:, :level_samples.shape[1]] + + print("level samples ", level_samples) + new_level_parameters.append(level_parameters[l_id]) + data_mlmc.append(level_samples) + + exit() + + final_sample_storage = create_quantity_mlmc(data_mlmc, level_parameters=np.array(new_level_parameters)) + + return final_sample_storage + + +def remove_level(mlmc_hdf_file, rm_level_id=0, use_levels=[]): + sample_storage = SampleStorageHDF(file_path=mlmc_hdf_file) + + print("mlmc sample storage get N collected ", sample_storage.get_n_collected()) + n_levels = len(sample_storage.get_level_ids()) + original_moments, estimator, original_true_domain, _ = estimate_moments(sample_storage) + + n_ops, field_times, coarse_flow, fine_flow = get_sample_times_mlmc(mlmc_hdf_file) + n_ops_est = n_ops + + if len(use_levels) > 0: + final_sample_storage = use_levels_from_mlmc(sample_storage, use_levels) + else: + data_mlmc = [] + mlmc_n_collected = estimator._sample_storage.get_n_collected() + for l_id, l_n_collected in zip(range(n_levels), mlmc_n_collected): + if l_id <= rm_level_id: + continue + print("l id ", l_id) + if rm_level_id + 1 == l_id: + level_samples = estimator.get_level_samples(level_id=l_id, n_samples=l_n_collected) + level_samples[:, :, 1] = 0 + print("level sampels shape ", level_samples.shape) + + else: + level_samples = estimator.get_level_samples(level_id=l_id, n_samples=l_n_collected) + print("level samples ", level_samples) + data_mlmc.append(level_samples) + + final_sample_storage = create_quantity_mlmc(data_mlmc, level_parameters=sample_storage.get_level_parameters()[rm_level_id+1:]) + + return final_sample_storage + + +def cut_original_test(mlmc_hdf_file, n_levels=None): + print("mlmc hdf file ", mlmc_hdf_file) + sample_storage = SampleStorageHDF(file_path=mlmc_hdf_file) + + print("mlmc sample storage get N collected ", sample_storage.get_n_collected()) + + n_levels = len(sample_storage.get_level_ids()) + original_moments, estimator, original_true_domain, _ = estimate_moments(sample_storage) + + n_ops, field_times, coarse_flow, fine_flow = get_sample_times_mlmc(mlmc_hdf_file) + n_ops_est = n_ops + + data_mlmc = [] + mlmc_n_collected = estimator._sample_storage.get_n_collected() + for l_id, l_n_collected in zip(range(n_levels), mlmc_n_collected): + level_samples = estimator.get_level_samples(level_id=l_id, n_samples=l_n_collected) + data_mlmc.append(level_samples) + + print("original level params", sample_storage.get_level_parameters()) + sample_storage = create_quantity_mlmc(data_mlmc, level_parameters=sample_storage.get_level_parameters()) + + n0 = 2000 + nL = 100 + n_levels = sample_storage.get_n_levels() + n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), n_levels))).astype(int) + print('n samples ', n_samples) + + sample_storage_for_estimated = cut_samples(data_mlmc, sample_storage, n_samples) # [2300]) + + original_q_estimator_est = get_quantity_estimator(sample_storage_for_estimated) + + n_estimated_orig, l_vars_orig, n_samples_orig = get_n_estimated(sample_storage_for_estimated, + original_q_estimator_est, n_ops=n_ops_est) + print("n estimated orig ", n_estimated_orig) + + sample_storage_for_estimated = cut_samples(data_mlmc, sample_storage_for_estimated, n_estimated_orig, + bootstrap=True) + +def analyze_output(targets, mult_coef=1,dataset_config=None): + + if dataset_config.get('output_scale', False): + print("targets[:10] ", targets[:10]) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(targets, bins=50, alpha=0.5, label='target', density=True) + # fig.colorbar(cont) + plt.title("training targets") + plt.show() + + mean_targets = dataset_config.get('mean_output', False) + var_targets = dataset_config.get('var_output', False) + + # mean_targets = np.mean(targets) + print("mean targets ", mean_targets) + # print("mean targets axis=0 " , np.mean(targets, axis=0)) + # var_targets = np.var(targets) + + targets = var_targets * targets + mean_targets + + targets_orig = np.exp(targets) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + + ax.hist(targets_orig, bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("targets orig") + plt.show() + + new_mult_coef = 1 / np.mean(targets_orig) + print("np.log(targets_orig * new_mult_coef)[:10] ", np.log(targets_orig * new_mult_coef)[:10]) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(np.log(targets_orig * new_mult_coef), bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("log(targets orig * mult coef)") + plt.show() + + new_mult_coef = 1 / np.mean(np.log(targets_orig)) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(np.log(targets_orig) * new_mult_coef, bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("log(targets orig) * mult coef") + plt.show() + + print("mean log targets orig ", np.mean(np.log(targets_orig))) + + log_scaled_targets_orig = (np.log(targets_orig) - np.mean(np.log(targets_orig))) / np.var(np.log(targets_orig)) + print("log scaled targets orig[:10] ", log_scaled_targets_orig[:10]) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(log_scaled_targets_orig, bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("log scaled target orig") + plt.show() + + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(np.log(targets_orig), bins=50, alpha=0.5, label='target orig log', density=True) + # fig.colorbar(cont) + plt.title("log targets orig") + plt.show() + + + else: + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(targets, bins=50, alpha=0.5, label='target', density=True) + #fig.colorbar(cont) + plt.title("training targets") + plt.show() + + targets_orig = np.exp(targets) / mult_coef + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + + ax.hist(targets_orig, bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("targets orig") + plt.show() + + new_mult_coef = 1/np.mean(targets_orig) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(np.log(targets_orig*new_mult_coef), bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("log(targets orig * mult coef)") + plt.show() + + new_mult_coef = 1 / np.mean(np.log(targets_orig)) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(np.log(targets_orig) * new_mult_coef, bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("log(targets orig) * mult coef") + plt.show() + + log_scaled_targets_orig = (np.log(targets_orig) - np.mean(np.log(targets_orig))) / np.var(np.log(targets_orig)) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(log_scaled_targets_orig, bins=50, alpha=0.5, label='targets orig', density=True) + # fig.colorbar(cont) + plt.title("log scaled target orig") + plt.show() + + + + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.hist(np.log(targets_orig), bins=50, alpha=0.5, label='target orig log', density=True) + # fig.colorbar(cont) + plt.title("log targets orig") + plt.show() + + exit() + + +def _variance_cost_analysis(l_var_nn, n_ops_predict): + + print("np.max(l_var_nn, axis=0) ", np.max(l_var_nn, axis=1)) + vl_cl = np.max(l_var_nn, axis=1) * n_ops_predict + + print("VlCl ", vl_cl) + + beta = {} + gamma = {} + beta_1 = {} + gamma_1 = {} + moments_m = [1, 5, 10, 15, 20] + #n_collected = estimator._sample_storage.get_n_collected() + # cost_levels = n_collected * np.array(new_n_ops) + cost_levels = np.array(n_ops_predict) + + max_level_var = True + beta_levels = [] + gamma_levels = [] + beta_levels_2 = [] + gamma_levels_2 = [] + + level_vars = np.max(l_var_nn, axis=1) + + level_costs = n_ops_predict + + print("cost levels ", cost_levels) + for l_id, l_vars in enumerate(l_var_nn, start=1): + # print("l_vars ", l_vars) + # print("l id ", l_id) + if max_level_var is True: + if l_id < len(level_vars): + beta_levels.append(-1 * np.log(level_vars[l_id]/level_vars[l_id-1])) + gamma_levels.append(np.log2(level_costs[l_id] / level_costs[l_id - 1])) + + if l_id == 1: + beta_levels_2.append(-1 * (np.log(level_vars[l_id - 1]))) + gamma_levels_2.append(np.log2(level_costs[l_id-1])) + else: + beta_levels_2.append(-1 * (np.log(level_vars[l_id-1])/(l_id-1))) + gamma_levels_2.append(np.log2(level_costs[l_id - 1])/(l_id-1)) + else: + for moment in moments_m: + if moment not in beta: + beta[moment] = {} + beta_1[moment] = {} + + if l_id not in beta[moment]: + beta[moment][l_id] = [] + beta_1[moment][l_id] = [] + + if l_id not in gamma: + gamma[l_id] = [] + gamma_1[l_id] = [] + + # print("l_id ", l_id) + # print("len l vars ", len(l_vars)) + # print("moment ", moment) + # print("beta ", beta) + beta[moment][l_id].append(-1 * np.log2(l_vars[moment]) / l_id) + + # print("level l vars ", l_vars) + # print("moments_mean.l_vars[l_id-2] ", moments_mean.l_vars[l_id-1]) + if l_id < len(cost_levels): + beta_1[moment][l_id].append(-1 * np.log2(l_var_nn[l_id][moment] / l_vars[moment])) + + gamma[l_id].append(np.log2(cost_levels[l_id - 1]) / l_id) + + if l_id < len(cost_levels): + # print("cost levles ", cost_levels) + # print("l id ", l_id) + gamma_1[l_id].append(np.log2(cost_levels[l_id] / cost_levels[l_id - 1])) + + print("beta levels ", beta_levels) + print("gamma levels ", gamma_levels) + + print("beta levels 2", beta_levels_2) + print("gamma levels 2", gamma_levels_2) + + beta_curve = [] + gamma_curve = [] + + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + ax.plot(level_vars, label="vars") + ax.plot(level_costs, label="costs") + + for b, g in zip(beta_levels, gamma_levels): + beta_curve = [] + gamma_curve = [] + for l in range(len(level_vars)): + beta_curve.append(2**(-1*b*l)) + gamma_curve.append(2 ** (g * l)) + + ax.plot(beta_curve, label="beta: {}".format(b)) + ax.plot(gamma_curve, label="gamma: {}".format(g)) + + # fig.colorbar(cont) + plt.title("levels - var, cost") + plt.yscale("log") + plt.legend() + plt.show() + + + print("beta: {}, beta_1:{}".format(beta, beta_1)) + print("gamma: {}, gamma_1:{}".format(gamma, gamma_1)) + + exit() + + +def process_mlmc(nn_hdf_file, sampling_info_path, ref_mlmc_file, targets, predictions, train_targets, train_predictions, + val_targets, l_0_targets=None, l_0_predictions=None, + l1_sample_time=None, l0_sample_time=None, nn_level=0, replace_level=False, stats=False, mlmc_hdf_file=None, + learning_time=0, dataset_config={}, targets_to_est=None, predictions_to_est=None): + """ + :param l1_sample_time: preprocess_time / len(data) + learning_time / len(data), + preprocess_time includes graph creation time and FlowDataset creation time + """ + # level_zero = False + cut_est = True + all_samples = False + n0, nL = 2000, 100 + domain_largest = True # If False than domain=None - domain is determined given the simulation samples + distr_domain_largest = False#False + replace_level = False#False + + rm_level = None#0 + rm_level_nn = None#None + use_levels = []#[0, 2] + + + #cut_original_test(nn_hdf_file) + # exit() + + # output_mult_factor = dataset_config.get('output_mult_factor', 1) + # #print("output mult factor ", output_mult_factor) + # #analyze_output(targets, output_mult_factor, dataset_config=dataset_config) + # plt.hist(targets, bins=50, alpha=0.5, label='target', density=True) + # plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + # + # # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # plt.show() + # + # plt.hist(l_0_targets, bins=50, alpha=0.5, label='l_0_target', density=True) + # plt.hist(l_0_predictions, bins=50, alpha=0.5, label='l_0_predictions', density=True) + # + # # print("lo targets ", l_0_targets) + # # print("l0 predictions ", l_0_predictions) + # # exit() + # + # # # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # plt.show() + # + # + # plt.hist(targets, bins=50, alpha=0.5, label='target', density=True) + # plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + # + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # + # plt.show() + # # + # plt.hist(targets_to_est, bins=50, alpha=0.5, label='target to est', density=True) + # plt.hist(predictions_to_est, bins=50, alpha=0.5, label='predictions to est', density=True) + # + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # plt.show() + + if targets_to_est is not None and predictions_to_est is not None: + print("len targets_to_est ", len(targets_to_est)) + targets = np.concatenate((targets, targets_to_est), axis=0) + predictions = np.concatenate((predictions, predictions_to_est), axis=0) + # targets = np.concatenate((targets, targets_to_est), axis=0) + # predictions = np.concatenate((predictions, predictions_to_est), axis=0) + + # l_0_targets = np.concatenate((l_0_targets, targets_to_est), axis=0) + # l_0_predictions = np.concatenate((l_0_predictions, predictions_to_est), axis=0) + + # targets = np.concatenate((targets, l_0_targets), axis=0) + # predictions = np.concatenate((predictions, l_0_predictions), axis=0) + + # targets = np.concatenate((targets, targets_to_est[:5000]), axis=0) + # predictions = np.concatenate((predictions, predictions_to_est[:5000]), axis=0) + + # l_0_targets = np.concatenate((l_0_targets, targets_to_est), axis=0) + # l_0_predictions = np.concatenate((l_0_predictions, predictions_to_est), axis=0) + + # targets = np.concatenate((targets, l_0_targets[-1000:]), axis=0) + # predictions = np.concatenate((predictions, l_0_predictions[-1000:]), axis=0) + + # print("len targets to est", len(targets_to_est)) + # print("len predictions to est ", len(predictions_to_est)) + # exit() + + + print("len(targets ", len(targets)) + print("len l0 targets ", len(l_0_targets)) + + + if dataset_config.get('output_normalization', False): + min_out = dataset_config.get('min_output') + max_out = dataset_config.get('max_output') + + targets = targets * (max_out - min_out) + min_out + predictions = predictions * (max_out - min_out) + min_out + l_0_targets = l_0_targets * (max_out - min_out) + min_out + l_0_predictions = l_0_predictions * (max_out - min_out) + min_out + + if dataset_config.get('output_scale', False): + # mean_targets = np.mean(targets) + # var_targets = np.var(targets) + + mean_targets = dataset_config.get('mean_output', False) + var_targets = dataset_config.get('var_output', False) + + targets = var_targets * targets + mean_targets + predictions = var_targets * predictions + mean_targets + + # mean_l_0_targets = mean_targets + # var_l_0_targets = var_targets + + l_0_targets = var_targets * l_0_targets + mean_targets + l_0_predictions = var_targets * l_0_predictions + mean_targets + + if dataset_config.get('output_log', False): + targets = np.exp(targets) + predictions = np.exp(predictions) + l_0_predictions = np.exp(l_0_predictions) + l_0_targets = np.exp(l_0_targets) + + if dataset_config.get('first_log_output', False): + targets = np.exp(targets) + predictions = np.exp(predictions) + l_0_predictions = np.exp(l_0_predictions) + l_0_targets = np.exp(l_0_targets) + + print("targets ", targets) + print("len targets ", len(targets)) + + # if not stats: + # print("nn_level ", nn_level) + # print("replace level ", replace_level) + + # targets = np.exp(targets) + # predictions = np.exp(predictions) + # l_0_predictions = np.exp(l_0_predictions) + # l_0_targets = np.exp(l_0_targets) + + # print("targets ", targets) + # print("predictions ", predictions) + # print("targets ", targets) + # print("predictions ", predictions) + # plt.hist(targets, bins=50, alpha=0.5, label='target', density=True) + # plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + # + # # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # plt.show() + # + # plt.hist(l_0_targets, bins=50, alpha=0.5, label='l_0_target', density=True) + # plt.hist(l_0_predictions, bins=50, alpha=0.5, label='l_0_predictions', density=True) + # + # # print("lo targets ", l_0_targets) + # # print("l0 predictions ", l_0_predictions) + # # exit() + # + # # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + # plt.legend(loc='upper right') + # # plt.xlim(-0.5, 1000) + # plt.yscale('log') + # plt.show() + # + + # targets = np.exp(targets) + # predictions = np.exp(predictions) + + # # Creating plot + # plt.boxplot(targets) + # plt.boxplot(predictions) + # # show plot + # plt.show() + #exit() + + ####### + ### Create storage fromm original MLMC data + ###### + if mlmc_hdf_file is None: + mlmc_hdf_file = nn_hdf_file + + if rm_level is not None: + sample_storage = remove_level(mlmc_hdf_file, rm_level_id=rm_level, use_levels=use_levels) + else: + sample_storage = SampleStorageHDF(file_path=mlmc_hdf_file) + + print("mlmc sample storage get N collected ", sample_storage.get_n_collected()) + + n_levels = len(sample_storage.get_level_ids()) + original_moments, estimator, original_true_domain, _ = estimate_moments(sample_storage) + + print("moments.mean ", original_moments.mean) + print("moments.var ", original_moments.var) + + if rm_level_nn is not None: + sample_storage_nn = remove_level(nn_hdf_file, rm_level_id=rm_level_nn, use_levels=use_levels) + else: + sample_storage_nn = SampleStorageHDF(file_path=nn_hdf_file) + + + original_moments_nn, estimator_nn, original_true_domain_nn, _ = estimate_moments(sample_storage_nn) + print("nn moments.mean ", original_moments_nn.mean) + print("nn moments.var ", original_moments_nn.var) + + orig_max_vars = np.max(original_moments.l_vars, axis=1) + # print("orig max vars ", orig_max_vars) + + ###### + ### Get n ops + ###### + n_ops, field_times, coarse_flow, fine_flow = get_sample_times_mlmc(mlmc_hdf_file) + #n_ops = [20.279451930908973, 87.91808330548963, 216.172210888505, 892.2780022583306, 2646.912581985272] + # n_ops, _, _ = get_sample_times(sampling_info_path) + print("n ops ", n_ops) + + n_ops = n_ops[-sample_storage.get_n_levels():] + # field_times = field_times[-sample_storage.get_n_levels():] + # coarse_flow = coarse_flow[-sample_storage.get_n_levels():] + # fine_flow = fine_flow[-sample_storage.get_n_levels():] + + if n_ops is None: + n_ops = sample_storage.get_n_ops() + field_times = np.zeros(len(n_ops)) + flow_times = np.zeros(len(n_ops)) + # Test storage creation + data_mlmc = [] + mlmc_n_collected = estimator._sample_storage.get_n_collected() + for l_id, l_n_collected in zip(range(n_levels), mlmc_n_collected): + level_samples = estimator.get_level_samples(level_id=l_id, n_samples=l_n_collected) + data_mlmc.append(level_samples) + + print("original level params", sample_storage.get_level_parameters()) + sample_storage = create_quantity_mlmc(data_mlmc, level_parameters=sample_storage.get_level_parameters()) + if cut_est: + # n0 = 2000 + # nL = 100 + n_levels = sample_storage.get_n_levels() + + if all_samples: + n_samples = sample_storage.get_n_collected() + else: + n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), n_levels))).astype(int) + + print('cut n samples for sample storgae estimate', n_samples) + sample_storage_for_estimated = cut_samples(data_mlmc, sample_storage, n_samples) # [2300]) + + print("Original storage") + orig_storage_n_collected, orig_storage_max_vars = get_storage_info(sample_storage) + print("orig storage max vars ", orig_storage_max_vars) + + mlmc_nn_diff_level = 1 + if replace_level: + n_lev = n_levels + level_params = [*sample_storage.get_level_parameters()] + else: + n_lev = n_levels - nn_level + 1 + level_params = [sample_storage.get_level_parameters()[nn_level], + *sample_storage.get_level_parameters()[mlmc_nn_diff_level - 1:]] + + # Use predicted data as zero level results and level one coarse results + data_nn = [] + n_ops_predict = [] + + print("l0_sample_time ", l0_sample_time) + print("l1_sample_time ", l1_sample_time) + + ############################ + ### Calculate First level (follows meta-level) n_ops + ############################ + print("learning time ", learning_time) + print("l1_sample_time ", l1_sample_time) + + + #learning_time = learning_time / 4 + C_1_cost = 0 + C_2_cost = 0 + + #len_data = 50000 # 50000#80000 + len_data = 4000 # case with independent samples + len_train_data = 2000 + len_data_preprocess = len_data + 50000 + l0_predict_time = 1e-3 + preprocess_time = l1_sample_time * len_data - learning_time + preprocess_time_per_sample = preprocess_time / len_data_preprocess + + learning_time = 310 # cl_0_1_s_1: L1 - 180, L2 - 220, L3 - 310, L4 - 820, L5 - 3320.261721275 + learning_time = 2840 # 02 conc cond, L1 - 400, L2 - 400 , L3 - 450, L4 - 550 , L5 - 2840 + + #n_ops_train = preprocess_time_per_sample + (learning_time / len_train_data) + l0_predict_time + + C_1_cost = l0_sample_time # time for generating random field + C_2_cost = preprocess_time_per_sample + l0_predict_time + n_ops[0] + + + print("C1 cost: {}, C2 cost: {}".format(C_1_cost, C_2_cost)) + + + #### + # Notes: + # l1_sample_time = preprocess_time / len(data) + learning_time / len(data) + # + # n_ops_train includes: + # preprocess time per sample + # learning time per training sample + # l0_prediction_time - the time necessary to predict neural network outcome for given already preprocessed data + #l1_sample_time = n_ops_train + # New l1_sample_time corresponds to the time necessary for Level 1 samples (fine + coarse) + # + ################################# + + #print("L1 sample time ", l1_sample_time) + + print("learning time ", learning_time) + print("preprocess time ", preprocess_time) + print("preprocess time per sample ", preprocess_time_per_sample) + + #print("new l1 sample time ", l1_sample_time) + + + # print("n ops train ", n_ops_train) + # print("n ops test ", n_ops_test) + + # n_ops = n_ops_0 + (n_ops_train * (len_train_data / nn_estimated) + n_ops_test * (len_test_data / nn_estimated)) / ( + # nn_estimated) + # n_ops += predict_l0_time + + # l0_sample_time, l1_sample_time = l1_sample_time, l0_sample_time + nn_n_collected = estimator_nn._sample_storage.get_n_collected() + print("nn n collected ", nn_n_collected) + + nn_lev_sim_time = 0 + for l_id in range(nn_level): + nn_lev_sim_time = n_ops[l_id] - nn_lev_sim_time + + for l_id in range(n_lev): + if l_id == 0: + level_samples = l_0_predictions.reshape(1, len(l_0_predictions), 1) + # level_samples = np.ones((1, len(l_0_predictions), 1)) + ft_index = nn_level + + # if output_mult_factor != 1: + # level_samples /= output_mult_factor + + if nn_level > 0: + ft_index = nn_level - 1 + n_ops_predict.append(l0_sample_time) # + field_times[ft_index] / 2) + + print("L0 n ops predict ", n_ops_predict) + + # print("l0_sample_time ", l0_sample_time) + # print("len l0 predictions ", len(l_0_predictions)) + # exit() + else: + if replace_level: + level_id = l_id + level_samples = estimator_nn.get_level_samples(level_id=level_id, n_samples=nn_n_collected[level_id]) + + n_ops_predict.append(n_ops[level_id]) + print("replace level n ops ", n_ops_predict) + else: + if l_id < mlmc_nn_diff_level: + continue + if l_id == mlmc_nn_diff_level: + coarse_level_samples = predictions.reshape(1, len(predictions), 1) + fine_level_samples = targets.reshape(1, len(targets), 1) + + # print("coarse level samples ", coarse_level_samples) + # print("fine level sampels ", fine_level_samples) + # exit() + + # coarse_level_samples = np.concatenate((coarse_level_samples, + # train_predictions.reshape(1, len(train_predictions), 1)), axis=1) + # + # fine_level_samples = np.concatenate((fine_level_samples, + # train_targets.reshape(1, len(train_targets), 1)), axis=1) + + # coarse_level_samples = coarse_level_samples[:, :len(predictions)//2, :] + # fine_level_samples = fine_level_samples[:, :len(predictions)//2, :] + level_samples = np.concatenate((fine_level_samples, coarse_level_samples), axis=2) + print("nn level ", nn_level) + + # if output_mult_factor != 1: + # level_samples /= output_mult_factor + print("level sampels corase and fine shape", fine_level_samples.shape) + + print("level sampels ", level_samples) + print("fine - coarse ", fine_level_samples - coarse_level_samples) + print("fine - coarse ", np.var(fine_level_samples - coarse_level_samples)) + + # if output_mult_factor != 1: + # level_samples /= output_mult_factor + + n_ops_predict.append(n_ops[mlmc_nn_diff_level - 1] - nn_lev_sim_time + l1_sample_time) + else: + if replace_level: + level_id = l_id # - 1 + else: + level_id = l_id + nn_level - 1 + if level_id >= len(nn_n_collected): + level_samples = [] + n_ops_predict[-1] = n_ops[level_id] + continue + else: + level_samples = estimator_nn.get_level_samples(level_id=level_id, + n_samples=nn_n_collected[level_id]) + print('n ops ', n_ops) + print("level id ", level_id) + + n_ops_predict.append(n_ops[level_id]) + # print("n ops predict append", n_ops_predict) + + # if output_mult_factor != 1: + # level_samples /= output_mult_factor + + print("level samples rescaled ", level_samples) + + #level_samples = np.log(level_samples) + #level_samples /= output_mult_factor + + print("leel samples exp ", level_samples) + + #print("level samples exp ", level_samples) + + data_nn.append(level_samples) + + print("n ops predict ", n_ops_predict) + + # n_ops_predict[1] += n_ops_predict[1]*0.2 + + print("level params ", level_params) + sample_storage_predict = create_quantity_mlmc(data_nn, level_parameters=level_params) + if cut_est: + # n0 = 2000 + # nL = 100 + n_levels = sample_storage_predict.get_n_levels() + + #n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), n_levels))).astype(int) + + if all_samples: + n_samples = sample_storage.get_n_collected() + else: + n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), n_levels))).astype(int) + + print('cut n samples for MC+NN estimate', n_samples) + sample_storage_predict_for_estimate = cut_samples(data_nn, sample_storage_predict, n_samples) + + # print("n ops predict ", n_ops_predict) + print("Storage predict info") + predict_storage_n_collected, predict_storage_max_vars = get_storage_info(sample_storage_predict_for_estimate) + print("predict storage n collected ", predict_storage_n_collected) + print("predict storage max vars ", predict_storage_max_vars) + + # if stats: + # return orig_storage_max_vars, predict_storage_max_vars + + ###### + ### Create estimators + ###### + ref_sample_storage = ref_storage(ref_mlmc_file) + if domain_largest: + domain = get_largest_domain([sample_storage, sample_storage_predict, ref_sample_storage]) + else: + domain = None + original_q_estimator = get_quantity_estimator(sample_storage, true_domain=domain) + predict_q_estimator = get_quantity_estimator(sample_storage_predict, true_domain=domain) + + if cut_est: + if domain_largest: + domain = get_largest_domain([sample_storage_for_estimated, sample_storage_predict_for_estimate, ref_sample_storage]) + else: + domain = None + original_q_estimator_est = get_quantity_estimator(sample_storage_for_estimated, true_domain=domain) + predict_q_estimator_est = get_quantity_estimator(sample_storage_predict_for_estimate, true_domain=domain) + # ref_estimator = get_quantity_estimator(ref_sample_storage, true_domain=domain) + + ####### + ### Calculate N estimated samples + ####### + print("n ops ", n_ops) + print("n ops predict ", n_ops_predict) + + # # # remove levels + # # # #@TODO: remove asap + # new_n_samples = [0, 0, sample_storage.get_n_collected()[-3], sample_storage.get_n_collected()[-2], sample_storage.get_n_collected()[-1]] + # sample_storage = cut_samples(data_mlmc, sample_storage, new_n_samples, new_l_0=2) + # print("Cut storage info") + # get_storage_info(sample_storage) + # original_q_estimator = get_quantity_estimator(sample_storage, true_domain=domain) + # n_ops = n_ops[2:] + # # # ##### + + # print("n ops ", n_ops) + # print("n ops predict ", n_ops_predict) + # exit() + # n_ops = [30.1978988484516, 2321.096786450282] + # n_ops_predict = [0.33790084, 30.91978988484516, 2321.096786450282] + + #### Original data + n_ops_est = copy.deepcopy(n_ops) + if not cut_est: + # n_ops_est[0] = n_ops_est[0] / 1000 + print("sample_storage.get_n_collected() ", sample_storage.get_n_collected()) + n_estimated_orig, l_vars_orig, n_samples_orig = get_n_estimated(sample_storage, original_q_estimator, + n_ops=n_ops_est) + + print("n estimated orig ", n_estimated_orig) + print("n ops est ", n_ops_est) + #print("sample storage for estimated n collected ", sample_storage_for_estimated.get_n_collected()) + + ###### + ## initial N guess + if cut_est: + print("n ops est ", n_ops_est) + n_estimated_orig, l_vars_orig, n_samples_orig = get_n_estimated(sample_storage_for_estimated, + original_q_estimator_est, n_ops=n_ops_est) + print("n estimated orig ", n_estimated_orig) + + sample_storage_for_estimated = cut_samples(data_mlmc, sample_storage, n_estimated_orig, + bootstrap=False) + + # Another estimate simulate adding samples algo + original_q_estimator_est_2 = get_quantity_estimator(sample_storage_for_estimated, true_domain=domain) + n_estimated_orig, l_vars_orig, n_samples_orig = get_n_estimated(sample_storage_for_estimated, + original_q_estimator_est_2, n_ops=n_ops_est) + + print("new n estimated orig ", n_estimated_orig) + + sample_storage = cut_samples(data_mlmc, sample_storage, n_estimated_orig, bootstrap=False) + + + # exit() + + n_estimated_nn, l_var_nn, n_samples_nn = get_n_estimated(sample_storage_predict_for_estimate, predict_q_estimator_est, + n_ops=n_ops_predict) + + # print("l var nn ", l_var_nn) + # print("n ops predict ", n_ops_predict) + # print("n estimated nn ", n_estimated_nn) + # + # print("n estimated orig ", n_estimated_orig) + + # new_n_estimated_nn = [] + # for n_est in n_estimated_nn: + # new_n_estimated_nn.append(int(n_est + n_est * 0.2)) + # n_estimated_nn = new_n_estimated_nn + + # ("new n estimated nn ", n_estimated_nn) + # exit() + # n_estimated_nn = [50000, 10000, 850] + # sample_storage_predict_2 = cut_samples(data_nn, sample_storage_predict, n_estimated_nn) + # predict_q_estimator_2 = get_quantity_estimator(sample_storage_predict_2, true_domain=domain) + # + # + # new_n_estimated_nn, new_l_var_nn, new_n_samples_nn = get_n_estimated(sample_storage_predict_2, predict_q_estimator_2, + # n_ops=n_ops_predict) + + # print("new n estimated nn", new_n_estimated_nn) + + # n_estimated_nn = new_n_estimated_nn + sample_storage_predict_0 = copy.deepcopy(sample_storage_predict) + + # predict_q_estimator.quantity = predict_q_estimator.quantity.subsample(sample_vec=n_estimated_nn) + + print("n estiamted nn ", n_estimated_nn) + print("n ops predict ", n_ops_predict) + + ############### + ## Recalculate first level (level of metamodel and simulation difference) n ops + ############### + # Use NN as new MC + if len(n_estimated_nn) == 1: + cost_tr = C_2_cost * len_train_data + if n_estimated_nn[0] > len_train_data: + cost_te = C_1_cost * (n_estimated_nn[0] - len_train_data) + else: + cost_te = 0 + + # n_ops_predict_C_2 = [C_2_cost] + # print("l1 sample time ", l1_sample_time) + # #orig_n_ops = n_ops_predict[0] - l1_sample_time + # cost_tr = (l1_sample_time + n_ops[0]) * (len_train_data) + # n_ops_test = (preprocess_time_per_sample + l0_predict_time) + # cost_te = n_ops_test * (n_estimated_nn[0] - len_train_data) + # + # print("cost tr ", cost_tr) + # print("cost te ", cost_te) + + #n_ops_predict[1] = orig_n_ops + ((cost_tr + cost_te) / n_estimated_nn[1]) + + n_ops_predict[0] = ((cost_tr + cost_te) / n_estimated_nn[0]) + + print("n ops predict final", n_ops_predict) + print("n ops ", n_ops) + else: + # n_ops_test = (preprocess_time_per_sample + l0_predict_time) # * (n_estimated_nn[1] - len_train_data) + # if n_estimated_nn[1] > len_train_data: + # orig_n_ops = n_ops_predict[1] - l1_sample_time + # cost_tr = l1_sample_time * (len_train_data) # / n_estimated_nn[1]) + # cost_te = n_ops_test * (n_estimated_nn[1] - len_train_data) # / n_estimated_nn[1]) + # + # # note L1 sample time is n_ops_train + # + # n_ops_predict[1] = orig_n_ops + ((cost_tr + cost_te) / n_estimated_nn[1]) + # + # print("preprocess time per sample", preprocess_time_per_sample) + # print("orig n ops ", orig_n_ops) + # print("cost_tr ", cost_tr) + # print("cost te ", cost_te) + + n_ops_predict[0] = C_1_cost + n_ops_predict[1] = C_2_cost + + #n_ops_predict = [0.33790084, 30.91978988484516, 2321.096786450282] + n_ops_predict_orig = n_ops_predict + + ###### + ## initial N guess + if cut_est: + n_estimated_nn, l_var_nn, n_samples_nn = get_n_estimated(sample_storage_predict_for_estimate, + predict_q_estimator_est, n_ops=n_ops_predict) + + print("NN FIRST n estimated nn ", n_estimated_nn) + all_n_estimated_nn = [] + for i in range(10): + + sample_storage_predict_for_est_2 = cut_samples(data_nn, sample_storage_predict, n_estimated_nn, bootstrap=True) + + predict_q_estimator_est_2 = get_quantity_estimator(sample_storage_predict_for_est_2, true_domain=domain) + + n_estimated_nn, l_var_nn, n_samples_nn = get_n_estimated(sample_storage_predict_for_est_2, + predict_q_estimator_est_2, + n_ops=n_ops_predict) + + all_n_estimated_nn.append(n_estimated_nn) + + print("all n estimated nn ", all_n_estimated_nn) + n_estimated_nn = np.mean(all_n_estimated_nn, axis=0, dtype=np.int32) + print("mean n estimated nn ", n_estimated_nn) + + + # n_estimated_nn, l_var_nn, n_samples_nn = get_n_estimated(sample_storage_predict_for_est_2, predict_q_estimator_est_2, + # n_ops=n_ops_predict) + + print("NN SECOND n collected before estimate ", sample_storage_predict_for_est_2.get_n_collected()) + print("NN SECOND n estimated nn ", n_estimated_nn) + + # n_estimated_nn, l_var_nn, n_samples_nn = get_n_estimated(sample_storage_predict, + # predict_q_estimator, + # n_ops=n_ops_predict) + + # n_estimated_nn_C1_C2, _, _ = get_n_estimated(sample_storage_predict, predict_q_estimator, + # n_ops=n_ops_predict) + + print("n ops ", n_ops) + print("new n ops predict ", n_ops_predict) + print("new n estimated nn ", n_estimated_nn) + #print("n ops predict C1 C2", n_ops_predict_C1_C2) + #print("new n estimated nn C1 C2 ", n_estimated_nn_C1_C2) + # exit() + + sample_storage_predict = cut_samples(data_nn, sample_storage_predict, n_estimated_nn) + + moms, _, _, _ = estimate_moments(sample_storage_predict) + print("moms.vars ", moms.var) + # exit() + + #sample_storage_predict = sample_storage_predict_for_estimate + + # predict_q_estimator = get_quantity_estimator(sample_storage_predict_0, true_domain=domain) + # predict_q_estimator.quantity = predict_q_estimator.quantity.subsample(sample_vec=n_estimated_nn) + + print("new n estimated nn ", n_estimated_nn) + print("new l var nn ", l_var_nn) + + ############################### + ### Variance cost relation ### + ############################### + #_variance_cost_analysis(l_var_nn, n_ops_predict) + + + # predict_moments = compute_moments(sample_storage_predict) + # print("predict moments var ", predict_moments.var) + + ####### + ## Estimate total time + ####### + print("NN estimated ", n_estimated_nn) + print("MLMC estimated ", n_estimated_orig) + print("n ops predict_orig ", n_ops_predict_orig) + print("n ops ", n_ops) + #print("n estimated nn[1] ", n_estimated_nn[1]) + # n_ops_predict_orig = n_ops_predict + # n_ops_test = preprocess_time / (n_estimated_nn[1] - len_train_data) + # if n_estimated_nn[1] > len_train_data: + # n_ops_predict_orig[1] = n_ops_predict_orig[1] - l1_sample_time + ((l1_sample_time * (len_train_data/n_estimated_nn[1]) + \ + # n_ops_test * ((n_estimated_nn[1] - len_train_data)/n_estimated_nn[1])) / n_estimated_nn[1]) + # n_ops_predict = n_ops_predict_orig + # # print("n ops predict ", n_ops_predict) + + NN_time_levels = n_ops_predict_orig * np.array(n_estimated_nn) + n_collected_times = n_ops * np.array(n_estimated_orig) + + print("NN time levels ", NN_time_levels) + print("MLMC time levels", n_collected_times) + nn_total_time = np.sum(NN_time_levels) + learning_time + print("NN total time ", nn_total_time) + mlmc_total_time = np.sum(n_collected_times) + print("MLMC total time ", mlmc_total_time) + # nn_total_time = np.sum(n_ops_predict_C1_C2 * np.array(n_estimated_nn_C1_C2)) + learning_time + # print("NN total time + learning time ", np.sum(n_ops_predict_C1_C2 * np.array(n_estimated_nn_C1_C2)) + learning_time) + + # original_moments, estimator, original_true_domain, _ = estimate_moments(sample_storage) + + # # Use train and test data without validation data + # data = [] + # for l_id in range(n_levels): + # level_samples = estimator.get_level_samples(level_id=l_id) + # if l_id == 0: + # level_samples = np.concatenate((train_targets.reshape(1, len(train_targets), 1), + # targets.reshape(1, len(targets), 1)), axis=1) + # data.append(level_samples) + # sample_storage_nn = create_quantity_mlmc(data) + # moments_nn, estimator_nn, _, _ = estimate_moments(sample_storage_nn, true_domain=original_true_domain) + + # n0 = 100 + # nL = 10 + # num_levels = n_levels + 1 + # initial_n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), num_levels))).astype(int) + # if len(initial_n_samples) == len(data): + # for i in range(len(data)): + # print(data[i].shape) + # data[i] = data[i][:, :initial_n_samples[i], :] + # print("data[i].shape ", data[i].shape) + + # level_params = [sample_storage.get_level_parameters()[0], *sample_storage.get_level_parameters()] + + # print("means nn ", moments_nn.mean) + # print("means_predict ", moments_predict.mean) + # + # print("means nn - means predict ", moments_nn.mean - moments_predict.mean) + # print("abs means nn - means predict ", np.abs(moments_nn.mean - moments_predict.mean)) + # + # print("vars nn ", moments_nn.var) + # print("vars predict ", moments_predict.var) + # + # print("moments_nn.l_means ", moments_nn.l_means[0]) + # print("moments_predict.l_means ", moments_predict.l_means[0]) + # + # print("moments nn n samples ", moments_nn.n_samples) + # print("moments nn n removed samples ", moments_predict.n_rm_samples) + # print("moments predict n samples ", moments_predict.n_samples) + # print("moments predict n removed samples ", moments_predict.n_rm_samples) + # + # for l_id, (l_mom, l_mom_pred) in enumerate(zip(moments_nn.l_means, moments_predict.l_means)): + # print("L id: {}, mom diff: {}".format(l_id, l_mom - l_mom_pred)) + + # if domain_largest: + # domain = get_largest_domain([sample_storage, sample_storage_predict, ref_sample_storage]) + # common_domain = get_largest_domain([sample_storage, sample_storage_predict, ref_sample_storage]) + # else: + # domain = None + + #domain = get_largest_domain([sample_storage, sample_storage_predict, ref_sample_storage]) + original_q_estimator = get_quantity_estimator(sample_storage, true_domain=domain) + predict_q_estimator = get_quantity_estimator(sample_storage_predict, true_domain=domain) + + final_sample_storage_moments, _, _, _ = estimate_moments(sample_storage) + print("final_sample_storage_moments.mean ", final_sample_storage_moments.mean) + print("final_sample_storage_moments.var ", final_sample_storage_moments.var) + + final_sample_storage_predict_moments, _, _, _ = estimate_moments(sample_storage_predict) + print("final_sample_storage_predict_moments.mean ", final_sample_storage_predict_moments.mean) + print("final_sample_storage_predict_moments.var ", final_sample_storage_predict_moments.var) + + # if cut_est: + # original_q_estimator = get_quantity_estimator(sample_storage_for_estimated, true_domain=domain) + # predict_q_estimator = get_quantity_estimator(sample_storage_predict_for_estimate, true_domain=domain) + + print("ref samples ", ref_sample_storage) + print("domain ", domain) + + if distr_domain_largest: + domain = get_largest_domain([sample_storage, sample_storage_predict, ref_sample_storage]) + common_domain = get_largest_domain([sample_storage, sample_storage_predict, ref_sample_storage]) + original_q_estimator = get_quantity_estimator(sample_storage, true_domain=domain) + predict_q_estimator = get_quantity_estimator(sample_storage_predict, true_domain=domain) + else: + domain = None + + ref_estimator = get_quantity_estimator(ref_sample_storage, true_domain=domain) + #ref_estimator = None + orig_moments_mean, predict_moments_mean, ref_moments_mean = compare_moments(original_q_estimator, + predict_q_estimator, ref_estimator) + + ref_orig_moments, ref_predict_moments, mlmc_predict_moments = compare_moments_2(sample_storage, sample_storage_predict, ref_sample_storage) + + level_kurtosis(original_q_estimator, predict_q_estimator) + + kl_mlmc, kl_nn = -1, -1 + orig_orth_moments, predict_orth_moments, ref_orth_moments = None, None, None + kl_mlmc, kl_nn, orig_orth_moments, predict_orth_moments, ref_orth_moments = compare_densities(original_q_estimator, predict_q_estimator, ref_estimator, + label_1="orig N: {}".format(n_estimated_orig), + label_2="gnn N: {}".format(n_estimated_nn)) + + if stats: + return n_estimated_orig, n_estimated_nn, n_ops, n_ops_predict, orig_moments_mean, \ + predict_moments_mean, ref_moments_mean, sample_storage.get_level_parameters(), \ + sample_storage_predict.get_level_parameters(), kl_mlmc, kl_nn, TARGET_VAR, \ + orig_orth_moments, predict_orth_moments, ref_orth_moments,\ + ref_orig_moments, ref_predict_moments, mlmc_predict_moments, learning_time + + plot_moments({"ref": ref_estimator, "orig": original_q_estimator, "nn": predict_q_estimator}) + + +def plot_loss(train_loss, val_loss, train_acc=None): + plt.plot(train_loss, label='loss') + if train_acc is not None: + plt.plot(train_acc, label='train acc') + plt.plot(val_loss, label='val_loss') + + print("len train loss ", len(train_loss)) + print("len val loss ", len(val_loss)) + print("final train loss ", train_loss[-1]) + print("final val loss ", val_loss[-1]) + + + print("val loss min: {}, corresponding train loss: {}".format(np.min(val_loss), train_loss[np.argmin(val_loss)])) + print("min val loss position: {}".format(np.argmin(val_loss))) + print("train loss min: {}, corresponding val loss: {}".format(np.min(train_loss), val_loss[np.argmin(train_loss)])) + print("min train loss position: {}".format(np.argmin(train_loss))) + + print("min train after min val: {}".format(np.min(train_loss[np.argmin(val_loss):]))) + print("min train after min val position: {}".format(np.argmin(val_loss) + np.argmin(train_loss[np.argmin(val_loss):]))) + + #plt.ylim([1, 3]) + plt.yscale("log") + #plt.axhline(np.min(train_loss)) + #plt.axhline(np.min(val_loss)) + plt.axvline(x=np.argmin(train_loss), color="blue") + plt.axvline(x=np.argmin(val_loss), color="green") + plt.xlabel('Epoch') + plt.ylabel('Error') + plt.legend() + plt.grid(True) + plt.show() + + +def plot_learning_rate(learning_rates): + plt.plot(learning_rates, label='loss') + #plt.ylim([0, 8]) + #plt.yscale("log") + plt.xlabel('Epoch') + plt.ylabel('Learning rate') + plt.legend() + plt.grid(True) + plt.show() + +def analyze_results(target, predictions): + #statistics, pvalue = ks_2samp(target, predictions) + + print("Target mean: {}, var: {}, Q25: {}, Q50: {}, Q75: {}".format(np.mean(target), + np.var(target), + np.quantile(target, 0.25), + np.quantile(target, 0.5), + np.quantile(target, 0.75))) + print("Predic mean: {}, var: {}, Q25: {}, Q50: {}, Q75: {}".format(np.mean(predictions), + np.var(predictions), + np.quantile(predictions, 0.25), + np.quantile(predictions, 0.5), + np.quantile(predictions, 0.75))) + + #print("KS statistics: {}, pvalue: {}".format(statistics, pvalue)) + # The closer KS statistic is to 0 the more likely it is that the two samples were drawn from the same distribution + + plt.hist(target, alpha=0.5, label='target', density=True) + plt.hist(predictions, alpha=0.5, label='predictions', density=True) + plt.legend(loc='upper right') + plt.show() + + +def estimate_density(values, title="Density"): + sample_storage = Memory() + n_levels = 1 + n_moments = N_MOMENTS + distr_accuracy = 1e-7 + + distr_plot = plots.Distribution(title=title, + log_density=True) + + result_format = [QuantitySpec(name="flow", unit="m", shape=(1,), times=[0], locations=['0'])] + + sample_storage.save_global_data(result_format=result_format, level_parameters=np.ones(n_levels)) + + successful_samples = {} + failed_samples = {} + n_ops = {} + n_successful = len(values) + for l_id in range(n_levels): + sizes = [] + for quantity_spec in result_format: + sizes.append(np.prod(quantity_spec.shape) * len(quantity_spec.times) * len(quantity_spec.locations)) + + # Dict[level_id, List[Tuple[sample_id:str, Tuple[fine_result: ndarray, coarse_result: ndarray]]]] + successful_samples[l_id] = [] + for sample_id in range(len(values)): + successful_samples[l_id].append((str(sample_id), (values[sample_id], 0))) + + n_ops[l_id] = [random.random(), n_successful] + + sample_storage.save_scheduled_samples(l_id, samples=["S{:07d}".format(i) for i in range(n_successful)]) + + sample_storage.save_samples(successful_samples, failed_samples) + sample_storage.save_n_ops(list(n_ops.items())) + + quantity = make_root_quantity(storage=sample_storage, q_specs=result_format) + length = quantity['flow'] + time = length[0] + location = time['0'] + value_quantity = location[0] + + quantile = QUANTILE + true_domain = mlmc.estimator.Estimate.estimate_domain(value_quantity, sample_storage, quantile=quantile) + moments_fn = Legendre(n_moments, true_domain) + + estimator = mlmc.estimator.Estimate(quantity=value_quantity, sample_storage=sample_storage, moments_fn=moments_fn) + + reg_param = 0 + target_var = TARGET_VAR + distr_obj, info, result, moments_fn = estimator.construct_density( + tol=distr_accuracy, + reg_param=reg_param, + orth_moments_tol=target_var) + + samples = value_quantity.samples(ChunkSpec(level_id=0, n_samples=sample_storage.get_n_collected()[0]))[..., 0] + + distr_plot.add_raw_samples(np.squeeze(samples)) + + distr_plot.add_distribution(distr_obj, label="") + + # kl = mlmc.tool.simple_distribution.KL_divergence(self.cut_distr.pdf, distr_obj.density, + # self.cut_distr.domain[0], self.cut_distr.domain[1]) + #kl_divergences.append(kl) + + distr_plot.show(file=None) + + + return estimator.estimate_moments() + + +def create_quantity(target, predictions): + sample_storage = Memory() + n_levels = 2 + + result_format = [QuantitySpec(name="conductivity", unit="m", shape=(1, 1), times=[1], locations=['0'])] + + sample_storage.save_global_data(result_format=result_format, level_parameters=np.ones(n_levels)) + + successful_samples = {} + failed_samples = {} + n_ops = {} + n_successful = len(target) + for l_id in range(n_levels): + sizes = [] + for quantity_spec in result_format: + sizes.append(np.prod(quantity_spec.shape) * len(quantity_spec.times) * len(quantity_spec.locations)) + + successful_samples[l_id] = [] + for sample_id in range(n_successful): + if l_id == 0: + fine_result = predictions[sample_id] + coarse_result = (np.zeros((np.sum(sizes),))) + else: + fine_result = target[sample_id] + coarse_result = predictions[sample_id] + + successful_samples[l_id].append((str(sample_id), (fine_result, coarse_result))) + + n_ops[l_id] = [random.random(), n_successful] + sample_storage.save_scheduled_samples(l_id, samples=["S{:07d}".format(i) for i in range(n_successful)]) + + sample_storage.save_samples(successful_samples, failed_samples) + sample_storage.save_n_ops(list(n_ops.items())) + + quantity = make_root_quantity(storage=sample_storage, q_specs=result_format) + length = quantity['flow'] + time = length[0] + location = time['0'] + value_quantity = location[0] + + return value_quantity, sample_storage + + +def diff_moments(target, predictions): + n_moments = 25 + quantity, target_sample_storage = create_quantity(target, predictions) + + quantile = QUANTILE + true_domain = mlmc.estimator.Estimate.estimate_domain(quantity, target_sample_storage, quantile=quantile) + + moments_fn = Legendre(n_moments, true_domain) + + quantity_moments = qe.moments(quantity, moments_fn) + + + moments_mean = qe.estimate_mean(quantity_moments) + + print("moments l means ", moments_mean.l_means) + print("moments l vars ", moments_mean.l_vars) + + print("np.max values mean l vars ", np.max(moments_mean.l_vars, axis=1)) + + print("moments mean ", moments_mean.mean) + print("moments var ", moments_mean.var) + + +def create_quantity_mlmc(data, level_parameters, num_ops=None): + sample_storage = Memory() + n_levels = len(data) + + result_format = [QuantitySpec(name="conductivity", unit="m", shape=(1, 1), times=[1], locations=['0'])] + sample_storage.save_global_data(result_format=result_format, level_parameters=level_parameters) + + successful_samples = {} + failed_samples = {} + n_ops = {} + for l_id in range(n_levels): + n_successful = data[l_id].shape[1] + sizes = [] + for quantity_spec in result_format: + sizes.append(np.prod(quantity_spec.shape) * len(quantity_spec.times) * len(quantity_spec.locations)) + + # Dict[level_id, List[Tuple[sample_id:str, Tuple[fine_result: ndarray, coarse_result: ndarray]]]] + successful_samples[l_id] = [] + for sample_id in range(n_successful): + + fine_result = data[l_id][:, sample_id, 0] + if l_id == 0: + coarse_result = (np.zeros((np.sum(sizes),))) + else: + coarse_result = data[l_id][:, sample_id, 1] + + successful_samples[l_id].append((str(sample_id), (fine_result, coarse_result))) + + # if num_ops is not None: + # n_ops[l_id] = [num_ops[l_id], n_successful] + # else: + n_ops[l_id] = [random.random(), n_successful] + + sample_storage.save_scheduled_samples(l_id, samples=["S{:07d}".format(i) for i in range(n_successful)]) + + # + # print("successful samples ") + # print("l 0", successful_samples[0][:10]) + # print("l 1", successful_samples[1][:10]) + # print("l 2", successful_samples[2][:10]) + # print("l 3", successful_samples[3][:10]) + + sample_storage.save_samples(successful_samples, failed_samples) + sample_storage.save_n_ops(list(n_ops.items())) + + return sample_storage + + +def estimate_moments(sample_storage, true_domain=None): + n_moments = N_MOMENTS + result_format = sample_storage.load_result_format() + root_quantity = make_root_quantity(sample_storage, result_format) + + conductivity = root_quantity['conductivity'] + time = conductivity[1] # times: [1] + location = time['0'] # locations: ['0'] + q_value = location[0, 0] + + if true_domain is None: + quantile = QUANTILE + true_domain = mlmc.estimator.Estimate.estimate_domain(q_value, sample_storage, quantile=quantile) + moments_fn = Legendre(n_moments, true_domain) + print("true domain ", true_domain) + + estimator = mlmc.estimator.Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn) + #means, vars = estimator.estimate_moments(moments_fn) + + moments_mean = qe.estimate_mean(qe.moments(q_value, moments_fn)) + return moments_mean, estimator, true_domain, q_value + + +def ref_storage(mlmc_file): + sample_storage = SampleStorageHDF(file_path=mlmc_file) + return sample_storage + + +def get_largest_domain(storages): + true_domains = [] + for storage in storages: + result_format = storage.load_result_format() + root_quantity = make_root_quantity(storage, result_format) + + conductivity = root_quantity['conductivity'] + time = conductivity[1] # times: [1] + location = time['0'] # locations: ['0'] + q_value = location[0, 0] + + # @TODO: How to estimate true_domain? + quantile = QUANTILE + domain = mlmc.estimator.Estimate.estimate_domain(q_value, storage, quantile=quantile) + + true_domains.append([domain[0], domain[1]]) + + true_domains = np.array(true_domains) + + print("true domains ", true_domains) + + # print("true domains ", true_domains[0]) + #true_domain = true_domains[-1] # ref + + + #true_domain = [np.min(true_domains[:, 0]), np.max(true_domains[:, 1])] + #true_domain = [np.max(true_domains[:, 0]), np.min(true_domains[:, 1])] + #true_domain = [np.mean(true_domains[:, 0]), np.mean(true_domains[:, 1])] + + #true_domain = true_domains[-1] # ref domain + true_domain = true_domains[0] # MC domain + return true_domain + + +def compare_moments_2(sample_storage, sample_storage_predict, ref_sample_storage): + true_domains = [] + for storage in [sample_storage, sample_storage_predict, ref_sample_storage]: + result_format = storage.load_result_format() + root_quantity = make_root_quantity(storage, result_format) + + conductivity = root_quantity['conductivity'] + time = conductivity[1] # times: [1] + location = time['0'] # locations: ['0'] + q_value = location[0, 0] + + # @TODO: How to estimate true_domain? + quantile = QUANTILE + domain = mlmc.estimator.Estimate.estimate_domain(q_value, storage, quantile=quantile) + true_domains.append([domain[0], domain[1]]) + + mlmc_ref_domain = [np.max([true_domains[0][0], true_domains[-1][0]]), + np.min([true_domains[0][1], true_domains[-1][1]])] + + print("true domain ", true_domains) + + # just mlmc domain + mlmc_ref_domain = true_domains[-1] # ref domain + mlmc_ref_domain = true_domains[0] # mlmc domain + + + nn_ref_domain = [np.max([true_domains[1][0], true_domains[-1][0]]), + np.min([true_domains[1][1], true_domains[-1][1]])] + + # just nn domain + nn_ref_domain = true_domains[-1] # ref domain + nn_ref_domain = true_domains[1] # mlmc domain + + + print("mlmc ref domain ", mlmc_ref_domain) + print("nn ref domain ", nn_ref_domain) + + print("############################ COMPARE MOMENTS 2 ####################################") + #### + ## MLMC vs REF + ### + ref_estimator = get_quantity_estimator(ref_sample_storage, true_domain=mlmc_ref_domain) + mlmc_estimator = get_quantity_estimator(sample_storage, true_domain=mlmc_ref_domain) + + ref_estimator.estimate_moments() + ref_moments_mean = ref_estimator.moments_mean + + mlmc_estimator.estimate_moments() + orig_moments_mean = mlmc_estimator.moments_mean + + # print("ref moments mean ", ref_moments_mean.mean) + # print("orig moments mean ", orig_moments_mean.mean) + + ref_orig_moments = [ref_moments_mean, orig_moments_mean] + + print("ref orig mean SSE ", np.sum((ref_moments_mean.mean - orig_moments_mean.mean) ** 2)) + print("ref orig mean SE ", np.sum(np.abs((ref_moments_mean.mean - orig_moments_mean.mean)))) + + print("ref orig var SSE ", np.sum((ref_moments_mean.var - orig_moments_mean.var) ** 2)) + print("ref orig var SE ", np.sum(np.abs((ref_moments_mean.var - orig_moments_mean.var)))) + + ############################## + ############################## + ### NN PREDICT vs REF + ### + ref_estimator = get_quantity_estimator(ref_sample_storage, true_domain=nn_ref_domain) + predict_estimator = get_quantity_estimator(sample_storage_predict, true_domain=nn_ref_domain) + + ref_estimator.estimate_moments() + ref_moments_mean = ref_estimator.moments_mean + + predict_estimator.estimate_moments() + predict_moments_mean = predict_estimator.moments_mean + + ref_predict_moments = [ref_moments_mean, predict_moments_mean] + + print("ref predict mean SSE ", np.sum((ref_moments_mean.mean - predict_moments_mean.mean) ** 2)) + # print("predict moments mean ", predict_moments_mean.mean) + print("ref predict mean SE ", np.sum(np.abs((ref_moments_mean.mean - predict_moments_mean.mean)))) + + print("ref predict var SSE ", np.sum((ref_moments_mean.var - predict_moments_mean.var) ** 2)) + print("ref predict var SE ", np.sum(np.abs((ref_moments_mean.var - predict_moments_mean.var)))) + + + ############################## + ###############################place + ### MLMC vs MC +NN + mlmc_estimator = get_quantity_estimator(sample_storage, true_domain=true_domains[0]) + predict_estimator = get_quantity_estimator(sample_storage_predict, true_domain=true_domains[0]) + + mlmc_estimator.estimate_moments() + mlmc_moments_mean = mlmc_estimator.moments_mean + + predict_estimator.estimate_moments() + predict_moments_mean = predict_estimator.moments_mean + + mlmc_predict_moments = [mlmc_moments_mean, predict_moments_mean] + + + # print("ref moments var ", ref_moments_mean.var) + #print("orig moments var ", orig_moments_mean.var) + #print("predict moments var ", predict_moments_mean.var) + + # print("MAX orig moments var ", np.max(orig_moments_mean.l_vars, axis=1)) + # print("MAX predict moments var ", np.max(predict_moments_mean.l_vars, axis=1)) + + print("MC moments ", mlmc_moments_mean.mean) + print("MC + NN moments ", predict_moments_mean.mean) + + print("mlmc predict mean SSE ", np.sum((mlmc_moments_mean.mean - predict_moments_mean.mean) ** 2)) + # print("predict moments mean ", predict_moments_mean.mean) + print("mlmc predict mean SE ", np.sum(np.abs((mlmc_moments_mean.mean - predict_moments_mean.mean)))) + + print("mlmc predict var SSE ", np.sum((mlmc_moments_mean.var - predict_moments_mean.var) ** 2)) + print("mlmc predict var SE ", np.sum(np.abs((mlmc_moments_mean.var - predict_moments_mean.var)))) + + + + print("##############################################################") + + return ref_orig_moments, ref_predict_moments, mlmc_predict_moments + + +def compare_moments(original_q_estimator, predict_q_estimator, ref_estimator=None): + print("############################ COMPARE MOMENTS ####################################") + original_q_estimator.estimate_moments() + orig_moments_mean = original_q_estimator.moments_mean + + predict_q_estimator.estimate_moments() + predict_moments_mean = predict_q_estimator.moments_mean + + ref_moments_mean = None + if ref_estimator is not None: + ref_estimator.estimate_moments() + ref_moments_mean = ref_estimator.moments_mean + + print("ref moments mean ", ref_moments_mean.mean) + print("orig moments mean ", orig_moments_mean.mean) + print("predict moments mean ", predict_moments_mean.mean) + + print("ref orig mean SSE ", np.sum((ref_moments_mean.mean - orig_moments_mean.mean)**2)) + print("ref predict mean SSE ", np.sum((ref_moments_mean.mean - predict_moments_mean.mean) ** 2)) + # + print("ref orig mean SE ", np.sum(np.abs((ref_moments_mean.mean - orig_moments_mean.mean)))) + print("ref predict mean SE ", np.sum(np.abs((ref_moments_mean.mean - predict_moments_mean.mean)))) + + orig_diff = ref_moments_mean.mean - orig_moments_mean.mean + predict_diff = ref_moments_mean.mean - predict_moments_mean.mean + orig_diff[0] = 1 + predict_diff[0] = 1 + + # print("np.abs((ref_moments_mean.mean - orig_moments_mean.mean))/orig_diff ", np.abs((ref_moments_mean.mean - orig_moments_mean.mean))/orig_diff) + # print("np.sum(np.abs((ref_moments_mean.mean - predict_moments_mean.mean))/predict_diff ", np.abs((ref_moments_mean.mean - predict_moments_mean.mean))/predict_diff) + # + # print("ref orig mean SE relative", np.sum(np.abs((ref_moments_mean.mean - orig_moments_mean.mean))/orig_diff)) + # print("ref predict mean SE relative", np.sum(np.abs((ref_moments_mean.mean - predict_moments_mean.mean))/predict_diff)) + + #print("ref moments var ", ref_moments_mean.var) + print("orig moments var ", orig_moments_mean.var) + print("predict moments var ", predict_moments_mean.var) + + # print("MAX orig moments var ", np.max(orig_moments_mean.l_vars, axis=1)) + # print("MAX predict moments var ", np.max(predict_moments_mean.l_vars, axis=1)) + + print("ref orig var SSE ", np.sum((ref_moments_mean.var - orig_moments_mean.var) ** 2)) + print("ref predict var SSE ", np.sum((ref_moments_mean.var - predict_moments_mean.var) ** 2)) + # + print("ref orig var SE ", np.sum(np.abs((ref_moments_mean.var - orig_moments_mean.var)))) + print("ref predict var SE ", np.sum(np.abs((ref_moments_mean.var - predict_moments_mean.var)))) + + print("##############################################################") + + return orig_moments_mean, predict_moments_mean, ref_moments_mean + + # l_0_samples = predict_q_estimator.get_level_samples(level_id=0) + # l_1_samples = predict_q_estimator.get_level_samples(level_id=1) + # l_2_samples = predict_q_estimator.get_level_samples(level_id=2) + # + # print("l 0 samples shape ", np.squeeze(l_0_samples).shape) + # print("l 1 samples shape ", np.squeeze(l_1_samples[..., 0]).shape) + # + # print("l_0_samples.var ", np.var(np.squeeze(l_0_samples))) + # print("l_1_samples ", l_1_samples) + # + # diff = l_1_samples[..., 0] - l_1_samples[..., 1] + # + # print("l1 diff ", diff) + # print("var l1 diff ", np.var(diff)) + # print("fine l_1_samples.var ", np.var(np.squeeze(l_1_samples[..., 0]))) + # print("fine l_2_samples.var ", np.var(np.squeeze(l_2_samples[..., 0]))) + +def level_kurtosis(original_q_estimator, predict_q_estimator): + original_q_estimator.kurtosis_check() + + +def compare_densities(estimator_1, estimator_2, ref_estimator, label_1="", label_2=""): + distr_plot = plots.ArticleDistributionPDF(title="densities", log_density=True, set_x_lim=False, quantity_name="$c [kgm^{-3}]$") + tol = 1e-7 + reg_param = 0 + + print("orig estimator") + distr_obj_1, _, result, _, orig_orth_moments = estimator_1.construct_density(tol=tol, reg_param=reg_param, + orth_moments_tol=TARGET_VAR) + #distr_plot.add_distribution(distr_obj_1, label=label_1, color="blue") + + print("predict estimator") + distr_obj_2, _, result, _, predict_orth_moments = estimator_2.construct_density(tol=tol, reg_param=reg_param, orth_moments_tol=TARGET_VAR) + #distr_plot.add_distribution(distr_obj_2, label=label_2, color="red", line_style="--") + + print("Ref estimator") + ref_distr_obj, _, result, _, ref_orth_moments = ref_estimator.construct_density(tol=tol, reg_param=reg_param, orth_moments_tol=TARGET_VAR) + #distr_plot.add_distribution(ref_distr_obj, label="MC reference", color="black", line_style=":") + + ref_estimator_pdf = get_quantity_estimator(ref_estimator._sample_storage, true_domain=None, n_moments=N_MOMENTS) + ref_distr_obj, _, result, _, ref_orth_moments_pdf = ref_estimator_pdf.construct_density(tol=tol, + reg_param=reg_param, + orth_moments_tol=TARGET_VAR) + + # domain = [np.max([ref_distr_obj.domain[0], distr_obj_1.domain[0], distr_obj_2.domain[0]]), + # np.min([ref_distr_obj.domain[1], distr_obj_1.domain[1], distr_obj_2.domain[1]])] + domain = [np.max([ref_distr_obj.domain[0], distr_obj_1.domain[0]]), + np.min([ref_distr_obj.domain[1], distr_obj_1.domain[1]])] + kl_div_ref_mlmc = mlmc.tool.simple_distribution.KL_divergence(ref_distr_obj.density, distr_obj_1.density, domain[0], domain[1]) + + print("KL div ref|mlmc: {}".format(kl_div_ref_mlmc)) + + domain = [np.max([ref_distr_obj.domain[0], distr_obj_2.domain[0]]), + np.min([ref_distr_obj.domain[1], distr_obj_2.domain[1]])] + kl_div_ref_gnn = mlmc.tool.simple_distribution.KL_divergence(ref_distr_obj.density, distr_obj_2.density, domain[0], + domain[1]) + + print("KL div ref|mlmc prediction: {}".format(kl_div_ref_gnn)) + + #distr_plot.add_distribution(distr_obj_1, label=label_1 + ", KL(ref|orig):{:0.4g}".format(kl_div_ref_mlmc), color="blue") + #distr_plot.add_distribution(distr_obj_2, label=label_2 + ", KL(ref|gnn):{:0.4g}".format(kl_div_ref_gnn), color="red", line_style="--") + distr_plot.add_distribution(distr_obj_1, label=r"$D_{3LMC}:$" + "{:0.4g}".format(kl_div_ref_mlmc), color="blue") + distr_plot.add_distribution(distr_obj_2, label=r"$D_{3LMC-M}:$" + "{:0.4g}".format(kl_div_ref_gnn), color="red", line_style="--") + distr_plot.add_distribution(ref_distr_obj, label="MC ref", color="black", line_style=":") + + distr_plot.show(file=None) + distr_plot.show(file="densities.pdf") + + + return kl_div_ref_mlmc, kl_div_ref_gnn, orig_orth_moments, predict_orth_moments, ref_orth_moments + + +def get_quantity_estimator(sample_storage, true_domain=None, quantity=None, n_moments=None): + if n_moments is None: + n_moments = N_MOMENTS + result_format = sample_storage.load_result_format() + if quantity is None: + root_quantity = make_root_quantity(sample_storage, result_format) + conductivity = root_quantity['conductivity'] + time = conductivity[1] # times: [1] + location = time['0'] # locations: ['0'] + quantity = location[0, 0] + + if true_domain is None: + quantile = QUANTILE + true_domain = mlmc.estimator.Estimate.estimate_domain(quantity, sample_storage, quantile=quantile) + + print("true domain") + moments_fn = Legendre(n_moments, true_domain) + #moments_fn = Monomial(n_moments, true_domain) + + return mlmc.estimator.Estimate(quantity=quantity, sample_storage=sample_storage, moments_fn=moments_fn) + + +def get_n_estimated(sample_storage, estimator, n_ops=None): + target_var = TARGET_VAR + #moments, estimator, _, quantity = estimate_moments(sample_storage, true_domain=true_domain) + + n_level_samples = sample_storage.get_n_collected() + # New estimation according to already finished samples + + #print("n level samples ", n_level_samples) + variances, n_samples = estimator.estimate_diff_vars() + #print("n samples ", n_samples) + #variances, est_n_ops = estimator.estimate_diff_vars_regression(n_level_samples) + + if n_ops is None: + n_ops = n_samples + # print("get n estimated n ops ", n_ops) + # print("variances ", variances) + n_estimated = mlmc.estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops, + n_levels=len(n_level_samples)) + return n_estimated, variances, n_samples + + +def get_storage_info(sample_storage): + moments, estimator, _, _ = estimate_moments(sample_storage) + n_collected = sample_storage.get_n_collected() + max_vars = np.max(np.array(moments.l_vars) / np.array(sample_storage.get_n_collected())[:, np.newaxis], axis=1) + print("n collected ", n_collected) + print("moments.l_vars max ", max_vars) + + + print('moments l vars ', moments.l_vars) + return n_collected, max_vars + + +def cut_samples(data, sample_storage, new_n_collected, new_l_0=0, bootstrap=False): + new_data = [] + for l_id, (d, n_est) in enumerate(zip(data, new_n_collected)): + # print("len d :", d.shape[1]) + #print("cut samples n est", n_est) + if n_est > 0: + if l_id == new_l_0: + if bootstrap: + sample_idx = np.random.choice(list(range(0, d.shape[1]-1)), size=n_est, replace=True) + if len(sample_idx) > d.shape[1]: + fine_samples = d[:, :np.min([d.shape[1], n_est]), 0].reshape(1, np.min([d.shape[1], n_est]), 1) + else: + fine_samples = d[:, sample_idx, 0].reshape(1, np.min([d.shape[1], len(sample_idx)]), 1) + else: + fine_samples = d[:, :np.min([d.shape[1], n_est]), 0].reshape(1, np.min([d.shape[1], n_est]), 1) + + coarse_samples = np.zeros(fine_samples.shape) + new_data.append(np.concatenate((fine_samples, coarse_samples), axis=2)) + else: + if bootstrap: + sample_idx = np.random.choice(list(range(0, d.shape[1] - 1)), size=n_est, replace=True) + if len(sample_idx) > d.shape[1]: + new_data.append(d[:, sample_idx, :]) + else: + new_data.append(d[:, :np.min([d.shape[1], n_est]), :]) + else: + new_data.append(d[:, :np.min([d.shape[1], n_est]), :]) + + # print("new data ", new_data) + # print("new data shape ", np.array(new_data).shape) + # + # print("var new data ", np.var(new_data, axis=-2)) + + sample_storage = create_quantity_mlmc(new_data, level_parameters=sample_storage.get_level_parameters()) + + return sample_storage + + +def plot_progress(conv_layers, dense_layers, output_flatten, mesh_file=None, n_samples=5): + + if mesh_file is not None: + #mesh = gmsh_io.GmshIO(fields_mesh) + mesh_data = extract_mesh_gmsh_io(mesh_file, get_points=True) + points = mesh_data['points'] + X = points[:, 0] + Y = points[:, 1] + + for idx, conv_layer in conv_layers.items(): + inputs, weights, outputs = conv_layer[0], conv_layer[1], conv_layer[2] + plt.matshow(weights[-1]) + plt.show() + # Note: weights have different shape than the mesh + + print("inputs ", inputs) + print("weights ", weights) + print("outputs ", outputs) + + for index, input in enumerate(inputs[:n_samples]): + if mesh_file: + for i in range(inputs[index].shape[1]): + input_feature = inputs[index][:, i] + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + print("inputs.shape ", input_feature.shape) + print("X.shape ", X.shape) + print("Y.shape ", Y.shape) + print("input.ravel().shape ", input_feature.ravel().shape) + cont = ax.tricontourf(X, Y, input_feature.ravel(), levels=16) + fig.colorbar(cont) + plt.title("input feature {}".format(i)) + plt.show() + + for i in range(outputs[index].shape[1]): + channel_output = outputs[index][:, i] + print("channel output shape ", channel_output.shape) + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + cont = ax.tricontourf(X, Y, channel_output, levels=16) + fig.colorbar(cont) + plt.title("output channel {}".format(i)) + + plt.show() + + else: + plt.matshow(input[0]) + plt.show() + plt.matshow(outputs[index][0]) + plt.show() + + # print("shape ", c_layer._outputs[index][0].shape) + plt.matshow(np.sum(outputs[index], axis=0, keepdims=True)) + plt.title("flatten") + plt.show() + + fig, ax = plt.subplots(1, 1, figsize=(15, 10)) + cont = ax.tricontourf(X, Y, np.sum(outputs[index], axis=1), levels=16) + fig.colorbar(cont) + plt.title("sum channels") + plt.show() + + # plt.matshow(self._inputs[-1][0]) + # plt.show() + # plt.matshow(self._outputs[-1][0]) + # plt.show() + + # print("output flatten ", self._output_flatten) + # print("final output ", final_output) + + if len(output_flatten) > 0: + plt.matshow([output_flatten[-1]]) + plt.title("flatten") + plt.show() + + for idx, dense_layer in dense_layers.items(): + inputs, weights, outputs = dense_layer[0], dense_layer[1], dense_layer[2] + + plt.matshow([inputs[-1]]) + plt.show() + plt.matshow([outputs[-1]]) + plt.show() + + + exit() + + +def plot_moments(mlmc_estimators): + n_moments = N_MOMENTS + moments_plot = mlmc.tool.plot.MomentsPlots( + title="Legendre {} moments".format(n_moments)) + + # moments_plot = mlmc.tool.plot.PlotMoments( + # title="Monomial {} moments".format(self.n_moments), log_mean_y=False) + + for nl, estimator in mlmc_estimators.items(): + + moments_mean = qe.estimate_mean(qe.moments(estimator._quantity, estimator._moments_fn)) + est_moments = moments_mean.mean + est_vars = moments_mean.var + + n_collected = [str(n_c) for n_c in estimator._sample_storage.get_n_collected()] + moments_plot.add_moments((moments_mean.mean, moments_mean.var), label="#{} N:".format(nl) + ", ".join(n_collected)) + + # print("moments level means ", moments_mean.l_means) + # print("moments level vars ", moments_mean.l_vars) + # print("moments level max vars ", np.max(moments_mean.l_vars, axis=1)) + # print("est moments ", est_moments) + # print("est_vars ", est_vars) + # print("np.max(est_vars) ", np.max(est_vars)) + + moments_plot.show(None) + #moments_plot.show(file=os.path.join(self.work_dir, "{}_moments".format(self.n_moments))) + moments_plot.reset() + + +def analyze_mlmc_data(): + n_levels = 5 + # mlmc_file = "/home/martin/Documents/metamodels/data/cl_0_3_s_4/L5/mlmc_5.hdf5" + mlmc_file = "/home/martin/Documents/metamodels/data/cl_0_1_s_1/L5/mlmc_5.hdf5" + + sample_storage = SampleStorageHDF(file_path=mlmc_file) + original_moments, estimator, original_true_domain = estimate_moments(sample_storage) + + # Test storage creation + data = [] + for l_id in range(n_levels): + level_samples = estimator.get_level_samples(level_id=l_id) + l_fine = np.squeeze(level_samples[..., 0]) + + print("mean l_fine ", np.mean(l_fine)) + plt.hist(l_fine, alpha=0.5, label='{}'.format(l_id), density=True) + data.append(level_samples) + + plt.legend(loc='upper right') + plt.show() + sample_storage_2 = create_quantity_mlmc(data) + moments_2, estimator_2, _ = estimate_moments(sample_storage_2) + assert np.allclose(original_moments.mean, moments_2.mean) + assert np.allclose(original_moments.var, moments_2.var) + + +def load_sim_data(sim_data_file): + """ + Load saved simulation data + :return: dict + """ + import json + + with open(sim_data_file, 'r') as file: + # First we load existing data into a dict. + data = json.load(file) + + generate_rnd = [] + extract_mesh = [] + make_fields = [] + coarse_flow = [] + fine_flow = [] + for level_data in data.values(): + generate_rnd_level = np.zeros(2) + extract_mesh_level = np.zeros(2) + make_fields_level = np.zeros(2) + coarse_flow_level = np.zeros(2) + fine_flow_level = np.zeros(2) + for d in level_data: + generate_rnd_level[0] += d["generate_rnd"] + generate_rnd_level[1] += 1 + + extract_mesh_level[0] += d["extract_mesh"] + extract_mesh_level[1] += 1 + + make_fields_level[0] += d["make_field"] + make_fields_level[1] += 1 + + coarse_flow_level[0] += d["coarse_flow"] + coarse_flow_level[1] += 1 + + fine_flow_level[0] += d["fine_flow"] + fine_flow_level[1] += 1 + + generate_rnd.append(generate_rnd_level) + extract_mesh.append(extract_mesh_level) + make_fields.append(make_fields_level) + coarse_flow.append(coarse_flow_level) + fine_flow.append(fine_flow_level) + + return generate_rnd, extract_mesh, make_fields, coarse_flow, fine_flow + + + +def get_sample_times_mlmc(mlmc_file, sample_storage=None): + if sample_storage is None: + sample_storage = SampleStorageHDF(file_path=mlmc_file) + + sim_data_file = os.path.join(os.path.dirname(mlmc_file), 'sim_data.json') + + n_ops = sample_storage.get_n_ops() + if os.path.exists(sim_data_file): + generate_rnd, extract_mesh, make_fields, coarse_flow, fine_flow = load_sim_data(sim_data_file) + else: + generate_rnd = sample_storage.get_generate_rnd_times() + extract_mesh = sample_storage.get_extract_mesh_times() + make_fields = sample_storage.get_make_field_times() + coarse_flow = sample_storage.get_coarse_flow_times() + fine_flow = sample_storage.get_fine_flow_times() + + def time_for_sample_func(data): + new_n_ops = [] + for nop in data: + nop = np.squeeze(nop) + if len(nop) > 0: + new_n_ops.append(nop[0] / nop[1]) + return new_n_ops + + print("generated rnd ", generate_rnd) + + generate_rnd = time_for_sample_func(generate_rnd) + extract_mesh = time_for_sample_func(extract_mesh) + make_fields = time_for_sample_func(make_fields) + coarse_flow = time_for_sample_func(coarse_flow) + fine_flow = time_for_sample_func(fine_flow) + + field_times = generate_rnd + extract_mesh + make_fields + + print("n ops ", n_ops) + print("field times ", field_times) + print("coarse flow ", coarse_flow) + print("fine flow ", fine_flow) + + return n_ops, field_times, coarse_flow, fine_flow + + +def get_sample_times(sampling_info_path): + n_levels = [5] + for nl in n_levels: + variances = [] + n_ops = [] + times = [] + + times_scheduled_samples = [] + running_times = [] + flow_running_times = [] + + for i in range(0, 100): + sampling_info_path_iter = os.path.join(sampling_info_path, str(i)) + if os.path.isdir(sampling_info_path_iter): + variances.append(np.load(os.path.join(sampling_info_path_iter, "variances.npy"))) + n_ops.append(np.load(os.path.join(sampling_info_path_iter, "n_ops.npy"))) + times.append(np.load(os.path.join(sampling_info_path_iter, "time.npy"))) + + running_times.append(np.load(os.path.join(sampling_info_path_iter, "running_times.npy"))) + flow_running_times.append(np.load(os.path.join(sampling_info_path_iter, "flow_running_times.npy"))) + if os.path.exists(os.path.join(sampling_info_path_iter, "scheduled_samples_time.npy")): + times_scheduled_samples.append( + np.load(os.path.join(sampling_info_path_iter, "scheduled_samples_time.npy"))) + else: + break + + def time_for_sample_func(data): + new_n_ops = [] + for nop in data: + nop = np.squeeze(nop) + if len(nop) > 0: + new_n_ops.append(nop[:, 0]/nop[:, 1]) + return new_n_ops + + n_ops = time_for_sample_func(n_ops) + running_times = time_for_sample_func(running_times) + flow_running_times = time_for_sample_func(flow_running_times) + + + field_times = np.mean(np.array(running_times) - np.array(flow_running_times) - np.array(flow_running_times), + axis=0) + + flow_times = np.mean(np.array(flow_running_times), axis=0) + n_ops = np.mean(n_ops, axis=0) + + # print("n ops ", n_ops) + # print("running times ", np.mean(running_times, axis=0)) + # print("flow running times ", flow_times) + # exit() + + #n_ops = np.mean(running_times, axis=0) # CPU time of simulation (fields + flow for both coarse and fine sample) + + print("field times ", field_times) + + print("n ops ", n_ops) + print("type n ops ", type(n_ops)) + + if np.isnan(np.all(n_ops)): + n_ops = None + + return n_ops, field_times, flow_times + + +def plot_data(data, label): + plt.hist(data, alpha=0.5, label=label, density=True) + + #plt.hist(predictions, bins=50, alpha=0.5, label='predictions', density=True) + # plt.hist(targets - predictions, bins=50, alpha=0.5, label='predictions', density=True) + plt.legend(loc='upper right') + # plt.xlim(-0.5, 1000) + #plt.yscale('log') + plt.show() + + +if __name__ == "__main__": + analyze_mlmc_data() diff --git a/mlmc/metamodel/random_field_time.py b/mlmc/metamodel/random_field_time.py new file mode 100644 index 00000000..a353dadc --- /dev/null +++ b/mlmc/metamodel/random_field_time.py @@ -0,0 +1,184 @@ +import time +import numpy as np +from mlmc.tool import gmsh_io +from mlmc.tool.flow_mc import FlowSim, create_corr_field +from mlmc.tool.flow_mc_2 import FlowSimProcConc +from mlmc.tool.flow_mc_2 import create_corr_field as conc_create_corr_fields +import gstools +from mlmc.random import correlated_field as cf + + +def conc_rnd_sample_time(mesh_file, corr_field_config): + + start_time = time.process_time() + mesh_data = FlowSim.extract_mesh(mesh_file) + + n_samples = 200 + for i in range(n_samples): + fields = conc_create_corr_fields(dim=2, log=corr_field_config["log"], mode_no=10000) + fields.set_points(mesh_data['points'], mesh_data['point_region_ids'], + mesh_data['region_map']) + + fine_input_sample, coarse_input_sample = FlowSimProcConc.generate_random_sample(fields, coarse_step=0, + n_fine_elements=len( + mesh_data['points'])) + #print("fine input sample ", fine_input_sample) + + + rnd_time = time.process_time() - start_time + print("rnd_time / n_samples ", rnd_time / n_samples) + + return rnd_time / n_samples + + +def corr_field_sample_time(mesh_file=None, corr_length_config=None): + # import matplotlib + # from matplotlib import ticker, cm + #matplotlib.rcParams.update({'font.size': 22}) + + if corr_length_config.get('02_conc', False): + return conc_rnd_sample_time(mesh_file, corr_length_config) + + dim = 2 + log = True + cl = 0.1 + s = 1 + + if mesh_file is None: + #mesh_file = "/home/martin/Sync/Documents/flow123d_results/flow_experiments/Exponential/corr_length_0_01/l_step_0.0055_common_files/mesh.msh" + #mesh_file = "/home/martin/Documents/metamodels/data/5_ele/cl_0_1_s_1/L5/l_step_0.020196309484414757_common_files/mesh.msh" + mesh_file = "/home/martin/Documents/metamodels/data/1000_ele/l_step_0.055_common_files/mesh.msh" + + + start_time = time.process_time() + mesh_data = FlowSim.extract_mesh(mesh_file) + if corr_length_config is not None: + fields = create_corr_field(model="exp", dim=dim, + sigma=corr_length_config['sigma'], + corr_length=corr_length_config['corr_length'], + log=corr_length_config['log']) + else: + fields = create_corr_field(model="exp", dim=dim, + sigma=s, + corr_length=cl, + log=log) + # # Create fields both fine and coarse + fields = FlowSim.make_fields(fields, mesh_data, None) + + n_samples = 200 + for i in range(n_samples): + + fine_input_sample, coarse_input_sample = FlowSim.generate_random_sample(fields, coarse_step=0, + n_fine_elements=len( + mesh_data['points'])) + + len(fine_input_sample["conductivity"]) + features_log = np.log(fine_input_sample["conductivity"]) + + # print("conductivity mean ", np.mean(fine_input_sample["conductivity"])) + # print("conductivity var ", np.var(fine_input_sample["conductivity"])) + output = 1 + # + # print("fine input sample ", fine_input_sample["conductivity"].shape) + # + # gmsh_io.GmshIO().write_fields('fields_sample.msh', mesh_data['ele_ids'], fine_input_sample) + # + # mesh = gmsh_io.GmshIO('fields_sample.msh') + # element_data = mesh.current_elem_data + # features = list(element_data.values()) + # print("features ", np.array(features).shape) + + rnd_time = time.process_time() - start_time + print("rnd_time / n_samples ", rnd_time / n_samples) + return rnd_time / n_samples + + #Xfinal, Yfinal = fields.fields[0].correlated_field.points[:, 0], fields.fields[0].correlated_field.points[:, 1] + + # cont = ax.tricontourf(Xfinal, + # Yfinal, + # fine_input_sample['conductivity'].ravel())#, locator=ticker.LogLocator()) + + # fig.colorbar(cont) + # fig.savefig("cl_{}_var_{}.pdf".format(cl, s ** 2)) + # plt.show() + + # print("fields ", fields) + # model = gs.Exponential(dim=2, len_scale=cl) + # srf = gs.SRF(model, mesh_type="unstructed", seed=20170519, mode_no=1000, generator='RandMeth') + # print("model.var ", model.var) + # field = srf( + # (fields.fields[0].correlated_field.points[:, 0], fields.fields[0].correlated_field.points[:, 1])) + # srf.vtk_export("field") + # ax = srf.plot() + # ax.set_aspect("equal") + + + +def conc_corr_field(mesh_file, corr_field_config): + start_time = time.process_time() + mesh_data = FlowSim.extract_mesh(mesh_file) + + fields = conc_create_corr_fields(dim=2, log=corr_field_config["log"], mode_no=10000) + + n_samples = 200 + for i in range(n_samples): + fields.set_points(mesh_data['points'], mesh_data['point_region_ids'], + mesh_data['region_map']) + + fine_input_sample, coarse_input_sample = FlowSimProcConc.generate_random_sample(fields, coarse_step=0, + n_fine_elements=len( + mesh_data['points'])) + # print("fine input sample ", fine_input_sample) + + + # Xfinal, Yfinal = fields.fields[0].correlated_field.points[:, 0], fields.fields[0].correlated_field.points[:, 1] + + # cont = ax.tricontourf(Xfinal, + # Yfinal, + # fine_input_sample['conductivity'].ravel())#, locator=ticker.LogLocator()) + + # fig.colorbar(cont) + # fig.savefig("cl_{}_var_{}.pdf".format(cl, s ** 2)) + # plt.show() + + # print("fields ", fields) + # model = gs.Exponential(dim=2, len_scale=cl) + # srf = gs.SRF(model, mesh_type="unstructed", seed=20170519, mode_no=1000, generator='RandMeth') + # print("model.var ", model.var) + # field = srf( + # (fields.fields[0].correlated_field.points[:, 0], fields.fields[0].correlated_field.points[:, 1])) + # srf.vtk_export("field") + # ax = srf.plot() + # ax.set_aspect("equal") + + rnd_time = time.process_time() - start_time + print("rnd_time / n_samples ", rnd_time / n_samples) + + +if __name__ == "__main__": + import cProfile + import pstats + pr = cProfile.Profile() + pr.enable() + + corr_file_config = {"02_conc": True, 'log': True} + #mesh_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_conr/l_step_1.0_common_files/repo.msh" + mesh_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/l_step_0.1414213562373095_common_files/repo.msh" + + # corr_file_config = {"02_conc": False, 'log': True, 'corr_length':0.1, 'sigma':1} + # mesh_file = "/home/martin/Documents/metamodels/data/1000_ele/l_step_0.055_common_files/mesh.msh" + + # my_result = corr_field_sample_time(mesh_file, corr_file_config) + # + # pr.disable() + # ps = pstats.Stats(pr).sort_stats('cumtime') + # ps.print_stats() + # + + + + ############################### + ### generate conc random sample + + conc_corr_field(mesh_file, corr_file_config) + diff --git a/mlmc/moments.py b/mlmc/moments.py index 2329566e..fb42a29a 100644 --- a/mlmc/moments.py +++ b/mlmc/moments.py @@ -1,5 +1,6 @@ import numpy as np import numpy.ma as ma +import tensorflow as tf from scipy.interpolate import BSpline @@ -108,6 +109,23 @@ def eval_diff2(self, value, size=None): return self._eval_diff2(value, size) +class Moments_tf(Moments): + + def clip(self, value): + """ + Remove outliers and replace them with NaN + :param value: array of numbers + :return: masked_array, out + """ + #print("ref domain ", self.ref_domain) + #print("value ", value) + # Masked array + return value + # out = tnp.ma.masked_outside(value, self.ref_domain[0], self.ref_domain[1]) + # # Replace outliers with NaN + # return tnp.ma.filled(out, np.nan) + + class Monomial(Moments): """ Monomials generalized moments @@ -229,6 +247,95 @@ def _eval_diff2(self, value, size): return P_n @ self.diff2_mat +class Legendre_tf(Moments_tf): + + def __init__(self, size, domain, ref_domain=None, log=False, safe_eval=True): + if ref_domain is not None: + self.ref_domain = ref_domain + else: + self.ref_domain = (-1, 1) + + self.diff_mat = np.zeros((size, size)) + for n in range(size - 1): + self.diff_mat[n, n + 1::2] = 2 * n + 1 + self.diff2_mat = self.diff_mat @ self.diff_mat + + super().__init__(size, domain, log, safe_eval) + + # def _eval_value(self, x, size): + # return tnp.polynomial.legendre.legvander(x, deg=size-1) + + def _eval_all(self, value, size): + value = self.transform(value) + #print("value ", value.shape) + + + # out = tfgms.evaluate_legendre_polynomial(degree_l=size-1, order_m=1, x=1) + # + # + + # return out + leg_poly = [] + for i in range(1, size): + out = Legendre_tf.P(i, value) + + # print("type out ", type(out)) + # print("out shape ", out.shape) + out = tf.convert_to_tensor(tf.squeeze(out)) + out = tf.dtypes.cast(out, tf.float64) + + #print("out shape ", out.shape) + + leg_poly.append(out) + + leg_poly = tf.stack(leg_poly, 0) + # print("leg poly shape ", leg_poly.shape) + # print("leg poly T shape ", tf.transpose(leg_poly).shape) + + return tf.transpose(leg_poly) + + @staticmethod + def P(n, x): + if (n == 0): + return np.ones(x.shape) # P0 = 1 + elif (n == 1): + return x # P1 = x + else: + return (((2 * n) - 1) * x * Legendre_tf.P(n - 1, x) - (n - 1) * Legendre_tf.P(n - 2, x)) / float(n) + + def _eval_all_der(self, value, size, degree=1): + """ + Derivative of Legendre polynomials + :param value: values to evaluate + :param size: number of moments + :param degree: degree of derivative + :return: + """ + value = self.transform(np.atleast_1d(value)) + eval_values = np.empty((value.shape + (size,))) + + for s in range(size): + if s == 0: + coef = [1] + else: + coef = np.zeros(s+1) + coef[-1] = 1 + + coef = np.polynomial.legendre.legder(coef, degree) + eval_values[:, s] = np.polynomial.legendre.legval(value, coef) + return eval_values + + def _eval_diff(self, value, size): + t = self.transform(np.atleast_1d(value)) + P_n = np.polynomial.legendre.legvander(t, deg=size - 1) + return P_n @ self.diff_mat + + def _eval_diff2(self, value, size): + t = self.transform(np.atleast_1d(value)) + P_n = np.polynomial.legendre.legvander(t, deg=size - 1) + return P_n @ self.diff2_mat + + class TransformedMoments(Moments): def __init__(self, other_moments, matrix): """ diff --git a/mlmc/plot/diagnostic_plots.py b/mlmc/plot/diagnostic_plots.py new file mode 100644 index 00000000..e69de29b diff --git a/mlmc/plot/plots.py b/mlmc/plot/plots.py index 9a6053d5..f97cadaf 100644 --- a/mlmc/plot/plots.py +++ b/mlmc/plot/plots.py @@ -8,7 +8,7 @@ import matplotlib.pyplot as plt -def create_color_bar(range, label, ax = None): +def create_color_bar(range, label, ax=None, colormap=None): """ Create colorbar for a variable with given range and add it to given axes. :param range: single value as high bound or tuple (low bound, high bound) @@ -16,13 +16,16 @@ def create_color_bar(range, label, ax = None): :param ax: :return: Function to map values to colors. (normalize + cmap) """ + # Create colorbar - colormap = plt.cm.gist_ncar + if colormap is None: + colormap = plt.cm.gist_ncar try: min_r, max_r = range except TypeError: min_r, max_r = 0, range normalize = plt.Normalize(vmin=min_r, vmax=max_r) + #colormap = (matplotlib.colors.ListedColormap(['red', 'green', 'blue', 'orange'])) scalar_mappable = plt.cm.ScalarMappable(norm=normalize, cmap=colormap) if type(max_r) is int: cb_values = np.arange(min_r, max_r) @@ -31,6 +34,11 @@ def create_color_bar(range, label, ax = None): cb_values = np.linspace(min_r, max_r, 100) #ticks = np.linspace(min_r, int(size / 10) * 10, 9) ticks = None + ticks = [5,10,15,20] + ticks = [2, 4, 6, 8, 10] + ticks = [2, 3, 4, 5] + + scalar_mappable.set_array(cb_values) clb = plt.colorbar(scalar_mappable, ticks=ticks, aspect=50, pad=0.01, ax=ax) clb.set_label(label) @@ -484,6 +492,165 @@ def show(self, file=""): _show_and_save(self.fig, file, self.title) +class CorrLength: + """ + Plot level variances, i.e. Var X^l as a function of the mesh step. + Selected moments are plotted. + """ + def __init__(self, moments=None): + """ + :param moments: Size or type of moments subset, see moments_subset function. + """ + matplotlib.rcParams.update({'font.size': 26}) + matplotlib.rcParams.update({'lines.markersize': 8}) + self.fig = plt.figure(figsize=(15, 8)) + self.title = "" + self.fig.suptitle(self.title) + + self.ax = self.fig.add_subplot(1, 1, 1) + self.ax.set_xlabel("$\lambda$ - correlation length") + self.ax.set_ylabel("$MSE$") + self.ax.set_xscale('log') + self.ax.set_yscale('log') + + self.n_moments = None + self.subset_type = moments + self.min_step = 1e300 + self.max_step = 0 + self.data = {} + + self.nn_min_step = 1e300 + self.nn_max_step = 0 + self.nn_data = {} + + self._mse_train = {} + self._mse_test = {} + + #self._colormap = plt.cm.tab20 + + def add_mse_train(self, mse): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + self._mse_train = mse + + def add_mse_test(self, mse): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + self._mse_test = mse + + def show(self, file=""): + #self._colormap = create_color_bar(range=[1, self.n_moments], label=r'$M_i$', ax=self.ax, colormap=plt.cm.tab20) + res = 5 + step_range = self.max_step / self.min_step + log_scale = step_range ** 0.001 - 1 + #rv = st.lognorm(scale=1, s=log_scale) + + # if m == 5: + # break + col = "blue" + label = "MSE train" + + print("mse train ", self._mse_train) + print("mse test ", self._mse_test) + + print("list(self._mse_train.keys()) ", list(self._mse_train.keys())) + print("self._mse_train.items() ", self._mse_train.items()) + + self.ax.scatter(list(self._mse_train.keys()), list(self._mse_train.values()), color=col, label=label) + + col = "red" + label = "MSE test" + self.ax.scatter(list(self._mse_test.keys()), list(self._mse_test.values()), color=col, label=label) + + self.fig.legend() + _show_and_save(self.fig, file, self.title) + + +class Variance: + """ + Plot level variances, i.e. Var X^l as a function of the mesh step. + Selected moments are plotted. + """ + def __init__(self, moments=None): + """ + :param moments: Size or type of moments subset, see moments_subset function. + """ + matplotlib.rcParams.update({'font.size': 26}) + matplotlib.rcParams.update({'lines.markersize': 8}) + self.fig = plt.figure(figsize=(15, 8)) + self.title = "" + self.fig.suptitle(self.title) + + self.ax = self.fig.add_subplot(1, 1, 1) + self.ax.set_xlabel("$\lambda$ - correlation length") + self.ax.set_ylabel("$MSE$") + self.ax.set_xscale('log') + self.ax.set_yscale('log') + + self.n_moments = None + self.subset_type = moments + self.min_step = 1e300 + self.max_step = 0 + self.data = {} + + self.nn_min_step = 1e300 + self.nn_max_step = 0 + self.nn_data = {} + + self._mse_train = {} + self._mse_test = {} + + #self._colormap = plt.cm.tab20 + + def add_mse_train(self, mse): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + self._mse_train = mse + + def add_mse_test(self, mse): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + self._mse_test = mse + + def show(self, file=""): + #self._colormap = create_color_bar(range=[1, self.n_moments], label=r'$M_i$', ax=self.ax, colormap=plt.cm.tab20) + res = 5 + step_range = self.max_step / self.min_step + log_scale = step_range ** 0.001 - 1 + #rv = st.lognorm(scale=1, s=log_scale) + + # if m == 5: + # break + col = "blue" + label = "MSE train" + + print("mse train ", self._mse_train) + print("mse test ", self._mse_test) + + print("list(self._mse_train.keys()) ", list(self._mse_train.keys())) + print("self._mse_train.items() ", self._mse_train.items()) + + self.ax.scatter(list(self._mse_train.keys()), list(self._mse_train.values()), color=col, label=label) + + col = "red" + label = "MSE test" + self.ax.scatter(list(self._mse_test.keys()), list(self._mse_test.values()), color=col, label=label) + + self.fig.legend() + _show_and_save(self.fig, file, self.title) + class Variance: """ Plot level variances, i.e. Var X^l as a function of the mesh step. @@ -554,6 +721,508 @@ def show(self, file=""): _show_and_save(self.fig, file, self.title) +class TrainTestMSE: + """ + Plot meta-model MSE + """ + def __init__(self, moments=None): + """ + :param moments: Size or type of moments subset, see moments_subset function. + """ + matplotlib.rcParams.update({'font.size': 26}) + matplotlib.rcParams.update({'lines.markersize': 8}) + self.fig = plt.figure(figsize=(15, 8)) + self.title = "" + self.fig.suptitle(self.title) + + self.ax = self.fig.add_subplot(1, 1, 1) + self.ax.set_xlabel("$\lambda$ - correlation length") + self.ax.set_ylabel("$MSE$") + self.ax.set_xscale('log') + self.ax.set_yscale('log') + + self.n_moments = None + self.subset_type = moments + self.min_step = 1e300 + self.max_step = 0 + self.data = {} + + self.nn_min_step = 1e300 + self.nn_max_step = 0 + self.nn_data = {} + + self._mse_train = {} + self._mse_test = {} + + #self._colormap = plt.cm.tab20 + + def add_mse_train(self, mse): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + self._mse_train = mse + + def add_mse_test(self, mse): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + self._mse_test = mse + + def show(self, file=""): + #self._colormap = create_color_bar(range=[1, self.n_moments], label=r'$M_i$', ax=self.ax, colormap=plt.cm.tab20) + res = 5 + step_range = self.max_step / self.min_step + log_scale = step_range ** 0.001 - 1 + #rv = st.lognorm(scale=1, s=log_scale) + + # if m == 5: + # break + col = "blue" + label = "MSE train" + + print("mse train ", self._mse_train) + print("mse test ", self._mse_test) + + print("list(self._mse_train.keys()) ", list(self._mse_train.keys())) + print("self._mse_train.items() ", self._mse_train.items()) + + self.ax.scatter(list(self._mse_train.keys()), list(self._mse_train.values()), color=col, label=label) + + col = "red" + label = "MSE test" + self.ax.scatter(list(self._mse_test.keys()), list(self._mse_test.values()), color=col, label=label) + + self.fig.legend() + _show_and_save(self.fig, file, self.title) + + +class VarianceNN: + """ + Plot level variances, i.e. Var X^l as a function of the mesh step. + Selected moments are plotted. + """ + def __init__(self, moments=None): + """ + :param moments: Size or type of moments subset, see moments_subset function. + """ + matplotlib.rcParams.update({'font.size': 26}) + matplotlib.rcParams.update({'lines.markersize': 7}) + self.fig = plt.figure(figsize=(15, 8)) + + matplotlib.rcParams.update({'font.size': 16}) + matplotlib.rcParams.update({'lines.markersize': 8}) + # fig, axes = plt.subplots(1, 1, figsize=(22, 10)) + self.fig = plt.figure(figsize=(8, 5)) + + self.title = ""#"Level variances" + self.fig.suptitle(self.title) + + self.ax = self.fig.add_subplot(1, 1, 1) + self.ax.set_xlabel("$h$ - mesh step") + #self.ax.set_ylabel("Var $X^h$") + self.ax.set_ylabel("$\hat{V}^r_l$") + self.ax.set_xscale('log') + self.ax.set_yscale('log') + + #self.ax.set_xlim([1e-3, 1e0]) + + self.n_moments = None + self.subset_type = moments + self.min_step = 1e300 + self.max_step = 0 + self.data = {} + + self.nn_min_step = 1e300 + self.nn_max_step = 0 + self.nn_data = {} + + self._colormap = plt.cm.tab20 + self._n_ops = None + + def set_n_ops(self, n_ops): + print("n ops ", n_ops) + self._n_ops = n_ops + + def add_level_variances(self, steps, variances): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + n_levels, n_moments = variances.shape + if self.n_moments is None: + self.n_moments = n_moments + self.moments_subset = moments_subset(n_moments, self.subset_type) + else: + assert self.n_moments == n_moments + + variances = variances[:, self.moments_subset] + self.min_step = min(self.min_step, steps[-1]) + self.max_step = max(self.max_step, steps[0]) + for m, vars in enumerate(variances.T): + X, Y = self.data.get(m, ([], [])) + X.extend(steps.tolist()) + Y.extend(vars.tolist()) + self.data[m] = (X, Y) + + def add_level_variances_nn(self, steps, variances): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + n_levels, n_moments = variances.shape + if self.n_moments is None: + self.n_moments = n_moments + self.moments_subset = moments_subset(n_moments, self.subset_type) + else: + assert self.n_moments == n_moments + + variances = variances[:, self.moments_subset] + self.nn_min_step = min(self.min_step, steps[-1]) + self.nn_max_step = max(self.max_step, steps[0]) + for m, vars in enumerate(variances.T): + X, Y = self.nn_data.get(m, ([], [])) + X.extend(steps.tolist()) + Y.extend(vars.tolist()) + self.nn_data[m] = (X, Y) + + def show(self, file=""): + self._colormap = create_color_bar(range=[1, self.n_moments], label=r'$r$', ax=self.ax, colormap=plt.cm.tab20) + res = 5 + step_range = self.max_step / self.min_step + log_scale = step_range ** 0.001 - 1 + #rv = st.lognorm(scale=1, s=log_scale) + for m, (X, Y) in self.data.items(): + # if m == 5: + # break + col = self._colormap(m) + label = "M{}".format(self.moments_subset[m]) + label = "MLMC" + # print("X ", X) + # print("len(X) ", len(X)) + # #print("rv.rvs(size=len(X)) ", rv.rvs(size=len(X))) + # print("Y ", Y) + XX = np.array(X) #* rv.rvs(size=len(X)) + # print("XX ", X) + self.ax.scatter(XX, Y, color=col) + XX, YY = make_monotone(X, Y) + + # step_range = self.nn_max_step / self.nn_min_step + # log_scale = step_range ** 0.01 - 1 + # rv = st.lognorm(scale=1, s=log_scale) + levels = {} + + print("nn data ", self.nn_data) + for m, (X, Y) in self.nn_data.items(): + # if m == 5: + # break + col = plt.cm.tab20(m) + label = "M{}".format(self.moments_subset[m]) + label = "MLMC meta" + XX = np.array(X) * 0.84 # rv.rvs(size=len(X)) + XY = np.array(X) * 1.16 + + self.ax.scatter(XX, Y, color=col, marker='v') + # XX, YY = make_monotone(X, Y) + + for x, y in zip(X, Y): + if x not in levels: + levels[x] = [] + levels[x].append(y) + + print("XX ", XX) + print("np.max(Y) + np.max(Y)*0.3) ", np.max(Y) + np.max(Y) * 0.3) + if self._n_ops is not None: + for index, n_ops in enumerate(self._n_ops): + self.ax.annotate("{:0.3g}".format(n_ops), (XY[index], np.max(levels[X[index]]))) + + #self.fig.legend() + + legend = self.ax.legend() + ax = legend.axes + + from matplotlib.lines import Line2D + from matplotlib.patches import Rectangle, RegularPolygon, FancyBboxPatch + + handles, labels = ax.get_legend_handles_labels() + + mlmc_marker = Line2D([], [], color='black', marker='o', linestyle='None', + markersize=8, markeredgewidth=1.7, + label='MLMC') # Line2D([], [], color='black', marker='|') + + handles.append(mlmc_marker) + labels.append("MLMC") + + if self.nn_data: + mlmc_marker_meta = Line2D([], [], color='black', marker='v', linestyle='None', + markersize=8, markeredgewidth=1.7, + label='r"$MLMC_{meta}:$"') # Line2D([], [], color='black', marker='|') + + handles.append(mlmc_marker_meta) + labels.append("MLMC-M") + + legend._legend_box = None + legend._init_legend_box(handles, labels) + legend._set_loc(legend._loc) + legend.set_title(legend.get_title().get_text()) + + _show_and_save(self.fig, file, self.title) + + +class VarianceNN2: + """ + Plot level variances, i.e. Var X^l as a function of the mesh step. + Selected moments are plotted. + """ + def __init__(self, moments=None): + """ + :param moments: Size or type of moments subset, see moments_subset function. + """ + matplotlib.rcParams.update({'font.size': 26}) + matplotlib.rcParams.update({'lines.markersize': 7}) + self.fig = plt.figure(figsize=(15, 8)) + + matplotlib.rcParams.update({'font.size': 16}) + matplotlib.rcParams.update({'lines.markersize': 8}) + # fig, axes = plt.subplots(1, 1, figsize=(22, 10)) + self.fig = plt.figure(figsize=(8, 5)) + + self.title = ""#"Level variances" + self.fig.suptitle(self.title) + + self.ax = self.fig.add_subplot(1, 1, 1) + self.ax.set_xlabel("mesh elements") + #self.ax.set_ylabel("Var $X^h$") + self.ax.set_ylabel("$\hat{V}^r_l$") + self.ax.set_xscale('log') + self.ax.set_yscale('log') + + #self.ax.set_xlim([1e-3, 1e0]) + self.ax.set_xlim([5e1, 1e5]) + + self.n_moments = None + self.subset_type = moments + self.min_step = 1e300 + self.max_step = 0 + self.data = {} + + self.nn_min_step = 1e300 + self.nn_max_step = 0 + self.nn_data = {} + + self._colormap = plt.cm.tab20 + self._n_ops = None + + def set_n_ops(self, n_ops): + print("n ops ", n_ops) + self._n_ops = n_ops + + def add_level_variances(self, steps, variances): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + n_levels, n_moments = variances.shape + if self.n_moments is None: + self.n_moments = n_moments + self.moments_subset = moments_subset(n_moments, self.subset_type) + else: + assert self.n_moments == n_moments + + variances = variances[:, self.moments_subset] + self.min_step = min(self.min_step, steps[-1]) + self.max_step = max(self.max_step, steps[0]) + for m, vars in enumerate(variances.T): + X, Y = self.data.get(m, ([], [])) + X.extend(steps.tolist()) + Y.extend(vars.tolist()) + self.data[m] = (X, Y) + + def add_level_variances_nn(self, steps, variances): + """ + Add variances for single MLMC instance. + :param steps, variances : as returned by Estimate.estimate_level_vars + :param n_levels: + """ + n_levels, n_moments = variances.shape + if self.n_moments is None: + self.n_moments = n_moments + self.moments_subset = moments_subset(n_moments, self.subset_type) + else: + assert self.n_moments == n_moments + + variances = variances[:, self.moments_subset] + self.nn_min_step = min(self.min_step, steps[-1]) + self.nn_max_step = max(self.max_step, steps[0]) + for m, vars in enumerate(variances.T): + X, Y = self.nn_data.get(m, ([], [])) + X.extend(steps.tolist()) + Y.extend(vars.tolist()) + self.nn_data[m] = (X, Y) + + def show(self, file=""): + self._colormap = create_color_bar(range=[1, self.n_moments], label=r'$r$', ax=self.ax, colormap=plt.cm.tab20) + res = 5 + step_range = self.max_step / self.min_step + log_scale = step_range ** 0.001 - 1 + #rv = st.lognorm(scale=1, s=log_scale) + for m, (X, Y) in self.data.items(): + + # if m == 5: + # break + print("m+1+m ", m+1+m) + col = self._colormap(m+1) + label = "M{}".format(self.moments_subset[m]) + label = "MLMC" + print("data m: {}, col: {}".format(m, col)) + # print("X ", X) + # print("len(X) ", len(X)) + # #print("rv.rvs(size=len(X)) ", rv.rvs(size=len(X))) + # print("Y ", Y) + XX = np.array(X) #* rv.rvs(size=len(X)) + # print("XX ", X) + self.ax.scatter(XX, Y, color=col) + XX, YY = make_monotone(X, Y) + + # step_range = self.nn_max_step / self.nn_min_step + # log_scale = step_range ** 0.01 - 1 + # rv = st.lognorm(scale=1, s=log_scale) + levels = {} + + print("nn data ", self.nn_data) + for m, (X, Y) in self.nn_data.items(): + #m += 1 + # if m == 5: + # break + #col = plt.cm.tab20(m) + col = self._colormap(m+1) + print("nn_data m: {}, col: {}".format(m, col)) + XX = np.array(X) * 0.84 # rv.rvs(size=len(X)) + print("X ", X) + X = np.array(X) + #XY = np.concatenate(([X[0] * 0.35], X[1:] * 1.16), axis=0) + + XY = np.concatenate(([X[0] * 0.6], [X[1] * 1.1], X[2:] * 0.95), axis=0) + + self.ax.scatter(XX, Y, color=col, marker='v') + # XX, YY = make_monotone(X, Y) + + for x, y in zip(X, Y): + if x not in levels: + levels[x] = [] + levels[x].append(y) + + print("XX ", XX) + print("np.max(Y) + np.max(Y)*0.3) ", np.max(Y) + np.max(Y) * 0.3) + print("self._n_ops ", self._n_ops) + self._n_ops = [0.338, 29.7, 223, 2320] + if self._n_ops is not None: + for index, n_ops in enumerate(self._n_ops): + if index == 0: + self.ax.annotate("{}".format(n_ops), (XY[index], np.max(levels[X[index]])*1.7)) + elif index == 1: + self.ax.annotate("{}".format(n_ops), (XY[index], np.max(levels[X[index]])*1.9)) + else: + #self.ax.annotate("{:0.3g}".format(n_ops), (XY[index], np.max(levels[X[index]]))) + self.ax.annotate("{}".format(n_ops), (XY[index], np.max(levels[X[index]])*1.7)) + + #self.fig.legend() + + legend = self.ax.legend() + ax = legend.axes + + from matplotlib.lines import Line2D + from matplotlib.patches import Rectangle, RegularPolygon, FancyBboxPatch + + handles, labels = ax.get_legend_handles_labels() + + mlmc_marker = Line2D([], [], color='black', marker='o', linestyle='None', + markersize=8, markeredgewidth=1.7, + label='MLMC') # Line2D([], [], color='black', marker='|') + + handles.append(mlmc_marker) + labels.append("3LMC") + + if self.nn_data: + mlmc_marker_meta = Line2D([], [], color='black', marker='v', linestyle='None', + markersize=8, markeredgewidth=1.7, + label='r"$MLMC_{meta}:$"') # Line2D([], [], color='black', marker='|') + + handles.append(mlmc_marker_meta) + labels.append("3LMC-M") + + legend._legend_box = None + legend._init_legend_box(handles, labels) + legend._set_loc(legend._loc) + legend.set_title(legend.get_title().get_text()) + + _show_and_save(self.fig, file, self.title) + + +class MomentsPlots(Distribution): + def __init__(self, title="", quantity_name="i-th moment", legend_title="", log_mean_y=False, log_var_y=False): + """ + """ + self._domain = None + self._title = title + self._legend_title = legend_title + self.plot_matrix = [] + self.i_plot = 0 + + self.cmap = plt.cm.tab20 + self.ax_var = None + self.ax_log_density = None + self.x_lim = None + + # mean_colors = ["brown", "salmon", "orange", "goldenrod", "red"] + # var_colors = ["blue", "slateblue", "indigo", "darkseagreen", "green"] + # + # self.mean_color = iter(mean_colors) + # self.cdf_color = iter(var_colors) + + self.fig, axes = plt.subplots(1, 2, figsize=(22, 10)) + self.ax_mean = axes[0] + self.ax_var = axes[1] + + #self.fig.suptitle(title, y=0.99) + x_axis_label = quantity_name + + self.ax_mean.set_ylabel("Mean") + self.ax_mean.set_xlabel(x_axis_label) + self.ax_mean.tick_params(axis='y') + + self.ax_var.set_ylabel("Var") + #self.ax_var.tick_params(axis='y') + self.ax_var.set_xlabel(x_axis_label) + + if log_mean_y: + self.ax_mean.set_yscale('log') + + if log_var_y: + self.ax_var.set_yscale('log') + + def add_moments(self, moments, label=None): + means, vars = moments + X = range(0, len(means)) + print("vars ", vars) + self.ax_var.scatter(X, vars, color=self.cmap(self.i_plot), label=label) + self.ax_mean.scatter(X, means, color=self.cmap(self.i_plot), label=label) + #self._plot_borders(self.ax_cdf, self.cdf_color, domain) + self.i_plot += 1 + + def show(self, file=""): + self.ax_mean.legend() + self.ax_var.legend() + + _show_and_save(self.fig, file, self._title) + + class BSplots: def __init__(self, n_samples, bs_n_samples, n_moments, ref_level_var): self._bs_n_samples = bs_n_samples @@ -986,7 +1655,6 @@ def plot_means_and_vars(self, moments_mean, moments_var, n_levels, exact_moments plt.legend() plt.show() - def plot_var_regression(self, i_moments = None): """ Plot total and level variances and their regression and errors of regression. @@ -1311,3 +1979,169 @@ def plot_pbs_flow_job_time(): #ax.set_yscale('log') ax.plot(1/(level_params**2), n_ops) _show_and_save(fig, "flow_time", "flow_time") + + +class ArticleDistributionPDF(Distribution): + """ + mlmc.plot.Distribution + + Class for plotting distribution approximation: PDF and CDF (optional) + Provides methods to: add more plots, add exact PDF, add ECDF/histogram from single level MC + """ + def __init__(self, exact_distr=None, title="", quantity_name="Y", legend_title="", + log_density=False, cdf_plot=False, log_x=False, error_plot='l2', reg_plot=False, multipliers_plot=True, + set_x_lim=True): + """ + Plot configuration + :param exact_distr: Optional exact domain (for adding to plot and computing error) + :param title: Figure title. + :param quantity_name: Quantity for X axis label. + :param log_density: Plot logarithm of density value. + :param cdf_plot: Plot CDF as well (default) + :param log_x: Use logarithmic scale for X axis. + :param error_plot: None, 'diff', 'kl. Plot error of pdf using either difference or + integrand of KL divergence: exact_pdf * log(exact_pdf / approx_pdf). + Simple difference is used for CDF for both options. + """ + plt.ticklabel_format(style='sci') + matplotlib.rcParams.update({'font.size': 16}) + from matplotlib import ticker + formatter = ticker.ScalarFormatter(useMathText=True) + formatter.set_scientific(True) + #matplotlib.rcParams.update({'lines.markersize': 8}) + self._exact_distr = exact_distr + self._log_density = log_density + self._log_x = log_x + self._error_plot = error_plot + self._domain = None + self._title = title + self._legend_title = legend_title + self.plot_matrix = [] + self.i_plot = 0 + + self.ax_cdf = None + self.ax_log_density = None + self.x_lim = None + + self.pdf_color = "brown" + self.cdf_color = "blue" + + self.reg_plot = reg_plot + + #self.fig, self.ax_cdf = plt.subplots(1, 1, figsize=(22, 10)) + self.fig, self.ax_pdf = plt.subplots(1, 1, figsize=(8, 5)) + self.fig_cdf = None + #self.ax_pdf = self.ax_cdf.twinx() + + #self.fig.suptitle(title, y=0.99) + x_axis_label = quantity_name + + # PDF axes + self.ax_pdf.set_ylabel("PDF")#, color=self.pdf_color) + #self.ax_pdf.set_ylabel("probability density") + self.ax_pdf.set_xlabel(x_axis_label) + #self.ax_pdf.ticklabel_format(style='sci') + #self.ax_pdf.tick_params(axis='y', labelcolor=self.pdf_color) + if self._log_x: + self.ax_pdf.set_xscale('log') + x_axis_label = "log " + x_axis_label + # if self._log_density: + # self.ax_pdf.set_yscale('log') + + # if cdf_plot: + # # CDF axes + # #self.ax_cdf.set_title("CDF approximations") + # self.ax_cdf.set_ylabel("CDF", color=self.cdf_color) + # self.ax_cdf.tick_params(axis='y', labelcolor=self.cdf_color) + # self.ax_cdf.set_xlabel(x_axis_label) + # if self._log_x: + # self.ax_cdf.set_xscale('log') + + if set_x_lim: + self.x_lim = [0, 2.6] + #self.x_lim = [0, 5] + self.x_lim = [0, 2.5] + + self.ax_pdf.set_xlim(*self.x_lim) + #self.ax_cdf.set_xlim(*self.x_lim) + + # """adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1""" + # _, y1 = self.ax_pdf.transData.transform((0, 0)) + # _, y2 = self.ax_cdf.transData.transform((0, 0)) + # inv = self.ax_cdf.transData.inverted() + # _, dy = inv.transform((0, 0)) - inv.transform((0, y1 - y2)) + # miny, maxy = self.ax_cdf.get_ylim() + # self.ax_cdf.set_ylim(miny + dy, maxy + dy) + + def add_raw_samples(self, samples): + """ + Add histogram and ecdf for raw samples. + :param samples: + """ + # Histogram + domain = (np.min(samples), np.max(samples)) + self.adjust_domain(domain) + if self.x_lim is not None: + self._domain = self.x_lim + N = len(samples) + print("N samples ", N) + bins = self._grid(int(0.5 * np.sqrt(N))) + self.ax_pdf.hist(samples, density=True, color='red', bins=bins, alpha=0.3) + + # Ecdf + # X = np.sort(samples) + # Y = (np.arange(len(X)) + 0.5) / float(len(X)) + # X, Y = make_monotone(X, Y) + # if self.ax_cdf is not None: + # self.ax_cdf.plot(X, Y, ':', color='midnightblue', label="ecdf") + + # PDF approx as derivative of Bspline CDF approx + # size_8 = int(N / 8) + # w = np.ones_like(X) + # w[:size_8] = 1 / (Y[:size_8]) + # w[N - size_8:] = 1 / (1 - Y[N - size_8:]) + # spl = interpolate.UnivariateSpline(X, Y, w, k=3, s=1) + # sX = np.linspace(domain[0], domain[1], 1000) + # if self._reg_param == 0: + # self.ax_pdf.plot(sX, spl.derivative()(sX), color='red', alpha=0.4, label="derivative of Bspline CDF") + + def add_distribution(self, distr_object, label=None, size=0, mom_indices=None, reg_param=0, color=None, line_style=None): + """ + Add plot for distribution 'distr_object' with given label. + :param distr_object: Instance of Distribution, we use methods: density, cdf and attribute domain + :param label: string label for legend + :return: + """ + self._reg_param = reg_param + + # if label is None: + # label = "size {}".format(distr_object.moments_fn.size) + domain = distr_object.domain + self.adjust_domain(domain) + d_size = domain[1] - domain[0] + slack = 0 # 0.05 + extended_domain = (domain[0] - slack * d_size, domain[1] + slack * d_size) + X = self._grid(10000, domain=domain) + + line_styles = ['-', ':', '-.', '--'] + plots = [] + + Y_pdf = distr_object.density(X) + + if line_style is None: + line_style = "-" + + if color is None: + color = self.pdf_color + + self.ax_pdf.plot(X, Y_pdf, color=color, label=label, linestyle=line_style) + Y_cdf = distr_object.cdf(X) + + # if self.ax_cdf is not None: + # if line_style is None: + # line_style = "-" + # if color is None: + # color = self.cdf_color + # self.ax_cdf.plot(X, Y_cdf, color=color, linestyle=line_style) + # #self._plot_borders(self.ax_cdf, self.cdf_color, domain) + self.i_plot += 1 diff --git a/mlmc/quantity/quantity_estimate.py b/mlmc/quantity/quantity_estimate.py index 2436d7b9..a1f63ddd 100644 --- a/mlmc/quantity/quantity_estimate.py +++ b/mlmc/quantity/quantity_estimate.py @@ -19,13 +19,17 @@ def cache_clear(): mlmc.quantity.quantity.QuantityConst.samples.cache_clear() -def estimate_mean(quantity): +def estimate_mean(quantity, form="diff", operation_func=None, **kwargs): """ MLMC mean estimator. The MLMC method is used to compute the mean estimate to the Quantity dependent on the collected samples. The squared error of the estimate (the estimator variance) is estimated using the central limit theorem. Data is processed by chunks, so that it also supports big data processing :param quantity: Quantity + :param form: if "diff" estimates based on difference between fine and coarse data = MLMC approach + "fine" estimates based on level's fine data + "coarse" estimates based on level's coarse data + :param operation_func: function to process level data, e.g. kurtosis estimation :return: QuantityMean which holds both mean and variance """ cache_clear() @@ -56,10 +60,26 @@ def estimate_mean(quantity): sums = [np.zeros(chunk.shape[0]) for _ in range(n_levels)] sums_of_squares = [np.zeros(chunk.shape[0]) for _ in range(n_levels)] - if chunk_spec.level_id == 0: - chunk_diff = chunk[:, :, 0] + # Estimates of level's fine data + if form == "fine": + if chunk_spec.level_id == 0: + chunk_diff = chunk[:, :, 0] + else: + chunk_diff = chunk[:, :, 0] + # Estimate of level's coarse data + elif form == "coarse": + if chunk_spec.level_id == 0: + chunk_diff = np.zeros(chunk[:, :, 0].shape) + else: + chunk_diff = chunk[:, :, 1] else: - chunk_diff = chunk[:, :, 0] - chunk[:, :, 1] + if chunk_spec.level_id == 0: + chunk_diff = chunk[:, :, 0] + else: + chunk_diff = chunk[:, :, 0] - chunk[:, :, 1] + + if operation_func is not None: + chunk_diff = operation_func(chunk_diff, chunk_spec, **kwargs) sums[chunk_spec.level_id] += np.sum(chunk_diff, axis=1) sums_of_squares[chunk_spec.level_id] += np.sum(chunk_diff**2, axis=1) @@ -154,3 +174,28 @@ def eval_cov(x): else: moments_qtype = qt.ArrayType(shape=(moments_fn.size, moments_fn.size, ), qtype=quantity.qtype) return mlmc.quantity.quantity.Quantity(quantity_type=moments_qtype, input_quantities=[quantity], operation=eval_cov) + + +def kurtosis_numerator(chunk_diff, chunk_spec, l_means): + """ + Estimate sample kurtosis nominator: + E[(Y_l - E[Y_l])^4] + :param chunk_diff: np.ndarray, [quantity shape, number of samples] + :param chunk_spec: quantity_spec.ChunkSpec + :return: np.ndarray, unchanged shape + """ + chunk_diff = (chunk_diff - l_means[chunk_spec.level_id]) ** 4 + return chunk_diff + + +def level_kurtosis(quantity, means_obj): + """ + Estimate sample kurtosis at each level as: + E[(Y_l - E[Y_l])^4] / (Var[Y_l])^2, where Y_l = fine_l - coarse_l + :param quantity: Quantity + :param means_obj: Quantity.QuantityMean + :return: np.ndarray, kurtosis per level + """ + numerator_means_obj = estimate_mean(quantity, operation_func=kurtosis_numerator, l_means=means_obj.l_means) + kurtosis = numerator_means_obj.l_means / (means_obj.l_vars)**2 + return kurtosis diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py index ba83e15b..5309f84d 100644 --- a/mlmc/random/correlated_field.py +++ b/mlmc/random/correlated_field.py @@ -62,7 +62,7 @@ def __init__(self, name, field=None, param_fields=[], regions=[]): if type(field) in [float, int]: self.const = field assert len(param_fields) == 0 - elif isinstance(field, RandomFieldBase): + elif isinstance(field, RandomFieldBase) or isinstance(field, gstools.covmodel.models.CovModel): self.correlated_field = field assert len(param_fields) == 0 else: @@ -372,7 +372,6 @@ def _initialize(self, **kwargs): """ Called after initialization in common constructor. """ - ### Attributes computed in precalculation. self.cov_mat = None # Covariance matrix (dense). @@ -500,14 +499,14 @@ def _sample(self): class GSToolsSpatialCorrelatedField(RandomFieldBase): - def __init__(self, model, mode_no=1000, log=False, sigma=1): + def __init__(self, model, mode_no=1000, log=False, sigma=1, mean=0): """ :param model: instance of covariance model class, which parent is gstools.covmodel.CovModel :param mode_no: number of Fourier modes, default: 1000 as in gstools package """ self.model = model self.mode_no = mode_no - self.srf = gstools.SRF(model, mode_no=mode_no) + self.srf = gstools.SRF(model, mean=mean, mode_no=mode_no) self.mu = self.srf.mean self.sigma = sigma self.dim = model.dim diff --git a/mlmc/sample_storage.py b/mlmc/sample_storage.py index 01be9082..885c0122 100644 --- a/mlmc/sample_storage.py +++ b/mlmc/sample_storage.py @@ -279,6 +279,13 @@ def sample_pairs_level(self, chunk_spec): 1 if np.prod(chunk.shape) == chunk.shape[0] * chunk.shape[1] else int(np.prod(chunk.shape) / chunk.shape[0] * chunk.shape[1])) + # Handle scalar simulation result + # @TODO: think it over again + if len(results.shape) != 3: + results = results.reshape(results.shape[0], results.shape[1], + 1 if np.prod(results.shape) == results.shape[0] * results.shape[1] else + int(np.prod(results.shape) / results.shape[0] * results.shape[1])) + # Remove auxiliary zeros from level zero sample pairs if chunk_spec.level_id == 0: chunk = chunk[:, :1, :] diff --git a/mlmc/sample_storage_hdf.py b/mlmc/sample_storage_hdf.py index 7e5fbef5..c137cb09 100644 --- a/mlmc/sample_storage_hdf.py +++ b/mlmc/sample_storage_hdf.py @@ -206,6 +206,21 @@ def unfinished_ids(self): return unfinished + def collected_ids(self, level_id=None): + """ + List of colected ids + :param level_id: int + :return: list + """ + if level_id is not None: + return self._level_groups[level_id].get_collected_ids() + + unfinished = [] + for level in self._level_groups: + unfinished.extend(level.get_collected_ids()) + + return unfinished + def failed_samples(self): """ Dictionary of failed samples @@ -241,6 +256,14 @@ def get_n_ops(self): Get number of estimated operations on each level :return: List """ + # n_ops = list(np.zeros(len(self._level_groups))) + # for level in self._level_groups: + # if level.running_times[1] > 0: + # n_ops[int(level.level_id)] = level.running_times[0] / level.running_times[1] + # else: + # n_ops[int(level.level_id)] = 0 + # return n_ops + n_ops = list(np.zeros(len(self._level_groups))) for level in self._level_groups: if level.n_ops_estimate[1] > 0: @@ -249,6 +272,51 @@ def get_n_ops(self): n_ops[int(level.level_id)] = 0 return n_ops + def get_original_n_ops(self): + n_ops = list(np.zeros(len(self._level_groups))) + for level in self._level_groups: + if level.n_ops_estimate[1] > 0: + n_ops[int(level.level_id)] = level.n_ops_estimate[0] / level.n_ops_estimate[1] + else: + n_ops[int(level.level_id)] = 0 + return n_ops + + def get_running_times(self): + n_ops = list(np.zeros(len(self._level_groups))) + for level in self._level_groups: + n_ops[int(level.level_id)] = level.running_times + return n_ops + + def get_extract_mesh_times(self): + n_ops = list(np.zeros(len(self._level_groups))) + for level in self._level_groups: + n_ops[int(level.level_id)] = level.extract_mesh_times + return n_ops + + def get_make_field_times(self): + n_ops = list(np.zeros(len(self._level_groups))) + for level in self._level_groups: + n_ops[int(level.level_id)] = level.make_field_times + return n_ops + + def get_generate_rnd_times(self): + n_ops = list(np.zeros(len(self._level_groups))) + for level in self._level_groups: + n_ops[int(level.level_id)] = level.generate_rnd_times + return n_ops + + def get_fine_flow_times(self): + n_ops = list(np.zeros(len(self._level_groups))) + for level in self._level_groups: + n_ops[int(level.level_id)] = level.fine_flow_times + return n_ops + + def get_coarse_flow_times(self): + n_ops = list(np.zeros(len(self._level_groups))) + for level in self._level_groups: + n_ops[int(level.level_id)] = level.coarse_flow_times + return n_ops + def get_level_ids(self): return [int(level.level_id) for level in self._level_groups] @@ -271,3 +339,4 @@ def get_n_levels(self): :return: int """ return len(self._level_groups) + diff --git a/mlmc/tool/flow_mc_2.py b/mlmc/tool/flow_mc_2.py new file mode 100644 index 00000000..ea9e27c1 --- /dev/null +++ b/mlmc/tool/flow_mc_2.py @@ -0,0 +1,519 @@ +import os +import os.path +import subprocess +import numpy as np +import shutil +import ruamel.yaml as yaml +from typing import List +import gstools +from mlmc.level_simulation import LevelSimulation +from mlmc.tool import gmsh_io +from mlmc.sim.simulation import Simulation +from mlmc.quantity.quantity_spec import QuantitySpec +from mlmc.random import correlated_field as cf + + +def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, por_sigma=1, mode_no=1000): + """ + Create random fields + :return: + # """ + # por_top = cf.SpatialCorrelatedField( + # corr_exp='gauss', + # dim=2, + # corr_length=0.2, + # mu=-1.0, + # sigma=1.0, + # log=True + # ) + # + # print("por top ", por_top) + + por_top = cf.GSToolsSpatialCorrelatedField(gstools.Gaussian(dim=2, len_scale=0.2), + log=log, mean=-1.0, sigma=por_sigma, mode_no=mode_no) + + #print("por top gstools ", por_top_gstools) + + # por_bot = cf.SpatialCorrelatedField( + # corr_exp='gauss', + # dim=2, + # corr_length=0.2, + # mu=-1.0, + # sigma=1.0, + # log=True + # ) + + por_bot = cf.GSToolsSpatialCorrelatedField(gstools.Gaussian(dim=2, len_scale=0.2), + log=log, mean=-1.0, sigma=por_sigma, mode_no=mode_no) + + + #por_bot = gstools.Gaussian(dim=dim, len_scale=0.2, mu=-1.0, sigma=1.0, log=True) + + water_viscosity = 8.90e-4 + + factor_top_model = gstools.Gaussian(dim=dim) + factor_bot_model = gstools.Gaussian(dim=dim) + + fields = cf.Fields([ + cf.Field('por_top', por_top, regions='ground_0'), + cf.Field('porosity_top', cf.positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'), + cf.Field('por_bot', por_bot, regions='ground_1'), + cf.Field('porosity_bot', cf.positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'), + cf.Field('porosity_repo', 0.5, regions='repo'), + #cf.Field('factor_top', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_0'), + + cf.Field('factor_top', cf.GSToolsSpatialCorrelatedField(factor_top_model, log=log, mean=1e-8, sigma=1.0, mode_no=mode_no), + regions='ground_0'), + + #cf.Field('factor_top', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1.0, log=True), regions='ground_0'), + # conductivity about + #cf.Field('factor_bot', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_1'), + #cf.Field('factor_bot', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1, log=True), regions='ground_1'), + cf.Field('factor_bot', + cf.GSToolsSpatialCorrelatedField(factor_bot_model, log=log, mean=1e-8, sigma=1.0, mode_no=mode_no), + regions='ground_1'), + + # cf.Field('factor_repo', cf.SpatialCorrelatedField('gauss', mu=1e-10, sigma=1, log=True), regions='repo'), + cf.Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 'factor_top', water_viscosity], + regions='ground_0'), + cf.Field('conductivity_bot', cf.kozeny_carman, ['porosity_bot', 1, 'factor_bot', water_viscosity], + regions='ground_1'), + # cf.Field('conductivity_repo', cf.kozeny_carman, ['porosity_repo', 1, 'factor_repo', water_viscosity], regions='repo') + cf.Field('conductivity_repo', 0.001, regions='repo') + ]) + + return fields + + +def substitute_placeholders(file_in, file_out, params): + """ + Substitute for placeholders of format '' from the dict 'params'. + :param file_in: Template file. + :param file_out: Values substituted. + :param params: { 'name': value, ...} + """ + used_params = [] + with open(file_in, 'r') as src: + text = src.read() + for name, value in params.items(): + placeholder = '<%s>' % name + n_repl = text.count(placeholder) + if n_repl > 0: + used_params.append(name) + text = text.replace(placeholder, str(value)) + with open(file_out, 'w') as dst: + dst.write(text) + return used_params + + +def force_mkdir(path, force=False): + """ + Make directory 'path' with all parents, + remove the leaf dir recursively if it already exists. + :param path: path to directory + :param force: if dir already exists then remove it and create new one + :return: None + """ + if force: + if os.path.isdir(path): + shutil.rmtree(path) + os.makedirs(path, mode=0o775, exist_ok=True) + + +class FlowSimProcConc(Simulation): + # placeholders in YAML + total_sim_id = 0 + MESH_FILE_VAR = 'mesh_file' + # Timestep placeholder given as O(h), h = mesh step + TIMESTEP_H1_VAR = 'timestep_h1' + # Timestep placeholder given as O(h^2), h = mesh step + TIMESTEP_H2_VAR = 'timestep_h2' + + # files + GEO_FILE = 'repo.geo' + MESH_FILE = 'repo.msh' + YAML_TEMPLATE = '02_conc_tmpl.yaml' + YAML_FILE = '02_conc.yaml' + FIELDS_FILE = 'fields_sample.msh' + + """ + Gather data for single flow call (coarse/fine) + + Usage: + mlmc.sampler.Sampler uses instance of FlowSimProcConc, it calls once level_instance() for each level step (The level_instance() method + is called as many times as the number of levels), it takes place in main process + + mlmc.tool.pbs_job.PbsJob uses static methods in FlowSimProcConc, it calls calculate(). That's where the calculation actually runs, + it takes place in PBS process + It also extracts results and passes them back to PbsJob, which handles the rest + + """ + + def __init__(self, config=None, clean=None): + """ + Simple simulation using flow123d + :param config: configuration of the simulation, processed keys: + env - Environment object. + fields - FieldSet object + yaml_file: Template for main input file. Placeholders: + - replaced by generated mesh + - for FIELD be name of any of `fields`, replaced by the FieldElementwise field with generated + field input file and the field name for the component. + geo_file: Path to the geometry file. + :param clean: bool, if True remove existing simulation files - mesh files, ... + """ + self.need_workspace = True + # This simulation requires workspace + self.env = config['env'] + # Environment variables, flow123d, gmsh, ... + self._fields_params = config['fields_params'] + self._fields = create_corr_field(**config['fields_params']) + self._fields_used_params = None + # Random fields instance + self.time_factor = config.get('time_factor', 1.0) + # It is used for minimal element from mesh determination (see level_instance method) + + self.base_yaml_file = config['yaml_file'] + self.base_geo_file = config['geo_file'] + self.field_template = config.get('field_template', + "!FieldElementwise {mesh_data_file: $INPUT_DIR$/%s, field_name: %s}") + self.work_dir = config['work_dir'] + self.clean = clean + + super(Simulation, self).__init__() + + def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation: + """ + Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.) + :param fine_level_params: in this version, it is just fine simulation step + :param coarse_level_params: in this version, it is just coarse simulation step + :return: mlmc.LevelSimulation object, this object is serialized in SamplingPoolPbs and deserialized in PbsJob, + so it allows pass simulation data from main process to PBS process + """ + fine_step = fine_level_params[0] + coarse_step = coarse_level_params[0] + + # TODO: determine minimal element from mesh + self.time_step_h1 = self.time_factor * fine_step + self.time_step_h2 = self.time_factor * fine_step * fine_step + + # Set fine simulation common files directory + # Files in the directory are used by each simulation at that level + common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(fine_step)) + force_mkdir(common_files_dir, force=self.clean) + + self.mesh_file = os.path.join(common_files_dir, self.MESH_FILE) + + if self.clean: + # Prepare mesh + geo_file = os.path.join(common_files_dir, self.GEO_FILE) + shutil.copyfile(self.base_geo_file, geo_file) + self._make_mesh(geo_file, self.mesh_file, fine_step) # Common computational mesh for all samples. + + # Prepare main input YAML + yaml_template = os.path.join(common_files_dir, self.YAML_TEMPLATE) + shutil.copyfile(self.base_yaml_file, yaml_template) + yaml_file = os.path.join(common_files_dir, self.YAML_FILE) + self._substitute_yaml(yaml_template, yaml_file) + + # Mesh is extracted because we need number of mesh points to determine task_size parameter (see return value) + fine_mesh_data = self.extract_mesh(self.mesh_file) + + # Set coarse simulation common files directory + # Files in the directory are used by each simulation at that level + coarse_sim_common_files_dir = None + if coarse_step != 0: + coarse_sim_common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(coarse_step)) + + # Simulation config + # Configuration is used in mlmc.tool.pbs_job.PbsJob instance which is run from PBS process + # It is part of LevelSimulation which is serialized and then deserialized in mlmc.tool.pbs_job.PbsJob + config = dict() + config["fine"] = {} + config["coarse"] = {} + config["fine"]["step"] = fine_step + config["coarse"]["step"] = coarse_step + config["fine"]["common_files_dir"] = common_files_dir + config["coarse"]["common_files_dir"] = coarse_sim_common_files_dir + + config[ + "fields_used_params"] = self._fields_used_params # Params for Fields instance, which is createed in PbsJob + config["gmsh"] = self.env['gmsh'] + config["flow123d"] = self.env['flow123d'] + config['fields_params'] = self._fields_params + + # Auxiliary parameter which I use to determine task_size (should be from 0 to 1, if task_size is above 1 then pbs job is scheduled) + job_weight = 17000000 # 4000000 - 20 min, 2000000 - cca 10 min + + return LevelSimulation(config_dict=config, + task_size=len(fine_mesh_data['points']) / job_weight, + calculate=FlowSimProcConc.calculate, + # method which carries out the calculation, will be called from PBS processs + need_sample_workspace=True # If True, a sample directory is created + ) + + @staticmethod + def calculate(config, seed): + """ + Method that actually run the calculation, it's called from mlmc.tool.pbs_job.PbsJob.calculate_samples() + Calculate fine and coarse sample and also extract their results + :param config: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance) + :param seed: random seed, int + :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation.calculate()) + """ + # Init correlation field objects + fields = create_corr_field(**config['fields_params']) # correlated_field.Fields instance + fields.set_outer_fields(config["fields_used_params"]) + + coarse_step = config["coarse"]["step"] # Coarse simulation step, zero if one level MC + flow123d = config["flow123d"] # Flow123d command + + # Extract fine mesh + fine_common_files_dir = config["fine"]["common_files_dir"] # Directory with fine simulation common files + fine_mesh_data = FlowSimProcConc.extract_mesh(os.path.join(fine_common_files_dir, FlowSimProcConc.MESH_FILE)) + + # Extract coarse mesh + coarse_mesh_data = None + coarse_common_files_dir = None + if coarse_step != 0: + coarse_common_files_dir = config["coarse"][ + "common_files_dir"] # Directory with coarse simulation common files + coarse_mesh_data = FlowSimProcConc.extract_mesh(os.path.join(coarse_common_files_dir, FlowSimProcConc.MESH_FILE)) + + # Create fields both fine and coarse + fields = FlowSimProcConc.make_fields(fields, fine_mesh_data, coarse_mesh_data) + + # Set random seed, seed is calculated from sample id, so it is not user defined + np.random.seed(seed) + # Generate random samples + fine_input_sample, coarse_input_sample = FlowSimProcConc.generate_random_sample(fields, coarse_step=coarse_step, + n_fine_elements=len( + fine_mesh_data['points'])) + + # Run fine sample + fields_file = os.path.join(os.getcwd(), FlowSimProcConc.FIELDS_FILE) + fine_res = FlowSimProcConc._run_sample(fields_file, fine_mesh_data['ele_ids'], fine_input_sample, flow123d, + fine_common_files_dir) + + # Rename fields_sample.msh to fine_fields_sample.msh, we might remove it + for filename in os.listdir(os.getcwd()): + if not filename.startswith("fine"): + shutil.move(os.path.join(os.getcwd(), filename), os.path.join(os.getcwd(), "fine_" + filename)) + + # Run coarse sample + coarse_res = np.zeros(len(fine_res)) + if coarse_input_sample: + coarse_res = FlowSimProcConc._run_sample(fields_file, coarse_mesh_data['ele_ids'], coarse_input_sample, flow123d, + coarse_common_files_dir) + + return fine_res, coarse_res + + @staticmethod + def make_fields(fields, fine_mesh_data, coarse_mesh_data): + """ + Create random fields that are used by both coarse and fine simulation + :param fields: correlated_field.Fields instance + :param fine_mesh_data: Dict contains data extracted from fine mesh file (points, point_region_ids, region_map) + :param coarse_mesh_data: Dict contains data extracted from coarse mesh file (points, point_region_ids, region_map) + :return: correlated_field.Fields + """ + # One level MC has no coarse_mesh_data + if coarse_mesh_data is None: + fields.set_points(fine_mesh_data['points'], fine_mesh_data['point_region_ids'], + fine_mesh_data['region_map']) + else: + coarse_centers = coarse_mesh_data['points'] + both_centers = np.concatenate((fine_mesh_data['points'], coarse_centers), axis=0) + both_regions_ids = np.concatenate( + (fine_mesh_data['point_region_ids'], coarse_mesh_data['point_region_ids'])) + assert fine_mesh_data['region_map'] == coarse_mesh_data['region_map'] + fields.set_points(both_centers, both_regions_ids, fine_mesh_data['region_map']) + + return fields + + @staticmethod + def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_dir): + """ + Create random fields file, call Flow123d and extract results + :param fields_file: Path to file with random fields + :param ele_ids: Element IDs in computational mesh + :param fine_input_sample: fields: {'field_name' : values_array, ..} + :param flow123d: Flow123d command + :param common_files_dir: Directory with simulations common files (flow_input.yaml, ) + :return: simulation result, ndarray + """ + gmsh_io.GmshIO().write_fields(fields_file, ele_ids, fine_input_sample) + + # x = [*flow123d, "--yaml_balance", '-i', os.getcwd(), '-s', "{}/flow_input.yaml".format(common_files_dir), + # "-o", os.getcwd(), ">{}/flow.out".format(os.getcwd())] + + #try: + subprocess.call( + [flow123d, "--yaml_balance", '-i', os.getcwd(), '-s', "{}/02_conc.yaml".format(common_files_dir), + "-o", os.getcwd(), ">{}/flow.out".format(os.getcwd())]) + # except: + # import sys + # print(sys.exc_info()) + + return FlowSimProcConc._extract_result(os.getcwd()) + + @staticmethod + def generate_random_sample(fields, coarse_step, n_fine_elements): + """ + Generate random field, both fine and coarse part. + Store them separeted. + :return: Dict, Dict + """ + fields_sample = fields.sample() + fine_input_sample = {name: values[:n_fine_elements, None] for name, values in fields_sample.items()} + coarse_input_sample = {} + if coarse_step != 0: + coarse_input_sample = {name: values[n_fine_elements:, None] for name, values in + fields_sample.items()} + + return fine_input_sample, coarse_input_sample + + def _make_mesh(self, geo_file, mesh_file, fine_step): + """ + Make the mesh, mesh_file: _step.msh. + Make substituted yaml: _step.yaml, + using common fields_step.msh file for generated fields. + :return: + """ + if self.env['gmsh_version'] == 2: + subprocess.call( + [self.env['gmsh'], "-2", '-format', 'msh2', '-clscale', str(fine_step), '-o', mesh_file, geo_file]) + else: + subprocess.call([self.env['gmsh'], "-2", '-clscale', str(fine_step), '-o', mesh_file, geo_file]) + + @staticmethod + def extract_mesh(mesh_file): + """ + Extract mesh from file + :param mesh_file: Mesh file path + :return: Dict + """ + mesh = gmsh_io.GmshIO(mesh_file) + is_bc_region = {} + region_map = {} + for name, (id, _) in mesh.physical.items(): + unquoted_name = name.strip("\"'") + is_bc_region[id] = (unquoted_name[0] == '.') + region_map[unquoted_name] = id + + bulk_elements = [] + for id, el in mesh.elements.items(): + _, tags, i_nodes = el + region_id = tags[0] + if not is_bc_region[region_id]: + bulk_elements.append(id) + + n_bulk = len(bulk_elements) + centers = np.empty((n_bulk, 3)) + ele_ids = np.zeros(n_bulk, dtype=int) + point_region_ids = np.zeros(n_bulk, dtype=int) + + for i, id_bulk in enumerate(bulk_elements): + _, tags, i_nodes = mesh.elements[id_bulk] + region_id = tags[0] + centers[i] = np.average(np.array([mesh.nodes[i_node] for i_node in i_nodes]), axis=0) + point_region_ids[i] = region_id + ele_ids[i] = id_bulk + + min_pt = np.min(centers, axis=0) + max_pt = np.max(centers, axis=0) + diff = max_pt - min_pt + min_axis = np.argmin(diff) + non_zero_axes = [0, 1, 2] + # TODO: be able to use this mesh_dimension in fields + if diff[min_axis] < 1e-10: + non_zero_axes.pop(min_axis) + points = centers[:, non_zero_axes] + + return {'points': points, 'point_region_ids': point_region_ids, 'ele_ids': ele_ids, 'region_map': region_map} + + def _substitute_yaml(self, yaml_tmpl, yaml_out): + """ + Create substituted YAML file from the tamplate. + :return: + """ + param_dict = {} + field_tmpl = self.field_template + for field_name in self._fields.names: + param_dict[field_name] = field_tmpl % (self.FIELDS_FILE, field_name) + param_dict[self.MESH_FILE_VAR] = self.mesh_file + param_dict[self.TIMESTEP_H1_VAR] = self.time_step_h1 + param_dict[self.TIMESTEP_H2_VAR] = self.time_step_h2 + used_params = substitute_placeholders(yaml_tmpl, yaml_out, param_dict) + + self._fields_used_params = used_params + + @staticmethod + def _extract_result(sample_dir): + """ + Extract the observed value from the Flow123d output. + :param sample_dir: str, path to sample directory + :return: None, inf or water balance result (float) and overall sample time + """ + # extract the flux + balance_file = os.path.join(sample_dir, "mass_balance.yaml") + + with open(balance_file, "r") as f: + balance = yaml.load(f) + + flux_regions = ['.surface'] + max_flux = 0.0 + found = False + for flux_item in balance['data']: + if 'region' not in flux_item: + os.remove(os.path.join(sample_dir, "mass_balance.yaml")) + break + + if flux_item['region'] in flux_regions: + out_flux = -float(flux_item['data'][0]) + if not np.isfinite(out_flux): + return np.inf + # flux_in = float(flux_item['data'][1]) + # if flux_in > 1e-10: + # raise Exception("Possitive inflow at outlet region.") + max_flux = max(max_flux, out_flux) # flux field + found = True + + # Get flow123d computing time + # run_time = FlowSimProcConc.get_run_time(sample_dir) + + if not found: + raise Exception + return np.array([max_flux]) + + @staticmethod + def result_format() -> List[QuantitySpec]: + """ + Define simulation result format + :return: List[QuantitySpec, ...] + """ + spec1 = QuantitySpec(name="conductivity", unit="m", shape=(1, 1), times=[1], locations=['0']) + # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40']) + return [spec1] + + # @staticmethod + # def get_run_time(sample_dir): + # """ + # Get flow123d sample running time from profiler + # :param sample_dir: Sample directory + # :return: float + # """ + # profiler_file = os.path.join(sample_dir, "profiler_info_*.json") + # profiler = glob.glob(profiler_file)[0] + # + # try: + # with open(profiler, "r") as f: + # prof_content = json.load(f) + # + # run_time = float(prof_content['children'][0]['cumul-time-sum']) + # except: + # print("Extract run time failed") + # + # return run_time + + diff --git a/mlmc/tool/gmsh_io.py b/mlmc/tool/gmsh_io.py index c5a3ad36..9f12d93a 100644 --- a/mlmc/tool/gmsh_io.py +++ b/mlmc/tool/gmsh_io.py @@ -44,6 +44,8 @@ def reset(self): self.elements = {} self.physical = {} self.element_data = {} + self._fields = {} + self._field = None def read_element_data_head(self, mshfile): @@ -69,7 +71,6 @@ def read_element_data_head(self, mshfile): n_elem = float(columns[0]) return field, time, t_idx, n_comp, n_elem - def read(self, mshfile=None): """Read a Gmsh .msh file. @@ -100,6 +101,10 @@ def read(self, mshfile=None): readmode = 5 elif line == '$ElementData': field, time, t_idx, n_comp, n_ele = self.read_element_data_head(mshfile) + + self._fields.setdefault(field, {}) + self._field = field + field_times = self.element_data.setdefault(field, {}) assert t_idx not in field_times self.current_elem_data = {} @@ -115,6 +120,7 @@ def read(self, mshfile=None): comp_values = [float(col) for col in columns[1:]] assert len(comp_values) == self.current_n_components self.current_elem_data[ele_idx] = comp_values + self._fields[self._field] = self.current_elem_data if readmode == 5: if len(columns) == 3: diff --git a/mlmc/tool/hdf5.py b/mlmc/tool/hdf5.py index f6f3219c..4b2e1d89 100644 --- a/mlmc/tool/hdf5.py +++ b/mlmc/tool/hdf5.py @@ -357,7 +357,7 @@ def chunks(self, n_samples=None): dataset = hdf_file["/".join([self.level_group_path, "collected_values"])] if n_samples is not None: - yield ChunkSpec(chunk_id=0, chunk_slice=slice(0, n_samples, 1), level_id=int(self.level_id)) + yield ChunkSpec(chunk_id=0, chunk_slice=slice(0, n_samples, ...), level_id=int(self.level_id)) else: for chunk_id, chunk in enumerate(dataset.iter_chunks()): yield ChunkSpec(chunk_id=chunk_id, chunk_slice=chunk[0], level_id=int(self.level_id)) # slice, level_id @@ -387,6 +387,12 @@ def collected_n_items(self): collected_n_items = len(dataset[()]) return collected_n_items + def get_collected_ids(self): + collected_ids = [] + with h5py.File(self.file_name, 'r') as hdf_file: + collected_ids = [sample[0].decode() for sample in hdf_file[self.level_group_path][self.collected_ids_dset][()]] + return collected_ids + def get_finished_ids(self): """ Get collected and failed samples ids @@ -394,8 +400,8 @@ def get_finished_ids(self): """ with h5py.File(self.file_name, 'r') as hdf_file: failed_ids = [sample[0].decode() for sample in hdf_file[self.level_group_path][self.failed_dset][()]] - successful_ids = [sample[0].decode() for sample in hdf_file[self.level_group_path][self.collected_ids_dset][()]] - return np.concatenate((np.array(successful_ids), np.array(failed_ids)), axis=0) + collected_ids = [sample[0].decode() for sample in hdf_file[self.level_group_path][self.collected_ids_dset][()]] + return np.concatenate((np.array(collected_ids), np.array(failed_ids)), axis=0) def get_unfinished_ids(self): """ @@ -437,6 +443,138 @@ def n_ops_estimate(self): if 'n_ops_estimate' in hdf_file[self.level_group_path].attrs: return hdf_file[self.level_group_path].attrs['n_ops_estimate'] + @property + def running_times(self): + """ + Get number of operations estimate + :return: float + """ + with h5py.File(self.file_name, 'r') as hdf_file: + if 'running_times' in hdf_file[self.level_group_path].attrs: + return hdf_file[self.level_group_path].attrs['running_times'] + + @running_times.setter + def running_times(self, n_ops_estimate): + """ + Set property n_ops_estimate + :param n_ops_estimate: number of operations (time) per samples + :return: None + """ + with h5py.File(self.file_name, 'a') as hdf_file: + if 'running_times' not in hdf_file[self.level_group_path].attrs: + hdf_file[self.level_group_path].attrs['running_times'] = [0., 0.] + hdf_file[self.level_group_path].attrs['running_times'] = n_ops_estimate + + @property + def extract_mesh_times(self): + """ + Get number of operations estimate + :return: float + """ + with h5py.File(self.file_name, 'r') as hdf_file: + if 'extract_mesh_times' in hdf_file[self.level_group_path].attrs: + return hdf_file[self.level_group_path].attrs['extract_mesh_times'] + + @extract_mesh_times.setter + def extract_mesh_times(self, n_ops_estimate): + """ + Set property n_ops_estimate + :param n_ops_estimate: number of operations (time) per samples + :return: None + """ + with h5py.File(self.file_name, 'a') as hdf_file: + if 'extract_mesh_times' not in hdf_file[self.level_group_path].attrs: + hdf_file[self.level_group_path].attrs['extract_mesh_times'] = [0., 0.] + hdf_file[self.level_group_path].attrs['extract_mesh_times'] = n_ops_estimate + + @property + def make_field_times(self): + """ + Get number of operations estimate + :return: float + """ + with h5py.File(self.file_name, 'r') as hdf_file: + if 'make_field_times' in hdf_file[self.level_group_path].attrs: + return hdf_file[self.level_group_path].attrs['make_field_times'] + + @make_field_times.setter + def make_field_times(self, n_ops_estimate): + """ + Set property n_ops_estimate + :param n_ops_estimate: number of operations (time) per samples + :return: None + """ + with h5py.File(self.file_name, 'a') as hdf_file: + if 'make_field_times' not in hdf_file[self.level_group_path].attrs: + hdf_file[self.level_group_path].attrs['make_field_times'] = [0., 0.] + hdf_file[self.level_group_path].attrs['make_field_times'] = n_ops_estimate + + @property + def generate_rnd_times(self): + """ + Get number of operations estimate + :return: float + """ + with h5py.File(self.file_name, 'r') as hdf_file: + if 'generate_rnd_times' in hdf_file[self.level_group_path].attrs: + return hdf_file[self.level_group_path].attrs['generate_rnd_times'] + + @generate_rnd_times.setter + def generate_rnd_times(self, n_ops_estimate): + """ + Set property n_ops_estimate + :param n_ops_estimate: number of operations (time) per samples + :return: None + """ + with h5py.File(self.file_name, 'a') as hdf_file: + if 'generate_rnd_times' not in hdf_file[self.level_group_path].attrs: + hdf_file[self.level_group_path].attrs['generate_rnd_times'] = [0., 0.] + hdf_file[self.level_group_path].attrs['generate_rnd_times'] = n_ops_estimate + + @property + def fine_flow_times(self): + """ + Get number of operations estimate + :return: float + """ + with h5py.File(self.file_name, 'r') as hdf_file: + if 'fine_flow_times' in hdf_file[self.level_group_path].attrs: + return hdf_file[self.level_group_path].attrs['fine_flow_times'] + + @fine_flow_times.setter + def fine_flow_times(self, n_ops_estimate): + """ + Set property n_ops_estimate + :param n_ops_estimate: number of operations (time) per samples + :return: None + """ + with h5py.File(self.file_name, 'a') as hdf_file: + if 'fine_flow_times' not in hdf_file[self.level_group_path].attrs: + hdf_file[self.level_group_path].attrs['fine_flow_times'] = [0., 0.] + hdf_file[self.level_group_path].attrs['fine_flow_times'] = n_ops_estimate + + @property + def coarse_flow_times(self): + """ + Get number of operations estimate + :return: float + """ + with h5py.File(self.file_name, 'r') as hdf_file: + if 'coarse_flow_times' in hdf_file[self.level_group_path].attrs: + return hdf_file[self.level_group_path].attrs['coarse_flow_times'] + + @coarse_flow_times.setter + def coarse_flow_times(self, n_ops_estimate): + """ + Set property n_ops_estimate + :param n_ops_estimate: number of operations (time) per samples + :return: None + """ + with h5py.File(self.file_name, 'a') as hdf_file: + if 'coarse_flow_times' not in hdf_file[self.level_group_path].attrs: + hdf_file[self.level_group_path].attrs['coarse_flow_times'] = [0., 0.] + hdf_file[self.level_group_path].attrs['coarse_flow_times'] = n_ops_estimate + @n_ops_estimate.setter def n_ops_estimate(self, n_ops_estimate): """ @@ -448,4 +586,3 @@ def n_ops_estimate(self, n_ops_estimate): if 'n_ops_estimate' not in hdf_file[self.level_group_path].attrs: hdf_file[self.level_group_path].attrs['n_ops_estimate'] = [0., 0.] hdf_file[self.level_group_path].attrs['n_ops_estimate'] = n_ops_estimate - diff --git a/mlmc/tool/simple_distribution.py b/mlmc/tool/simple_distribution.py index 77513f6a..df3f55cb 100644 --- a/mlmc/tool/simple_distribution.py +++ b/mlmc/tool/simple_distribution.py @@ -104,7 +104,6 @@ def density(self, value): power = np.minimum(np.maximum(power, -200), 200) return np.exp(power) - def cdf(self, values): values = np.atleast_1d(values) np.sort(values) @@ -267,10 +266,11 @@ def _calculate_functional(self, multipliers): integral = np.dot(q_density, self._quad_weights) sum = np.sum(self.moment_means * multipliers / self._moment_errs) - end_diff = np.dot(self._end_point_diff, multipliers) - penalty = np.sum(np.maximum(end_diff, 0)**2) fun = sum + integral - fun = fun + np.abs(fun) * self._penalty_coef * penalty + if self._penalty_coef != 0: + end_diff = np.dot(self._end_point_diff, multipliers) + penalty = np.sum(np.maximum(end_diff, 0) ** 2) + fun = fun + np.abs(fun) * self._penalty_coef * penalty return fun @@ -284,46 +284,41 @@ def _calculate_gradient(self, multipliers): q_gradient = self._quad_moments.T * q_density integral = np.dot(q_gradient, self._quad_weights) / self._moment_errs - end_diff = np.dot(self._end_point_diff, multipliers) - penalty = 2 * np.dot( np.maximum(end_diff, 0), self._end_point_diff) - fun = np.sum(self.moment_means * multipliers / self._moment_errs) + integral[0] * self._moment_errs[0] - gradient = self.moment_means / self._moment_errs - integral + np.abs(fun) * self._penalty_coef * penalty + if self._penalty_coef != 0: + end_diff = np.dot(self._end_point_diff, multipliers) + penalty = 2 * np.dot(np.maximum(end_diff, 0), self._end_point_diff) + fun = np.sum(self.moment_means * multipliers / self._moment_errs) + integral[0] * self._moment_errs[0] + + gradient = self.moment_means / self._moment_errs - integral + np.abs(fun) * self._penalty_coef * penalty + else: + gradient = self.moment_means / self._moment_errs - integral # + np.abs(fun) * self._penalty_coef * penalty + return gradient + def _calc_jac(self): + q_density = self.density(self._quad_points) + q_density_w = q_density * self._quad_weights + + jacobian_matrix = (self._quad_moments.T * q_density_w) @ self._quad_moments + return jacobian_matrix + def _calculate_jacobian_matrix(self, multipliers): """ :return: jacobian matrix, symmetric, (n_moments, n_moments) """ - self._update_quadrature(multipliers) - q_density = self._density_in_quads(multipliers) - q_density_w = q_density * self._quad_weights - q_mom = self._quad_moments / self._moment_errs - - jacobian_matrix = (q_mom.T * q_density_w) @ q_mom - - # Compute just triangle use lot of memory (possibly faster) - # moment_outer = np.einsum('ki,kj->ijk', q_mom, q_mom) - # triu_idx = np.triu_indices(self.approx_size) - # triu_outer = moment_outer[triu_idx[0], triu_idx[1], :] - # integral = np.dot(triu_outer, q_density_w) - # jacobian_matrix = np.empty(shape=(self.approx_size, self.approx_size)) - # jacobian_matrix[triu_idx[0], triu_idx[1]] = integral - # jacobian_matrix[triu_idx[1], triu_idx[0]] = integral - - end_diff = np.dot(self._end_point_diff, multipliers) - fun = np.sum(self.moment_means * multipliers / self._moment_errs) + jacobian_matrix[0,0] * self._moment_errs[0]**2 - for side in [0, 1]: - if end_diff[side] > 0: - penalty = 2 * np.outer(self._end_point_diff[side], self._end_point_diff[side]) - jacobian_matrix += np.abs(fun) * self._penalty_coef * penalty - - - #e_vals = np.linalg.eigvalsh(jacobian_matrix) + # jacobian_matrix_hess = hessian(self._calculate_functional)(multipliers) + # print(pd.DataFrame(jacobian_matrix_hess)) + jacobian_matrix = self._calc_jac() + + if self._penalty_coef != 0: + end_diff = np.dot(self._end_point_diff, multipliers) + fun = np.sum(self.moment_means * multipliers / self._moment_errs) + jacobian_matrix[0, 0] * \ + self._moment_errs[0] ** 2 + for side in [0, 1]: + if end_diff[side] > 0: + penalty = 2 * np.outer(self._end_point_diff[side], self._end_point_diff[side]) + jacobian_matrix += np.abs(fun) * self._penalty_coef * penalty - #print(multipliers) - #print("jac spectra: ", e_vals) - #print("means:", self.moment_means) - #print("\n jac:", np.diag(jacobian_matrix)) return jacobian_matrix diff --git a/requirements.txt b/requirements.txt index ad19c689..c123f5c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,6 @@ ruamel.yaml attrs gstools memoization +tensorflow +pandas +spektral diff --git a/test/01_cond_field/process_simple.py b/test/01_cond_field/process_simple.py index 70046a4b..76176702 100644 --- a/test/01_cond_field/process_simple.py +++ b/test/01_cond_field/process_simple.py @@ -26,9 +26,9 @@ def __init__(self): # Remove HDF5 file, start from scratch self.debug = args.debug # 'Debug' mode is on - keep sample directories - self.use_pbs = True + self.use_pbs = False # Use PBS sampling pool - self.n_levels = 1 + self.n_levels = 5 self.n_moments = 25 # Number of MLMC levels @@ -76,11 +76,34 @@ def process(self): # @TODO: How to estimate true_domain? quantile = 0.001 true_domain = mlmc.estimator.Estimate.estimate_domain(q_value, sample_storage, quantile=quantile) + print("true domain ", true_domain) moments_fn = Legendre(self.n_moments, true_domain) + n_ops = np.array(sample_storage.get_n_ops()) + print("n ops ", n_ops[:, 0] / n_ops[:, 1]) + + print("sample storage n collected ", sample_storage.get_n_collected()) + estimator = mlmc.estimator.Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn) means, vars = estimator.estimate_moments(moments_fn) + l_0_samples = estimator.get_level_samples(level_id=0) + l_1_samples = estimator.get_level_samples(level_id=1) + l_2_samples = estimator.get_level_samples(level_id=2) + l_3_samples = estimator.get_level_samples(level_id=3) + l_4_samples = estimator.get_level_samples(level_id=4) + + print("l 0 samples shape ", np.squeeze(l_0_samples).shape) + print("l 1 samples shape ", np.squeeze(l_1_samples[..., 0]).shape) + + print("l_0_samples.var ", np.var(np.squeeze(l_0_samples)[:10000])) + print("fine l_1_samples.var ", np.var(np.squeeze(l_1_samples[..., 0]))) + print("fine l_2_samples.var ", np.var(np.squeeze(l_2_samples[..., 0]))) + print("fine l_3_samples.var ", np.var(np.squeeze(l_3_samples[..., 0]))) + print("fine l_4_samples.var ", np.var(np.squeeze(l_4_samples[..., 0]))) + + exit() + moments_quantity = moments(root_quantity, moments_fn=moments_fn, mom_at_bottom=True) moments_mean = estimate_mean(moments_quantity) conductivity_mean = moments_mean['conductivity'] @@ -95,9 +118,9 @@ def process(self): # central_moments_quantity = moments(root_quantity, moments_fn=central_moments, mom_at_bottom=True) # central_moments_mean = estimate_mean(central_moments_quantity) - #estimator.sub_subselect(sample_vector=[10000]) + # estimator.sub_subselect(sample_vector=[10000]) - #self.process_target_var(estimator) + # self.process_target_var(estimator) self.construct_density(estimator, tol=1e-8) #self.data_plots(estimator) @@ -108,7 +131,8 @@ def process_target_var(self, estimator): n0, nL = 100, 3 n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), self.n_levels))).astype(int) - n_estimated = estimator.bs_target_var_n_estimated(target_var=1e-5, sample_vec=n_samples) # number of estimated sampels for given target variance + n_estimated = estimator.bs_target_var_n_estimated(target_var=1e-5, + sample_vec=n_samples) # number of estimated sampels for given target variance estimator.plot_variances(sample_vec=n_estimated) estimator.plot_bs_var_log(sample_vec=n_estimated) @@ -179,8 +203,8 @@ def setup_config(self, clean): simulation_factory = FlowSim(config=simulation_config, clean=clean) # Create HDF sample storage - sample_storage = SampleStorageHDF( - file_path=os.path.join(self.work_dir, "mlmc_{}.hdf5".format(self.n_levels))) + sample_storage = SampleStorageHDF(file_path=os.path.join(self.work_dir, "mlmc_{}.hdf5".format(self.n_levels))) + # Create sampler, it manages sample scheduling and so on sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory, @@ -280,15 +304,17 @@ def generate_jobs(self, sampler, n_samples=None, renew=False, target_var=None): # New estimation according to already finished samples variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) n_estimated = estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) + n_levels=sampler.n_levels) # Loop until number of estimated samples is greater than the number of scheduled samples + while not sampler.process_adding_samples(n_estimated, self.sample_sleep, self.adding_samples_coef, timeout=self.sample_timeout): + # New estimation according to already finished samples variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples) n_estimated = estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops, - n_levels=sampler.n_levels) + n_levels=sampler.n_levels) def set_moments(self, quantity, sample_storage, n_moments=5): true_domain = estimator.Estimate.estimate_domain(quantity, sample_storage, quantile=0.01) diff --git a/test/metamodels/__init__.py b/test/metamodels/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/metamodels/compare_models.py b/test/metamodels/compare_models.py new file mode 100644 index 00000000..c48a98b3 --- /dev/null +++ b/test/metamodels/compare_models.py @@ -0,0 +1,33 @@ +import os + +#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Run on CPU only +import sys +import subprocess +from mlmc.metamodel.analyze_nn import run_GNN, run_SVR, statistics, analyze_statistics, process_results +import tensorflow as tf + +from mlmc.metamodel.flow_task_GNN_2 import GNN +from spektral.layers import GCNConv, GlobalSumPool, ChebConv, GraphSageConv, ARMAConv, GATConv, APPNPConv, GINConv, GeneralConv +from tensorflow.keras.losses import MeanSquaredError, KLDivergence, MeanAbsoluteError +from tensorflow.keras import Model +from tensorflow.keras.layers import Dense +from spektral.layers import GlobalSumPool, GlobalMaxPool, GlobalAvgPool +import tensorflow as tf +from tensorflow.keras.layers.experimental import preprocessing + + +def compare_channels(): + data_path = "/home/martin/Documents/metamodels/data/comparison/ChebConv_channels" + channels = [8, 16, 32, 64, 128, 256] + channels = [2, 4, 8, 16, 32, 128] + + for channel in channels: + channel_path = os.path.join(data_path, "{0}/ChebConv{0}".format(channel)) + print("channel path ", channel_path) + analyze_statistics(channel_path) + + + + +if __name__ == "__main__": + compare_channels() diff --git a/test/metamodels/metamodel_test.py b/test/metamodels/metamodel_test.py new file mode 100644 index 00000000..5864edfb --- /dev/null +++ b/test/metamodels/metamodel_test.py @@ -0,0 +1,771 @@ +import os +import numpy as np +import warnings +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Run on CPU only +import sys +import shutil +import subprocess +from mlmc.metamodel.analyze_nn import run_GNN, run_SVR, statistics, analyze_statistics, process_results +from mlmc.moments import Legendre_tf, Monomial + +from mlmc.metamodel.flow_task_GNN_2 import GNN +from spektral.layers import GCNConv, GlobalSumPool, ChebConv, GraphSageConv, ARMAConv, GATConv, APPNPConv, GINConv, GeneralConv +from mlmc.metamodel.own_cheb_conv import OwnChebConv +from tensorflow.keras.losses import MeanSquaredError, KLDivergence, MeanAbsoluteError +from mlmc.metamodel.custom_methods import abs_activation, MSE_moments +from tensorflow.keras import Model +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense +from tensorflow.keras import regularizers +from spektral.layers import GlobalSumPool, GlobalMaxPool, GlobalAvgPool +import tensorflow as tf +from mlmc.plot import plots +from tensorflow.keras.layers.experimental import preprocessing +warnings.filterwarnings("ignore", category=DeprecationWarning) + + +def get_gnn(): + # Parameters + # conv_layer = GCNConv + conv_layer = ChebConv # Seems better than GCNConv, good distribution of predictions + conv_layer = OwnChebConv + # conv_layer = GraphSageConv # Seems better than ChebConv, good loss but very narrow distribution of predictions + # # conv_layer = ARMAConv # Seems worse than GraphSageConv + # conv_layer = GATConv # Slow and not better than GraphSageConv + # # conv_layer = APPNPConv # Not bad but worse than GraphSageConv + # # conv_layer = GINConv # it is comparable to APPNPConv + # act_func = "relu" # "tanh"#"elu" + + loss = MeanSquaredError() # var_loss_function# + #loss = MSE_moments + # loss = MeanAbsoluteError() + # loss = MeanSquaredLogarithmicError() + # loss = KLDivergence() + # loss = total_loss_function + optimizer = tf.optimizers.Adam(learning_rate=0.001) + patience = 150 + hidden_regularization = None # l2(2e-10) + + net_model_config = { + "conv_layer": conv_layer, + "hidden_activation": 'relu', + "output_activation": abs_activation, + #"output_activation": 'linear', + "kernel_regularization": hidden_regularization, + "normalizer": preprocessing.Normalization() + } + + #model = Net(**net_model_config) + + model_config = {"loss": loss, + "optimizer": optimizer, + "patience": patience, + "model_class": Net, + "net_model_config": net_model_config, + "verbose": True} + + corr_field_config = {'02_conc': True, 'corr_length': 0.1, 'sigma': 1, 'log': True} + + return GNN, conv_layer, corr_field_config, model_config + + +class Net(Model): + def __init__(self, conv_layer, hidden_activation, output_activation, kernel_regularization, normalizer, + **kwargs): + super().__init__(**kwargs) + + # self.normalizer = normalizer + # self.norm_layer = tf.keras.layers.LayerNormalization(axis=1) + self._conv_layers = [conv_layer(8, K=4, activation=hidden_activation, kernel_regularizer=kernel_regularization)]#, + #conv_layer(64, K=1, activation=hidden_activation, kernel_regularizer=kernel_regularization)] + # self.conv3 = conv_layer(32, K=1, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv4 = conv_layer(32, K=1, activation=hidden_activation, kernel_regularizer=kernel_regularization) + #self.conv2 = conv_layer(32, K=2, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv3 = conv_layer(16, K=2, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv3 = conv_layer(8, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv4 = conv_layer(4, activation=hidden_activation, kernel_regularizer=kernel_regularization) + # self.conv3 = conv_layer(64, activation=hidden_activation, kernel_regularizer=kernel_regularization) + #self.flatten = GlobalSumPool() + + self.flatten = GlobalAvgPool() + + #self._submodel = Sequential() + + self._dense_layers = [Dense(32, activation=hidden_activation), Dense(1)] + + # for d_layer in self._dense_layers: + # self._submodel.add(d_layer) + # self.fc1 = Dense(32, activation=hidden_activation) + #, activation=output_activation) # linear activation for output neuron + + def call(self, inputs): + x, a = inputs + + for c_layer in self._conv_layers: + x = c_layer([x, a]) + + output = self.flatten(x) + + for d_layer in self._dense_layers: + output = d_layer(output) + + return output + + +def get_config(data_dir, case=0): + feature_names = [['conductivity']] + + if case == 0: + cl = "cl_0_3_s_4" + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_0.055_common_files/mesh.msh") + output_dir = os.path.join(data_dir, "{}/L5/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir, "{}/L5/mlmc_5.hdf5".format(cl)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L{}/test/01_cond_field/output/".format(cl, nn_level + 1)) + l_0_hdf_path = os.path.join(data_dir, "{}/L{}/mlmc_{}.hdf5".format(cl, nn_level + 1, nn_level + 1)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl)) + + elif case == 1: + cl = "cl_0_1_s_1" + nn_level = 1 + replace_level = False + mesh = os.path.join(data_dir, "l_step_0.027624156655057155_common_files/mesh.msh") + output_dir = os.path.join(data_dir, "{}/L5/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir, "{}/L5/mlmc_5.hdf5".format(cl)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L{}/test/01_cond_field/output/".format(cl, nn_level + 1)) + l_0_hdf_path = os.path.join(data_dir, "{}/L{}/mlmc_{}.hdf5".format(cl, nn_level + 1, nn_level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl)) + + elif case == 2: + data_dir = "/home/martin/Documents/metamodels/data/5_ele/" + cl = "cl_0_1_s_1" + nn_level = 3 + replace_level = False + mesh = os.path.join(data_dir, "cl_0_1_s_1/L5/l_step_0.020196309484414757_common_files/mesh.msh") + output_dir = os.path.join(data_dir, "{}/L5/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir, "{}/L5/mlmc_5.hdf5".format(cl)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L1_{}/test/01_cond_field/output/".format(cl, nn_level)) + l_0_hdf_path = os.path.join(data_dir, "{}/L1_{}/mlmc_1.hdf5".format(cl, nn_level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_3/mlmc_1.hdf5".format(cl)) + + elif case == 3 or case == 4: + #data_dir = "/home/martin/Documents/metamodels/data/5_ele/" + cl = "cl_0_1_s_1" + if case == 4: + cl = "cl_0_3_s_4" + nn_level = 0 + replace_level = False + # mesh = os.path.join(data_dir, "{}/L5/l_step_0.020196309484414757_common_files/mesh.msh".format(cl)) + mesh = os.path.join(data_dir, "l_step_0.07416198487095663_common_files/mesh.msh".format(cl)) + output_dir = os.path.join(data_dir, "{}/L5/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir, "{}/L5/mlmc_5.hdf5".format(cl)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L1_{}/test/01_cond_field/output/".format(cl,nn_level)) + l_0_hdf_path = os.path.join(data_dir, "{}/L1_{}/mlmc_1.hdf5".format(cl, nn_level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl)) + + elif case == 5: + #data_dir = "/home/martin/Documents/metamodels/data/5_ele/" + cl = "cl_0_3_s_4" + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "cl_0_1_s_1/L5/l_step_0.020196309484414757_common_files/mesh.msh") + output_dir = os.path.join(data_dir,"{}/L1_3/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir,"{}/L1_3/mlmc_1.hdf5".format(cl)) + save_path = os.path.join(data_dir,"{}".format(cl)) + l_0_output_dir = os.path.join(data_dir,"{}/L1_{}/test/01_cond_field/output/".format(cl,nn_level)) + l_0_hdf_path = os.path.join(data_dir,"{}/L1_{}/mlmc_1.hdf5".format(cl, nn_level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir,"{}/L1_3/mlmc_1.hdf5".format(cl)) + + elif case == 6: # mesh size comparison + cl = "cl_0_1_s_1" + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_0.07416198487095663_common_files/mesh.msh".format(cl)) + output_dir = os.path.join(data_dir, "{}/L1/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir, "{}/L1/mlmc_1.hdf5".format(cl)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L1_{}/test/01_cond_field/output/".format(cl, nn_level)) + l_0_hdf_path = os.path.join(data_dir, "{}/L1_{}/mlmc_1.hdf5".format(cl, nn_level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl)) + + elif case == 7: # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/" + cl = "cl_0_1_s_1" + level = 3 + nn_level = 0 + replace_level = False + #mesh = os.path.join(data_dir, "l_step_1.0_common_files/mesh.msh".format(cl)) #L1, 7s + mesh = os.path.join(data_dir, "l_step_0.27232698153315_common_files/mesh.msh".format(cl)) #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.07416198487095663_common_files/mesh.msh".format(cl)) #L3 12s + #mesh = os.path.join(data_dir, "l_step_0.020196309484414757_common_files/mesh.msh".format(cl)) #L4 22s + #mesh = os.path.join(data_dir, "l_step_0.0055_common_files/mesh.msh".format(cl)) #L5 + output_dir = os.path.join(data_dir, "{}/L1_{}/test/01_cond_field/output/".format(cl, level)) + hdf_path = os.path.join(data_dir, "{}/L1_{}/mlmc_1.hdf5".format(cl, level)) + mlmc_hdf_path = os.path.join(data_dir, "{}/mlmc_hdf/L1_{}/mlmc_1.hdf5".format(cl, level)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L0_MC/L1_{}/test/01_cond_field/output/".format(cl, level)) + l_0_hdf_path = os.path.join(data_dir, "{}/L0_MC/L1_{}/mlmc_1.hdf5".format(cl, level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl)) + + elif case == 8 or case == 9: + data_dir = "/home/martin/Documents/metamodels/data/5_ele/" + cl = "cl_0_1_s_1" + if case == 9: + cl = "cl_0_3_s_4" + nn_level = 0 + replace_level = False + # mesh = os.path.join(data_dir, "{}/L5/l_step_0.020196309484414757_common_files/mesh.msh".format(cl)) + mesh = os.path.join(data_dir, "l_step_0.07416198487095663_common_files/mesh.msh".format(cl)) + output_dir = os.path.join(data_dir, "{}/L3_1/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir, "{}/L3_1/mlmc_3.hdf5".format(cl)) + mlmc_hdf_path = os.path.join(data_dir, "{}/L3_2/mlmc_3.hdf5".format(cl)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L1_2/test/01_cond_field/output/".format(cl,nn_level)) + l_0_hdf_path = os.path.join(data_dir, "{}/L1_2/mlmc_1.hdf5".format(cl, nn_level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl)) + + elif case == 10 or case == 11: + data_dir = "/home/martin/Documents/metamodels/data/5_ele/" + cl = "cl_0_1_s_1" + if case == 11: + cl = "cl_0_3_s_4" + nn_level = 0 + replace_level = False + # mesh = os.path.join(data_dir, "{}/L5/l_step_0.020196309484414757_common_files/mesh.msh".format(cl)) + mesh = os.path.join(data_dir, "l_step_0.07416198487095663_common_files/mesh.msh".format(cl)) + output_dir = os.path.join(data_dir, "{}/L2_1/test/01_cond_field/output/".format(cl)) + hdf_path = os.path.join(data_dir, "{}/L2_1/mlmc_2.hdf5".format(cl)) + mlmc_hdf_path = os.path.join(data_dir, "{}/L2_2/mlmc_2.hdf5".format(cl)) + save_path = os.path.join(data_dir, "{}".format(cl)) + l_0_output_dir = os.path.join(data_dir, "{}/L1_2/test/01_cond_field/output/".format(cl,nn_level)) + l_0_hdf_path = os.path.join(data_dir, "{}/L1_2/mlmc_1.hdf5".format(cl, nn_level)) + sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl)) + ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl)) + + elif case == 12: # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + # cl = "cl_0_1_s_1" + level = 3 + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + #mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + #mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + elif case == 13: # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_por/" + # cl = "cl_0_1_s_1" + level = 3 + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + #mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") # L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + feature_names = [['porosity_top', 'porosity_bot', 'porosity_repo']] + + elif case == 14: # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_2_features/" + # cl = "cl_0_1_s_1" + level = 3 + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + #mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") # L2 10.5 s + #mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L3 12s + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo'], + ['porosity_top', 'porosity_bot', 'porosity_repo']] + + return output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, sampling_info_path, ref_mlmc_file,\ + replace_level, nn_level, mlmc_hdf_path, feature_names + + +# def plot_results_corr_length(): +# cl_all = {"cl_0_001_s_1": 0.001, "cl_0_01_s_1": 0.01, "cl_0_1_s_1": 0.1, "cl_1_s_1": 1, "cl_10_s_1": 10} +# +# # cl_all = {"cl_0_001_s_1": 0.001, "cl_10_s_1": 1} +# # +# cl_all = {"cl_0_001_s_1": 0.001} +# +# tr_MSE = {} +# te_MSE = {} +# tr_RSE = {} +# te_RSE = {} +# +# for cl_dir, cl in cl_all.items(): +# data_dir = "/home/martin/Documents/metamodels/data/mesh_size/" +# level = 3 +# nn_level = 0 +# replace_level = False +# # mesh = os.path.join(data_dir, "l_step_1.0_common_files/mesh.msh".format(cl)) #L1, 7s +# # mesh = os.path.join(data_dir, "l_step_0.27232698153315_common_files/mesh.msh".format(cl)) #L2 10.5 s +# mesh = os.path.join(data_dir, "l_step_0.07416198487095663_common_files/mesh.msh".format(cl_dir)) # L3 12s +# # mesh = os.path.join(data_dir, "l_step_0.020196309484414757_common_files/mesh.msh".format(cl)) #L4 22s +# # mesh = os.path.join(data_dir, "l_step_0.0055_common_files/mesh.msh".format(cl)) #L5 +# output_dir = os.path.join(data_dir, "{}/L1_{}/test/01_cond_field/output/".format(cl_dir, level)) +# hdf_path = os.path.join(data_dir, "{}/L1_{}/mlmc_1.hdf5".format(cl_dir, level)) +# mlmc_hdf_path = os.path.join(data_dir, "{}/mlmc_hdf/L1_{}/mlmc_1.hdf5".format(cl_dir, level)) +# save_path = os.path.join(data_dir, "{}".format(cl_dir)) +# l_0_output_dir = os.path.join(data_dir, "{}/L0_MC/L1_{}/test/01_cond_field/output/".format(cl_dir, level)) +# l_0_hdf_path = os.path.join(data_dir, "{}/L0_MC/L1_{}/mlmc_1.hdf5".format(cl_dir, level)) +# sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl_dir)) +# ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl_dir)) +# +# machine_learning_model = ("mesh_L3_log_15k", run_GNN, False) +# +# gnn, conv_layer, corr_field_config, model_config = get_gnn() +# +# save_path = os.path.join(save_path, machine_learning_model[0]) +# +# print("save path ", save_path) +# graph_creation_time = 28 # 22#159#0#159#66 +# +# config = {'machine_learning_model': machine_learning_model, +# 'save_path': save_path, +# 'sampling_info_path': sampling_info_path, +# 'output_dir': output_dir, +# 'nn_hdf_path': hdf_path, +# 'mlmc_hdf_path': mlmc_hdf_path, +# 'mesh': mesh, +# 'l_0_output_dir': l_0_output_dir, +# 'l_0_hdf_path': l_0_hdf_path, +# 'ref_mlmc_file': ref_mlmc_file, +# 'level': nn_level, +# 'conv_layer': conv_layer, +# 'gnn': gnn, +# 'model_config': model_config, +# 'replace_level': replace_level, +# 'corr_field_config': corr_field_config, +# 'n_train_samples': 2000, +# 'val_samples_ratio': 0.3, +# 'batch_size': 200, +# 'epochs': 2000, +# 'learning_rate': 0.01, +# 'graph_creation_time': graph_creation_time, +# 'save_model': False, +# 'loss_params': {'moments_class': Legendre_tf, "max_moments": 20, 'loss_max': 0.5, 'quantile': 1e-3} +# } +# +# train_MSE, test_MSE, train_RSE, test_RSE = analyze_statistics(config) +# +# tr_MSE[cl] = np.mean(train_MSE) +# te_MSE[cl] = np.mean(test_MSE) +# tr_RSE[cl] = np.mean(train_RSE) +# te_RSE[cl] = np.mean(test_RSE) +# +# +# plt_cl = plots.CorrLength() +# plt_cl.add_mse_test(te_MSE) +# plt_cl.add_mse_train(tr_MSE) +# +# plt_cl.show(None) +# plt_cl.show("corr_length_mse") +# +# plt_cl = plots.CorrLength() +# plt_cl.add_mse_test(te_RSE) +# plt_cl.add_mse_train(tr_RSE) +# +# plt_cl.show(None) +# plt_cl.show("corr_length_mse") + + +# def plot_results_corr_length(): +# cl_all = {"cl_0_001_s_1": 0.001, "cl_0_01_s_1": 0.01, "cl_0_1_s_1": 0.1, "cl_1_s_1": 1, "cl_10_s_1": 10} +# +# # cl_all = {"cl_0_001_s_1": 0.001, "cl_10_s_1": 1} +# # +# cl_all = {"cl_0_001_s_1": 0.001} +# +# tr_MSE = {} +# te_MSE = {} +# tr_RSE = {} +# te_RSE = {} +# +# for cl_dir, cl in cl_all.items(): +# data_dir = "/home/martin/Documents/metamodels/data/mesh_size/" +# level = 3 +# nn_level = 0 +# replace_level = False +# # mesh = os.path.join(data_dir, "l_step_1.0_common_files/mesh.msh".format(cl)) #L1, 7s +# # mesh = os.path.join(data_dir, "l_step_0.27232698153315_common_files/mesh.msh".format(cl)) #L2 10.5 s +# mesh = os.path.join(data_dir, "l_step_0.07416198487095663_common_files/mesh.msh".format(cl_dir)) # L3 12s +# # mesh = os.path.join(data_dir, "l_step_0.020196309484414757_common_files/mesh.msh".format(cl)) #L4 22s +# # mesh = os.path.join(data_dir, "l_step_0.0055_common_files/mesh.msh".format(cl)) #L5 +# output_dir = os.path.join(data_dir, "{}/L1_{}/test/01_cond_field/output/".format(cl_dir, level)) +# hdf_path = os.path.join(data_dir, "{}/L1_{}/mlmc_1.hdf5".format(cl_dir, level)) +# mlmc_hdf_path = os.path.join(data_dir, "{}/mlmc_hdf/L1_{}/mlmc_1.hdf5".format(cl_dir, level)) +# save_path = os.path.join(data_dir, "{}".format(cl_dir)) +# l_0_output_dir = os.path.join(data_dir, "{}/L0_MC/L1_{}/test/01_cond_field/output/".format(cl_dir, level)) +# l_0_hdf_path = os.path.join(data_dir, "{}/L0_MC/L1_{}/mlmc_1.hdf5".format(cl_dir, level)) +# sampling_info_path = os.path.join(data_dir, "{}/sampling_info".format(cl_dir)) +# ref_mlmc_file = os.path.join(data_dir, "{}/L1_benchmark/mlmc_1.hdf5".format(cl_dir)) +# +# machine_learning_model = ("mesh_L3_log_15k", run_GNN, False) +# +# gnn, conv_layer, corr_field_config, model_config = get_gnn() +# +# save_path = os.path.join(save_path, machine_learning_model[0]) +# +# print("save path ", save_path) +# graph_creation_time = 25 # 22#159#0#159#66 +# +# config = {'machine_learning_model': machine_learning_model, +# 'save_path': save_path, +# 'sampling_info_path': sampling_info_path, +# 'output_dir': output_dir, +# 'nn_hdf_path': hdf_path, +# 'mlmc_hdf_path': mlmc_hdf_path, +# 'mesh': mesh, +# 'l_0_output_dir': l_0_output_dir, +# 'l_0_hdf_path': l_0_hdf_path, +# 'ref_mlmc_file': ref_mlmc_file, +# 'level': nn_level, +# 'conv_layer': conv_layer, +# 'gnn': gnn, +# 'model_config': model_config, +# 'replace_level': replace_level, +# 'corr_field_config': corr_field_config, +# 'n_train_samples': 2000, +# 'val_samples_ratio': 0.3, +# 'batch_size': 200, +# 'epochs': 2000, +# 'learning_rate': 0.01, +# 'graph_creation_time': graph_creation_time, +# 'save_model': False, +# 'loss_params': {'moments_class': Legendre_tf, "max_moments": 20, 'loss_max': 0.5, 'quantile': 1e-3} +# } +# +# train_MSE, test_MSE, train_RSE, test_RSE = analyze_statistics(config) +# +# tr_MSE[cl] = np.mean(train_MSE) +# te_MSE[cl] = np.mean(test_MSE) +# tr_RSE[cl] = np.mean(train_RSE) +# te_RSE[cl] = np.mean(test_RSE) +# +# +# plt_cl = plots.CorrLength() +# plt_cl.add_mse_test(te_MSE) +# plt_cl.add_mse_train(tr_MSE) +# +# plt_cl.show(None) +# plt_cl.show("corr_length_mse") +# +# plt_cl = plots.CorrLength() +# plt_cl.add_mse_test(te_RSE) +# plt_cl.add_mse_train(tr_RSE) +# +# plt_cl.show(None) +# plt_cl.show("corr_length_mse") + + +def get_arguments(arguments): + """ + Getting arguments from console + :param arguments: list of arguments + :return: namespace + """ + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('work_dir', help='work directory') + parser.add_argument('data_dir', help='data directory') + args = parser.parse_args(arguments) + return args + + +if __name__ == "__main__": + args = get_arguments(sys.argv[1:]) + data_dir = args.data_dir + work_dir = args.work_dir + case = 12 + #data_dir = "/home/martin/Documents/metamodels/data/1000_ele/" + output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, sampling_info_path, ref_mlmc_file,\ + replace_level, nn_level, mlmc_hdf_path, feature_names = get_config(data_dir, case) + + # plot_results_corr_length() + # exit() + + # if os.path.exists(os.path.join(work_dir, "mlmc_{}.hdf5".format(nn_level + 1))): + # l_0_hdf_path = os.path.join(work_dir, "mlmc_{}.hdf5".format(nn_level + 1)) + # hdf_path = os.path.join(work_dir, "mlmc_5.hdf5") + # ref_mlmc_file = os.path.join(work_dir, "benchmark_mlmc_1.hdf5") + + + # import cProfile + # import pstats + # pr = cProfile.Profile() + # pr.enable() + #gnn, conv_layer, corr_field_config = get_gnn() + # + # my_result = run_GNN(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, level=nn_level, log=True, conv_layer=conv_layer) + # + # pr.disable() + # ps = pstats.Stats(pr).sort_stats('cumtime') + # ps.print_stats() + + #run_SVR(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, sampling_info_path, ref_mlmc_file, level=nn_level, log=True, conv_layer=conv_layer) # , gnn=gnn) + + #run_GNN(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, level=nn_level, log=True) # , gnn=gnn) + #run_CNN(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, level=nn_level, log=True) # , gnn=gnn) + #process_results(hdf_path, sampling_info_path, ref_mlmc_file, save_path, nn_level, replace_level) + + # Graph creation time for cl_0_1_s_1 case 1 = 100 s + + # gnn, conv_layer, corr_field_config = get_gnn() + # # # # #run_GNN(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, model=GCN, level=nn_level, log=True) # CGN model leads to constant value + # # # # #run_GNN(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, level=nn_level, log=True, gnn=gnn, conv_layer=conv_layer) + # run_GNN(output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, sampling_info_path, ref_mlmc_file, + # level=nn_level, log=True, conv_layer=conv_layer, gnn=gnn, corr_field_config=corr_field_config, graph_creation_time=100) + # process_results(hdf_path, sampling_info_path, ref_mlmc_file, save_path, nn_level, replace_level) + # + + gnn, conv_layer, corr_field_config, model_config = get_gnn() + + # print("gnn ", gnn) + #print("conv layer ", conv_layer) + + #machine_learning_model = ("L2_test", run_GNN, False) + + machine_learning_model = ("ChC8L3_log", run_GNN, False) + #machine_learning_model = ("ChC8L2_log", run_GNN, False) + #machine_learning_model = ("SVR_L3_log", run_GNN, False) + # machine_learning_model = ("ChC32L3T25000", run_GNN, False) + # + # machine_learning_model = ("ChC32Loss2_adding_moments_2", run_GNN, False) + #machine_learning_model = ("ChC32Loss2_add_mom", run_GNN, True) + # + #machine_learning_model = ("ChC32L3M10_test", run_GNN, False) + + #machine_learning_model = ("mesh_L3", run_GNN, False) + + #machine_learning_model = ("SVR_mesh_L3_log", run_GNN, False) + + + machine_learning_model = ("GCN_mesh_L3_log", run_GNN, False) + # + #machine_learning_model = ("mesh_moments_test_2", run_GNN, True) + #machine_learning_model = ("mesh_L3_test_m", run_GNN, False) + + # #models = {"ChebConv": (run_GNN, False), "SVR": (run_SVR, False)} + machine_learning_model = ("5eleChebConvL3_2", run_GNN, False) + #machine_learning_model = ("5eleChebConvK2", run_GNN, False) + # # machine_learning_model = ("5eleChebConvK3", run_GNN, False) + #machine_learning_model = ("5eleChebConv32abs", run_GNN, False) + #machine_learning_model = ("5eleChebConv32msemom", run_GNN, False) + + #################### + ### Compare number of training samples ### + # machine_learning_model = ("mesh_L3", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_5k", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_10k", run_GNN, False) + # #machine_learning_model = ("mesh_L3_t_15k", run_GNN, False) + # ################ + # ################ + # + # #################### + # ### Compare number of training samples LOG ### + # machine_learning_model = ("mesh_L3_t_100_log", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_500_log", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_1000_log", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_1500_log", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_2000_log", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_4000_log", run_GNN, False) + # machine_learning_model = ("mesh_L3_t_8000_log", run_GNN, False) + # # # machine_learning_model = ("mesh_L3_t_15k", run_GNN, False) + # ################ + # ################ + # + # ######## + # # Test different correlation lengths + # machine_learning_model = ("mesh_L3_log", run_GNN, False) + # #machine_learning_model = ("SVR_mesh_L3_log", run_GNN, False) + # ####### + # + #machine_learning_model = ("DNN_mesh_L3_log_deep", run_DNN, True) + # + #machine_learning_model = ("DNN_mesh_L3_6", run_DNN, True) + machine_learning_model = ("GCN_mesh_L3_log_16", run_GNN, True) + machine_learning_model = ("mesh_L3_log_test_saved_model", run_GNN, True) + if case == 7: + #machine_learning_model = ("mesh_L3_log_50k_weights", run_GNN, True) + machine_learning_model = ("mesh_L3_log_50k", run_GNN, True) + machine_learning_model = ("L1_3_cl_0_1_s_1_all_log_output_mult", run_GNN, False) + #machine_learning_model = ("L1_3_cl_0_1_s_1_all_log_output_mult_case_1", run_GNN, False) + + # machine_learning_model = ("test_02_conc", run_GNN, False) + # + # machine_learning_model = ("L1_1_02_conc_cond_5", run_GNN, False) + # # # + # machine_learning_model = ("L1_1_02_conc_cond", run_GNN, False) + # machine_learning_model = ("L1_1_02_conc_cond_norm", run_GNN, False) + # machine_learning_model = ("L1_1_02_conc_cond_norm_output_mult", run_GNN, False) + if case == 12: + machine_learning_model = ("L1_1_02_conc_cond_norm_output_mult", run_GNN, False) + machine_learning_model = ("L1_1_02_conc_cond_output_mult", run_GNN, False) + machine_learning_model = ("L1_1_02_conc_cond_norm", run_GNN, False) + + + #machine_learning_model = ("L1_1_02_conc_cond", run_GNN, False) + # + # #machine_learning_model = ("L1_1_02_conc_cond_all_log_output_mult", run_GNN, False) + # #machine_learning_model = ("L1_1_02_conc_cond_features_log_output_mult", run_GNN, False) + # machine_learning_model = ("L1_1_02_conc_cond_output_log_output_mult", run_GNN, False) + # + # #machine_learning_model = ("L1_3_02_conc_cond_test", run_GNN, False) + # machine_learning_model = ("L1_3_02_conc_cond_all_log_output_mult", run_GNN, False) + machine_learning_model = ("L1_3_02_conc_cond_all_log_output_mult_case_1", run_GNN, False) + + + #### CASE 1 #### + machine_learning_model = ("L1_3_02_conc_cond_features_norm_case_1", run_GNN, False) + machine_learning_model = ("L1_3_02_conc_cond_features_norm_mult_output_case_1", run_GNN, False) + + machine_learning_model = ("L1_3_02_conc_cond_output_mult_case_1", run_GNN, False) + ### log + machine_learning_model = ("L1_3_02_conc_cond_log_features_norm_case_1", run_GNN, False) + #machine_learning_model = ("L1_3_02_conc_cond_true_features_norm_mult_output_case_1", run_GNN, False) + #machine_learning_model = ("L1_3_02_conc_cond_log_output_mult_case_1", run_GNN, False) + + #machine_learning_model = ("L1_3_02_conc_cond_all_log_output_mult_T1_case_1", run_GNN, False) + + machine_learning_model = ("L1_3_02_conc_cond_log_output_mult_T28_case_1", run_GNN, False) + + #machine_learning_model = ("L1_3_02_conc_cond_log_output_mult_T34_AOF6_case_1", run_GNN, False) + + #machine_learning_model = ("L1_3_test", run_GNN, False) + + if case == 13: + machine_learning_model = ("L1_1_02_conc_por_all_log_output_mult", run_GNN, False) + machine_learning_model = ("L1_1_02_conc_por_features_log_output_mult", run_GNN, False) + machine_learning_model = ("L1_1_02_conc_por_output_log_output_mult", run_GNN, False) + + machine_learning_model = ("L1_1_02_conc_por_test", run_GNN, False) + + machine_learning_model = ("L1_3_02_conc_por_all_log_output_mult", run_GNN, False) + + if case == 14: + machine_learning_model = ("L1_1_02_conc_2_features", run_GNN, False) + #machine_learning_model = ("L1_1_02_conc_2_features_log", run_GNN, True) + + #machine_learning_model = ("L1_1_02_conc_2_features_log_output_mult", run_GNN, False) + machine_learning_model = ("L1_1_02_conc_2_features_all_log_mult_output", run_GNN, False) + #machine_learning_model = ("L1_1_02_conc_2_features_output_log_mult_output", run_GNN, False) + #machine_learning_model = ("L1_1_02_conc_2_features_features_log_mult_output", run_GNN, False) + + machine_learning_model = ("L1_1_02_conc_2_features_test", run_GNN, False) + + machine_learning_model = ("L1_3_02_conc_2_features_all_log_output_mult", run_GNN, False) + + machine_learning_model = ("L1_3_02_conc_2_features_log_output_mult_T19_case_1", run_GNN, False) + + + #machine_learning_model = ("L1_1_02_conc_2_features_test", run_GNN, False) + + #machine_learning_model = ("mesh_L3_log_50k_weights_5", run_GNN, False) + + #machine_learning_model = ("mesh_L3_log_sigmoid", run_GNN, False) # ReLU is much better + + save_path = os.path.join(save_path, machine_learning_model[0]) + + # if os.path.exists(save_path): + # shutil.rmtree(save_path) + + print("save path ", save_path) + + # 02 proc times + # graph creation time: 2 features: 53 sec + # conductivity: 35 sec + # porosity: 35 sec + + # L2 + # graph creation time: 2 features: 104 sec + # conductivity: 68 sec + # porosity: 66 sec + + # L3 + # graph creation time: 2 features: 396 sec + # conductivity: 251 sec + # porosity: 250 sec + + # L4 + # graph creation time: 2 features: + # conductivity: 1670 + # porosity: + graph_creation_time = 250#25#11#22#159#0#159#66 + + config = {'machine_learning_model': machine_learning_model, + 'save_path': save_path, + 'output_dir': output_dir, + 'hdf_path': hdf_path, + 'mlmc_hdf_path': mlmc_hdf_path, + 'mesh': mesh, + 'l_0_output_dir': l_0_output_dir, + 'l_0_hdf_path': l_0_hdf_path, + 'sampling_info_path': sampling_info_path, + 'ref_mlmc_file': ref_mlmc_file, + 'level': nn_level, + 'conv_layer': conv_layer, + 'gnn': gnn, + 'model_config': model_config, + 'replace_level': replace_level, + 'corr_field_config': corr_field_config, + 'n_train_samples': 2000, + 'val_samples_ratio': 0.2, + 'batch_size': 20, + 'epochs': 2, + 'learning_rate': 0.001, + 'graph_creation_time': graph_creation_time, + 'save_model': True, + 'feature_names': feature_names + } + + #statistics(config) + + analyze_statistics(config) + + # save_path = os.path.join(save_path, "SVR") + # statistics(run_SVR, output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, level=nn_level, log=True) + diff --git a/test/metamodels/nn_config.py b/test/metamodels/nn_config.py new file mode 100644 index 00000000..272dfc23 --- /dev/null +++ b/test/metamodels/nn_config.py @@ -0,0 +1,9 @@ +from mlmc.metamodel.flow_task_GNN_2 import GNN +from spektral.layers import GCNConv, GlobalSumPool, ChebConv, GraphSageConv, ARMAConv, GATConv, APPNPConv, GINConv, GeneralConv +from tensorflow.keras.losses import MeanSquaredError, KLDivergence, MeanAbsoluteError +from tensorflow.keras import Model +from tensorflow.keras.layers import Dense +from spektral.layers import GlobalSumPool, GlobalMaxPool, GlobalAvgPool +import tensorflow as tf +from tensorflow.keras.layers.experimental import preprocessing + diff --git a/test/metamodels/predict_on_different_mesh.py b/test/metamodels/predict_on_different_mesh.py new file mode 100644 index 00000000..7192c443 --- /dev/null +++ b/test/metamodels/predict_on_different_mesh.py @@ -0,0 +1,595 @@ +import os +import numpy as np +import warnings +#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Run on CPU only +import sys +import shutil +import subprocess +from mlmc.metamodel.analyze_nn import run_GNN, run_SVR, statistics, analyze_statistics, process_results +from mlmc.moments import Legendre_tf, Monomial +from keras.layers import Input +from mlmc.metamodel.flow_task_GNN_2 import GNN +from spektral.layers import GCNConv, GlobalSumPool, ChebConv, GraphSageConv, ARMAConv, GATConv, APPNPConv, GINConv, GeneralConv +from mlmc.metamodel.own_cheb_conv import OwnChebConv +from tensorflow.keras.losses import MeanSquaredError, KLDivergence, MeanAbsoluteError +from mlmc.metamodel.custom_methods import abs_activation, MSE_moments +from tensorflow.keras import Model +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense +from tensorflow.keras import regularizers +from spektral.layers import GlobalSumPool, GlobalMaxPool, GlobalAvgPool +import tensorflow as tf +from mlmc.plot import plots +from tensorflow.keras.layers.experimental import preprocessing +from mlmc.metamodel.analyze_nn import load_statistics, process_data +from scipy import stats +warnings.filterwarnings("ignore", category=DeprecationWarning) + + +def get_gnn(): + # Parameters + # conv_layer = GCNConv + conv_layer = ChebConv # Seems better than GCNConv, good distribution of predictions + conv_layer = OwnChebConv + # conv_layer = GraphSageConv # Seems better than ChebConv, good loss but very narrow distribution of predictions + # # conv_layer = ARMAConv # Seems worse than GraphSageConv + # conv_layer = GATConv # Slow and not better than GraphSageConv + # # conv_layer = APPNPConv # Not bad but worse than GraphSageConv + # # conv_layer = GINConv # it is comparable to APPNPConv + # act_func = "relu" # "tanh"#"elu" + + loss = MeanSquaredError() # var_loss_function# + #loss = MSE_moments + # loss = MeanAbsoluteError() + # loss = MeanSquaredLogarithmicError() + # loss = KLDivergence() + # loss = total_loss_function + optimizer = tf.optimizers.Adam(learning_rate=0.001) + patience = 1000 + hidden_regularization = None # l2(2e-10) + + net_model_config = { + "conv_layer": conv_layer, + "hidden_activation": 'relu', + "output_activation": abs_activation, + #"output_activation": 'linear', + "kernel_regularization": hidden_regularization, + "normalizer": preprocessing.Normalization() + } + + #model = Net(**net_model_config) + + model_config = {"loss": loss, + "optimizer": optimizer, + "patience": patience, + "model_class": Net, + "net_model_config": net_model_config, + "verbose": True} + + corr_field_config = {'02_conc': True, 'corr_length': 0.1, 'sigma': 1, 'log': True} + + return GNN, conv_layer, corr_field_config, model_config + + +class Net(Model): + def __init__(self, conv_layer, hidden_activation, output_activation, kernel_regularization, normalizer, + **kwargs): + super().__init__(**kwargs) + + # T19 + self._conv_layers = [ + conv_layer(8, K=4, activation=hidden_activation, kernel_regularizer=kernel_regularization)] + self.flatten = GlobalSumPool() + + self._dense_layers = [Dense(64, activation=hidden_activation), Dense(32, activation=hidden_activation), + Dense(1)] + + # T34 + # self._conv_layers = [ + # conv_layer(8, K=4, activation=hidden_activation, kernel_regularizer=kernel_regularization)] # ,n) + # self.flatten = GlobalSumPool() + # + # self._dense_layers = [Dense(256, activation=hidden_activation), Dense(128, activation=hidden_activation), + # Dense(1)] + + + def call(self, inputs): + x, a = inputs + + for c_layer in self._conv_layers: + x = c_layer([x, a]) + + output = self.flatten(x) + + # print("output shape ", output.shape) + # exit()1 + + for d_layer in self._dense_layers: + output = d_layer(output) + + return output + + +def get_config(data_dir, case=0): + if case == 12: # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + # cl = "cl_0_1_s_1" + level = "4" + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + # mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + # mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 # 4388 - for 50k + # mesh = os.path.join(data_dir, "l_step_0.027_common_files/repo.msh") # L5 - graph creation time: 2564.6843196170003 + output_dir = os.path.join(data_dir, "L1_{}_50k/test/02_conc/output/".format(level)) + predict_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + predict_hdf = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}_50k/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}_50k/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}_50k/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + graph_creation_time = 4400 + + if case == "L2": # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + level = 2 + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") + graph_creation_time = 35 + + output_dir = os.path.join(data_dir, "L1_{}_50k/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}_50k/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}_50k/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}_50k/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + if case == 'L1': # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + level = 1 + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") + graph_creation_time = 68 + output_dir = os.path.join(data_dir, "L1_{}_50k/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}_50k/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}_50k/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}_50k/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + if case == 'L3': # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + level = 3 + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") # L3 12s + graph_creation_time = 500 + output_dir = os.path.join(data_dir, "L1_{}_50k/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}_50k/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}_50k/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}_50k/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + # output_dir = l_0_output_dir + # hdf_path = l_0_hdf_path + + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + if case == 'L5': # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + level = 5 + nn_level = 0 + replace_level = False + mesh = os.path.join(data_dir, "l_step_0.027_common_files/repo.msh") # L5 - graph creation time: 2564.6843196170003 + graph_creation_time = 5000 + output_dir = os.path.join(data_dir, "L1_{}_50k/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}_50k/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}_50k/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}_50k/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + # output_dir = l_0_output_dir + # hdf_path = l_0_hdf_path + + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + elif case == 429: # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + # cl = "cl_0_1_s_1" + level = "3_429" + nn_level = 0 + replace_level = False + #mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + #mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.15_common_files/repo.msh") #L3 12s + #mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = None#os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + graph_creation_time = 241 + + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + elif case == 521: # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/" + # cl = "cl_0_1_s_1" + level = "3_521" + nn_level = 0 + replace_level = False + #mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + #mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.13_common_files/repo.msh") #L3 12s + #mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = None#os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + #ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + graph_creation_time = 285 + + elif case == "case_2": # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/case_2" + # cl = "cl_0_1_s_1" + level = "3" + nn_level = 0 + replace_level = False + # mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + # mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + # mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = None # os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_benchmark/mlmc_1.hdf5" + #ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + graph_creation_time = 670 # case_2 + + elif case == "case_3": # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/case_3" + # cl = "cl_0_1_s_1" + level = "3" + nn_level = 0 + replace_level = False + # mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + # mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + # mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = None # os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + #ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_benchmark/mlmc_1.hdf5" + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + graph_creation_time = 601 # case_3 + + elif case == "case_4": # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/case_4" + # cl = "cl_0_1_s_1" + level = "3" + nn_level = 0 + replace_level = False + # mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + # mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + # mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + #ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + ref_mlmc_file = mlmc_hdf_path #"/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_benchmark/mlmc_1.hdf5" + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + + graph_creation_time = 608 # case_4 + + elif case == "case_5": # mesh size comparison + data_dir = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/case_5" + # cl = "cl_0_1_s_1" + level = "3" + nn_level = 0 + replace_level = False + # mesh = os.path.join(data_dir, "l_step_1.0_common_files/repo.msh") # L1, 7s + # mesh = os.path.join(data_dir, "l_step_0.3760603093086394_common_files/repo.msh") #L2 10.5 s + mesh = os.path.join(data_dir, "l_step_0.1414213562373095_common_files/repo.msh") #L3 12s + # mesh = os.path.join(data_dir, "l_step_0.053182958969449884_common_files/repo.msh") # L4 + output_dir = os.path.join(data_dir, "L1_{}/test/02_conc/output/".format(level)) + hdf_path = os.path.join(data_dir, "L1_{}/mlmc_1.hdf5".format(level)) + mlmc_hdf_path = None # os.path.join(data_dir, "mlmc_hdf/L1_{}/mlmc_1.hdf5".format(level)) + save_path = data_dir + l_0_output_dir = os.path.join(data_dir, "L0_MC/L1_{}/test/02_conc/output/".format(level)) + l_0_hdf_path = os.path.join(data_dir, "L0_MC/L1_{}/mlmc_1.hdf5".format(level)) + sampling_info_path = os.path.join(data_dir, "sampling_info") + + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_3/mlmc_1.hdf5" + # ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_1/mlmc_1.hdf5" + #ref_mlmc_file = os.path.join(data_dir, "L1_benchmark/mlmc_1.hdf5") + ref_mlmc_file = "/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/L1_benchmark/mlmc_1.hdf5" + + feature_names = [['conductivity_top', 'conductivity_bot', 'conductivity_repo']] + graph_creation_time = 587 # case_5 + + return output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, sampling_info_path, ref_mlmc_file,\ + replace_level, nn_level, mlmc_hdf_path, feature_names, graph_creation_time + + +def get_arguments(arguments): + """ + Getting arguments from console + :param arguments: list of arguments + :return: namespace + """ + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('work_dir', help='work directory') + parser.add_argument('data_dir', help='data directory') + args = parser.parse_args(arguments) + return args + + +# def set_weights(new_model, old_model): +# for new_conv_layer, old_conv_layer in zip(new_model._conv_layers, old_model._conv_layers): +# new_conv_layer.kernel = old_conv_layer.kernel +# +# print(old_conv_layer.kernel.numpy().shape) +# input_imgs = Input(shape=(None, 108, 1)) +# print("old_model.flatten.weights", old_model.flatten().weights) +# +# for new_dense_layer, old_dense_layer in zip(new_model._dense_layers, old_model._dense_layers): +# +# print("old_dense_layer.get_weights() shape ", old_dense_layer.get_weights()[0].shape) +# print("old_dense_layer.get_weights() shape ", old_dense_layer.get_weights()[1].shape) +# input_imgs = Input(shape=(None, 108, 1)) +# new_dense_layer(input_imgs) +# # model = Model(inputs=input_imgs, outputs=encoded) +# # dense_layer.set_weights(weights) +# +# print("new dense layer weights ", new_dense_layer.weights) +# new_dense_layer.set_weights(old_dense_layer.get_weights()) + + +if __name__ == "__main__": + ####################### + # Load trained model # + ####################### + #machine_learning_model = ("L1_3_02_conc_cond_log_output_mult_T19_case_1", run_GNN, False) + machine_learning_model = ("L1_4_T27_case_1_out_log_scale_is", run_GNN, False) + save_path = os.path.join("/home/martin/Documents/metamodels/data/mesh_size/02_conc_cond/", machine_learning_model[0]) + data_dict = load_statistics(save_path) + #data_dict = process_data(data_dict) + #for i in range(len(data_dict["test_targets"])): + + list_train_MSE, list_test_MSE, list_all_train_RSE, list_all_test_RSE, list_nn_total_time, list_mlmc_total_time,\ + list_kl_mlmc_all, list_kl_nn_all, list_learning_times = [], [], [], [], [], [], [], [], [] + + for index, model in enumerate(data_dict["model"][:12]): + + # newInput = Input(batch_shape=(None, 108, 1)) + # newOutputs = model(newInput) + # newModel = Model(newInput, newOutputs) + # + # if type(model._conv_layers[0]).__name__ == 'OwnChebConv': + # conv_layer = OwnChebConv + # + # print("conv layer ", conv_layer) + + ##################### + ## New case config ## + ##################### + args = get_arguments(sys.argv[1:]) + data_dir = args.data_dir + work_dir = args.work_dir + # case = "L2" + # case = 521 + case = 12#"case_3" + case = "L5" + case="L1" + #data_dir = "/home/martin/Documents/metamodels/data/1000_ele/" + output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, sampling_info_path, ref_mlmc_file,\ + replace_level, nn_level, mlmc_hdf_path, feature_names, graph_creation_time = get_config(data_dir, case) + + machine_learning_model = ("L1_5_02_conc_cond_log_output_mult_T27_case_1_trained_{}_{}".format(case, index), run_GNN, False) + save_path = os.path.join(save_path, machine_learning_model[0]) + + if os.path.exists(save_path): + shutil.rmtree(save_path) + + #corr_field_config = {'02_conc': True, 'corr_length': 0.1, 'sigma': 1, 'log': True} + + gnn, conv_layer, corr_field_config, model_config = get_gnn() + + #gnn_au = gnn(**model_config) + # + # print("gnn au model ", gnn_au._model) + # print("model ", model) + + #set_weights(gnn_au._model, model) + + # dataset_config = {"features_normalization": False, + # "calc_output_mult_factor": True, + # "output_mult_factor": 1, + # "features_mult_factor": 1, + # "features_log": False, + # "output_log": True + # } + + # # 02_conc config + dataset_config = {"features_normalization": False, + "calc_output_mult_factor": False, + "output_mult_factor": 1, + "features_mult_factor": 1, + "first_log_features": False, + "first_log_output": True, + "output_scale": True, + } + + config = {'machine_learning_model': machine_learning_model, + 'save_path': save_path, + 'output_dir': output_dir, + 'hdf_path': hdf_path, + 'mlmc_hdf_path': mlmc_hdf_path, + 'mesh': mesh, + 'l_0_output_dir': l_0_output_dir, + 'l_0_hdf_path': l_0_hdf_path, + 'sampling_info_path': sampling_info_path, + 'ref_mlmc_file': ref_mlmc_file, + 'level': nn_level, + 'conv_layer': conv_layer, + 'gnn': gnn, + 'model_config': model_config, + 'replace_level': replace_level, + 'corr_field_config': corr_field_config, + 'n_train_samples': 2000, + 'val_samples_ratio': 0.2, + 'batch_size': 200, + 'epochs': 2, + 'learning_rate': 0.001, + 'graph_creation_time': graph_creation_time, + 'save_model': True, + "train_model": False, + 'feature_names': feature_names, + "set_model": model, + 'dataset_config': dataset_config, + 'independent_samples': False, + # 'predict_dir': predict_dir, + # 'predict_hdf ': predict_hdf + } + + model_title, mch_l_model, log = config['machine_learning_model'] + + train_MSE, test_MSE, all_train_RSE, all_test_RSE, nn_total_time, mlmc_total_time, kl_mlmc_all, kl_nn_all, \ + learning_times = statistics(config) + + list_train_MSE.append(train_MSE) + list_test_MSE.append(test_MSE) + list_all_train_RSE.append(all_train_RSE) + list_all_test_RSE.append(all_test_RSE) + list_nn_total_time.append(nn_total_time) + list_mlmc_total_time.append(mlmc_total_time) + list_kl_mlmc_all.append(kl_mlmc_all) + list_kl_nn_all.append(kl_nn_all) + list_learning_times.append(learning_times) + + print("learning time ", list_learning_times) + print("mean learning time ", np.mean(list_learning_times)) + print("max learning time ", np.max(list_learning_times)) + + + print("############# OUTPUT ################") + print("len(train MSE) ", len(list_train_MSE)) + print("train MSE ", np.mean(list_all_train_RSE)) + # print("train MSE sqrt var", np.sqrt(np.var(train_MSE))) + # print("train MSE std", np.std(train_MSE)) + + # output_mult_factor = 1437603411 + # print("orig train MSE ", train_MSE) + # train_MSE = np.array(train_MSE) * output_mult_factor + # print("train MSE ", train_MSE) + # test_MSE = np.array(test_MSE) * output_mult_factor + + print("train MSE ", list_train_MSE) + print("stats.sem(train_MSE) ", stats.sem(list_train_MSE)) + print("test MSE ", np.mean(list_test_MSE)) + print("test MSE ", list_test_MSE) + print("stats.sem(test_MSE) ", stats.sem(list_test_MSE)) + # print("test MSE std", np.sqrt(np.var(test_MSE))) + print("train RSE ", np.mean(list_all_train_RSE)) + print("test RSE ", np.mean(list_all_test_RSE)) + + print("nn total time ", list_nn_total_time) + print("mlmc total time ", list_mlmc_total_time) + + print("KL mlmc ", np.mean(list_kl_mlmc_all)) + print("KL nn ", np.mean(list_kl_nn_all)) + + print("mean learning time ", np.mean(list_learning_times)) + print("max learning time ", np.max(list_learning_times)) + + print("######################################") + + #analyze_statistics(config) + + # save_path = os.path.join(save_path, "SVR") + # statistics(run_SVR, output_dir, hdf_path, l_0_output_dir, l_0_hdf_path, save_path, mesh, level=nn_level, log=True) +