From c3285f0c201b5c72efe570b956c2ce2af2540bf8 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Tue, 12 Oct 2021 12:22:13 +0200
Subject: [PATCH 1/6] moments vars L2 norm

---
 mlmc/estimator.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/mlmc/estimator.py b/mlmc/estimator.py
index af4f6e0..4983c6e 100644
--- a/mlmc/estimator.py
+++ b/mlmc/estimator.py
@@ -363,7 +363,7 @@ def estimate_domain(quantity, sample_storage, quantile=None):
     return np.min(ranges[:, 0]), np.max(ranges[:, 1])
 
 
-def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels):
+def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_ops, n_levels, vars_l2_norm=False):
     """
     Estimate optimal number of samples for individual levels that should provide a target variance of
     resulting moment estimate.
@@ -372,19 +372,26 @@ def estimate_n_samples_for_target_variance(target_variance, prescribe_vars, n_op
     :param prescribe_vars: vars[ L, M] for all levels L and moments_fn M safe the (zeroth) constant moment with zero variance.
     :param n_ops: number of operations at each level
     :param n_levels: number of levels
+    :param vars_l2_norm: if True, then use L2 norm of moments variances for each level variance
     :return: np.array with number of optimal samples for individual levels and moments_fn, array (LxR)
     """
     vars = prescribe_vars
+    if vars_l2_norm:
+        vars = np.linalg.norm(vars, axis=1)
+
     sqrt_var_n = np.sqrt(vars.T * n_ops)  # moments_fn in rows, levels in cols
-    total = np.sum(sqrt_var_n, axis=1)  # sum over levels
+    total = np.sum(sqrt_var_n, axis=-1)  # sum over levels
+
     n_samples_estimate = np.round((sqrt_var_n / n_ops).T * total / target_variance).astype(int)  # moments_fn in cols
+
     # Limit maximal number of samples per level
     n_samples_estimate_safe = np.maximum(
         np.minimum(n_samples_estimate, vars * n_levels / target_variance), 2)
 
+    if vars_l2_norm:
+        return n_samples_estimate_safe.astype(int)
     return np.max(n_samples_estimate_safe, axis=1).astype(int)
 
-
 def calc_level_params(step_range, n_levels):
     assert step_range[0] > step_range[1]
     level_parameters = []

From e1ede945e673bc8e9a8eff113da62aba8e487f79 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Wed, 13 Oct 2021 16:26:56 +0200
Subject: [PATCH 2/6] task 02

---
 mlmc/tool/flow_mc_02.py       | 501 ++++++++++++++++++++++++++++++++++
 test/02_conc/new_proc_conc.py |   0
 test/02_conc/proc_conc.py     | 425 ++++++++++++++++++----------
 3 files changed, 787 insertions(+), 139 deletions(-)
 create mode 100644 mlmc/tool/flow_mc_02.py
 create mode 100644 test/02_conc/new_proc_conc.py

diff --git a/mlmc/tool/flow_mc_02.py b/mlmc/tool/flow_mc_02.py
new file mode 100644
index 0000000..682952e
--- /dev/null
+++ b/mlmc/tool/flow_mc_02.py
@@ -0,0 +1,501 @@
+import os
+import os.path
+import subprocess
+import numpy as np
+import shutil
+import ruamel.yaml as yaml
+from typing import List
+import gstools
+from mlmc.level_simulation import LevelSimulation
+from mlmc.tool import gmsh_io
+from mlmc.sim.simulation import Simulation
+from mlmc.quantity.quantity_spec import QuantitySpec
+from mlmc.random import correlated_field as cf
+
+
+def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1, mode_no=1000):
+    """
+    Create random fields
+    :return:
+    """
+    # por_top = cf.SpatialCorrelatedField(
+    #     corr_exp='gauss',
+    #     dim=2,
+    #     corr_length=0.2,
+    #     mu=-1.0,
+    #     sigma=1.0,
+    #     log=True
+    # )
+
+    por_top = gstools.Gaussian(dim=dim,  len_scale=0.2, mu=-1.0, sigma=1.0, log=True)
+
+    # por_bot = cf.SpatialCorrelatedField(
+    #     corr_exp='gauss',
+    #     dim=2,
+    #     corr_length=0.2,
+    #     mu=-1.0,
+    #     sigma=1.0,
+    #     log=True
+    # )
+
+    por_bot = gstools.Gaussian(dim=dim,  len_scale=0.2, mu=-1.0, sigma=1.0, log=True)
+
+    water_viscosity = 8.90e-4
+
+    fields = cf.Fields([
+        cf.Field('por_top', por_top, regions='ground_0'),
+        cf.Field('porosity_top', cf.positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'),
+        cf.Field('por_bot', por_bot, regions='ground_1'),
+        cf.Field('porosity_bot', cf.positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'),
+        cf.Field('porosity_repo', 0.5, regions='repo'),
+        #cf.Field('factor_top', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_0'),
+        cf.Field('factor_top', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1.0, log=True), regions='ground_0'),
+        # conductivity about
+        #cf.Field('factor_bot', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_1'),
+        cf.Field('factor_bot', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1, log=True), regions='ground_1'),
+        # cf.Field('factor_repo', cf.SpatialCorrelatedField('gauss', mu=1e-10, sigma=1, log=True), regions='repo'),
+        cf.Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 'factor_top', water_viscosity],
+                 regions='ground_0'),
+        cf.Field('conductivity_bot', cf.kozeny_carman, ['porosity_bot', 1, 'factor_bot', water_viscosity],
+                 regions='ground_1'),
+        # cf.Field('conductivity_repo', cf.kozeny_carman, ['porosity_repo', 1, 'factor_repo', water_viscosity], regions='repo')
+        cf.Field('conductivity_repo', 0.001, regions='repo')
+    ])
+
+    return fields
+
+
+def substitute_placeholders(file_in, file_out, params):
+    """
+    Substitute for placeholders of format '<name>' from the dict 'params'.
+    :param file_in: Template file.
+    :param file_out: Values substituted.
+    :param params: { 'name': value, ...}
+    """
+    used_params = []
+    with open(file_in, 'r') as src:
+        text = src.read()
+    for name, value in params.items():
+        placeholder = '<%s>' % name
+        n_repl = text.count(placeholder)
+        if n_repl > 0:
+            used_params.append(name)
+            text = text.replace(placeholder, str(value))
+    with open(file_out, 'w') as dst:
+        dst.write(text)
+    return used_params
+
+
+def force_mkdir(path, force=False):
+    """
+    Make directory 'path' with all parents,
+    remove the leaf dir recursively if it already exists.
+    :param path: path to directory
+    :param force: if dir already exists then remove it and create new one
+    :return: None
+    """
+    if force:
+        if os.path.isdir(path):
+            shutil.rmtree(path)
+    os.makedirs(path, mode=0o775, exist_ok=True)
+
+
+class FlowSimProcConc(Simulation):
+    # placeholders in YAML
+    total_sim_id = 0
+    MESH_FILE_VAR = 'mesh_file'
+    # Timestep placeholder given as O(h), h = mesh step
+    TIMESTEP_H1_VAR = 'timestep_h1'
+    # Timestep placeholder given as O(h^2), h = mesh step
+    TIMESTEP_H2_VAR = 'timestep_h2'
+
+    # files
+    GEO_FILE = 'mesh.geo'
+    MESH_FILE = 'mesh.msh'
+    YAML_TEMPLATE = 'flow_input.yaml.tmpl'
+    YAML_FILE = 'flow_input.yaml'
+    FIELDS_FILE = 'fields_sample.msh'
+
+    """
+    Gather data for single flow call (coarse/fine)
+
+    Usage:
+    mlmc.sampler.Sampler uses instance of FlowSimProcConc, it calls once level_instance() for each level step (The level_instance() method
+     is called as many times as the number of levels), it takes place in main process
+
+    mlmc.tool.pbs_job.PbsJob uses static methods in FlowSimProcConc, it calls calculate(). That's where the calculation actually runs,
+    it takes place in PBS process
+       It also extracts results and passes them back to PbsJob, which handles the rest 
+
+    """
+
+    def __init__(self, config=None, clean=None):
+        """
+        Simple simulation using flow123d
+        :param config: configuration of the simulation, processed keys:
+            env - Environment object.
+            fields - FieldSet object
+            yaml_file: Template for main input file. Placeholders:
+                <mesh_file> - replaced by generated mesh
+                <FIELD> - for FIELD be name of any of `fields`, replaced by the FieldElementwise field with generated
+                 field input file and the field name for the component.
+            geo_file: Path to the geometry file.
+        :param clean: bool, if True remove existing simulation files - mesh files, ...
+        """
+        self.need_workspace = True
+        # This simulation requires workspace
+        self.env = config['env']
+        # Environment variables, flow123d, gmsh, ...
+        self._fields_params = config['fields_params']
+        self._fields = create_corr_field(**config['fields_params'])
+        self._fields_used_params = None
+        # Random fields instance
+        self.time_factor = config.get('time_factor', 1.0)
+        # It is used for minimal element from mesh determination (see level_instance method)
+
+        self.base_yaml_file = config['yaml_file']
+        self.base_geo_file = config['geo_file']
+        self.field_template = config.get('field_template',
+                                         "!FieldElementwise {mesh_data_file: $INPUT_DIR$/%s, field_name: %s}")
+        self.work_dir = config['work_dir']
+        self.clean = clean
+
+        super(Simulation, self).__init__()
+
+    def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
+        """
+        Called from mlmc.Sampler, it creates single instance of LevelSimulation (mlmc.)
+        :param fine_level_params: in this version, it is just fine simulation step
+        :param coarse_level_params: in this version, it is just coarse simulation step
+        :return: mlmc.LevelSimulation object, this object is serialized in SamplingPoolPbs and deserialized in PbsJob,
+         so it allows pass simulation data from main process to PBS process
+        """
+        fine_step = fine_level_params[0]
+        coarse_step = coarse_level_params[0]
+
+        # TODO: determine minimal element from mesh
+        self.time_step_h1 = self.time_factor * fine_step
+        self.time_step_h2 = self.time_factor * fine_step * fine_step
+
+        # Set fine simulation common files directory
+        # Files in the directory are used by each simulation at that level
+        common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(fine_step))
+        force_mkdir(common_files_dir, force=self.clean)
+
+        self.mesh_file = os.path.join(common_files_dir, self.MESH_FILE)
+
+        if self.clean:
+            # Prepare mesh
+            geo_file = os.path.join(common_files_dir, self.GEO_FILE)
+            shutil.copyfile(self.base_geo_file, geo_file)
+            self._make_mesh(geo_file, self.mesh_file, fine_step)  # Common computational mesh for all samples.
+
+            # Prepare main input YAML
+            yaml_template = os.path.join(common_files_dir, self.YAML_TEMPLATE)
+            shutil.copyfile(self.base_yaml_file, yaml_template)
+            yaml_file = os.path.join(common_files_dir, self.YAML_FILE)
+            print("yaml file ", yaml_file)
+
+            self._substitute_yaml(yaml_template, yaml_file)
+
+        # Mesh is extracted because we need number of mesh points to determine task_size parameter (see return value)
+        fine_mesh_data = self.extract_mesh(self.mesh_file)
+
+        # Set coarse simulation common files directory
+        # Files in the directory are used by each simulation at that level
+        coarse_sim_common_files_dir = None
+        if coarse_step != 0:
+            coarse_sim_common_files_dir = os.path.join(self.work_dir, "l_step_{}_common_files".format(coarse_step))
+
+        # Simulation config
+        # Configuration is used in mlmc.tool.pbs_job.PbsJob instance which is run from PBS process
+        # It is part of LevelSimulation which is serialized and then deserialized in mlmc.tool.pbs_job.PbsJob
+        config = dict()
+        config["fine"] = {}
+        config["coarse"] = {}
+        config["fine"]["step"] = fine_step
+        config["coarse"]["step"] = coarse_step
+        config["fine"]["common_files_dir"] = common_files_dir
+        config["coarse"]["common_files_dir"] = coarse_sim_common_files_dir
+
+        config[
+            "fields_used_params"] = self._fields_used_params  # Params for Fields instance, which is createed in PbsJob
+        config["gmsh"] = self.env['gmsh']
+        config["flow123d"] = self.env['flow123d']
+        config['fields_params'] = self._fields_params
+
+        # Auxiliary parameter which I use to determine task_size (should be from 0 to 1, if task_size is above 1 then pbs job is scheduled)
+        job_weight = 17000000  # 4000000 - 20 min, 2000000 - cca 10 min
+
+        return LevelSimulation(config_dict=config,
+                               task_size=len(fine_mesh_data['points']) / job_weight,
+                               calculate=FlowSimProcConc.calculate,
+                               # method which carries out the calculation, will be called from PBS processs
+                               need_sample_workspace=True  # If True, a sample directory is created
+                               )
+
+    @staticmethod
+    def calculate(config, seed):
+        """
+        Method that actually run the calculation, it's called from mlmc.tool.pbs_job.PbsJob.calculate_samples()
+        Calculate fine and coarse sample and also extract their results
+        :param config: dictionary containing simulation configuration, LevelSimulation.config_dict (set in level_instance)
+        :param seed: random seed, int
+        :return: List[fine result, coarse result], both flatten arrays (see mlmc.sim.synth_simulation.calculate())
+        """
+        # Init correlation field objects
+        fields = create_corr_field(**config['fields_params'])  # correlated_field.Fields instance
+        fields.set_outer_fields(config["fields_used_params"])
+
+        coarse_step = config["coarse"]["step"]  # Coarse simulation step, zero if one level MC
+        flow123d = config["flow123d"]  # Flow123d command
+
+        # Extract fine mesh
+        fine_common_files_dir = config["fine"]["common_files_dir"]  # Directory with fine simulation common files
+        fine_mesh_data = FlowSimProcConc.extract_mesh(os.path.join(fine_common_files_dir, FlowSimProcConc.MESH_FILE))
+
+        # Extract coarse mesh
+        coarse_mesh_data = None
+        coarse_common_files_dir = None
+        if coarse_step != 0:
+            coarse_common_files_dir = config["coarse"][
+                "common_files_dir"]  # Directory with coarse simulation common files
+            coarse_mesh_data = FlowSimProcConc.extract_mesh(os.path.join(coarse_common_files_dir, FlowSimProcConc.MESH_FILE))
+
+        # Create fields both fine and coarse
+        fields = FlowSimProcConc.make_fields(fields, fine_mesh_data, coarse_mesh_data)
+
+        # Set random seed, seed is calculated from sample id, so it is not user defined
+        np.random.seed(seed)
+        # Generate random samples
+        fine_input_sample, coarse_input_sample = FlowSimProcConc.generate_random_sample(fields, coarse_step=coarse_step,
+                                                                                n_fine_elements=len(
+                                                                                    fine_mesh_data['points']))
+
+        # Run fine sample
+        fields_file = os.path.join(os.getcwd(), FlowSimProcConc.FIELDS_FILE)
+        fine_res = FlowSimProcConc._run_sample(fields_file, fine_mesh_data['ele_ids'], fine_input_sample, flow123d,
+                                       fine_common_files_dir)
+
+        # Rename fields_sample.msh to fine_fields_sample.msh, we might remove it
+        for filename in os.listdir(os.getcwd()):
+            if not filename.startswith("fine"):
+                shutil.move(os.path.join(os.getcwd(), filename), os.path.join(os.getcwd(), "fine_" + filename))
+
+        # Run coarse sample
+        coarse_res = np.zeros(len(fine_res))
+        if coarse_input_sample:
+            coarse_res = FlowSimProcConc._run_sample(fields_file, coarse_mesh_data['ele_ids'], coarse_input_sample, flow123d,
+                                             coarse_common_files_dir)
+
+        return fine_res, coarse_res
+
+    @staticmethod
+    def make_fields(fields, fine_mesh_data, coarse_mesh_data):
+        """
+        Create random fields that are used by both coarse and fine simulation
+        :param fields: correlated_field.Fields instance
+        :param fine_mesh_data: Dict contains data extracted from fine mesh file (points, point_region_ids, region_map)
+        :param coarse_mesh_data: Dict contains data extracted from coarse mesh file (points, point_region_ids, region_map)
+        :return: correlated_field.Fields
+        """
+        # One level MC has no coarse_mesh_data
+        if coarse_mesh_data is None:
+            fields.set_points(fine_mesh_data['points'], fine_mesh_data['point_region_ids'],
+                              fine_mesh_data['region_map'])
+        else:
+            coarse_centers = coarse_mesh_data['points']
+            both_centers = np.concatenate((fine_mesh_data['points'], coarse_centers), axis=0)
+            both_regions_ids = np.concatenate(
+                (fine_mesh_data['point_region_ids'], coarse_mesh_data['point_region_ids']))
+            assert fine_mesh_data['region_map'] == coarse_mesh_data['region_map']
+            fields.set_points(both_centers, both_regions_ids, fine_mesh_data['region_map'])
+
+        return fields
+
+    @staticmethod
+    def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_dir):
+        """
+        Create random fields file, call Flow123d and extract results
+        :param fields_file: Path to file with random fields
+        :param ele_ids: Element IDs in computational mesh
+        :param fine_input_sample: fields: {'field_name' : values_array, ..}
+        :param flow123d: Flow123d command
+        :param common_files_dir: Directory with simulations common files (flow_input.yaml, )
+        :return: simulation result, ndarray
+        """
+        gmsh_io.GmshIO().write_fields(fields_file, ele_ids, fine_input_sample)
+
+        # x = [*flow123d, "--yaml_balance", '-i', os.getcwd(), '-s', "{}/flow_input.yaml".format(common_files_dir),
+        #          "-o", os.getcwd(), ">{}/flow.out".format(os.getcwd())]
+
+        try:
+            subprocess.call(
+                [*flow123d, "--yaml_balance", '-i', os.getcwd(), '-s', "{}/flow_input.yaml".format(common_files_dir),
+                 "-o", os.getcwd(), ">{}/flow.out".format(os.getcwd())])
+        except:
+            import sys
+            print(sys.exc_info())
+
+        return FlowSimProcConc._extract_result(os.getcwd())
+
+    @staticmethod
+    def generate_random_sample(fields, coarse_step, n_fine_elements):
+        """
+        Generate random field, both fine and coarse part.
+        Store them separeted.
+        :return: Dict, Dict
+        """
+        fields_sample = fields.sample()
+        fine_input_sample = {name: values[:n_fine_elements, None] for name, values in fields_sample.items()}
+        coarse_input_sample = {}
+        if coarse_step != 0:
+            coarse_input_sample = {name: values[n_fine_elements:, None] for name, values in
+                                   fields_sample.items()}
+
+        return fine_input_sample, coarse_input_sample
+
+    def _make_mesh(self, geo_file, mesh_file, fine_step):
+        """
+        Make the mesh, mesh_file: <geo_base>_step.msh.
+        Make substituted yaml: <yaml_base>_step.yaml,
+        using common fields_step.msh file for generated fields.
+        :return:
+        """
+        if self.env['gmsh_version'] == 2:
+            subprocess.call(
+                [self.env['gmsh'], "-2", '-format', 'msh2', '-clscale', str(fine_step), '-o', mesh_file, geo_file])
+        else:
+            subprocess.call([self.env['gmsh'], "-2", '-clscale', str(fine_step), '-o', mesh_file, geo_file])
+
+    @staticmethod
+    def extract_mesh(mesh_file):
+        """
+        Extract mesh from file
+        :param mesh_file: Mesh file path
+        :return: Dict
+        """
+        mesh = gmsh_io.GmshIO(mesh_file)
+        is_bc_region = {}
+        region_map = {}
+        for name, (id, _) in mesh.physical.items():
+            unquoted_name = name.strip("\"'")
+            is_bc_region[id] = (unquoted_name[0] == '.')
+            region_map[unquoted_name] = id
+
+        bulk_elements = []
+        for id, el in mesh.elements.items():
+            _, tags, i_nodes = el
+            region_id = tags[0]
+            if not is_bc_region[region_id]:
+                bulk_elements.append(id)
+
+        n_bulk = len(bulk_elements)
+        centers = np.empty((n_bulk, 3))
+        ele_ids = np.zeros(n_bulk, dtype=int)
+        point_region_ids = np.zeros(n_bulk, dtype=int)
+
+        for i, id_bulk in enumerate(bulk_elements):
+            _, tags, i_nodes = mesh.elements[id_bulk]
+            region_id = tags[0]
+            centers[i] = np.average(np.array([mesh.nodes[i_node] for i_node in i_nodes]), axis=0)
+            point_region_ids[i] = region_id
+            ele_ids[i] = id_bulk
+
+        min_pt = np.min(centers, axis=0)
+        max_pt = np.max(centers, axis=0)
+        diff = max_pt - min_pt
+        min_axis = np.argmin(diff)
+        non_zero_axes = [0, 1, 2]
+        # TODO: be able to use this mesh_dimension in fields
+        if diff[min_axis] < 1e-10:
+            non_zero_axes.pop(min_axis)
+        points = centers[:, non_zero_axes]
+
+        return {'points': points, 'point_region_ids': point_region_ids, 'ele_ids': ele_ids, 'region_map': region_map}
+
+    def _substitute_yaml(self, yaml_tmpl, yaml_out):
+        """
+        Create substituted YAML file from the tamplate.
+        :return:
+        """
+        param_dict = {}
+        field_tmpl = self.field_template
+        for field_name in self._fields.names:
+            param_dict[field_name] = field_tmpl % (self.FIELDS_FILE, field_name)
+        param_dict[self.MESH_FILE_VAR] = self.mesh_file
+        param_dict[self.TIMESTEP_H1_VAR] = self.time_step_h1
+        param_dict[self.TIMESTEP_H2_VAR] = self.time_step_h2
+        used_params = substitute_placeholders(yaml_tmpl, yaml_out, param_dict)
+
+        self._fields_used_params = used_params
+
+    @staticmethod
+    def _extract_result(sample_dir):
+        """
+        Extract the observed value from the Flow123d output.
+        :param sample_dir: str, path to sample directory
+        :return: None, inf or water balance result (float) and overall sample time
+        """
+        # extract the flux
+        balance_file = os.path.join(sample_dir, "mass_balance.yaml")
+
+        with open(balance_file, "r") as f:
+            balance = yaml.load(f)
+
+        flux_regions = ['.surface']
+        max_flux = 0.0
+        found = False
+        for flux_item in balance['data']:
+            if 'region' not in flux_item:
+                os.remove(os.path.join(sample_dir, "mass_balance.yaml"))
+                break
+
+            if flux_item['region'] in flux_regions:
+                out_flux = -float(flux_item['data'][0])
+                if not np.isfinite(out_flux):
+                    return np.inf
+                # flux_in = float(flux_item['data'][1])
+                # if flux_in > 1e-10:
+                #    raise Exception("Possitive inflow at outlet region.")
+                max_flux = max(max_flux, out_flux)  # flux field
+                found = True
+
+        # Get flow123d computing time
+        # run_time = FlowSimProcConc.get_run_time(sample_dir)
+
+        if not found:
+            raise Exception
+        return np.array([max_flux])
+
+    @staticmethod
+    def result_format() -> List[QuantitySpec]:
+        """
+        Define simulation result format
+        :return: List[QuantitySpec, ...]
+        """
+        spec1 = QuantitySpec(name="conductivity", unit="m", shape=(1, 1), times=[1], locations=['0'])
+        # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
+        return [spec1]
+
+    # @staticmethod
+    # def get_run_time(sample_dir):
+    #     """
+    #     Get flow123d sample running time from profiler
+    #     :param sample_dir: Sample directory
+    #     :return: float
+    #     """
+    #     profiler_file = os.path.join(sample_dir, "profiler_info_*.json")
+    #     profiler = glob.glob(profiler_file)[0]
+    #
+    #     try:
+    #         with open(profiler, "r") as f:
+    #             prof_content = json.load(f)
+    #
+    #         run_time = float(prof_content['children'][0]['cumul-time-sum'])
+    #     except:
+    #         print("Extract run time failed")
+    #
+    #     return run_time
+
+
diff --git a/test/02_conc/new_proc_conc.py b/test/02_conc/new_proc_conc.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/02_conc/proc_conc.py b/test/02_conc/proc_conc.py
index 9cf8e4b..4859c40 100644
--- a/test/02_conc/proc_conc.py
+++ b/test/02_conc/proc_conc.py
@@ -1,176 +1,323 @@
 import os
 import sys
-import ruamel.yaml as yaml
 import numpy as np
+import mlmc.tool.simple_distribution
+from mlmc.sampler import Sampler
+from mlmc.sample_storage_hdf import SampleStorageHDF
+from mlmc.sampling_pool import OneProcessPool
+from mlmc.sampling_pool_pbs import SamplingPoolPBS
+from mlmc.tool.flow_mc import FlowSim
+from mlmc.moments import Legendre
+from mlmc.tool.process_base import ProcessBase
+from mlmc.quantity.quantity import make_root_quantity
+from mlmc.quantity.quantity_estimate import estimate_mean, moments
+from mlmc import estimator
 
-src_path = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(os.path.join(src_path, '..', '..', 'src'))
 
-import mlmc.mlmc
-import mlmc.simulation
-import mlmc.moments
-import mlmc.distribution
-import flow_mc as flow_mc
-import mlmc.correlated_field as cf
-import mlmc.estimate
+class ProcConc:
 
-sys.path.append(os.path.join(src_path, '..'))
-import base_process
+    def __init__(self):
+        args = ProcessBase.get_arguments(sys.argv[1:])
 
+        self.work_dir = os.path.abspath(args.work_dir)
+        # Add samples to existing ones
+        self.clean = args.clean
+        # Remove HDF5 file, start from scratch
+        self.debug = args.debug
+        # 'Debug' mode is on - keep sample directories
+        self.use_pbs = True
+        # Use PBS sampling pool
+        self.n_levels = 1
+        self.n_moments = 25
+        # Number of MLMC levels
 
-class FlowConcSim(flow_mc.FlowSim):
-    """
-    Child from FlowSimulation that defines extract method
-    """
+        # step_range = [0.055, 0.0035]
+        step_range = [1, 0.02]
+        # step_range = [0.1, 0.055]
+        # step   - elements
+        # 0.1    - 262
+        # 0.08   - 478
+        # 0.06   - 816
+        # 0.055  - 996
+        # 0.006 -  74188
+        # 0.0055 - 87794
+        # 0.005  - 106056
+        # 0.004  - 165404
+        # 0.0035 - 217208
 
-    def _extract_result(self, sample):
-        """
-        Extract the observed value from the Flow123d output.
-        Get sample from the field restriction, write to the GMSH file, call flow.
-        :param sample: mlmc.sample Sample
-        :return: None, total flux (float) and overall sample time
-        """
-        sample_dir = sample.directory
-        if os.path.exists(os.path.join(sample_dir, "FINISHED")):
-            # extract the flux
-            balance_file = os.path.join(sample_dir, "mass_balance.yaml")
-
-            with open(balance_file, "r") as f:
-                balance = yaml.load(f)
-
-            # it has to be changed for every new input file or different observation.
-            # However in Analysis it is already done in general way.
-            flux_regions = ['.surface']
-            max_flux = 0.0
-            found = False
-
-            for flux_item in balance['data']:
-                if 'region' not in flux_item:
-                    os.remove(os.path.join(sample_dir, "mass_balance.yaml"))
-                    return None
-
-                if flux_item['region'] in flux_regions:
-                    out_flux = -float(flux_item['data'][0])
-                    if not np.isfinite(out_flux):
-                        return np.inf
-                    # flux_in = float(flux_item['data'][1])
-                    # if flux_in > 1e-10:
-                    #    raise Exception("Possitive inflow at outlet region.")
-                    max_flux = max(max_flux, out_flux)  # flux field
-                    found = True
-
-            if not found:
-                raise Exception
-
-            # Get flow123d computing time
-            run_time = self.get_run_time(sample_dir)
-
-            return max_flux, run_time
+        # step_range [simulation step at the coarsest level, simulation step at the finest level]
+
+        # Determine level parameters at each level (In this case, simulation step at each level) are set automatically
+        self.level_parameters = estimator.determine_level_parameters(self.n_levels, step_range)
+
+        # Determine number of samples at each level
+        self.n_samples = estimator.determine_n_samples(self.n_levels)
+
+        if args.command == 'run':
+            self.run()
+        elif args.command == "process":
+            self.process()
         else:
-            return None, 0
+            self.clean = False
+            self.run(renew=True) if args.command == 'renew' else self.run()
+
+    def process(self):
+        sample_storage = SampleStorageHDF(file_path=os.path.join(self.work_dir, "mlmc_{}.hdf5".format(self.n_levels)))
+        sample_storage.chunk_size = 1e8
+        result_format = sample_storage.load_result_format()
+        root_quantity = make_root_quantity(sample_storage, result_format)
+
+        conductivity = root_quantity['conductivity']
+        time = conductivity[1]  # times: [1]
+        location = time['0']  # locations: ['0']
+        q_value = location[0, 0]
+
+        # @TODO: How to estimate true_domain?
+        quantile = 0.001
+        true_domain = mlmc.estimator.Estimate.estimate_domain(q_value, sample_storage, quantile=quantile)
+        moments_fn = Legendre(self.n_moments, true_domain)
+
+        estimator = mlmc.estimator.Estimate(quantity=q_value, sample_storage=sample_storage, moments_fn=moments_fn)
+        means, vars = estimator.estimate_moments(moments_fn)
+
+        moments_quantity = moments(root_quantity, moments_fn=moments_fn, mom_at_bottom=True)
+        moments_mean = estimate_mean(moments_quantity)
+        conductivity_mean = moments_mean['conductivity']
+        time_mean = conductivity_mean[1]  # times: [1]
+        location_mean = time_mean['0']  # locations: ['0']
+        values_mean = location_mean[0]  # result shape: (1,)
+        value_mean = values_mean[0]
+        assert value_mean.mean == 1
+
+        # true_domain = [-10, 10]  # keep all values on the original domain
+        # central_moments = Monomial(self.n_moments, true_domain, ref_domain=true_domain, mean=means())
+        # central_moments_quantity = moments(root_quantity, moments_fn=central_moments, mom_at_bottom=True)
+        # central_moments_mean = estimate_mean(central_moments_quantity)
+
+        #estimator.sub_subselect(sample_vector=[10000])
+
+        #self.process_target_var(estimator)
+        self.construct_density(estimator, tol=1e-8)
+        #self.data_plots(estimator)
 
+    def data_plots(self, estimator):
+        estimator.fine_coarse_violinplot()
 
-class ProcConc(base_process.Process):
+    def process_target_var(self, estimator):
+        n0, nL = 100, 3
+        n_samples = np.round(np.exp2(np.linspace(np.log2(n0), np.log2(nL), self.n_levels))).astype(int)
 
-    def run(self):
+        n_estimated = estimator.bs_target_var_n_estimated(target_var=1e-5, sample_vec=n_samples)  # number of estimated sampels for given target variance
+        estimator.plot_variances(sample_vec=n_estimated)
+        estimator.plot_bs_var_log(sample_vec=n_estimated)
+
+    def construct_density(self, estimator, tol=1.95, reg_param=0.0):
+        """
+        Construct approximation of the density using given moment functions.
+        :param estimator: mlmc.estimator.Estimate instance, it contains quantity for which the density is reconstructed
+        :param tol: Tolerance of the fitting problem, with account for variances in moments.
+                    Default value 1.95 corresponds to the two tail confidence 0.95.
+        :param reg_param: regularization parameter
+        :return: None
+        """
+        distr_obj, result, _, _ = estimator.construct_density(tol=tol, reg_param=reg_param)
+        distr_plot = mlmc.plot.plots.Distribution(title="{} levels, {} moments".format(self.n_levels, self.n_moments))
+        distr_plot.add_distribution(distr_obj, label="#{}".format(self.n_moments))
+
+        if self.n_levels == 1:
+            samples = estimator.get_level_samples(level_id=0)[..., 0]
+            distr_plot.add_raw_samples(np.squeeze(samples))
+        distr_plot.show(None)
+        distr_plot.show(file=os.path.join(self.work_dir, "pdf_cdf_{}_moments_1".format(self.n_moments)))
+        distr_plot.reset()
+
+    def run(self, renew=False):
         """
-        Run mlmc
+        Run MLMC
+        :param renew: If True then rerun failed samples with same sample id
         :return: None
         """
+        # Create working directory if necessary
         os.makedirs(self.work_dir, mode=0o775, exist_ok=True)
 
-        mlmc_list = []
-        for nl in [5]:  # , 2, 3, 4,5, 7, 9]:
-            mlmc = self.setup_config(nl, clean=True)
-            # self.n_sample_estimate(mlmc)
-            self.generate_jobs(mlmc, n_samples=[5, 5, 5, 5, 5])
-            mlmc_list.append(mlmc)
+        if self.clean:
+            # Remove HFD5 file
+            if os.path.exists(os.path.join(self.work_dir, "mlmc_{}.hdf5".format(self.n_levels))):
+                os.remove(os.path.join(self.work_dir, "mlmc_{}.hdf5".format(self.n_levels)))
 
-        self.all_collect(mlmc_list)
+        # Create sampler (mlmc.Sampler instance) - crucial class which actually schedule samples
+        sampler = self.setup_config(clean=self.clean)
+        # Schedule samples
+        self.generate_jobs(sampler, n_samples=[5], renew=renew)
+        #self.generate_jobs(sampler, n_samples=None, renew=renew, target_var=1e-3)
+        #self.generate_jobs(sampler, n_samples=[500, 500], renew=renew, target_var=1e-5)
+        self.all_collect(sampler)  # Check if all samples are finished
 
-    def setup_config(self, n_levels, clean):
+    def setup_config(self, clean):
         """
-        Set simulation configuration, object for generating correlated fields ...
-        :param n_levels: Number of levels
-        :param clean: bool
-        :return: None
+        Simulation dependent configuration
+        :param clean: bool, If True remove existing files
+        :return: mlmc.sampler instance
         """
-        # Set pbs config, flow123d, gmsh, ...
+        # Set pbs config, flow123d, gmsh, ..., random fields are set in simulation class
         self.set_environment_variables()
-        output_dir = os.path.join(self.work_dir, "output_{}".format(n_levels))
-        # remove existing files
-        if clean:
-            self.rm_files(output_dir)
-
-        # Init pbs object
-        self.create_pbs_object(output_dir, clean)
-
-        por_top = cf.SpatialCorrelatedField(
-            corr_exp='gauss',
-            dim=2,
-            corr_length=0.2,
-            mu=-1.0,
-            sigma=1.0,
-            log=True
-        )
-        por_bot = cf.SpatialCorrelatedField(
-            corr_exp='gauss',
-            dim=2,
-            corr_length=0.2,
-            mu=-1.0,
-            sigma=1.0,
-            log=True
-        )
-        water_viscosity = 8.90e-4
-        fields = cf.Fields([
-            cf.Field('por_top', por_top, regions='ground_0'),
-            cf.Field('porosity_top', cf.positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'),
-            cf.Field('por_bot', por_bot, regions='ground_1'),
-            cf.Field('porosity_bot', cf.positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'),
-            cf.Field('porosity_repo', 0.5, regions='repo'),
-            cf.Field('factor_top', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_0'),
-            # conductivity about
-            cf.Field('factor_bot', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_1'),
-            # cf.Field('factor_repo', cf.SpatialCorrelatedField('gauss', mu=1e-10, sigma=1, log=True), regions='repo'),
-            cf.Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 'factor_top', water_viscosity],
-                     regions='ground_0'),
-            cf.Field('conductivity_bot', cf.kozeny_carman, ['porosity_bot', 1, 'factor_bot', water_viscosity],
-                     regions='ground_1'),
-            # cf.Field('conductivity_repo', cf.kozeny_carman, ['porosity_repo', 1, 'factor_repo', water_viscosity], regions='repo')
-            cf.Field('conductivity_repo', 0.001, regions='repo')
-        ])
-
-        self.step_range = (1, 0.02)  # finest mesh about 18k elements
+
+        # Create Pbs sampling pool
+        sampling_pool = self.create_sampling_pool()
+
+        # simulation_config = {
+        #     'work_dir': self.work_dir,
+        #     'env': dict(flow123d=self.flow123d, gmsh=self.gmsh, gmsh_version=1),  # The Environment.
+        #     'yaml_file': os.path.join(self.work_dir, '01_conductivity.yaml'),
+        #     'geo_file': os.path.join(self.work_dir, 'square_1x1.geo'),
+        #     'fields_params': dict(model='exp', sigma=4, corr_length=0.1),
+        #     'field_template': "!FieldElementwise {mesh_data_file: \"$INPUT_DIR$/%s\", field_name: %s}"
+        # }
 
         simulation_config = {
-            'env': dict(flow123d=self.flow123d, gmsh=self.gmsh, pbs=self.pbs_obj),  # The Environment.
-            'output_dir': output_dir,
-            'fields': fields,
+            'work_dir': self.work_dir,
+            'env': dict(flow123d=self.flow123d, gmsh=self.gmsh, gmsh_version=1),  # The Environment.
             'time_factor': 1e7,  # max velocity about 1e-8
             'yaml_file': os.path.join(self.work_dir, '02_conc_tmpl.yaml'),
-        # The template with a mesh and field placeholders
-            'sim_param_range': self.step_range,  # Range of MLMC simulation parametr. Here the mesh step.
             'geo_file': os.path.join(self.work_dir, 'repo.geo'),
-        # The file with simulation geometry (independent of the step)
-            # 'field_template': "!FieldElementwise {gmsh_file: \"${INPUT}/%s\", field_name: %s}"
             'field_template': "!FieldElementwise {mesh_data_file: \"$INPUT_DIR$/%s\", field_name: %s}"
-
         }
 
-        FlowConcSim.total_sim_id = 0
+        # Create simulation factory
+        simulation_factory = FlowSim(config=simulation_config, clean=clean)
+
+        # Create HDF sample storage
+        sample_storage = SampleStorageHDF(
+            file_path=os.path.join(self.work_dir, "mlmc_{}.hdf5".format(self.n_levels)))
+
+        # Create sampler, it manages sample scheduling and so on
+        sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
+                          level_parameters=self.level_parameters)
 
-        self.options['output_dir'] = output_dir
-        mlmc_obj = mlmc.mlmc.MLMC(n_levels, FlowConcSim.factory(self.step_range, config=simulation_config, clean=clean),
-                                  self.step_range, self.options)
+        return sampler
 
-        if clean:
-            # Create new execution of mlmc
-            mlmc_obj.create_new_execution()
+    def set_environment_variables(self):
+        """
+        Set pbs config, flow123d, gmsh
+        :return: None
+        """
+        root_dir = os.path.abspath(self.work_dir)
+        while root_dir != '/':
+            root_dir, tail = os.path.split(root_dir)
+
+        if tail == 'storage' or tail == 'auto':
+            # Metacentrum
+            self.sample_sleep = 30
+            self.init_sample_timeout = 600
+            self.sample_timeout = 60
+            self.adding_samples_coef = 0.1
+            self.flow123d = 'flow123d'  # "/storage/praha1/home/jan_brezina/local/flow123d_2.2.0/flow123d"
+            self.gmsh = "/storage/liberec3-tul/home/martin_spetlik/astra/gmsh/bin/gmsh"
         else:
-            # Use existing mlmc HDF file
-            mlmc_obj.load_from_file()
-        return mlmc_obj
+            # Local
+            self.sample_sleep = 1
+            self.init_sample_timeout = 60
+            self.sample_timeout = 60
+            self.adding_samples_coef = 0.1
+            self.flow123d = ["/home/martin/Documents/flow_package_test/publish/flow123d_3.1.0_linux_install/flow123d_3.1.0/bin/fterm.sh", "flow123d"]
+            # self.flow123d = [
+            #     "/home/martin/Documents/flow123d/bin/fterm", "flow123d"]
+            #self.flow123d = ["/home/martin/Documents/flow123d_3.0.1/bin/fterm.sh", "flow123d"]
+            self.gmsh = "/home/martin/gmsh/bin/gmsh"
+
+    def create_sampling_pool(self):
+        """
+        Initialize sampling pool, object which
+        :return: None
+        """
+        if not self.use_pbs:
+            return OneProcessPool(work_dir=self.work_dir, debug=self.debug)  # Everything runs in one process
+
+        # Create PBS sampling pool
+        #sampling_pool = SamplingPoolPBS(work_dir=self.work_dir, debug=self.debug)
+        sampling_pool = OneProcessPool(work_dir=self.work_dir, debug=self.debug)
+
+        # pbs_config = dict(
+        #     n_cores=1,
+        #     n_nodes=1,
+        #     select_flags=['cgroups=cpuacct'],
+        #     mem='1Gb',
+        #     queue='charon',
+        #     pbs_name='flow123d',
+        #     walltime='72:00:00',
+        #     optional_pbs_requests=[],  # e.g. ['#PBS -m ae', ...]
+        #     home_dir='/storage/liberec3-tul/home/martin_spetlik/',
+        #     python='python3.8',
+        #     env_setting=['cd $MLMC_WORKDIR',
+        #                  'module load python/3.8.0-gcc',
+        #                  'source env/bin/activate',
+        #                  'module use /storage/praha1/home/jan-hybs/modules',
+        #                  'module load flow123d',
+        #                  'module unload python-3.6.2-gcc',
+        #                  'module unload python36-modules-gcc']
+        # )
+        #
+        # sampling_pool.pbs_common_setting(flow_3=True, **pbs_config)
+
+        return sampling_pool
+
+    def generate_jobs(self, sampler, n_samples=None, renew=False, target_var=None):
+        """
+        Generate level samples
+        :param n_samples: None or list, number of samples for each level
+        :param renew: rerun failed samples with same random seed (= same sample id)
+        :return: None
+        """
+        if renew:
+            sampler.ask_sampling_pool_for_samples()
+            sampler.renew_failed_samples()
+            sampler.ask_sampling_pool_for_samples(sleep=self.sample_sleep, timeout=self.sample_timeout)
+        else:
+            if n_samples is not None:
+                sampler.set_initial_n_samples(n_samples)
+            else:
+                sampler.set_initial_n_samples()
+            sampler.schedule_samples()
+            sampler.ask_sampling_pool_for_samples(sleep=self.sample_sleep, timeout=self.sample_timeout)
+            self.all_collect(sampler)
+
+            if target_var is not None:
+                root_quantity = make_root_quantity(storage=sampler.sample_storage,
+                                                   q_specs=sampler.sample_storage.load_result_format())
+
+                moments_fn = self.set_moments(root_quantity, sampler.sample_storage, n_moments=self.n_moments)
+                estimate_obj = estimator.Estimate(root_quantity, sample_storage=sampler.sample_storage,
+                                                   moments_fn=moments_fn)
+
+                # New estimation according to already finished samples
+                variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+                n_estimated = estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+                                                                                   n_levels=sampler.n_levels)
+
+                # Loop until number of estimated samples is greater than the number of scheduled samples
+                while not sampler.process_adding_samples(n_estimated, self.sample_sleep, self.adding_samples_coef,
+                                                         timeout=self.sample_timeout):
+                    # New estimation according to already finished samples
+                    variances, n_ops = estimate_obj.estimate_diff_vars_regression(sampler._n_scheduled_samples)
+                    n_estimated = estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+                                                                                       n_levels=sampler.n_levels)
+
+    def set_moments(self, quantity, sample_storage, n_moments=5):
+        true_domain = estimator.Estimate.estimate_domain(quantity, sample_storage, quantile=0.01)
+        return Legendre(n_moments, true_domain)
+
+    def all_collect(self, sampler):
+        """
+        Collect samples
+        :param sampler: mlmc.Sampler object
+        :return: None
+        """
+        running = 1
+        while running > 0:
+            running = 0
+            running += sampler.ask_sampling_pool_for_samples(sleep=self.sample_sleep, timeout=1e-5)
+            print("N running: ", running)
 
 
 if __name__ == "__main__":
-    pr = ProcConc()
+    ProcConc()

From babbbc8ccac40f38d9c5bcaefc382524ce8f2736 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Thu, 14 Oct 2021 12:53:18 +0200
Subject: [PATCH 3/6] flow mc 02 gstools

---
 mlmc/random/correlated_field.py |  3 +--
 mlmc/tool/flow_mc_02.py         | 14 ++++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py
index ba83e15..201f28c 100644
--- a/mlmc/random/correlated_field.py
+++ b/mlmc/random/correlated_field.py
@@ -62,7 +62,7 @@ def __init__(self, name, field=None, param_fields=[], regions=[]):
         if type(field) in [float, int]:
             self.const = field
             assert len(param_fields) == 0
-        elif isinstance(field, RandomFieldBase):
+        elif isinstance(field, RandomFieldBase) or isinstance(field, gstools.covmodel.models.CovModel):
             self.correlated_field = field
             assert len(param_fields) == 0
         else:
@@ -372,7 +372,6 @@ def _initialize(self, **kwargs):
         """
         Called after initialization in common constructor.
         """
-
         ### Attributes computed in precalculation.
         self.cov_mat = None
         # Covariance matrix (dense).
diff --git a/mlmc/tool/flow_mc_02.py b/mlmc/tool/flow_mc_02.py
index 682952e..0d38dbf 100644
--- a/mlmc/tool/flow_mc_02.py
+++ b/mlmc/tool/flow_mc_02.py
@@ -26,9 +26,13 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1
     #     sigma=1.0,
     #     log=True
     # )
+    #
+    # print("por top ", por_top)
 
     por_top = gstools.Gaussian(dim=dim,  len_scale=0.2, mu=-1.0, sigma=1.0, log=True)
 
+    #print("por top gstools ", por_top_gstools)
+
     # por_bot = cf.SpatialCorrelatedField(
     #     corr_exp='gauss',
     #     dim=2,
@@ -110,10 +114,10 @@ class FlowSimProcConc(Simulation):
     TIMESTEP_H2_VAR = 'timestep_h2'
 
     # files
-    GEO_FILE = 'mesh.geo'
-    MESH_FILE = 'mesh.msh'
-    YAML_TEMPLATE = 'flow_input.yaml.tmpl'
-    YAML_FILE = 'flow_input.yaml'
+    GEO_FILE = 'repo.geo'
+    MESH_FILE = 'repo.msh'
+    YAML_TEMPLATE = '02_conc_tmpl.yaml'
+    YAML_FILE = '02_conc.yaml'
     FIELDS_FILE = 'fields_sample.msh'
 
     """
@@ -194,8 +198,6 @@ def level_instance(self, fine_level_params: List[float], coarse_level_params: Li
             yaml_template = os.path.join(common_files_dir, self.YAML_TEMPLATE)
             shutil.copyfile(self.base_yaml_file, yaml_template)
             yaml_file = os.path.join(common_files_dir, self.YAML_FILE)
-            print("yaml file ", yaml_file)
-
             self._substitute_yaml(yaml_template, yaml_file)
 
         # Mesh is extracted because we need number of mesh points to determine task_size parameter (see return value)

From 94e4d65d16e292d4c834faa4e8f7f62fe5297871 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Fri, 22 Oct 2021 12:20:31 +0200
Subject: [PATCH 4/6] proc con distr

---
 mlmc/tool/simple_distribution.py | 19 ++++++++++---------
 test/02_conc/new_proc_conc.py    |  0
 test/02_conc/proc_conc.py        | 23 ++++++++++++++++++-----
 3 files changed, 28 insertions(+), 14 deletions(-)
 delete mode 100644 test/02_conc/new_proc_conc.py

diff --git a/mlmc/tool/simple_distribution.py b/mlmc/tool/simple_distribution.py
index 77513f6..a2d3c2c 100644
--- a/mlmc/tool/simple_distribution.py
+++ b/mlmc/tool/simple_distribution.py
@@ -68,6 +68,8 @@ def estimate_density_minimize(self, tol=1e-5, reg_param =0.01):
                                       options={'tol': tol, 'xtol': tol,
                                                'gtol': tol, 'disp': False,  'maxiter': max_it})
         self.multipliers = result.x
+
+        print("self multipliers ", self.multipliers)
         jac_norm = np.linalg.norm(result.jac)
         if self._verbose:
             print("size: {} nits: {} tol: {:5.3g} res: {:5.3g} msg: {}".format(
@@ -270,7 +272,9 @@ def _calculate_functional(self, multipliers):
         end_diff = np.dot(self._end_point_diff, multipliers)
         penalty = np.sum(np.maximum(end_diff, 0)**2)
         fun = sum + integral
-        fun = fun + np.abs(fun) * self._penalty_coef * penalty
+
+        if self._penalty_coef > 0:
+            fun = fun + (np.abs(fun) * self._penalty_coef * penalty)
 
         return fun
 
@@ -287,7 +291,11 @@ def _calculate_gradient(self, multipliers):
         end_diff = np.dot(self._end_point_diff, multipliers)
         penalty = 2 * np.dot( np.maximum(end_diff, 0), self._end_point_diff)
         fun = np.sum(self.moment_means * multipliers / self._moment_errs) + integral[0] * self._moment_errs[0]
-        gradient = self.moment_means / self._moment_errs - integral + np.abs(fun) * self._penalty_coef * penalty
+
+        gradient = self.moment_means / self._moment_errs - integral
+        if self._penalty_coef > 0:
+            gradient = gradient + np.abs(fun) * self._penalty_coef * penalty
+
         return gradient
 
     def _calculate_jacobian_matrix(self, multipliers):
@@ -317,13 +325,6 @@ def _calculate_jacobian_matrix(self, multipliers):
                 penalty = 2 * np.outer(self._end_point_diff[side], self._end_point_diff[side])
                 jacobian_matrix += np.abs(fun) * self._penalty_coef * penalty
 
-
-        #e_vals = np.linalg.eigvalsh(jacobian_matrix)
-
-        #print(multipliers)
-        #print("jac spectra: ", e_vals)
-        #print("means:", self.moment_means)
-        #print("\n jac:", np.diag(jacobian_matrix))
         return jacobian_matrix
 
 
diff --git a/test/02_conc/new_proc_conc.py b/test/02_conc/new_proc_conc.py
deleted file mode 100644
index e69de29..0000000
diff --git a/test/02_conc/proc_conc.py b/test/02_conc/proc_conc.py
index 4859c40..c06330f 100644
--- a/test/02_conc/proc_conc.py
+++ b/test/02_conc/proc_conc.py
@@ -6,7 +6,7 @@
 from mlmc.sample_storage_hdf import SampleStorageHDF
 from mlmc.sampling_pool import OneProcessPool
 from mlmc.sampling_pool_pbs import SamplingPoolPBS
-from mlmc.tool.flow_mc import FlowSim
+from mlmc.tool.flow_mc_02 import FlowSimProcConc
 from mlmc.moments import Legendre
 from mlmc.tool.process_base import ProcessBase
 from mlmc.quantity.quantity import make_root_quantity
@@ -28,12 +28,12 @@ def __init__(self):
         self.use_pbs = True
         # Use PBS sampling pool
         self.n_levels = 1
-        self.n_moments = 25
+        self.n_moments = 5
         # Number of MLMC levels
 
         # step_range = [0.055, 0.0035]
         step_range = [1, 0.02]
-        # step_range = [0.1, 0.055]
+        #step_range = [0.1, 0.055] # 5LMC - 53, 115, 474, 2714, 18397
         # step   - elements
         # 0.1    - 262
         # 0.08   - 478
@@ -89,6 +89,18 @@ def process(self):
         value_mean = values_mean[0]
         assert value_mean.mean == 1
 
+        # target_var = 1e-5
+        # estimate_obj = mlmc.estimator.Estimate(root_quantity, sample_storage=sample_storage, moments_fn=moments_fn)
+        # variances, n_ops = estimate_obj.estimate_diff_vars_regression(sample_storage.get_n_collected())
+        # print("variances ", variances)
+        # n_estimated = mlmc.estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
+        #                                                                n_levels=self.n_levels)
+        # print("n estimated ", n_estimated)
+
+
+        # print("means ", means)
+        # print("vars ", vars)
+
         # true_domain = [-10, 10]  # keep all values on the original domain
         # central_moments = Monomial(self.n_moments, true_domain, ref_domain=true_domain, mean=means())
         # central_moments_quantity = moments(root_quantity, moments_fn=central_moments, mom_at_bottom=True)
@@ -97,7 +109,7 @@ def process(self):
         #estimator.sub_subselect(sample_vector=[10000])
 
         #self.process_target_var(estimator)
-        self.construct_density(estimator, tol=1e-8)
+        self.construct_density(estimator, tol=1e-7)
         #self.data_plots(estimator)
 
     def data_plots(self, estimator):
@@ -180,11 +192,12 @@ def setup_config(self, clean):
             'time_factor': 1e7,  # max velocity about 1e-8
             'yaml_file': os.path.join(self.work_dir, '02_conc_tmpl.yaml'),
             'geo_file': os.path.join(self.work_dir, 'repo.geo'),
+            'fields_params': dict(model='exp', sigma=1, corr_length=0.1),
             'field_template': "!FieldElementwise {mesh_data_file: \"$INPUT_DIR$/%s\", field_name: %s}"
         }
 
         # Create simulation factory
-        simulation_factory = FlowSim(config=simulation_config, clean=clean)
+        simulation_factory = FlowSimProcConc(config=simulation_config, clean=clean)
 
         # Create HDF sample storage
         sample_storage = SampleStorageHDF(

From 141505c9da1252406fe24621ea3f30565af0bf8b Mon Sep 17 00:00:00 2001
From: Martin Spetlik <martin_spetlik@charon.nti.tul.cz>
Date: Fri, 22 Oct 2021 12:24:01 +0200
Subject: [PATCH 5/6] rnd field

---
 mlmc/random/correlated_field.py |  4 ++--
 mlmc/tool/flow_mc_02.py         | 37 +++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/mlmc/random/correlated_field.py b/mlmc/random/correlated_field.py
index 201f28c..5309f84 100644
--- a/mlmc/random/correlated_field.py
+++ b/mlmc/random/correlated_field.py
@@ -499,14 +499,14 @@ def _sample(self):
 
 class GSToolsSpatialCorrelatedField(RandomFieldBase):
 
-    def __init__(self, model, mode_no=1000, log=False, sigma=1):
+    def __init__(self, model, mode_no=1000, log=False, sigma=1, mean=0):
         """
         :param model: instance of covariance model class, which parent is gstools.covmodel.CovModel
         :param mode_no: number of Fourier modes, default: 1000 as in gstools package
         """
         self.model = model
         self.mode_no = mode_no
-        self.srf = gstools.SRF(model, mode_no=mode_no)
+        self.srf = gstools.SRF(model, mean=mean, mode_no=mode_no)
         self.mu = self.srf.mean
         self.sigma = sigma
         self.dim = model.dim
diff --git a/mlmc/tool/flow_mc_02.py b/mlmc/tool/flow_mc_02.py
index 0d38dbf..0219997 100644
--- a/mlmc/tool/flow_mc_02.py
+++ b/mlmc/tool/flow_mc_02.py
@@ -29,7 +29,8 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1
     #
     # print("por top ", por_top)
 
-    por_top = gstools.Gaussian(dim=dim,  len_scale=0.2, mu=-1.0, sigma=1.0, log=True)
+    por_top = cf.GSToolsSpatialCorrelatedField(gstools.Gaussian(dim=2,  len_scale=0.2),
+                                               log=log, mean=-1.0, sigma=1.0, mode_no=mode_no)
 
     #print("por top gstools ", por_top_gstools)
 
@@ -42,10 +43,16 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1
     #     log=True
     # )
 
-    por_bot = gstools.Gaussian(dim=dim,  len_scale=0.2, mu=-1.0, sigma=1.0, log=True)
+    por_bot = cf.GSToolsSpatialCorrelatedField(gstools.Gaussian(dim=2, len_scale=0.2),
+                                               log=log, mean=-1.0, sigma=1.0, mode_no=mode_no)
+
+    #por_bot = gstools.Gaussian(dim=dim,  len_scale=0.2, mu=-1.0, sigma=1.0, log=True)
 
     water_viscosity = 8.90e-4
 
+    factor_top_model = gstools.Gaussian(dim=dim,  len_scale=1)
+    factor_bot_model = gstools.Gaussian(dim=dim, len_scale=1)
+
     fields = cf.Fields([
         cf.Field('por_top', por_top, regions='ground_0'),
         cf.Field('porosity_top', cf.positive_to_range, ['por_top', 0.02, 0.1], regions='ground_0'),
@@ -53,10 +60,18 @@ def create_corr_field(model='gauss', corr_length=0.125, dim=2, log=True, sigma=1
         cf.Field('porosity_bot', cf.positive_to_range, ['por_bot', 0.01, 0.05], regions='ground_1'),
         cf.Field('porosity_repo', 0.5, regions='repo'),
         #cf.Field('factor_top', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_0'),
-        cf.Field('factor_top', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1.0, log=True), regions='ground_0'),
+
+        cf.Field('factor_top', cf.GSToolsSpatialCorrelatedField(factor_top_model, log=log, mean=1e-8, sigma=1.0, mode_no=mode_no),
+                 regions='ground_0'),
+
+        #cf.Field('factor_top', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1.0, log=True), regions='ground_0'),
         # conductivity about
         #cf.Field('factor_bot', cf.SpatialCorrelatedField('gauss', mu=1e-8, sigma=1, log=True), regions='ground_1'),
-        cf.Field('factor_bot', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1, log=True), regions='ground_1'),
+        #cf.Field('factor_bot', gstools.Gaussian(len_scale=1, mu=1e-8, sigma=1, log=True), regions='ground_1'),
+        cf.Field('factor_bot',
+                 cf.GSToolsSpatialCorrelatedField(factor_bot_model, log=log, mean=1e-8, sigma=1.0, mode_no=mode_no),
+                 regions='ground_1'),
+
         # cf.Field('factor_repo', cf.SpatialCorrelatedField('gauss', mu=1e-10, sigma=1, log=True), regions='repo'),
         cf.Field('conductivity_top', cf.kozeny_carman, ['porosity_top', 1, 'factor_top', water_viscosity],
                  regions='ground_0'),
@@ -331,13 +346,13 @@ def _run_sample(fields_file, ele_ids, fine_input_sample, flow123d, common_files_
         # x = [*flow123d, "--yaml_balance", '-i', os.getcwd(), '-s', "{}/flow_input.yaml".format(common_files_dir),
         #          "-o", os.getcwd(), ">{}/flow.out".format(os.getcwd())]
 
-        try:
-            subprocess.call(
-                [*flow123d, "--yaml_balance", '-i', os.getcwd(), '-s', "{}/flow_input.yaml".format(common_files_dir),
-                 "-o", os.getcwd(), ">{}/flow.out".format(os.getcwd())])
-        except:
-            import sys
-            print(sys.exc_info())
+        #try:
+        subprocess.call(
+            [flow123d, "--yaml_balance", '-i', os.getcwd(), '-s', "{}/02_conc.yaml".format(common_files_dir),
+             "-o", os.getcwd(), ">{}/flow.out".format(os.getcwd())])
+        # except:
+        #     import sys
+        #     print(sys.exc_info())
 
         return FlowSimProcConc._extract_result(os.getcwd())
 

From ed7e80fb29c42a8428542bd94e18ffec9a3e5ba7 Mon Sep 17 00:00:00 2001
From: martinspetlik <martin.spetlik>
Date: Fri, 22 Oct 2021 12:26:53 +0200
Subject: [PATCH 6/6] rm plot improt

---
 mlmc/tool/simple_distribution.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlmc/tool/simple_distribution.py b/mlmc/tool/simple_distribution.py
index a2d3c2c..5029086 100644
--- a/mlmc/tool/simple_distribution.py
+++ b/mlmc/tool/simple_distribution.py
@@ -2,7 +2,6 @@
 import scipy as sc
 import scipy.integrate as integrate
 import mlmc.moments
-import mlmc.plot.plots
 
 EXACT_QUAD_LIMIT = 1000