diff --git a/examples/scripts/battery_parameterisation/bayesian_feature_fitting.py b/examples/scripts/battery_parameterisation/bayesian_feature_fitting.py
index 7089598e2..743183ba7 100644
--- a/examples/scripts/battery_parameterisation/bayesian_feature_fitting.py
+++ b/examples/scripts/battery_parameterisation/bayesian_feature_fitting.py
@@ -5,6 +5,16 @@
 
 import pybop
 
+"""
+This example demonstrates how to use EP-BOLFI to parameterise a PyBaMM model using
+a "feature"-based cost function. We use the term "feature" to describe a parameter
+obtained from fitting either the data or a candidate solution to a simpler model.
+Every evaluation of a feature-based cost function runs its own optimisation (based
+on the simpler model) to identify the value of the feature. The aim is to minimise
+the "feature distance" to identify the parameter values which produce a candidate
+solution with a feature value as close as possible to that of the data.
+"""
+
 # Define model and parameter values
 model = pybamm.lithium_ion.SPMe()
 parameter_values = pybamm.ParameterValues("Chen2020")
@@ -65,15 +75,15 @@
 dataset = pybop.import_pybamm_solution(synthetic_data)
 
 ICI_cost = pybop.SquareRootFeatureDistance(
-    dataset["Time [s]"],
-    dataset["Voltage [V]"],
+    dataset=dataset,
+    target="Voltage [V]",
     feature="inverse_slope",
     time_start=0,
     time_end=90,
 )
 GITT_cost = pybop.SquareRootFeatureDistance(
-    dataset["Time [s]"],
-    dataset["Voltage [V]"],
+    dataset=dataset,
+    target="Voltage [V]",
     feature="inverse_slope",
     time_start=901,
     time_end=991,
@@ -108,7 +118,12 @@
     optim = pybop.EP_BOLFI(problem, options=options)
     result = optim.run()
 
-    pybop.plot.convergence(result, yaxis={"type": "log"})
-    pybop.plot.parameters(result, yaxis={"type": "log"}, yaxis2={"type": "log"})
+    # Plot the optimisation result
+    result.plot_convergence(yaxis={"type": "log"})
+    result.plot_parameters(yaxis={"type": "log"}, yaxis2={"type": "log"})
+
+    # Plot predictions for a set of inputs sampled from the posterior
+    fig = result.plot_predictive(show=False)
+    fig[0].show()
 
     print_citations()
diff --git a/pybop/costs/error_measures.py b/pybop/costs/error_measures.py
index 99de93c27..7cf479fd0 100644
--- a/pybop/costs/error_measures.py
+++ b/pybop/costs/error_measures.py
@@ -21,7 +21,7 @@ class ErrorMeasure(BaseCost):
     ----------
     dataset : pybop.Dataset
         Dataset object containing the target data.
-    target : list[str]
+    target : str | list[str], optional
         The name(s) of the target variable(s).
     weighting : Union[str, np.ndarray], optional
         The type of weighting to use when taking the sum or mean of the error
diff --git a/pybop/costs/feature_distances.py b/pybop/costs/feature_distances.py
index c3f7e5ecf..918e6f486 100644
--- a/pybop/costs/feature_distances.py
+++ b/pybop/costs/feature_distances.py
@@ -5,6 +5,7 @@
 
 from pybop.costs.base_cost import BaseCost
 from pybop.costs.evaluation import Evaluation
+from pybop.processing.dataset import Dataset
 
 
 def indices_of(values, target):
@@ -18,28 +19,44 @@ def indices_of(values, target):
 
 
 class FeatureDistance(BaseCost):
-    """Base for defining cost functions based on comparing fit functions."""
+    """
+    Base for defining cost functions based on comparing fit functions.
+
+    Parameters
+    ----------
+    dataset : pybop.Dataset
+        Dataset object containing the target data.
+    target : str, optional
+        The name of the target variable.
+    feature : str, optional
+        The name of the parameter to use for fitting, must be one of the supported features.
+    time_start : float, optional
+        Set the time (in seconds) from which onwards the data shall be
+        fitted, counted from the start of the data. Default is the start.
+    time_end : float, optional
+        Set the time (in seconds) until which the data shall be fitted,
+        counted from the start of the data. Default is the end.
+    """
 
     _supported_features = []
 
     def __init__(
         self,
-        domain_data: np.ndarray,
-        target_data: np.ndarray,
-        feature: str,
+        dataset: Dataset,
+        target: str = None,
+        feature: str = None,
         time_start: float = None,
         time_end: float = None,
     ):
         super().__init__()
         if feature not in self._supported_features:
             raise ValueError(
-                "Feature '"
-                + feature
-                + "' not supported. Options: "
+                f"Feature '{feature}' not supported. Options: "
                 + str(self._supported_features)
             )
-        self._domain_data = domain_data
-        self._target_data = target_data
+        self.set_target(target, dataset)
+        if len(self._target) != 1:
+            raise ValueError("Feature distances require exactly one target variable.")
         self.feature = feature
         self.time_start = time_start
         self.time_end = time_end
@@ -55,7 +72,7 @@ def __init__(
             warnings.simplefilter("ignore")
             self.data_fit = self._fit(
                 self.domain_data[self.start_index : self.end_index],
-                self.target_data[self.start_index : self.end_index],
+                self.target_data[self._target[0]][self.start_index : self.end_index],
             )
 
     def _inverse_fit_function(self, y, *args):
@@ -113,37 +130,24 @@ class SquareRootFeatureDistance(FeatureDistance):
 
     Fits a square-root fit function and compares either its offset or
     its slope between model predictions and target data.
+
+    Supported features:
+    - "offset": The value of the square-root fit at the start.
+    - "slope": The prefactor of the square-root over time.
+    - "inverse_slope": 1 over "slope"; may perform better.
     """
 
     _supported_features = ["offset", "slope", "inverse_slope"]
 
     def __init__(
         self,
-        domain_data: np.ndarray,
-        target_data: np.ndarray,
+        dataset: Dataset,
+        target: str = None,
         feature: str = "inverse_slope",
         time_start: float = None,
         time_end: float = None,
     ):
-        """
-        Parameters
-        ----------
-        domain_data : np.ndarray
-            The content of the "Time [s]" entry in the used `DataSet`.
-        feature : str, optional
-            Set the fit parameter from the square-root fit to use for
-            fitting. Possible values:
-             - "offset": The value of the square-root fit at the start.
-             - "slope": The prefactor of the square-root over time.
-             - "inverse_slope": 1 over "slope"; may perform better.
-        time_start : float, optional
-            Set the time (in seconds) from which onwards the data shall be
-            fitted, counted from the start of the data. Default is the start.
-        time_end : float, optional
-            Set the time (in seconds) until which the data shall be fitted,
-            counted from the start of the data. Default is the end.
-        """
-        super().__init__(domain_data, target_data, feature, time_start, time_end)
+        super().__init__(dataset, target, feature, time_start, time_end)
 
     def _inverse_fit_function(self, y, b, c):
         """Square function to transform data for a linear fit."""
@@ -167,38 +171,25 @@ class ExponentialFeatureDistance(FeatureDistance):
 
     Fits an exponential and compares either its asymptote, its magnitude,
     or its timescale between model predictions and target data.
+
+    Supported features:
+    - "asymptote": The exponential fit value at infinite time.
+    - "magnitude": The prefactor of the exponential term.
+    - "timescale": The denominator in the exponential argument.
+    - "inverse_timescale": 1 over "timescale"; may perform better.
     """
 
     _supported_features = ["asymptote", "magnitude", "timescale", "inverse_timescale"]
 
     def __init__(
         self,
-        domain_data: np.ndarray,
-        target_data: np.ndarray,
+        dataset: Dataset,
+        target: str = None,
         feature: str = "inverse_timescale",
         time_start: float = None,
         time_end: float = None,
     ):
-        """
-        Parameters
-        ----------
-        domain_data : np.ndarray
-            The content of the "Time [s]" entry in the used `DataSet`.
-        feature : str, optional
-            Set the fit parameter from the square-root fit to use for
-            fitting. Possible values:
-             - "asymptote": The exponential fit value at infinite time.
-             - "magnitude": The prefactor of the exponential term.
-             - "timescale": The denominator in the exponential argument.
-             - "inverse_timescale": 1 over "timescale"; may perform better.
-        time_start : float, optional
-            Set the time (in seconds) from which onwards the data shall be
-            fitted, counted from the start of the data. Default is the start.
-        time_end : float, optional
-            Set the time (in seconds) until which the data shall be fitted,
-            counted from the start of the data. Default is the end.
-        """
-        super().__init__(domain_data, target_data, feature, time_start, time_end)
+        super().__init__(dataset, target, feature, time_start, time_end)
 
     def _inverse_fit_function(self, y, b, c, d):
         """Logarithm function to transform data for a linear fit."""
diff --git a/pybop/optimisers/ep_bolfi_optimiser.py b/pybop/optimisers/ep_bolfi_optimiser.py
index 095d23066..42541d819 100644
--- a/pybop/optimisers/ep_bolfi_optimiser.py
+++ b/pybop/optimisers/ep_bolfi_optimiser.py
@@ -1,7 +1,7 @@
-import copy
 import json
 import time
 from contextlib import redirect_stderr, redirect_stdout
+from copy import deepcopy
 from dataclasses import dataclass, field
 from sys import stderr, stdout
 
@@ -9,6 +9,7 @@
 from pybamm import citations
 
 import pybop
+from pybop import plot
 from pybop._logging import Logger
 from pybop.optimisers.base_optimiser import BaseOptimiser, OptimisationResult
 from pybop.parameters.multivariate_distributions import MultivariateGaussian
@@ -17,9 +18,9 @@
 
 def ep_bolfi_problem_processing(y, problem):
     if isinstance(y, dict):
-        evaluation = problem._cost(y[problem._simulator.output_variables[0]])  # noqa: SLF001
+        evaluation = problem.cost(y[problem.simulator.output_variables[0]])
     else:
-        evaluation = problem._cost(y)  # noqa: SLF001
+        evaluation = problem.cost(y)
     if isinstance(evaluation, pybop.costs.evaluation.Evaluation):
         return [evaluation.values]
     else:
@@ -204,7 +205,8 @@ def __init__(
         #     author={Minka, T},
         #     journal={Proceedings of the Seventeenth Conference on Uncertainty in Artificial Intelligence (UAI2001)},
         #     pages={362-369},
-        #     year={2013}
+        #     year={2013},
+        #     doi={10.48550/arXiv.1301.2294}
         # }""")
         citations.register("""@article{
             Barthelme2014,
@@ -213,7 +215,8 @@ def __init__(
             journal={Journal of the American Statistical Association},
             volume={109},
             pages={315-333},
-            year={2014}
+            year={2014},
+            doi={10.1080/01621459.2013.864178}
         }""")
         citations.register("""@article{
             Gutmann2016,
@@ -222,7 +225,8 @@ def __init__(
             journal={Journal of Machine Learning Research},
             volume={17},
             pages={1-47},
-            year={2016}
+            year={2016},
+            doi={arXiv.1501.03291}
         }""")
         citations.register("""@article{
             Kuhn2022,
@@ -232,7 +236,8 @@ def __init__(
             volume={6},
             pages={e202200374},
             year={2023},
-            publisher={Chemistry Europe}
+            publisher={Chemistry Europe},
+            doi={10.1002/batt.202200374}
         }""")
 
     def _set_up_optimiser(self):
@@ -241,7 +246,7 @@ def _set_up_optimiser(self):
         # Use the first output variable to pass to EP-BOLFI; define separate simulators
         # for multiple output variables.
         simulators = [
-            lambda inputs, sim=problem._simulator: (  # noqa: SLF001
+            lambda inputs, sim=problem.simulator: (
                 sim.solve(inputs)[sim.output_variables[0]].data
             )
             for problem in self.problem.problems
@@ -330,13 +335,13 @@ def _run(self) -> "BayesianOptimisationResult":
         # Collect all features into one cost. Note: they are logarithms,
         # so this is a multiplicative combination.
         feature_costs = np.array(list(ep_bolfi_log["discrepancies"].values()))
-        cost_list = copy.deepcopy(feature_costs[0])
+        cost_list = deepcopy(feature_costs[0])
         for i in range(1, len(feature_costs)):
             for j in range(len(cost_list)):
                 cost_list[j][0] += feature_costs[i][j][0]
         cost_list = np.array([np.exp(value[0]) for value in cost_list])
-        x_best_over_time = copy.deepcopy(x_list)
-        cost_best = copy.deepcopy(cost_list)
+        x_best_over_time = deepcopy(x_list)
+        cost_best = deepcopy(cost_list)
         for i in range(1, len(cost_list)):
             if cost_list[i] < cost_best[i - 1]:
                 x_best_over_time[i:, None] = x_list[i, None]
@@ -365,7 +370,7 @@ def _run(self) -> "BayesianOptimisationResult":
             for entry in x_best_over_time
         ]
         self._logger.x_search_best = x_search_best_over_time[-1]
-        self._logger.cost_best = cost_best[0]
+        self._logger.cost_best = cost_best[-1]
         model_mean_dict = {
             key: value[0]
             for key, value in ep_bolfi_result["inferred parameters"].items()
@@ -386,8 +391,8 @@ def _run(self) -> "BayesianOptimisationResult":
             [bounds[1][0] for bounds in ep_bolfi_result["error bounds"].values()]
         )
         # The re-use of `parameters` makes transformations easily usable.
-        posterior = copy.deepcopy(self.problem.parameters)
-        posterior.prior = MultivariateGaussian(
+        posterior = deepcopy(self.problem.parameters)
+        posterior._distribution = MultivariateGaussian(  # noqa: SLF001
             search_mean_array, np.array(ep_bolfi_result["covariance"])
         )
         self._logger.iteration = {
@@ -484,3 +489,9 @@ def __init__(
         self.maximum_a_posteriori = maximum_a_posteriori
         self.log_evidence_mean = log_evidence_mean
         self.log_evidence_variance = log_evidence_variance
+
+    def plot_predictive(self, **kwargs):
+        """
+        Plot the predictive posterior of a Bayesian parameterisation result.
+        """
+        return plot.predictive(result=self, **kwargs)
diff --git a/pybop/plot/__init__.py b/pybop/plot/__init__.py
index f58db2016..06e5a0245 100644
--- a/pybop/plot/__init__.py
+++ b/pybop/plot/__init__.py
@@ -11,3 +11,4 @@
 from .nyquist import nyquist
 from .voronoi import surface
 from .samples import trace, chains, posterior, summary_table
+from .predictive import predictive
diff --git a/pybop/plot/predictive.py b/pybop/plot/predictive.py
new file mode 100644
index 000000000..66163086a
--- /dev/null
+++ b/pybop/plot/predictive.py
@@ -0,0 +1,100 @@
+from typing import TYPE_CHECKING
+
+import numpy as np
+import plotly.express as px
+
+from pybop.plot.standard_plots import StandardPlot
+from pybop.problems.meta_problem import MetaProblem
+
+if TYPE_CHECKING:
+    from pybop.optimisers.ep_bolfi_optimiser import BayesianOptimisationResult
+    from pybop.samplers.base_sampler import SamplingResult
+
+
+def predictive(
+    result: "BayesianOptimisationResult | SamplingResult",
+    number_of_traces: int = 8,
+    data_legend_entry=None,
+    rvs_legend_entry=None,
+    pdf_plot=None,
+    pdf_label: str = "PDF",
+    colour_scale="viridis",
+    show: bool = True,
+    **layout_kwargs,
+):
+    """
+    Plot the predictive posterior of a Bayesian optimisation result.
+    """
+
+    posterior_samples = result.posterior.sample_from_distribution(
+        n_samples=number_of_traces
+    )
+    posterior_samples_pdf = np.asarray(
+        [result.posterior.distribution.pdf(s) for s in posterior_samples]
+    )
+    pdf_range = np.asarray([posterior_samples_pdf.min(), posterior_samples_pdf.max()])
+
+    # Create a plot for each problem
+    problems = (
+        result.problem.problems
+        if isinstance(result.problem, MetaProblem)
+        else [result.problem]
+    )
+    figure_list = []
+
+    for problem in problems:
+        inputs = [problem.parameters.to_dict(s) for s in posterior_samples]
+        simulations = problem.simulate_batch(inputs=inputs)
+
+        plot_dict = StandardPlot(
+            x=problem.domain_data,
+            y=problem.cost._dataset[problem.target[0]],  # noqa: SLF001
+            layout_options=dict(
+                xaxis_title=StandardPlot.remove_brackets(problem.domain),
+                yaxis_title=StandardPlot.remove_brackets(problem.target[0]),
+            ),
+            trace_names=data_legend_entry,
+        )
+
+        for pdf, pred in zip(posterior_samples_pdf, simulations, strict=False):
+            plot_dict.add_traces(
+                x=problem.domain_data,
+                y=pred[problem.target[0]].data,
+                line={
+                    "dash": "dot",
+                    "color": px.colors.sample_colorscale(
+                        colour_scale,
+                        (pdf - pdf_range[0]) / (pdf_range[1] - pdf_range[0]),
+                    )[0],
+                },
+            )
+
+        # Add the colourbar.
+        plot_dict.add_traces(
+            x=[None],
+            y=[None],
+            mode="markers",
+            marker={
+                "size": 0,
+                "color": pdf_range,
+                "colorscale": colour_scale,
+                "showscale": True,
+                "colorbar": {"title": {"text": "Posterior PDF", "side": "right"}},
+            },
+        )
+
+        if pdf_plot is not None:
+            plot_dict.add_traces(
+                x=pdf_plot[0],
+                y=pdf_plot[1],
+                trace_names=pdf_label,
+            )
+
+        fig = plot_dict(show=False)
+        fig.update_layout(**layout_kwargs)
+        if show:
+            fig.show()
+
+        figure_list.append(fig)
+
+    return figure_list
diff --git a/pybop/samplers/base_sampler.py b/pybop/samplers/base_sampler.py
index 146ec03e4..ca1631dcb 100644
--- a/pybop/samplers/base_sampler.py
+++ b/pybop/samplers/base_sampler.py
@@ -1,3 +1,4 @@
+from copy import deepcopy
 from dataclasses import dataclass
 
 import numpy as np
@@ -7,6 +8,7 @@
 from pybop import plot
 from pybop._logging import Logger
 from pybop._result import Result
+from pybop.parameters.multivariate_distributions import MultivariateGaussian
 from pybop.problems.log_pdf import LogPDF
 
 
@@ -215,11 +217,20 @@ def get_summary_statistics(self, significant_digits: int = 4):
             "ci_upper": lambda x, axis: np.percentile(x, 97.5, axis=axis),
         }
 
-        return {
+        summary_statistics = {
             key: self._calculate_statistics(func, key, axis=0)
             for key, func in summary_funs.items()
         }
 
+        # Assume the posterior is Gaussian
+        self.posterior = deepcopy(self.problem.parameters)
+        self.posterior._distribution = MultivariateGaussian(  # noqa: SLF001
+            mean=summary_statistics["mean"],
+            covariance=np.eye(self.n_parameters) * summary_statistics["std"],
+        )
+
+        return summary_statistics
+
     def plot_trace(self, **kwargs):
         """
         Plot trace plots for the posterior samples.
@@ -310,3 +321,9 @@ def rhat(self):
         stationary chains R-hat will be close to one, otherwise it is higher.
         """
         return pints.rhat(self.chains)
+
+    def plot_predictive(self, **kwargs):
+        """
+        Plot the predictive posterior of a Bayesian parameterisation result.
+        """
+        return plot.predictive(result=self, **kwargs)
diff --git a/tests/unit/test_cost.py b/tests/unit/test_cost.py
index 2cf920d8e..cb3488057 100644
--- a/tests/unit/test_cost.py
+++ b/tests/unit/test_cost.py
@@ -58,8 +58,14 @@ def gitt_like_dataset(self):
             0.2 + 0.4 * np.sqrt(domain_data[0:200]),
             0.2 + 0.4 * 20**0.5 + 3 * (2 / 3 - np.exp(-0.02 * domain_data[200:])),
         )
+        dataset = pybop.Dataset(
+            {
+                "Time [s]": domain_data,
+                "Voltage [V]": target_data,
+            }
+        )
         switchover_point = 20.0
-        return domain_data, target_data, switchover_point
+        return dataset, switchover_point
 
     def test_base(self, dataset):
         cost = pybop.ErrorMeasure(dataset)
@@ -426,51 +432,37 @@ def test_mixed_problem_classes(self, dataset, design_simulator):
             pybop.WeightedCost(cost1, cost2)
 
     def test_square_root_feature_distance(self, gitt_like_dataset):
-        domain_data, target_data, switchover_point = gitt_like_dataset
+        dataset, switchover_point = gitt_like_dataset
         srfd = SquareRootFeatureDistance(
-            domain_data, target_data, feature="offset", time_end=switchover_point
+            dataset=dataset, feature="offset", time_end=switchover_point
         )
         assert abs(srfd.data_fit - 0.2) < 1e-4
         srfd = SquareRootFeatureDistance(
-            domain_data, target_data, feature="slope", time_end=switchover_point
+            dataset=dataset, feature="slope", time_end=switchover_point
         )
         assert abs(srfd.data_fit - 0.4) < 1e-4
         srfd = SquareRootFeatureDistance(
-            domain_data, target_data, feature="inverse_slope", time_end=switchover_point
+            dataset=dataset, feature="inverse_slope", time_end=switchover_point
         )
         assert abs(srfd.data_fit - 1 / 0.4) < 1e-4
         with pytest.raises(ValueError):
-            srfd = SquareRootFeatureDistance(
-                domain_data, target_data, feature="non_existent"
-            )
+            srfd = SquareRootFeatureDistance(dataset=dataset, feature="non_existent")
 
     def test_exponential_feature_distance(self, gitt_like_dataset):
-        domain_data, target_data, switchover_point = gitt_like_dataset
+        dataset, switchover_point = gitt_like_dataset
         efd = ExponentialFeatureDistance(
-            domain_data,
-            target_data,
-            feature="asymptote",
-            time_start=switchover_point,
+            dataset=dataset, feature="asymptote", time_start=switchover_point
         )
         assert abs(efd.data_fit - (2.2 + 0.4 * 20**0.5)) < 1e-4
         efd = ExponentialFeatureDistance(
-            domain_data,
-            target_data,
-            feature="magnitude",
-            time_start=switchover_point,
+            dataset=dataset, feature="magnitude", time_start=switchover_point
         )
         assert abs(efd.data_fit + 2.0) < 1e-1
         efd = ExponentialFeatureDistance(
-            domain_data,
-            target_data,
-            feature="timescale",
-            time_start=switchover_point,
+            dataset=dataset, feature="timescale", time_start=switchover_point
         )
         assert abs(efd.data_fit - 1 / 0.02) < 1e-2
         efd = ExponentialFeatureDistance(
-            domain_data,
-            target_data,
-            feature="inverse_timescale",
-            time_start=switchover_point,
+            dataset=dataset, feature="inverse_timescale", time_start=switchover_point
         )
         assert abs(efd.data_fit - 0.02) < 1e-4
diff --git a/tests/unit/test_optimisation.py b/tests/unit/test_optimisation.py
index 71e9a7f39..7d21f414e 100644
--- a/tests/unit/test_optimisation.py
+++ b/tests/unit/test_optimisation.py
@@ -148,15 +148,15 @@ def multivariate_problem(self, multivariate_simulator, dataset):
     @pytest.fixture
     def gitt_like_problem(self, multivariate_simulator, dataset):
         sqrt_cost_1 = pybop.costs.feature_distances.SquareRootFeatureDistance(
-            dataset["Time [s]"],
-            dataset["Voltage [V]"],
+            dataset,
+            target="Voltage [V]",
             feature="offset",
             time_start=0,
             time_end=180,
         )
         sqrt_cost_2 = pybop.costs.feature_distances.SquareRootFeatureDistance(
-            dataset["Time [s]"],
-            dataset["Voltage [V]"],
+            dataset,
+            target="Voltage [V]",
             feature="offset",
             time_start=180,
             time_end=360,