From 31a4d043ac03978f582eaa7f4add5388bfce46ac Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Tue, 6 Jan 2026 15:16:20 -0500
Subject: [PATCH 1/9] Allow to use externalPostfit as starting point for fit;
 profile beta parameters after loading externalPostfit

---
 bin/rabbit_fit.py | 52 ++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/bin/rabbit_fit.py b/bin/rabbit_fit.py
index 58d71702..968c3967 100755
--- a/bin/rabbit_fit.py
+++ b/bin/rabbit_fit.py
@@ -276,41 +276,43 @@ def fit(args, fitter, ws, dofit=True):
 
     if args.externalPostfit is not None:
         fitter.load_fitresult(args.externalPostfit, args.externalPostfitResult)
-    else:
-        if dofit:
-            cb = fitter.minimize()
 
-            if cb is not None:
-                ws.add_loss_time_hist(cb.loss_history, cb.time_history)
+        if fitter.binByBinStat:
+            fitter._profile_beta()
 
-        if not args.noHessian:
-            # compute the covariance matrix and estimated distance to minimum
+    if dofit:
+        cb = fitter.minimize()
 
-            val, grad, hess = fitter.loss_val_grad_hess()
-            edmval, cov = edmval_cov(grad, hess)
+        if cb is not None:
+            ws.add_loss_time_hist(cb.loss_history, cb.time_history)
 
-            logger.info(f"edmval: {edmval}")
+    if not args.noHessian:
+        # compute the covariance matrix and estimated distance to minimum
+
+        val, grad, hess = fitter.loss_val_grad_hess()
+        edmval, cov = edmval_cov(grad, hess)
+        logger.info(f"edmval: {edmval}")
 
-            fitter.cov.assign(cov)
+        fitter.cov.assign(cov)
 
-            del cov
+        del cov
 
-            if fitter.binByBinStat and fitter.diagnostics:
-                # This is the estimated distance to minimum with respect to variations of
-                # the implicit binByBinStat nuisances beta at fixed parameter values.
-                # It should be near-zero by construction as long as the analytic profiling is
-                # correct
-                _, gradbeta, hessbeta = fitter.loss_val_grad_hess_beta()
-                edmvalbeta, covbeta = edmval_cov(gradbeta, hessbeta)
-                logger.info(f"edmvalbeta: {edmvalbeta}")
+        if fitter.binByBinStat and fitter.diagnostics:
+            # This is the estimated distance to minimum with respect to variations of
+            # the implicit binByBinStat nuisances beta at fixed parameter values.
+            # It should be near-zero by construction as long as the analytic profiling is
+            # correct
+            _, gradbeta, hessbeta = fitter.loss_val_grad_hess_beta()
+            edmvalbeta, covbeta = edmval_cov(gradbeta, hessbeta)
+            logger.info(f"edmvalbeta: {edmvalbeta}")
 
-            if args.doImpacts:
-                ws.add_impacts_hists(*fitter.impacts_parms(hess))
+        if args.doImpacts:
+            ws.add_impacts_hists(*fitter.impacts_parms(hess))
 
-            del hess
+        del hess
 
-            if args.globalImpacts:
-                ws.add_global_impacts_hists(*fitter.global_impacts_parms())
+        if args.globalImpacts:
+            ws.add_global_impacts_hists(*fitter.global_impacts_parms())
 
     nllvalreduced = fitter.reduced_nll().numpy()
 

From 84bec047df46338c1b509aba85166461d38515ce Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Tue, 6 Jan 2026 15:20:29 -0500
Subject: [PATCH 2/9] Fix feezing of constraint parameters; using freezing
 functionality in likelihood scans

---
 bin/rabbit_plot_likelihood_scan.py |  63 ++++----
 rabbit/fitter.py                   | 241 ++++++++++++-----------------
 2 files changed, 125 insertions(+), 179 deletions(-)

diff --git a/bin/rabbit_plot_likelihood_scan.py b/bin/rabbit_plot_likelihood_scan.py
index a6a47523..a052db16 100755
--- a/bin/rabbit_plot_likelihood_scan.py
+++ b/bin/rabbit_plot_likelihood_scan.py
@@ -1,9 +1,7 @@
 #!/usr/bin/env python3
 
 import argparse
-import os
 
-import matplotlib.pyplot as plt
 import mplhep as hep
 import numpy as np
 
@@ -16,32 +14,6 @@
 logger = None
 
 
-def writeOutput(fig, outfile, extensions=[], postfix=None, args=None, meta_info=None):
-    name, _ = os.path.splitext(outfile)
-
-    if postfix:
-        name += f"_{postfix}"
-
-    for ext in extensions:
-        if ext[0] != ".":
-            ext = "." + ext
-        output = name + ext
-        print(f"Write output file {output}")
-        plt.savefig(output)
-
-        output = name.rsplit("/", 1)
-        output[1] = os.path.splitext(output[1])[0]
-        if len(output) == 1:
-            output = (None, *output)
-    if args is None and meta_info is None:
-        return
-    output_tools.write_logfile(
-        *output,
-        args=args,
-        meta_info=meta_info,
-    )
-
-
 def parseArgs():
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -73,6 +45,11 @@ def parseArgs():
         default="./test",
         help="Folder path for output",
     )
+    parser.add_argument(
+        "--eoscp",
+        action="store_true",
+        help="Override use of xrdcp and use the mount instead",
+    )
     parser.add_argument(
         "-p", "--postfix", type=str, help="Postfix for output file name"
     )
@@ -113,6 +90,13 @@ def parseArgs():
         default=None,
         help="x axis limits",
     )
+    parser.add_argument(
+        "--ylim",
+        type=float,
+        nargs=2,
+        default=None,
+        help="y axis limits",
+    )
     parser.add_argument("--titlePos", type=int, default=2, help="title position")
     parser.add_argument(
         "--config",
@@ -133,6 +117,7 @@ def plot_scan(
     subtitle=None,
     titlePos=0,
     xlim=None,
+    ylim=None,
     combine=None,
     ylabel=r"$-2\,\Delta \log L$",
     config={},
@@ -148,13 +133,15 @@ def plot_scan(
 
     if xlim is None:
         xlim = (min(x), max(x))
+    if ylim is None:
+        ylim = (min(y), max(y))
 
     fig, ax = plot_tools.figure(
         x,
         xlabel,
         ylabel,
         xlim=xlim,
-        ylim=(min(y), max(y)),  # logy=args.logy
+        ylim=ylim,  # logy=args.logy
     )
 
     ax.axhline(y=1, color="gray", linestyle="--", alpha=0.5)
@@ -227,6 +214,7 @@ def plot_scan(
 
 def main():
     args = parseArgs()
+    outdir = output_tools.make_plot_dir(args.outpath, eoscp=args.eoscp)
 
     global logger
     logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger)
@@ -280,19 +268,20 @@ def main():
             subtitle=args.subtitle,
             titlePos=args.titlePos,
             xlim=args.xlim,
+            ylim=args.ylim,
             config=config,
             combine=(vals, nlls) if args.combine is not None else None,
             no_hessian=args.noHessian,
         )
-        os.makedirs(args.outpath, exist_ok=True)
-        outfile = os.path.join(args.outpath, f"nll_scan_{param}")
-        writeOutput(
-            fig,
-            outfile=outfile,
-            extensions=["png", "pdf"],
-            meta_info=meta,
+
+        to_join = [f"nll_scan_{param}", args.postfix]
+        outfile = "_".join(filter(lambda x: x, to_join))
+        plot_tools.save_pdf_and_png(outdir, outfile)
+        output_tools.write_index_and_log(
+            outdir,
+            outfile,
+            analysis_meta_info=meta,
             args=args,
-            postfix=args.postfix,
         )
 
 
diff --git a/rabbit/fitter.py b/rabbit/fitter.py
index faba62a6..967f883d 100644
--- a/rabbit/fitter.py
+++ b/rabbit/fitter.py
@@ -45,9 +45,7 @@ def __init__(self, xv):
     def __call__(self, intermediate_result):
         loss = intermediate_result.fun
 
-        logger.debug(
-            f"Iteration {self.iiter}: loss value {loss}"
-        )  # ; status {intermediate_result.status}")
+        logger.debug(f"Iteration {self.iiter}: loss value {loss}")
         if np.isnan(loss):
             raise ValueError(f"Loss value is NaN at iteration {self.iiter}")
 
@@ -404,7 +402,14 @@ def set_blinding_offsets(self, blind=True):
             )
 
     def get_blinded_theta(self):
-        theta = self.x[self.poi_model.npoi :]
+        theta = self.x[self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst]
+        theta = tf.where(
+            self.frozen_params_mask[
+                self.poi_model.npoi : self.poi_model.npoi + self.indata.nsyst
+            ],
+            tf.stop_gradient(theta),
+            theta,
+        )
         if self.do_blinding:
             return theta + self._blinding_offsets_theta
         else:
@@ -416,6 +421,9 @@ def get_blinded_poi(self):
             poi = xpoi
         else:
             poi = tf.square(xpoi)
+        poi = tf.where(
+            self.frozen_params_mask[: self.poi_model.npoi], tf.stop_gradient(poi), poi
+        )
         if self.do_blinding:
             return poi * self._blinding_offsets_poi
         else:
@@ -1301,15 +1309,6 @@ def _compute_yields_noBBB(self, full=True):
         poi = self.get_blinded_poi()
         theta = self.get_blinded_theta()
 
-        poi = tf.where(
-            self.frozen_params_mask[: self.poi_model.npoi], tf.stop_gradient(poi), poi
-        )
-        theta = tf.where(
-            self.frozen_params_mask[self.poi_model.npoi :],
-            tf.stop_gradient(theta),
-            theta,
-        )
-
         rnorm = self.poi_model.compute(poi)
 
         normcentral = None
@@ -1941,14 +1940,12 @@ def _compute_lbeta(self, beta, full_nll=False):
         return None
 
     def _compute_nll_components(self, profile=True, full_nll=False):
-        nexpfullcentral, _, beta = self._compute_yields_with_beta(
+        nexp, _, beta = self._compute_yields_with_beta(
             profile=profile,
             compute_norm=False,
             full=False,
         )
 
-        nexp = nexpfullcentral
-
         if self.chisqFit:
             ln = 0.5 * tf.reduce_sum((nexp - self.nobs) ** 2 / self.varnobs, axis=-1)
         elif self.covarianceFit:
@@ -2065,6 +2062,68 @@ def loss_val_grad_hess_beta(self, profile=True):
 
         return val, grad, hess
 
+    def fit(self):
+        def scipy_loss(xval):
+            self.x.assign(xval)
+            val, grad = self.loss_val_grad()
+            return val.__array__(), grad.__array__()
+
+        def scipy_hessp(xval, pval):
+            self.x.assign(xval)
+            p = tf.convert_to_tensor(pval)
+            val, grad, hessp = self.loss_val_grad_hessp(p)
+            return hessp.__array__()
+
+        def scipy_hess(xval):
+            self.x.assign(xval)
+            val, grad, hess = self.loss_val_grad_hess()
+            if self.diagnostics:
+                cond_number = tfh.cond_number(hess)
+                logger.info(f"  - Condition number: {cond_number}")
+                edmval = tfh.edmval(grad, hess)
+                logger.info(f"  - edmval: {edmval}")
+            return hess.__array__()
+
+        xval = self.x.numpy()
+
+        callback = FitterCallback(self)
+
+        if self.minimizer_method in [
+            "trust-krylov",
+            "trust-ncg",
+        ]:
+            info_minimize = dict(hessp=scipy_hessp)
+        elif self.minimizer_method in [
+            "trust-exact",
+            "dogleg",
+        ]:
+            info_minimize = dict(hess=scipy_hess)
+        else:
+            info_minimize = dict()
+
+        try:
+            res = scipy.optimize.minimize(
+                scipy_loss,
+                xval,
+                method=self.minimizer_method,
+                jac=True,
+                tol=0.0,
+                callback=callback,
+                **info_minimize,
+            )
+        except Exception as ex:
+            # minimizer could have called the loss or hessp functions with "random" values, so restore the
+            # state from the end of the last iteration before the exception
+            xval = callback.xval
+            logger.debug(ex)
+        else:
+            xval = res["x"]
+            logger.debug(res)
+
+        self.x.assign(xval)
+
+        return callback
+
     def minimize(self):
         if self.is_linear:
             logger.info(
@@ -2092,65 +2151,7 @@ def minimize(self):
 
             callback = None
         else:
-
-            def scipy_loss(xval):
-                self.x.assign(xval)
-                val, grad = self.loss_val_grad()
-                return val.__array__(), grad.__array__()
-
-            def scipy_hessp(xval, pval):
-                self.x.assign(xval)
-                p = tf.convert_to_tensor(pval)
-                val, grad, hessp = self.loss_val_grad_hessp(p)
-                return hessp.__array__()
-
-            def scipy_hess(xval):
-                self.x.assign(xval)
-                val, grad, hess = self.loss_val_grad_hess()
-                if self.diagnostics:
-                    cond_number = tfh.cond_number(hess)
-                    logger.info(f"  - Condition number: {cond_number}")
-                    edmval = tfh.edmval(grad, hess)
-                    logger.info(f"  - edmval: {edmval}")
-                return hess.__array__()
-
-            xval = self.x.numpy()
-
-            callback = FitterCallback(xval)
-
-            if self.minimizer_method in [
-                "trust-krylov",
-                "trust-ncg",
-            ]:
-                info_minimize = dict(hessp=scipy_hessp)
-            elif self.minimizer_method in [
-                "trust-exact",
-                "dogleg",
-            ]:
-                info_minimize = dict(hess=scipy_hess)
-            else:
-                info_minimize = dict()
-
-            try:
-                res = scipy.optimize.minimize(
-                    scipy_loss,
-                    xval,
-                    method=self.minimizer_method,
-                    jac=True,
-                    tol=0.0,
-                    callback=callback,
-                    **info_minimize,
-                )
-            except Exception as ex:
-                # minimizer could have called the loss or hessp functions with "random" values, so restore the
-                # state from the end of the last iteration before the exception
-                xval = callback.xval
-                logger.debug(ex)
-            else:
-                xval = res["x"]
-                logger.debug(res)
-
-            self.x.assign(xval)
+            callback = self.fit()
 
         # force profiling of beta with final parameter values
         # TODO avoid the extra calculation and jitting if possible since the relevant calculation
@@ -2164,6 +2165,9 @@ def nll_scan(self, param, scan_range, scan_points, use_prefit=False):
         # make a likelihood scan for a single parameter
         # assuming the likelihood is minimized
 
+        # freeze minimize which mean to not update it in the fit
+        self.freeze_params(param)
+
         idx = np.where(self.parms.astype(str) == param)[0][0]
 
         # store current state of x temporarily
@@ -2189,51 +2193,28 @@ def nll_scan(self, param, scan_range, scan_points, use_prefit=False):
                 if i == 0:
                     continue
 
+                logger.debug(f"Now at i={i} x={ixval}")
                 self.x.assign(tf.tensor_scatter_nd_update(self.x, [[idx]], [ixval]))
 
-                def scipy_loss(xval):
-                    self.x.assign(xval)
-                    val, grad = self.loss_val_grad()
-                    grad = grad.numpy()
-                    grad[idx] = 0  # Zero out gradient for the frozen parameter
-                    return val.numpy(), grad
-
-                def scipy_hessp(xval, pval):
-                    self.x.assign(xval)
-                    pval[idx] = (
-                        0  # Ensure the perturbation does not affect frozen parameter
-                    )
-                    p = tf.convert_to_tensor(pval)
-                    val, grad, hessp = self.loss_val_grad_hessp(p)
-                    hessp = hessp.numpy()
-                    # TODO: worth testing modifying the loss/grad/hess functions to imply 1
-                    # for the corresponding hessian element instead of 0,
-                    # since this might allow the minimizer to converge more efficiently
-                    hessp[idx] = (
-                        0  # Zero out Hessian-vector product at the frozen index
-                    )
-                    return hessp
-
-                res = scipy.optimize.minimize(
-                    scipy_loss,
-                    self.x,
-                    method="trust-krylov",
-                    jac=True,
-                    hessp=scipy_hessp,
-                )
-                if res["success"]:
-                    dnlls[nscans // 2 + sign * i] = (
-                        self.reduced_nll().numpy() - nll_best
-                    )
-                    scan_vals[nscans // 2 + sign * i] = ixval
+                self.fit()
+
+                dnlls[nscans // 2 + sign * i] = self.reduced_nll().numpy() - nll_best
+
+                scan_vals[nscans // 2 + sign * i] = ixval
 
             # reset x to original state
             self.x.assign(xval)
 
+        # let the parameter be free again
+        self.defreeze_params(param)
+
         return scan_vals, dnlls
 
     def nll_scan2D(self, param_tuple, scan_range, scan_points, use_prefit=False):
 
+        # freeze minimize which mean to not update it in the fit
+        self.freeze_params(param_tuple)
+
         idx0 = np.where(self.parms.astype(str) == param_tuple[0])[0][0]
         idx1 = np.where(self.parms.astype(str) == param_tuple[1])[0][0]
 
@@ -2280,7 +2261,9 @@ def nll_scan2D(self, param_tuple, scan_range, scan_points, use_prefit=False):
             ix = best_fit - i
             iy = best_fit + j
 
-            # print(f"i={i}, j={j}, r={r} drow={drow}, dcol={dcol} | ix={ix}, iy={iy}")
+            logger.debug(
+                f"Now at (ix,iy) = ({ix},{iy}) (x,y)= ({x_scans[ix]},{y_scans[iy]})"
+            )
 
             self.x.assign(
                 tf.tensor_scatter_nd_update(
@@ -2288,41 +2271,15 @@ def nll_scan2D(self, param_tuple, scan_range, scan_points, use_prefit=False):
                 )
             )
 
-            def scipy_loss(xval):
-                self.x.assign(xval)
-                val, grad = self.loss_val_grad()
-                grad = grad.numpy()
-                grad[idx0] = 0
-                grad[idx1] = 0
-                return val.numpy(), grad
-
-            def scipy_hessp(xval, pval):
-                self.x.assign(xval)
-                pval[idx0] = 0
-                pval[idx1] = 0
-                p = tf.convert_to_tensor(pval)
-                val, grad, hessp = self.loss_val_grad_hessp(p)
-                hessp = hessp.numpy()
-                hessp[idx0] = 0
-                hessp[idx1] = 0
-
-                if np.allclose(hessp, 0, atol=1e-8):
-                    return np.zeros_like(hessp)
+            self.fit()
 
-                return hessp
+            dnlls[ix, iy] = self.reduced_nll().numpy() - nll_best
 
-            res = scipy.optimize.minimize(
-                scipy_loss,
-                self.x,
-                method="trust-krylov",
-                jac=True,
-                hessp=scipy_hessp,
-            )
+        self.x.assign(xval)
 
-            if res["success"]:
-                dnlls[ix, iy] = self.reduced_nll().numpy() - nll_best
+        # let the parameter be free again
+        self.defreeze_params(param_tuple)
 
-        self.x.assign(xval)
         return x_scans, y_scans, dnlls
 
     def contour_scan(self, param, nll_min, q=1, signs=[-1, 1], fun=None):

From e9cd3270ab85d938c394e47d84aa574d628e1ec8 Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Sun, 25 Jan 2026 08:59:17 -0500
Subject: [PATCH 3/9] Add option to skip postfit profiling (e.g. only using
 externalPostfit); Add possibility to only load parameters, not covariance
 from external postfit; always draw zero line in hist plot

---
 bin/rabbit_fit.py        | 10 ++++++++--
 bin/rabbit_plot_hists.py | 11 ++++++++++-
 rabbit/fitter.py         | 15 ++++++++++-----
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/bin/rabbit_fit.py b/bin/rabbit_fit.py
index 968c3967..2bb289d7 100755
--- a/bin/rabbit_fit.py
+++ b/bin/rabbit_fit.py
@@ -149,6 +149,12 @@ def make_parser():
         type=str,
         help="Specify result from external postfit file",
     )
+    parser.add_argument(
+        "--noPostfitProfile",
+        default=False,
+        action="store_true",
+        help="Do not profile beta parameters in the postfit (e.g. when using --externalPostfit).",
+    )
     parser.add_argument(
         "--doImpacts",
         default=False,
@@ -333,7 +339,7 @@ def fit(args, fitter, ws, dofit=True):
             "nllvalreduced": nllvalreduced,
             "ndfsat": ndfsat,
             "edmval": edmval,
-            "postfit_profile": args.externalPostfit is None,
+            "postfit_profile": not args.noPostfitProfile,
         }
     )
 
@@ -561,7 +567,7 @@ def main():
                             ifitter,
                             ws,
                             prefit=False,
-                            profile=args.externalPostfit is None,
+                            profile=not args.noPostfitProfile,
                         )
                 else:
                     fit_time.append(time.time())
diff --git a/bin/rabbit_plot_hists.py b/bin/rabbit_plot_hists.py
index bb66e0a3..aebf7af1 100755
--- a/bin/rabbit_plot_hists.py
+++ b/bin/rabbit_plot_hists.py
@@ -817,6 +817,7 @@ def make_plot(
             h_den = h_inclusive
 
         if diff:
+            h0 = hh.addHists(h_num, h_num, scale2=-1)
             h1 = hh.addHists(h_inclusive, h_den, scale2=-1)
             h2 = hh.addHists(h_data, h_den, scale2=-1)
             if h_data_stat is not None:
@@ -824,6 +825,14 @@ def make_plot(
                     h_data_stat, h_den, cutoff=cutoff, rel_unc=True
                 )
         else:
+            h0 = hh.divideHists(
+                h_num,
+                h_num,
+                cutoff=1e-8,
+                rel_unc=True,
+                flow=False,
+                by_ax_name=False,
+            )
             h1 = hh.divideHists(
                 h_inclusive,
                 h_den,
@@ -839,7 +848,7 @@ def make_plot(
                 )
 
         hep.histplot(
-            h1,
+            h0,
             histtype="step",
             color="grey",
             alpha=0.5,
diff --git a/rabbit/fitter.py b/rabbit/fitter.py
index 967f883d..3fbc00e8 100644
--- a/rabbit/fitter.py
+++ b/rabbit/fitter.py
@@ -286,12 +286,14 @@ def __init__(
 
     def load_fitresult(self, fitresult_file, fitresult_key):
         # load results from external fit and set postfit value and covariance elements for common parameters
+        cov_ext = None
         with h5py.File(fitresult_file, "r") as fext:
             if "x" in fext.keys():
                 # fitresult from combinetf
                 x_ext = fext["x"][...]
                 parms_ext = fext["parms"][...].astype(str)
-                cov_ext = fext["cov"][...]
+                if "cov" in fext.keys():
+                    cov_ext = fext["cov"][...]
             else:
                 # fitresult from rabbit
                 h5results_ext = io_tools.get_fitresult(fext, fitresult_key)
@@ -299,10 +301,10 @@ def load_fitresult(self, fitresult_file, fitresult_key):
 
                 x_ext = h_parms_ext.values()
                 parms_ext = np.array(h_parms_ext.axes["parms"])
-                cov_ext = h5results_ext["cov"].get().values()
+                if "cov" in h5results_ext.keys():
+                    cov_ext = h5results_ext["cov"].get().values()
 
         xvals = self.x.numpy()
-        covval = self.cov.numpy()
         parms = self.parms.astype(str)
 
         # Find common elements with their matching indices
@@ -310,10 +312,13 @@ def load_fitresult(self, fitresult_file, fitresult_key):
             parms, parms_ext, assume_unique=True, return_indices=True
         )
         xvals[idxs] = x_ext[idxs_ext]
-        covval[np.ix_(idxs, idxs)] = cov_ext[np.ix_(idxs_ext, idxs_ext)]
 
         self.x.assign(xvals)
-        self.cov.assign(tf.constant(covval))
+
+        if cov_ext is not None:
+            covval = self.cov.numpy()
+            covval[np.ix_(idxs, idxs)] = cov_ext[np.ix_(idxs_ext, idxs_ext)]
+            self.cov.assign(tf.constant(covval))
 
     def update_frozen_params(self):
         new_mask_np = np.isin(self.parms, self.frozen_params)

From d2bb63e109e0e62dc9bf10442e5bb9904df5a118 Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Sun, 25 Jan 2026 11:48:37 -0500
Subject: [PATCH 4/9] Minor fix on pulls labeling

---
 bin/rabbit_plot_pulls_and_impacts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/rabbit_plot_pulls_and_impacts.py b/bin/rabbit_plot_pulls_and_impacts.py
index b847c305..e13574d4 100755
--- a/bin/rabbit_plot_pulls_and_impacts.py
+++ b/bin/rabbit_plot_pulls_and_impacts.py
@@ -324,7 +324,7 @@ def make_bar(
                     size=8,
                 ),
                 error_x=error_x,
-                name="Pulls ± Constraints",
+                name=f"Pulls ± Constraints ({name})",
                 showlegend=include_ref,
             ),
             row=1,

From e2eea24df8ec231930d4c2ccc0f4c9114399e340 Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Sun, 25 Jan 2026 12:53:20 -0500
Subject: [PATCH 5/9] Fix betavar in presence of multiple channels

---
 rabbit/tensorwriter.py | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/rabbit/tensorwriter.py b/rabbit/tensorwriter.py
index 8af81098..108e5f04 100644
--- a/rabbit/tensorwriter.py
+++ b/rabbit/tensorwriter.py
@@ -439,7 +439,10 @@ def add_beta_variations(
         variation = h.project(*dest_axes_names, *source_axes_names).values()
         variation = variation.reshape((*norm.shape, -1))
 
-        self.dict_beta_variations[dest_channel][process] = variation
+        if source_channel not in self.dict_beta_variations[dest_channel].keys():
+            self.dict_beta_variations[dest_channel][source_channel] = {}
+
+        self.dict_beta_variations[dest_channel][source_channel][process] = variation
 
     def get_logk(self, syst, norm, kfac=1.0, systematic_type=None):
         if not np.all(np.isfinite(syst)):
@@ -795,9 +798,31 @@ def write(self, outfolder="./", outfilename="rabbit_input.hdf5", args={}):
                                     dict_logkhalfdiff_proc[syst]
                                 )
 
-                    if proc in self.dict_beta_variations[chan]:
-                        beta_vars = self.dict_beta_variations[chan][proc]
-                        beta_variations[ibin : ibin + nbinschan, :, iproc] = beta_vars
+                    for (
+                        source_channel,
+                        source_channel_dict,
+                    ) in self.dict_beta_variations[chan].items():
+                        if proc in source_channel_dict:
+
+                            # find the bins of the source channel
+                            ibin_start = 0
+                            for c, nb in self.nbinschan.items():
+                                if self.channels[c]["masked"]:
+                                    continue  # masked channels can not be source channels
+                                if c == source_channel:
+                                    ibin_end = ibin_start + nb
+                                    break
+                                else:
+                                    ibin_start += nb
+                            else:
+                                raise RuntimeError(
+                                    f"Did not find source channel {source_channel} in list of channels {[k for k in self.nbinschan.keys()]}"
+                                )
+
+                            beta_vars = source_channel_dict[proc]
+                            beta_variations[
+                                ibin : ibin + nbinschan, ibin_start:ibin_end, iproc
+                            ] = beta_vars
 
                 ibin += nbinschan
 

From 0ced4fd6a2f9f0ec0d8b260949a4965c2a677443 Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Sun, 25 Jan 2026 13:48:33 -0500
Subject: [PATCH 6/9] Add functionality to set parameter constraint minium

---
 bin/rabbit_fit.py |  7 +++++++
 rabbit/fitter.py  | 25 ++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/bin/rabbit_fit.py b/bin/rabbit_fit.py
index 2bb289d7..41abbe8e 100755
--- a/bin/rabbit_fit.py
+++ b/bin/rabbit_fit.py
@@ -182,6 +182,13 @@ def make_parser():
         action="store_true",
         help="compute impacts of frozen (non-profiled) systematics",
     )
+    parser.add_argument(
+        "--setConstraintMinimum",
+        default=[],
+        nargs=2,
+        action="append",
+        help="Set the constraint minima of specified parameter to specified value",
+    )
 
     return parser.parse_args()
 
diff --git a/rabbit/fitter.py b/rabbit/fitter.py
index 6c8652da..dc1db755 100644
--- a/rabbit/fitter.py
+++ b/rabbit/fitter.py
@@ -139,12 +139,27 @@ def __init__(
 
         self.parms = np.concatenate([self.poi_model.pois, self.indata.systs])
 
+        # tf tensor containing default constraint minima
+        theta0default = np.zeros(self.indata.nsyst)
+        for parm, val in options.setConstraintMinimum:
+            idx = np.where(self.indata.systs.astype(str) == parm)[0]
+            if len(idx) != 1:
+                raise RuntimeError(
+                    f"Expect to find exactly one match for {parm} to set constraint minimum, but found {len(len(idx))}"
+                )
+            theta0default[idx[0]] = val
+
+        self.theta0default = tf.convert_to_tensor(
+            theta0default, dtype=self.indata.dtype
+        )
+
         # tf variable containing all fit parameters
-        thetadefault = tf.zeros([self.indata.nsyst], dtype=self.indata.dtype)
         if self.poi_model.npoi > 0:
-            xdefault = tf.concat([self.poi_model.xpoidefault, thetadefault], axis=0)
+            xdefault = tf.concat(
+                [self.poi_model.xpoidefault, self.theta0default], axis=0
+            )
         else:
-            xdefault = thetadefault
+            xdefault = self.theta0default
 
         self.x = tf.Variable(xdefault, trainable=True, name="x")
 
@@ -184,7 +199,7 @@ def __init__(
 
         # constraint minima for nuisance parameters
         self.theta0 = tf.Variable(
-            tf.zeros([self.indata.nsyst], dtype=self.indata.dtype),
+            self.theta0default,
             trainable=False,
             name="theta0",
         )
@@ -488,7 +503,7 @@ def set_beta0(self, values):
         self.logbeta0.assign(tf.math.log(beta0safe))
 
     def theta0defaultassign(self):
-        self.theta0.assign(tf.zeros([self.indata.nsyst], dtype=self.theta0.dtype))
+        self.theta0.assign(self.theta0default)
 
     def xdefaultassign(self):
         if self.poi_model.npoi == 0:

From 6ccde5c63acc520ebba53b5913592884a7e44ba6 Mon Sep 17 00:00:00 2001
From: David Walter <david.walter@cern.ch>
Date: Sun, 1 Mar 2026 19:34:15 +0100
Subject: [PATCH 7/9] Fix remaining merge conflicts

---
 bin/rabbit_fit.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/bin/rabbit_fit.py b/bin/rabbit_fit.py
index 2db7ee64..44893f19 100755
--- a/bin/rabbit_fit.py
+++ b/bin/rabbit_fit.py
@@ -182,13 +182,6 @@ def make_parser():
         action="store_true",
         help="compute impacts of frozen (non-profiled) systematics",
     )
-    parser.add_argument(
-        "--setConstraintMinimum",
-        default=[],
-        nargs=2,
-        action="append",
-        help="Set the constraint minima of specified parameter to specified value",
-    )
 
     return parser.parse_args()
 

From 14f0c0eecde6b2ef22b4c4659fe6763c44d4cf15 Mon Sep 17 00:00:00 2001
From: David Walter <david.walter@cern.ch>
Date: Mon, 2 Mar 2026 19:06:33 +0100
Subject: [PATCH 8/9] Fix parameter freezing

---
 README.md                                  |   2 +-
 notebooks/tutorial_1_getting_started.ipynb | 168 ++++++++++--
 notebooks/tutorial_2_advanced.ipynb        | 294 ++++++++++++++++-----
 rabbit/fitter.py                           |  17 +-
 4 files changed, 384 insertions(+), 97 deletions(-)

diff --git a/README.md b/README.md
index e36795cc..2f008ba2 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ python -m venv env
 Then activate it and install the necessary packages
 ```bash
 source env/bin/activate
-pip install wums[pickling,plotting] tensorflow tensorflow-probability numpy h5py hist scipy matplotlib mplhep seaborn pandas plotly kaleido
+pip install wums[pickling,plotting] tensorflow tensorflow-probability tf_keras numpy h5py hist scipy matplotlib mplhep seaborn pandas plotly kaleido
 ```
 The packages `matplotlib`, `mplhep`, `seaborn`, `pandas`, `plotly`, and `kaleido` are only needed for the plotting scripts. 
 For the `text2hdf5.py` conversion also the `uproot` package is needed.
diff --git a/notebooks/tutorial_1_getting_started.ipynb b/notebooks/tutorial_1_getting_started.ipynb
index 38b9e5fb..891553d1 100644
--- a/notebooks/tutorial_1_getting_started.ipynb
+++ b/notebooks/tutorial_1_getting_started.ipynb
@@ -22,7 +22,36 @@
    "cell_type": "markdown",
    "id": "76691d0f",
    "metadata": {},
-   "source": "There are many ways to set a working environment. The following is a suggestion that works on my local laptop.\n\nIn your terminal, clone the rabbit repository:\n``` bash\ngit clone git@github.com:WMass/rabbit.git\ncd rabbit\n```\n\nCreate a virtual environment (requires python 3.12 or higher):\n``` bash\npython -m venv env\nsource env/bin/activate\npip install wums[pickling,plotting] tensorflow tensorflow-probability numpy h5py hist scipy matplotlib mplhep seaborn pandas plotly kaleido\n```\n\nInstall jupyter notebook specific packages and the kernel spec:\n``` bash\npip install ipykernel\npython -m ipykernel install --user --name=rabbit_env --display-name \"Python (Rabbit)\"\n```\n\nStart jupyter notebook:\n``` bash\njupyter notebook\n```\n\nAfter starting jupyter notebook, open the browser link, and navigate to this file you have to activate the kernel with the environment. Go to \"Kernel\" -> \"Change kernel\" -> \"Python (Rabbit)\".\nIf everything done right the following command should print the python path inside your python virtual environment:"
+   "source": [
+    "There are many ways to set a working environment. The following is a suggestion that works on my local laptop.\n",
+    "\n",
+    "In your terminal, clone the rabbit repository:\n",
+    "``` bash\n",
+    "git clone git@github.com:WMass/rabbit.git\n",
+    "cd rabbit\n",
+    "```\n",
+    "\n",
+    "Create a virtual environment (requires python 3.12 or higher):\n",
+    "``` bash\n",
+    "python -m venv env\n",
+    "source env/bin/activate\n",
+    "pip install wums[pickling,plotting] tensorflow tensorflow-probability numpy h5py hist scipy matplotlib mplhep seaborn pandas plotly kaleido\n",
+    "```\n",
+    "\n",
+    "Install jupyter notebook specific packages and the kernel spec:\n",
+    "``` bash\n",
+    "pip install ipykernel\n",
+    "python -m ipykernel install --user --name=rabbit_env --display-name \"Python (Rabbit)\"\n",
+    "```\n",
+    "\n",
+    "Start jupyter notebook:\n",
+    "``` bash\n",
+    "jupyter notebook\n",
+    "```\n",
+    "\n",
+    "After starting jupyter notebook, open the browser link, and navigate to this file you have to activate the kernel with the environment. Go to \"Kernel\" -> \"Change kernel\" -> \"Python (Rabbit)\".\n",
+    "If everything done right the following command should print the python path inside your python virtual environment:"
+   ]
   },
   {
    "cell_type": "code",
@@ -47,7 +76,9 @@
    "cell_type": "markdown",
    "id": "7b5178e4",
    "metadata": {},
-   "source": "Since we are using a lot of bash scripts we need to set the environment variables. Outside of a notebook this is done by running `source setup.sh` from the repository root. The cell below replicates the same effect inside the notebook:"
+   "source": [
+    "Since we are using a lot of bash scripts we need to set the environment variables. Outside of a notebook this is done by running `source setup.sh` from the repository root. The cell below replicates the same effect inside the notebook:"
+   ]
   },
   {
    "cell_type": "code",
@@ -95,7 +126,9 @@
    "cell_type": "markdown",
    "id": "817a8b07",
    "metadata": {},
-   "source": "The following is needed to visualize the plots within this session"
+   "source": [
+    "The following is needed to visualize the plots within this session"
+   ]
   },
   {
    "cell_type": "code",
@@ -129,7 +162,40 @@
    "id": "a52d336a",
    "metadata": {},
    "outputs": [],
-   "source": "import numpy as np\nimport hist\n\naxis = hist.axis.Regular(10,0,1, name=\"x\")\n\n# Create histograms for signal, 2 backgrounds, and data \nh_sig = hist.Hist(axis, storage=hist.storage.Weight())  \nh_flat = hist.Hist(axis, storage=hist.storage.Weight())  \nh_exp = hist.Hist(axis, storage=hist.storage.Weight())  \nh_data = hist.Hist(axis, storage=hist.storage.Double())  \n  \n# Generate and fill components with weights  \nnp.random.seed(42)  \n# Gaussian signal (mean=0.5, std=0.1) with weights  \nsig_samples = np.random.normal(0.5, 0.1, 5000)\nsig_weights = np.random.normal(1.0, 0.2, 5000)  # Mean weight=1, sigma=0.2 \nh_sig.fill(sig_samples, weight=sig_weights)  \n  \n# Flat background with weights  \nflat_samples = np.random.uniform(0, 1, 4000)  \nflat_weights = np.random.normal(0.5, 0.1, 4000)  # Mean weight=0.5, sigma=0.1\nh_flat.fill(flat_samples, weight=flat_weights)  \n  \n# Exponential background with weights  \nexp_samples = np.random.exponential(0.2, 2000)  \nexp_weights = np.random.normal(1.5, 0.3, 2000)  # Mean weight=1.5, sigma=0.3\nh_exp.fill(exp_samples, weight=exp_weights)  \n  \n# Sum components and add Poisson fluctuations\nh_data.values()[...] = np.random.poisson(  \n    h_sig.values() + h_flat.values() + h_exp.values()  \n)"
+   "source": [
+    "import numpy as np\n",
+    "import hist\n",
+    "\n",
+    "axis = hist.axis.Regular(10,0,1, name=\"x\")\n",
+    "\n",
+    "# Create histograms for signal, 2 backgrounds, and data \n",
+    "h_sig = hist.Hist(axis, storage=hist.storage.Weight())  \n",
+    "h_flat = hist.Hist(axis, storage=hist.storage.Weight())  \n",
+    "h_exp = hist.Hist(axis, storage=hist.storage.Weight())  \n",
+    "h_data = hist.Hist(axis, storage=hist.storage.Double())  \n",
+    "  \n",
+    "# Generate and fill components with weights  \n",
+    "np.random.seed(42)  \n",
+    "# Gaussian signal (mean=0.5, std=0.1) with weights  \n",
+    "sig_samples = np.random.normal(0.5, 0.1, 5000)\n",
+    "sig_weights = np.random.normal(1.0, 0.2, 5000)  # Mean weight=1, sigma=0.2 \n",
+    "h_sig.fill(sig_samples, weight=sig_weights)  \n",
+    "  \n",
+    "# Flat background with weights  \n",
+    "flat_samples = np.random.uniform(0, 1, 4000)  \n",
+    "flat_weights = np.random.normal(0.5, 0.1, 4000)  # Mean weight=0.5, sigma=0.1\n",
+    "h_flat.fill(flat_samples, weight=flat_weights)  \n",
+    "  \n",
+    "# Exponential background with weights  \n",
+    "exp_samples = np.random.exponential(0.2, 2000)  \n",
+    "exp_weights = np.random.normal(1.5, 0.3, 2000)  # Mean weight=1.5, sigma=0.3\n",
+    "h_exp.fill(exp_samples, weight=exp_weights)  \n",
+    "  \n",
+    "# Sum components and add Poisson fluctuations\n",
+    "h_data.values()[...] = np.random.poisson(  \n",
+    "    h_sig.values() + h_flat.values() + h_exp.values()  \n",
+    ")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -174,7 +240,13 @@
    "cell_type": "markdown",
    "id": "ad673a47",
    "metadata": {},
-   "source": "## Constructing the input file rabbit expects\n\nTo perform a binned profile maximum likelihood fit of the provided templates to the data histogram we need to construct an input file in the required format. Rabbit offers a python interface to do that. Multiple channels can be provided where each channel has an arbitrary number of axes. Histograms for the data and predictions, called processes, need to be provided in the same axes. In the underlying data structure the multi-dimensional histogram for each channel get unrolled and that flat arrays get concatenated.\n\nIn our case we just have one channel and histograms with a single axis. "
+   "source": [
+    "## Constructing the input file rabbit expects\n",
+    "\n",
+    "To perform a binned profile maximum likelihood fit of the provided templates to the data histogram we need to construct an input file in the required format. Rabbit offers a python interface to do that. Multiple channels can be provided where each channel has an arbitrary number of axes. Histograms for the data and predictions, called processes, need to be provided in the same axes. In the underlying data structure the multi-dimensional histogram for each channel get unrolled and that flat arrays get concatenated.\n",
+    "\n",
+    "In our case we just have one channel and histograms with a single axis. "
+   ]
   },
   {
    "cell_type": "code",
@@ -208,7 +280,16 @@
    "cell_type": "markdown",
    "id": "14da4ee7",
    "metadata": {},
-   "source": "A real measurement has systematic uncertainties. Rabbit does not distinguish between normalization and shape uncertainties. Each uncertainty source is characterized by one up and one down variation across all bins, per process. Internally a 4D tensor is constructed with (processes, bins, systematics, up/down) variation. It is possible to symmetrize systematic variations to make the likelihood more quadratic and aid the minimization and establish validity of Gaussian approximations for example for the uncertainty estimation. \n* For systematic variations that are expected to by symmetric (but may have some statistical fluctuations which makes them asymmetric) the option \"average\" is recommended which is, as the name suggests, building the average of the up and down variation. \n* For ones that are known to be asymmetric, the \"linear\" or \"quadratic\" (more conservative) options are recommended where the asymmetric variation is split into two symmetric variations, one for the average and one for the difference.\n* If only one histogram is provided it will be assumed as symmetric and mirrored.\n\nIf systematic uncertainties are only provided for a subset of the channels or processes the variations get zero padded. \n\nBy default, all systematic uncertainties have a multiplicative effect on the predicted yield and are log-normal constraint. Alternatively additive normal constrained systematics can be used by setting systematic_type to \"normal\" in the TensorWriter construction."
+   "source": [
+    "A real measurement has systematic uncertainties. Rabbit does not distinguish between normalization and shape uncertainties. Each uncertainty source is characterized by one up and one down variation across all bins, per process. Internally a 4D tensor is constructed with (processes, bins, systematics, up/down) variation. It is possible to symmetrize systematic variations to make the likelihood more quadratic and aid the minimization and establish validity of Gaussian approximations for example for the uncertainty estimation. \n",
+    "* For systematic variations that are expected to by symmetric (but may have some statistical fluctuations which makes them asymmetric) the option \"average\" is recommended which is, as the name suggests, building the average of the up and down variation. \n",
+    "* For ones that are known to be asymmetric, the \"linear\" or \"quadratic\" (more conservative) options are recommended where the asymmetric variation is split into two symmetric variations, one for the average and one for the difference.\n",
+    "* If only one histogram is provided it will be assumed as symmetric and mirrored.\n",
+    "\n",
+    "If systematic uncertainties are only provided for a subset of the channels or processes the variations get zero padded. \n",
+    "\n",
+    "By default, all systematic uncertainties have a multiplicative effect on the predicted yield and are log-normal constraint. Alternatively additive normal constrained systematics can be used by setting systematic_type to \"normal\" in the TensorWriter construction."
+   ]
   },
   {
    "cell_type": "code",
@@ -268,7 +349,9 @@
    "cell_type": "markdown",
    "id": "91497f1e",
    "metadata": {},
-   "source": "Finally, we store the file on disk. Note that a `RuntimeWarning: invalid value encountered in divide` may appear when adding systematics — this is expected and harmless. It occurs when a process has zero nominal yield in some bins, causing `log(0)` during the internal log-normal encoding; those entries are clipped to a small numerical value automatically."
+   "source": [
+    "Finally, we store the file on disk. Note that a `RuntimeWarning: invalid value encountered in divide` may appear when adding systematics — this is expected and harmless. It occurs when a process has zero nominal yield in some bins, causing `log(0)` during the internal log-normal encoding; those entries are clipped to a small numerical value automatically."
+   ]
   },
   {
    "cell_type": "code",
@@ -293,7 +376,9 @@
    "cell_type": "markdown",
    "id": "b46ae232",
    "metadata": {},
-   "source": "The following steps are using bash commands to investigate"
+   "source": [
+    "The following steps are using bash commands to investigate"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -351,7 +436,10 @@
    "cell_type": "markdown",
    "id": "e7aebc1c",
    "metadata": {},
-   "source": "Looks good! However, the scripts does not know what you are trying to do and the checks have a limited coverage. It does not guarantee that things work. \nMore insight can be obtained from looking at the data, nominal predictions, and systematic variations, exactly as they go into the fit. Do the variations look sane to you?"
+   "source": [
+    "Looks good! However, the scripts does not know what you are trying to do and the checks have a limited coverage. It does not guarantee that things work. \n",
+    "More insight can be obtained from looking at the data, nominal predictions, and systematic variations, exactly as they go into the fit. Do the variations look sane to you?"
+   ]
   },
   {
    "cell_type": "code",
@@ -402,7 +490,31 @@
    "cell_type": "markdown",
    "id": "85f32bbf",
    "metadata": {},
-   "source": "## Fit\n\nFrom the provided input data a likelihood function is constructed that has to be maximized. As usual, the negative logarithm of the likelihood is implemented for minimization and constant terms are neglected:\n\n$-\\ln\\left( \\mathcal{L} \\right) = \\sum_{i=1}^\\mathrm{bins} \\left[ n^\\mathrm{exp}_i - n^\\mathrm{obs}_i \\ln(n^\\mathrm{exp}_i) \\right] + \\frac{1}{2} \\sum_{k=1}^\\mathrm{syst} \\left(\\theta_k - \\theta^0_k\\right)^2 + \\sum_{i=1}^\\mathrm{bins} \\left[ k^\\mathrm{stat}_i \\beta_i - k^\\mathrm{stat}_i \\beta_i^0 \\ln(\\beta_i) \\right]$\n\nwith $n_i = \\sum_j^\\mathrm{proc} f_{i,j}(\\vec{\\mu}) \\beta_i n_{i,j}^0 \\prod_k^\\mathrm{syst} \\kappa_{i,j,k}^{\\theta_k}$ and $\\kappa_{i, j, k} = \\frac{n^0_{i,j} + v_{i, j, k}}{n^0_{i,j}}$. \nWith the bin index $i$, process index $j$ and systematic index $k$. The parameters associated to each systematic variation is $\\theta$ while $\\theta^0$ is the constraint minimum that is by default set to 0 but can be varied for toy experiments. The $\\beta$ and $\\beta^0$ parameters account for the statistical uncertainty on the prediction and the Barlow--Beeston light approach is followed by default. The $k^\\mathrm{stat}$ numbers encodes the statistical uncertainty on the predictions. The $\\mu$ are parameters of interest (POI) and their effect on the yields is modelled via the so-called POI model $f$, which by default returns 1 for signal and 0 for background processes. \n\nThe `-t` option selects which dataset to fit:\n- `-t -1`: fit to the **Asimov dataset**, where observed data = sum of all prefit predictions. Use this to get expected sensitivities and to validate the fit setup before looking at real data.\n- `-t 0`: fit to the **observed data**.\n- Multiple values (e.g. `-t 0 -1`) run both fits and store the results under separate keys in the output file.\n\nBy default fits to data are **blinded**: a random offset is applied to the POIs so the true data result is not visible until you add `--unblind`.\n\nTo start with, we perform a fit to the Asimov dataset using `-t -1` and save:\n- the expected prefit and postfit distribution (--saveHists) for each process (--saveHistsPerProcess) and the total uncertainties (--computeHistErrors)\n- the effect of the systematic variations (--computeVariations)\n- the uncertainty breakdown on the signal strength modifier following the traditional (--doImpacts) and global impacts definition (--globalImpacts)\n\nWhen the fit terminates it prints out the estimated distance to the minimum (edmval). If the edmval is large (e.g. larger than 10^{-9}) it is a sign that the fit has not converged. \nIt also prints the p-value from the saturated likelihood test in the asymptotic limit. This tells you how well your model describes the data."
+   "source": [
+    "## Fit\n",
+    "\n",
+    "From the provided input data a likelihood function is constructed that has to be maximized. As usual, the negative logarithm of the likelihood is implemented for minimization and constant terms are neglected:\n",
+    "\n",
+    "$-\\ln\\left( \\mathcal{L} \\right) = \\sum_{i=1}^\\mathrm{bins} \\left[ n^\\mathrm{exp}_i - n^\\mathrm{obs}_i \\ln(n^\\mathrm{exp}_i) \\right] + \\frac{1}{2} \\sum_{k=1}^\\mathrm{syst} \\left(\\theta_k - \\theta^0_k\\right)^2 + \\sum_{i=1}^\\mathrm{bins} \\left[ k^\\mathrm{stat}_i \\beta_i - k^\\mathrm{stat}_i \\beta_i^0 \\ln(\\beta_i) \\right]$\n",
+    "\n",
+    "with $n_i = \\sum_j^\\mathrm{proc} f_{i,j}(\\vec{\\mu}) \\beta_i n_{i,j}^0 \\prod_k^\\mathrm{syst} \\kappa_{i,j,k}^{\\theta_k}$ and $\\kappa_{i, j, k} = \\frac{n^0_{i,j} + v_{i, j, k}}{n^0_{i,j}}$. \n",
+    "With the bin index $i$, process index $j$ and systematic index $k$. The parameters associated to each systematic variation is $\\theta$ while $\\theta^0$ is the constraint minimum that is by default set to 0 but can be varied for toy experiments. The $\\beta$ and $\\beta^0$ parameters account for the statistical uncertainty on the prediction and the Barlow--Beeston light approach is followed by default. The $k^\\mathrm{stat}$ numbers encodes the statistical uncertainty on the predictions. The $\\mu$ are parameters of interest (POI) and their effect on the yields is modelled via the so-called POI model $f$, which by default returns 1 for signal and 0 for background processes. \n",
+    "\n",
+    "The `-t` option selects which dataset to fit:\n",
+    "- `-t -1`: fit to the **Asimov dataset**, where observed data = sum of all prefit predictions. Use this to get expected sensitivities and to validate the fit setup before looking at real data.\n",
+    "- `-t 0`: fit to the **observed data**.\n",
+    "- Multiple values (e.g. `-t 0 -1`) run both fits and store the results under separate keys in the output file.\n",
+    "\n",
+    "By default fits to data are **blinded**: a random offset is applied to the POIs so the true data result is not visible until you add `--unblind`.\n",
+    "\n",
+    "To start with, we perform a fit to the Asimov dataset using `-t -1` and save:\n",
+    "- the expected prefit and postfit distribution (--saveHists) for each process (--saveHistsPerProcess) and the total uncertainties (--computeHistErrors)\n",
+    "- the effect of the systematic variations (--computeVariations)\n",
+    "- the uncertainty breakdown on the signal strength modifier following the traditional (--doImpacts) and global impacts definition (--globalImpacts)\n",
+    "\n",
+    "When the fit terminates it prints out the estimated distance to the minimum (edmval). If the edmval is large (e.g. larger than 10^{-9}) it is a sign that the fit has not converged. \n",
+    "It also prints the p-value from the saturated likelihood test in the asymptotic limit. This tells you how well your model describes the data."
+   ]
   },
   {
    "cell_type": "code",
@@ -528,7 +640,11 @@
    "cell_type": "markdown",
    "id": "702b81b5",
    "metadata": {},
-   "source": "### Pulls and impacts\n\nImpact parameter pulls and impacts are computed by default on all parameters of interest and in the Gaussian approximation. Here, the impact on the signal strength modifier is plotted."
+   "source": [
+    "### Pulls and impacts\n",
+    "\n",
+    "Impact parameter pulls and impacts are computed by default on all parameters of interest and in the Gaussian approximation. Here, the impact on the signal strength modifier is plotted."
+   ]
   },
   {
    "cell_type": "code",
@@ -2327,8 +2443,13 @@
   {
    "cell_type": "markdown",
    "id": "7s4h290l4h2",
-   "source": "How to read the pulls table:\n- **pull**: the postfit parameter value. For constrained nuisances this is in units of the prefit uncertainty (a pull of 0 means the nuisance is at its prefit central value, ±1 means it shifted by one standard deviation). For unconstrained parameters (POIs and free parameters like `slope`) it is the fitted value in its natural units.\n- **constraint**: the postfit uncertainty, again in units of the prefit uncertainty for constrained nuisances. A value significantly less than 1 indicates the data is constraining that nuisance beyond the prior.\n- The values in parentheses are the **prefit** equivalents: the prior central value and uncertainty before the fit.",
-   "metadata": {}
+   "metadata": {},
+   "source": [
+    "How to read the pulls table:\n",
+    "- **pull**: the postfit parameter value. For constrained nuisances this is in units of the prefit uncertainty (a pull of 0 means the nuisance is at its prefit central value, ±1 means it shifted by one standard deviation). For unconstrained parameters (POIs and free parameters like `slope`) it is the fitted value in its natural units.\n",
+    "- **constraint**: the postfit uncertainty, again in units of the prefit uncertainty for constrained nuisances. A value significantly less than 1 indicates the data is constraining that nuisance beyond the prior.\n",
+    "- The values in parentheses are the **prefit** equivalents: the prior central value and uncertainty before the fit."
+   ]
   },
   {
    "cell_type": "code",
@@ -2378,12 +2499,21 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "354c760d",
+   "cell_type": "markdown",
+   "id": "995df4af",
    "metadata": {},
-   "outputs": [],
-   "source": "## Summary\n\nIn this tutorial you have learned how to:\n- Build an input tensor with `TensorWriter`: channels, processes, normalization and shape systematics\n- Diagnose input data with `debug_inputdata.py` and `rabbit_plot_inputdata.py`\n- Run a profile likelihood fit with `rabbit_fit.py` on Asimov (`-t -1`) and observed data (`-t 0`)\n- Interpret fit results: prefit/postfit distributions, pulls and constraints, impacts, correlation matrix, and likelihood scans\n- Use blinding and unblinding to protect the data result during fit development\n\n**Next steps:** [Tutorial 2](tutorial_2_advanced.ipynb) covers multi-dimensional channels, masked channels for cross section measurements, custom mappings, and custom POI models."
+   "source": [
+    "## Summary\n",
+    "\n",
+    "In this tutorial you have learned how to:\n",
+    "- Build an input tensor with `TensorWriter`: channels, processes, normalization and shape systematics\n",
+    "- Diagnose input data with `debug_inputdata.py` and `rabbit_plot_inputdata.py`\n",
+    "- Run a profile likelihood fit with `rabbit_fit.py` on Asimov (`-t -1`) and observed data (`-t 0`)\n",
+    "- Interpret fit results: prefit/postfit distributions, pulls and constraints, impacts, correlation matrix, and likelihood scans\n",
+    "- Use blinding and unblinding to protect the data result during fit development\n",
+    "\n",
+    "**Next steps:** [Tutorial 2](tutorial_2_advanced.ipynb) covers multi-dimensional channels, masked channels for cross section measurements, custom mappings, and custom POI models."
+   ]
   }
  ],
  "metadata": {
@@ -2407,4 +2537,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/notebooks/tutorial_2_advanced.ipynb b/notebooks/tutorial_2_advanced.ipynb
index a8995193..3513fb68 100644
--- a/notebooks/tutorial_2_advanced.ipynb
+++ b/notebooks/tutorial_2_advanced.ipynb
@@ -4,7 +4,15 @@
    "cell_type": "markdown",
    "id": "95d66795",
    "metadata": {},
-   "source": "# Tutorial 2: Mappings, masked channels, and POI models\n\nMasked channels are auxiliary channels that don't enter the fit in the default setup (although they might indirectly through more advanced use cases such as regularization). They contain processes with systematic variations as regular channels. This allows to propagate the fitresult into these distributions.\n\nA common use case is a cross section measurement. The cross section is not simply scaling with the signal strength modifier, i.e. $\\sigma_\\mathrm{measured} \\neq \\mu \\sigma$. Instead, it depends on all systematic uncertainties that change the cross section, usually the theory uncertainties. Masked channels allow to define histograms containing the full dependency of the cross section, i.e. $\\sigma_\\mathrm{measured} = \\sigma(\\mu, \\vec{\\theta})$ where $\\vec{\\theta}$ are all systematic uncertainties that the cross section depends on. In case a differential cross section measurement is performed, the masked channel contains the generator-level distribution with all its dependencies.\n\nMappings are transformations of parameters and observables (histograms). Baseline mappings are 'BaseMapping', 'Project', 'Ratio', or 'AngularCoefficients' but custom mappings can be defined. Any differential function can be implemented as a mapping. Mappings can be applied to regular or masked channels."
+   "source": [
+    "# Tutorial 2: Mappings, masked channels, and POI models\n",
+    "\n",
+    "Masked channels are auxiliary channels that don't enter the fit in the default setup (although they might indirectly through more advanced use cases such as regularization). They contain processes with systematic variations as regular channels. This allows to propagate the fitresult into these distributions.\n",
+    "\n",
+    "A common use case is a cross section measurement. The cross section is not simply scaling with the signal strength modifier, i.e. $\\sigma_\\mathrm{measured} \\neq \\mu \\sigma$. Instead, it depends on all systematic uncertainties that change the cross section, usually the theory uncertainties. Masked channels allow to define histograms containing the full dependency of the cross section, i.e. $\\sigma_\\mathrm{measured} = \\sigma(\\mu, \\vec{\\theta})$ where $\\vec{\\theta}$ are all systematic uncertainties that the cross section depends on. In case a differential cross section measurement is performed, the masked channel contains the generator-level distribution with all its dependencies.\n",
+    "\n",
+    "Mappings are transformations of parameters and observables (histograms). Baseline mappings are 'BaseMapping', 'Project', 'Ratio', or 'AngularCoefficients' but custom mappings can be defined. Any differential function can be implemented as a mapping. Mappings can be applied to regular or masked channels."
+   ]
   },
   {
    "cell_type": "markdown",
@@ -27,7 +35,7 @@
      "output_type": "stream",
      "text": [
       "RABBIT_BASE: /home/david/work/Repos/rabbit\n",
-      "PATH: /home/david/work/Repos/rabbit/bin:/home/david/work/Repos/rabbit/env/bin:/home/david/.local/bin:/home/david/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin\n"
+      "PATH: /home/david/work/Repos/rabbit/env/bin:/home/david/.local/bin:/home/david/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/david/work/Repos/rabbit/bin\n"
      ]
     }
    ],
@@ -72,21 +80,113 @@
    "cell_type": "markdown",
    "id": "a9bd52fc",
    "metadata": {},
-   "source": "## Generating a synthetic toy model\n\nWe generate the same model as in the tutorial 1, but now we add an additional category axis, this could for example reflect 2 charges, lepton flavors, or any other variable. We also include a masked channel for the cross sections of the signal in these two categories."
+   "source": [
+    "## Generating a synthetic toy model\n",
+    "\n",
+    "We generate the same model as in the tutorial 1, but now we add an additional category axis, this could for example reflect 2 charges, lepton flavors, or any other variable. We also include a masked channel for the cross sections of the signal in these two categories."
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "3e5e9eef",
    "metadata": {},
    "outputs": [],
-   "source": "import numpy as np\nimport hist\nfrom wums import boostHistHelpers as hh\n\naxis = hist.axis.Regular(10,0,1, name=\"x\")\naxis_cat = hist.axis.Regular(2,-2,2, name=\"cat\", flow=False)\n\n# Create histograms for signal, 2 backgrounds, and data \nh_sig = hist.Hist(axis, axis_cat, storage=hist.storage.Weight())  \nh_flat = hist.Hist(axis, axis_cat, storage=hist.storage.Weight())  \nh_exp = hist.Hist(axis, axis_cat, storage=hist.storage.Weight())  \nh_data = hist.Hist(axis, axis_cat, storage=hist.storage.Double())  \n  \n# Generate and fill components with weights  \nnp.random.seed(42)  \n# Gaussian signal (mean=0.5, std=0.1) with weights  \nsig_samples = np.random.normal(0.5, 0.1, 5000)\nsig_cats = np.random.choice([-1, 1], size=5000)\nsig_weights = np.random.normal(1.0, 0.2, 5000)  # Mean weight=1, sigma=0.2 \nh_sig.fill(sig_samples, sig_cats, weight=sig_weights)  \n  \n# Flat background with weights  \nflat_samples = np.random.uniform(0, 1, 4000)  \nflat_cats = np.random.choice([-1, 1], size=4000)\nflat_weights = np.random.normal(0.5, 0.1, 4000)  # Mean weight=0.5, sigma=0.1\nh_flat.fill(flat_samples, flat_cats, weight=flat_weights)  \n  \n# Exponential background with weights  \nexp_samples = np.random.exponential(0.2, 500)  \nexp_cats = np.random.choice([-1, 1], size=500)\nexp_weights = np.random.normal(10, 2, 500)  # Mean weight=10, sigma=2 \nh_exp.fill(exp_samples, exp_cats, weight=exp_weights)  \n  \n# Sum components and add Poisson fluctuations\nh_data.values()[...] = np.random.poisson(  \n    h_sig.values() + h_flat.values() + h_exp.values()  \n)\n\n# Generate and 2D Shape Systematics\ndef make_shape_var(h, factor_func):  \n    h_var = h.copy()  \n    \n    # 1. Calculate the x-dependent weights (1D array)\n    centers_x = h.axes[0].centers  \n    weights_x = factor_func(centers_x)  \n    \n    # 2. Create the sign flip for the categories\n    # Assuming bin 0 is 'negative' and bin 1 is 'positive'\n    # This creates an array: [-1, 1]\n    category_signs = np.array([-1, 1])\n    \n    # 3. Combine them using broadcasting\n    # (N, 1) * (1, 2) results in an (N, 2) matrix of weights\n    # The first column will be -weights_x, the second will be +weights_x\n    total_weights = weights_x[:, np.newaxis] * category_signs[np.newaxis, :]\n        \n    # 4. Apply to the histogram values\n    h_var.values()[...] = h.values() * (1 + total_weights)  \n    return h_var\n\n# Variation for flat background\nh_flat_up = make_shape_var(h_flat, lambda c: 0.1 * np.exp(2 * (c - 0.5)))  \nh_flat_dn = make_shape_var(h_flat, lambda c: -0.08 * np.exp(2 * (c - 0.5)))  \n\n# Variation for signal (slope)\nh_sig_up = make_shape_var(h_sig, lambda c: 0.5 * (c - 0.5))  \nh_sig_dn = make_shape_var(h_sig, lambda c: -0.5 * (c - 0.5))  \n\n# This represents the cross section histogram with 2 bins in the category axis\n#  This could for example be the information at generator level.\n#  We divide the number of expected events by a hypothetical luminosity and efficiency\nlumi = 1000\neff = np.array([0.91, 0.92])\nacc = np.array([0.52, 0.78])\nscale = 1 / (lumi * eff * acc)\n\nh_sig_xsec = hh.scaleHist(h_sig.project(\"cat\"), scale) \nh_sig_xsec.variances()[...] = 0 # let's asume we have infinite statistics at gen level\n\nh_sig_xsec_up = hh.scaleHist(h_sig_up.project(\"cat\"), scale)\nh_sig_xsec_dn = hh.scaleHist(h_sig_dn.project(\"cat\"), scale)"
+   "source": [
+    "import numpy as np\n",
+    "import hist\n",
+    "from wums import boostHistHelpers as hh\n",
+    "\n",
+    "axis = hist.axis.Regular(10,0,1, name=\"x\")\n",
+    "axis_cat = hist.axis.Regular(2,-2,2, name=\"cat\", flow=False)\n",
+    "\n",
+    "# Create histograms for signal, 2 backgrounds, and data \n",
+    "h_sig = hist.Hist(axis, axis_cat, storage=hist.storage.Weight())  \n",
+    "h_flat = hist.Hist(axis, axis_cat, storage=hist.storage.Weight())  \n",
+    "h_exp = hist.Hist(axis, axis_cat, storage=hist.storage.Weight())  \n",
+    "h_data = hist.Hist(axis, axis_cat, storage=hist.storage.Double())  \n",
+    "  \n",
+    "# Generate and fill components with weights  \n",
+    "np.random.seed(42)  \n",
+    "# Gaussian signal (mean=0.5, std=0.1) with weights  \n",
+    "sig_samples = np.random.normal(0.5, 0.1, 5000)\n",
+    "sig_cats = np.random.choice([-1, 1], size=5000)\n",
+    "sig_weights = np.random.normal(1.0, 0.2, 5000)  # Mean weight=1, sigma=0.2 \n",
+    "h_sig.fill(sig_samples, sig_cats, weight=sig_weights)  \n",
+    "  \n",
+    "# Flat background with weights  \n",
+    "flat_samples = np.random.uniform(0, 1, 4000)  \n",
+    "flat_cats = np.random.choice([-1, 1], size=4000)\n",
+    "flat_weights = np.random.normal(0.5, 0.1, 4000)  # Mean weight=0.5, sigma=0.1\n",
+    "h_flat.fill(flat_samples, flat_cats, weight=flat_weights)  \n",
+    "  \n",
+    "# Exponential background with weights  \n",
+    "exp_samples = np.random.exponential(0.2, 500)  \n",
+    "exp_cats = np.random.choice([-1, 1], size=500)\n",
+    "exp_weights = np.random.normal(10, 2, 500)  # Mean weight=10, sigma=2 \n",
+    "h_exp.fill(exp_samples, exp_cats, weight=exp_weights)  \n",
+    "  \n",
+    "# Sum components and add Poisson fluctuations\n",
+    "h_data.values()[...] = np.random.poisson(  \n",
+    "    h_sig.values() + h_flat.values() + h_exp.values()  \n",
+    ")\n",
+    "\n",
+    "# Generate and 2D Shape Systematics\n",
+    "def make_shape_var(h, factor_func):  \n",
+    "    h_var = h.copy()  \n",
+    "    \n",
+    "    # 1. Calculate the x-dependent weights (1D array)\n",
+    "    centers_x = h.axes[0].centers  \n",
+    "    weights_x = factor_func(centers_x)  \n",
+    "    \n",
+    "    # 2. Create the sign flip for the categories\n",
+    "    # Assuming bin 0 is 'negative' and bin 1 is 'positive'\n",
+    "    # This creates an array: [-1, 1]\n",
+    "    category_signs = np.array([-1, 1])\n",
+    "    \n",
+    "    # 3. Combine them using broadcasting\n",
+    "    # (N, 1) * (1, 2) results in an (N, 2) matrix of weights\n",
+    "    # The first column will be -weights_x, the second will be +weights_x\n",
+    "    total_weights = weights_x[:, np.newaxis] * category_signs[np.newaxis, :]\n",
+    "        \n",
+    "    # 4. Apply to the histogram values\n",
+    "    h_var.values()[...] = h.values() * (1 + total_weights)  \n",
+    "    return h_var\n",
+    "\n",
+    "# Variation for flat background\n",
+    "h_flat_up = make_shape_var(h_flat, lambda c: 0.1 * np.exp(2 * (c - 0.5)))  \n",
+    "h_flat_dn = make_shape_var(h_flat, lambda c: -0.08 * np.exp(2 * (c - 0.5)))  \n",
+    "\n",
+    "# Variation for signal (slope)\n",
+    "h_sig_up = make_shape_var(h_sig, lambda c: 0.5 * (c - 0.5))  \n",
+    "h_sig_dn = make_shape_var(h_sig, lambda c: -0.5 * (c - 0.5))  \n",
+    "\n",
+    "# This represents the cross section histogram with 2 bins in the category axis\n",
+    "#  This could for example be the information at generator level.\n",
+    "#  We divide the number of expected events by a hypothetical luminosity and efficiency\n",
+    "lumi = 1000\n",
+    "eff = np.array([0.91, 0.92])\n",
+    "acc = np.array([0.52, 0.78])\n",
+    "scale = 1 / (lumi * eff * acc)\n",
+    "\n",
+    "h_sig_xsec = hh.scaleHist(h_sig.project(\"cat\"), scale) \n",
+    "h_sig_xsec.variances()[...] = 0 # let's asume we have infinite statistics at gen level\n",
+    "\n",
+    "h_sig_xsec_up = hh.scaleHist(h_sig_up.project(\"cat\"), scale)\n",
+    "h_sig_xsec_dn = hh.scaleHist(h_sig_dn.project(\"cat\"), scale)"
+   ]
   },
   {
    "cell_type": "markdown",
    "id": "0d9a3a3a",
    "metadata": {},
-   "source": "## Input tensor construction\n\nThis time we put this in a function to re-use it later.\n\nWe are also adding a masked channel with 2 cross section bins that contain the category information."
+   "source": [
+    "## Input tensor construction\n",
+    "\n",
+    "This time we put this in a function to re-use it later.\n",
+    "\n",
+    "We are also adding a masked channel with 2 cross section bins that contain the category information."
+   ]
   },
   {
    "cell_type": "code",
@@ -194,13 +294,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2026-02-27 18:23:09.708000: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:23:09.708363: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:23:09.760857: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2026-03-02 12:37:47.489183: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:37:47.544034: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2026-02-27 18:23:11.084363: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:23:11.084831: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:23:11.644896: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
+      "2026-03-02 12:37:48.826788: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:37:49.399114: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
       "ch0 {'axes': [Regular(10, 0, 1, name='x'), Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': False, 'flow': False, 'start': 0, 'stop': 20}\n",
       "ch0_masked {'axes': [Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': True, 'flow': False, 'start': 20, 'stop': 22}\n",
       "\u001b[38;21mINFO:rabbit_plot_inputdata.py: Make plots for channel: ch0\u001b[0m\n",
@@ -255,11 +353,18 @@
    "cell_type": "markdown",
    "id": "ac645cb2",
    "metadata": {},
-   "source": "## Fit\n\nNow we compute the effect on the masked channel using the mapping functionality:\n- -m Select ch0_masked just selects the ch0_masked histogram, \n- -m Project ch0_masked projects the histogram from ch0_masked to a single bin,\n- -m Ratio ch0_masked cat:0 cat:1 computes the ratio between the two category bins."
+   "source": [
+    "## Fit\n",
+    "\n",
+    "Now we compute the effect on the masked channel using the mapping functionality:\n",
+    "- -m Select ch0_masked just selects the ch0_masked histogram, \n",
+    "- -m Project ch0_masked projects the histogram from ch0_masked to a single bin,\n",
+    "- -m Ratio ch0_masked cat:0 cat:1 computes the ratio between the two category bins."
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 7,
    "id": "5db94378",
    "metadata": {},
    "outputs": [
@@ -267,13 +372,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2026-02-27 18:30:21.292606: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:30:21.292944: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:30:21.346486: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2026-03-02 12:37:52.930564: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:37:52.982631: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2026-02-27 18:30:22.734758: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:30:22.735175: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:30:25.134111: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
+      "2026-03-02 12:37:54.384452: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:37:56.857147: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
       "ch0 {'axes': [Regular(10, 0, 1, name='x'), Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': False, 'flow': False, 'start': 0, 'stop': 20}\n",
       "ch0_masked {'axes': [Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': True, 'flow': False, 'start': 20, 'stop': 22}\n",
       "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Select\u001b[0m\n",
@@ -291,16 +394,16 @@
       "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Select\u001b[0m\n",
       "\u001b[38;21mINFO:rabbit_fit.py: Save processes histogram for Select\u001b[0m\n",
       "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Project ch0_masked\u001b[0m\n",
-      "WARNING:tensorflow:5 out of the last 5 calls to <function Fitter.expected_with_variance at 0x7b71e86afa60> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
+      "WARNING:tensorflow:5 out of the last 5 calls to <function Fitter.expected_with_variance at 0x72539050e0c0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
       "\u001b[38;21mINFO:rabbit_fit.py: Save processes histogram for Project ch0_masked\u001b[0m\n",
-      "WARNING:tensorflow:5 out of the last 5 calls to <function Fitter.expected_variations at 0x7b71dd5a31a0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
+      "WARNING:tensorflow:5 out of the last 5 calls to <function Fitter.expected_variations at 0x72536a4791c0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
       "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Ratio ch0_masked ch0_masked cat:0 cat:1\u001b[0m\n",
-      "WARNING:tensorflow:6 out of the last 6 calls to <function Fitter.expected_with_variance at 0x7b71e86afa60> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
-      "WARNING:tensorflow:6 out of the last 6 calls to <function Fitter.expected_variations at 0x7b71dd5a31a0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
+      "WARNING:tensorflow:6 out of the last 6 calls to <function Fitter.expected_with_variance at 0x72539050e0c0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
+      "WARNING:tensorflow:6 out of the last 6 calls to <function Fitter.expected_variations at 0x72536a4791c0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
       "/home/david/work/Repos/rabbit/env/lib/python3.12/site-packages/wums/ioutils.py:161: FutureWarning: .metadata was not set, returning None instead of Attribute error, boost-histogram 1.7+ will error.\n",
       "  (axes, obj.storage_type(), obj.metadata, obj.label, obj.name, h5buf),\n",
       "Results written in file ./results/advanced/fitresults.hdf5\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: 20.17 seconds total time\u001b[0m\n"
+      "\u001b[38;21mINFO:rabbit_fit.py: 20.22 seconds total time\u001b[0m\n"
      ]
     }
    ],
@@ -428,11 +531,23 @@
    "cell_type": "markdown",
    "id": "4345cd28",
    "metadata": {},
-   "source": "### Bin by bin statistics\nThe uncertainty on the prediction per bin and process is treated following the Barlow--Beeston approach. By default, the 'lite' approximation is followed where one implicit parameter per bin is assigned. \nSince the weights of each process have different means the 'full' implementation might be better, it can be activated via --binByBinStatMode full. "
+   "source": [
+    "### Bin by bin statistics\n",
+    "The uncertainty on the prediction per bin and process is treated following the Barlow--Beeston approach. By default, the 'lite' approximation is followed where one implicit 'beta' parameter per bin is assigned. \n",
+    "Since the weights of each process have different means the 'full' implementation might be better, it can be activated via `--binByBinStatMode full` to introduce one beta parameter per process and bins and `--binByBinStatType gamma` to treat each beta parameter as gamma distributed. The likelihood looks as follows:\n",
+    "\n",
+    "$-\\ln\\left( \\mathcal{L} \\right) = \\sum_{i=1}^\\mathrm{bins} \\left[ n^\\mathrm{exp}_i - n^\\mathrm{obs}_i \\ln(n^\\mathrm{exp}_i) \\right] + \\frac{1}{2} \\sum_{k=1}^\\mathrm{syst} \\left(\\theta_k - \\theta^0_k\\right)^2 + \\sum_{i=1}^\\mathrm{bins} \\sum_{j=1}^\\mathrm{process} \\left[ k^\\mathrm{stat}_{i,j} \\beta_{i,j} - k^\\mathrm{stat}_{i,j} \\beta_{i,j}^0 \\ln(\\beta_{i,j}) \\right]$\n",
+    "\n",
+    "with $n_i = \\sum_j^\\mathrm{proc} f_{i,j}(\\vec{\\mu}) \\beta_{i,j} n_{i,j}^0 \\prod_k^\\mathrm{syst} \\kappa_{i,j,k}^{\\theta_k}$ and $\\kappa_{i, j, k} = \\frac{n^0_{i,j} + v_{i, j, k}}{n^0_{i,j}}$ and $\\kappa_{i, j, k} = \\frac{n^0_{i,j} + v_{i, j, k}}{n^0_{i,j}}$.\n",
+    "\n",
+    "In this configuration the beta parameters can not be solved fully analytically and in each iteration of the fit a numerical procedure is followed to solve them numerically, which leads to an increase of runtime. The current implementation does not support the computation of impacts via Jacobian vector products, which has to be disabled via `--globalImpactsDisableJVP`.\n",
+    "\n",
+    "In case each bin is well populated the Gaussian approximation can be used with `--binByBinStatType normal-multiplicative`, which is solved analytic again."
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 16,
    "id": "c9574185",
    "metadata": {},
    "outputs": [
@@ -440,53 +555,54 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2026-02-27 18:23:51.395127: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:23:51.395480: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:23:51.446633: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2026-03-02 12:44:53.027227: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:44:53.079520: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2026-02-27 18:23:52.796864: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:23:52.797235: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:23:55.164252: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
+      "2026-03-02 12:44:54.425711: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:44:56.764407: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
       "ch0 {'axes': [Regular(10, 0, 1, name='x'), Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': False, 'flow': False, 'start': 0, 'stop': 20}\n",
       "ch0_masked {'axes': [Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': True, 'flow': False, 'start': 20, 'stop': 22}\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Select\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save processes histogram for Select\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Project ch0_masked\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save processes histogram for Project ch0_masked\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Ratio ch0_masked ch0_masked cat:0 cat:1\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: edmval: 4.56379251689586e-21\u001b[0m\n",
+      "\u001b[1;32mDEBUG:fitter.py: Updated list of frozen params: []\u001b[0m\n",
+      "\u001b[38;21mINFO:rabbit_fit.py: edmval: 1.4428862892810091e-27\u001b[0m\n",
       "\u001b[38;21mINFO:rabbit_fit.py: Saturated chi2:\u001b[0m\n",
       "\u001b[38;21mINFO:rabbit_fit.py:     ndof: 18\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py:     2*deltaNLL: 6.98\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py:     p-value: 99.03%\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Select\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save processes histogram for Select\u001b[0m\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Project ch0_masked\u001b[0m\n",
-      "WARNING:tensorflow:5 out of the last 5 calls to <function Fitter.expected_with_variance at 0x7fe38c1e4400> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save processes histogram for Project ch0_masked\u001b[0m\n",
-      "WARNING:tensorflow:5 out of the last 5 calls to <function Fitter.expected_variations at 0x7fe38c100180> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: Save inclusive histogram for Ratio ch0_masked ch0_masked cat:0 cat:1\u001b[0m\n",
-      "WARNING:tensorflow:6 out of the last 6 calls to <function Fitter.expected_with_variance at 0x7fe38c1e4400> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
-      "WARNING:tensorflow:6 out of the last 6 calls to <function Fitter.expected_variations at 0x7fe38c100180> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
+      "\u001b[38;21mINFO:rabbit_fit.py:     2*deltaNLL: 0.0\u001b[0m\n",
+      "\u001b[38;21mINFO:rabbit_fit.py:     p-value: 100.0%\u001b[0m\n",
       "/home/david/work/Repos/rabbit/env/lib/python3.12/site-packages/wums/ioutils.py:161: FutureWarning: .metadata was not set, returning None instead of Attribute error, boost-histogram 1.7+ will error.\n",
       "  (axes, obj.storage_type(), obj.metadata, obj.label, obj.name, h5buf),\n",
       "Results written in file ./results/advanced/fitresults_nonlinear.hdf5\n",
-      "\u001b[38;21mINFO:rabbit_fit.py: 13.95 seconds total time\u001b[0m\n"
+      "\u001b[38;21mINFO:rabbit_fit.py: 6.92 seconds total time\u001b[0m\n",
+      "\u001b[1;32mDEBUG:rabbit_fit.py: 3.00 seconds initialization time\u001b[0m\n",
+      "\u001b[1;32mDEBUG:rabbit_fit.py: For fit -1:\u001b[0m\n",
+      "\u001b[1;32mDEBUG:rabbit_fit.py: 0.05 seconds for prefit\u001b[0m\n",
+      "\u001b[1;32mDEBUG:rabbit_fit.py: 3.86 seconds for fit\u001b[0m\n",
+      "\u001b[1;32mDEBUG:rabbit_fit.py: 0.01 seconds for postfit\u001b[0m\n"
      ]
     }
    ],
    "source": [
     "!rabbit_fit.py ./results/advanced/input.hdf5 -t 0 --unblind -o ./results/advanced \\\n",
-    "-m Select ch0_masked -m Project ch0_masked -m Ratio ch0_masked ch0_masked cat:0 cat:1 \\\n",
-    "--binByBinStatMode full --postfix nonlinear\\\n",
-    "--saveHists --saveHistsPerProcess --computeHistErrors --computeHistCov --computeVariations --computeHistImpacts"
+    "--binByBinStatMode full --binByBinStatType gamma --postfix nonlinear -v 4 # \\\n",
+    "# -m Select ch0_masked -m Project ch0_masked -m Ratio ch0_masked ch0_masked cat:0 cat:1 \\\n",
+    "# --saveHists --saveHistsPerProcess --computeHistErrors --computeHistCov --computeVariations --computeHistImpacts \\\n",
+    "# --globalImpactsDisableJVP"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "644eafd1",
    "metadata": {},
-   "source": "### Linearizing the likelihood\n\nInstead, we can also make the likelihood fully linear (i.e. quadratic in the NLL) by using a chi-squared fit and normal-additive Barlow-Beeston parameters. In addition, we want to use normal-additive systematic variations, for that we need to reproduce the input file with modified options. In this setup the likelihood is fully quadratic and can be minimized semi-analytically by once computing the Hessian and Jacobian — rabbit detects this automatically and uses a Cholesky decomposition instead of an iterative minimizer.\n\n**When to prefer the linearized approach:**\n- Large models (many bins, processes, or systematics) where iterative minimization is slow — the analytic solve is orders of magnitude faster.\n- When systematic effects are small relative to the statistical uncertainty, so the linear approximation is well justified.\n- As a cross-check of the full nonlinear result: if the two agree well the linear approximation is valid for your model.\n\nThe trade-off is that the linear approximation may be less accurate when systematic variations are large or highly asymmetric, in which case the full nonlinear fit is more reliable."
+   "source": [
+    "### Linearizing the likelihood\n",
+    "\n",
+    "Instead, we can also make the likelihood fully linear (i.e. quadratic in the NLL) by using a chi-squared fit and normal-additive Barlow-Beeston parameters. In addition, we want to use normal-additive systematic variations, for that we need to reproduce the input file with modified options. In this setup the likelihood is as follows\n",
+    "\n",
+    "$-\\ln\\left( \\mathcal{L} \\right) = \\frac{1}{2}\\sum_{i=1}^\\mathrm{bins} \\frac{\\left(n^\\mathrm{exp}_i - n^\\mathrm{obs}_i\\right)^2}{n^\\mathrm{obs}_i} + \\frac{1}{2} \\sum_{k=1}^\\mathrm{syst} \\left(\\theta_k - \\theta^0_k\\right)^2 + \\frac{1}{2} \\sum_{i=1}^\\mathrm{bins} \\sum_{j=1}^\\mathrm{process} k^\\mathrm{stat}_{i,j} \\left( \\beta_{i,j} - \\beta_{i,j}^0 \\right)^2$\n",
+    "\n",
+    "Which is fully quadratic and can be minimized semi-analytically by once computing the Hessian and Jacobian — rabbit detects this automatically and uses a Cholesky decomposition instead of an iterative minimizer.\n",
+    "\n",
+    "The trade-off is that the linear approximation may be less accurate in case of small sample size or when systematic variations are highly asymmetric, in which case the full nonlinear fit can be more reliable."
+   ]
   },
   {
    "cell_type": "code",
@@ -508,13 +624,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2026-02-27 18:24:12.013440: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:24:12.013828: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:24:12.067138: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2026-03-02 12:41:25.238428: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:41:25.301392: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2026-02-27 18:24:13.322666: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2026-02-27 18:24:13.323080: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2026-02-27 18:24:13.871447: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
+      "2026-03-02 12:41:26.646163: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2026-03-02 12:41:27.206536: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
       "ch0 {'axes': [Regular(10, 0, 1, name='x'), Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': False, 'flow': False, 'start': 0, 'stop': 20}\n",
       "ch0_masked {'axes': [Regular(2, -2, 2, underflow=False, overflow=False, name='cat')], 'masked': True, 'flow': False, 'start': 20, 'stop': 22}\n",
       "\u001b[38;21mINFO:rabbit_plot_inputdata.py: Make plots for channel: ch0\u001b[0m\n",
@@ -620,7 +734,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "id": "b218b74d",
    "metadata": {
     "scrolled": false
@@ -681,7 +795,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "id": "7282ea08",
    "metadata": {
     "scrolled": false
@@ -713,10 +827,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x784b57aaf3b0>"
+       "<IPython.lib.display.IFrame at 0x7876663294c0>"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -734,7 +848,18 @@
    "cell_type": "markdown",
    "id": "14c7ae8c",
    "metadata": {},
-   "source": "## Custom mappings\n\nWe want to define our own custom mapping. Any differentiable function can be implemented. \n\nFor whatever reason we want to know what is $f(x,y) = x \\cdot \\sin^2(x/y)$ where $x$ is the bin in cat0 and $y$ is the bin in cat1. A custom mapping needs to implement:\n* the `__init__` function which is called once at the beginning,\n* a `parse_args` class method to define how the command line option is to be interpreted,\n* the `compute_flat` function which is evaluated to compute the prefit and postfit expectation and uncertainties. This is the key method to override in a custom mapping: it receives the flat concatenated bin vector and returns the transformed output. Uncertainty propagation through the transformation is handled automatically via `tf.GradientTape`.\n\nWe notice that the model is actually pretty similar to the ratio model, it has two terms and does some transformation on them. So we can use that one as a starting point and don't need to implement our own parsing function."
+   "source": [
+    "## Custom mappings\n",
+    "\n",
+    "We want to define our own custom mapping. Any differentiable function can be implemented. \n",
+    "\n",
+    "For whatever reason we want to know what is $f(x,y) = x \\cdot \\sin^2(x/y)$ where $x$ is the bin in cat0 and $y$ is the bin in cat1. A custom mapping needs to implement:\n",
+    "* the `__init__` function which is called once at the beginning,\n",
+    "* a `parse_args` class method to define how the command line option is to be interpreted,\n",
+    "* the `compute_flat` function which is evaluated to compute the prefit and postfit expectation and uncertainties. This is the key method to override in a custom mapping: it receives the flat concatenated bin vector and returns the transformed output. Uncertainty propagation through the transformation is handled automatically via `tf.GradientTape`.\n",
+    "\n",
+    "We notice that the model is actually pretty similar to the ratio model, it has two terms and does some transformation on them. So we can use that one as a starting point and don't need to implement our own parsing function."
+   ]
   },
   {
    "cell_type": "code",
@@ -781,7 +906,11 @@
    "cell_type": "markdown",
    "id": "c401df24",
    "metadata": {},
-   "source": "Now let's evaluate this new mapping. Subsequently we can treat it as any other channel, e.g. look at the uncertainty breakdown.\n\nSince the mapping doesn't change the fit (it only transforms the postfit observables), we can reuse the parameter values from the previous fit with `--externalPostfit`. This skips the minimization entirely and just evaluates the mapping at the already-found postfit parameter values — very useful when iterating on mappings without having to re-run the full fit each time."
+   "source": [
+    "Now let's evaluate this new mapping. Subsequently we can treat it as any other channel, e.g. look at the uncertainty breakdown.\n",
+    "\n",
+    "Since the mapping doesn't change the fit (it only transforms the postfit observables), we can reuse the parameter values from the previous fit with `--externalPostfit`. This skips the minimization entirely and just evaluates the mapping at the already-found postfit parameter values — very useful when iterating on mappings without having to re-run the full fit each time."
+   ]
   },
   {
    "cell_type": "code",
@@ -877,7 +1006,17 @@
    "cell_type": "markdown",
    "id": "358b2ca4",
    "metadata": {},
-   "source": "## POI Models\n\nA reparameterization of the likelihood can be achieved using POI models. A POI model is a function that takes the parameters of interest (POIs), $\\vec{\\mu}$, and computes a tensor that gets multiplied to the number of predicted events by bin and process.\n\nThe default POI model is the `Mu` model which just scales signal processes with $\\mu$ and multiplies 1 to background processes. The `Ones` model multiplies all processes with 1. The `Mixture` model multiplies one signal process with $\\mu$ and another one with $1-\\mu$.\n\nPOI models can also be used to generate shape variations in the bin dimension.\n\nLet's produce our own POI model, we want to multiply one bin in the category axis with $\\sin^2(\\mu)$ and the other one with $\\cos^2(\\mu)$. The way to implement it is very similar to the mappings, with one key difference: a POI model overrides `compute(poi)`, which receives the POI tensor and returns a `[nbins, nproc]` scale factor tensor. A mapping overrides `compute_flat(params, observables)`, which receives the full flat bin vector and returns the transformed observable. Both must be implemented using TensorFlow operations so that automatic differentiation works correctly."
+   "source": [
+    "## POI Models\n",
+    "\n",
+    "A reparameterization of the likelihood can be achieved using POI models. A POI model is a function that takes the parameters of interest (POIs), $\\vec{\\mu}$, and computes a tensor that gets multiplied to the number of predicted events by bin and process.\n",
+    "\n",
+    "The default POI model is the `Mu` model which just scales signal processes with $\\mu$ and multiplies 1 to background processes. The `Ones` model multiplies all processes with 1. The `Mixture` model multiplies one signal process with $\\mu$ and another one with $1-\\mu$.\n",
+    "\n",
+    "POI models can also be used to generate shape variations in the bin dimension.\n",
+    "\n",
+    "Let's produce our own POI model, we want to multiply one bin in the category axis with $\\sin^2(\\mu)$ and the other one with $\\cos^2(\\mu)$. The way to implement it is very similar to the mappings, with one key difference: a POI model overrides `compute(poi)`, which receives the POI tensor and returns a `[nbins, nproc]` scale factor tensor. A mapping overrides `compute_flat(params, observables)`, which receives the full flat bin vector and returns the transformed observable. Both must be implemented using TensorFlow operations so that automatic differentiation works correctly."
+   ]
   },
   {
    "cell_type": "code",
@@ -1073,8 +1212,21 @@
   {
    "cell_type": "markdown",
    "id": "m613z8ud27g",
-   "source": "## Summary\n\nIn this tutorial you have learned how to:\n- Build multi-dimensional input tensors and add masked channels for quantities not entering the likelihood (e.g. unfolded cross sections)\n- Use mappings (`Select`, `Project`, `Ratio`) to transform postfit observables and propagate uncertainties through them via automatic differentiation\n- Write a custom mapping by subclassing `Mapping` and overriding `compute_flat`\n- Use `--externalPostfit` to evaluate new mappings against an existing fit result without re-running the minimization\n- Use `--binByBinStatMode full` for models with per-process statistical uncertainties\n- Linearize the likelihood for faster computation when the linear approximation is valid\n- Write a custom POI model by subclassing `POIModel` and overriding `compute`\n\nFor further information see the [README](../README.md) and the [rabbit repository](https://github.com/WMass/rabbit).",
-   "metadata": {}
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "In this tutorial you have learned how to:\n",
+    "- Build multi-dimensional input tensors and add masked channels for quantities not entering the likelihood (e.g. unfolded cross sections)\n",
+    "- Use mappings (`Select`, `Project`, `Ratio`) to transform postfit observables and propagate uncertainties through them via automatic differentiation\n",
+    "- Write a custom mapping by subclassing `Mapping` and overriding `compute_flat`\n",
+    "- Use `--externalPostfit` to evaluate new mappings against an existing fit result without re-running the minimization\n",
+    "- Use `--binByBinStatMode full` for models with per-process statistical uncertainties\n",
+    "- Linearize the likelihood for faster computation when the linear approximation is valid\n",
+    "- Write a custom POI model by subclassing `POIModel` and overriding `compute`\n",
+    "\n",
+    "For further information see the [README](../README.md) and the [rabbit repository](https://github.com/WMass/rabbit)."
+   ]
   }
  ],
  "metadata": {
@@ -1098,4 +1250,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/rabbit/fitter.py b/rabbit/fitter.py
index d51e2be5..1bd36fb7 100644
--- a/rabbit/fitter.py
+++ b/rabbit/fitter.py
@@ -22,14 +22,19 @@ def solve_quad_eq(a, b, c):
 def match_regexp_params(regular_expressions, parameter_names):
     if isinstance(regular_expressions, str):
         regular_expressions = [regular_expressions]
-    # Find parameters that match any regex
+    
+    # Check for exact matches first
+    exact_matches = [s for expr in regular_expressions 
+                     for s in parameter_names if s.decode() == expr]
+    if exact_matches:
+        return exact_matches
+    
+    # Fall back to regex matching
     compiled_expressions = [re.compile(expr) for expr in regular_expressions]
-    matched_parameters = [
-        s
-        for s in parameter_names
+    return [
+        s for s in parameter_names
         if any(regex.match(s.decode()) for regex in compiled_expressions)
     ]
-    return matched_parameters
 
 
 class FitterCallback:
@@ -344,6 +349,7 @@ def load_fitresult(self, fitresult_file, fitresult_key):
             self.cov.assign(tf.constant(covval))
 
     def update_frozen_params(self):
+        logger.debug(f"Updated list of frozen params: {self.frozen_params}")
         new_mask_np = np.isin(self.parms, self.frozen_params)
 
         self.frozen_params_mask.assign(new_mask_np)
@@ -438,7 +444,6 @@ def get_theta(self):
             tf.stop_gradient(theta),
             theta,
         )
-        theta = self.x[self.poi_model.npoi :]
         if self.do_blinding:
             return theta + self._blinding_offsets_theta
         else:

From 24441de4cc4226abb8f5b97f75c5b97338057021 Mon Sep 17 00:00:00 2001
From: David Walter <david.walter@cern.ch>
Date: Mon, 2 Mar 2026 19:09:39 +0100
Subject: [PATCH 9/9] Fix linting

---
 README.md        |  2 +-
 rabbit/fitter.py | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 2f008ba2..be536640 100644
--- a/README.md
+++ b/README.md
@@ -197,7 +197,7 @@ rabbit_print_impacts results/fitresult.hdf5
 
 We use pre-commit hooks and linters in the CI. Activate git pre-commit hooks (only need to do this once when checking out)
 ```
-git config --local include.path ../.gitconfig
+git config --local include.path ../.gitconfig 
 ```
 In case rabbit is included as a submodule, use instead:
 ```
diff --git a/rabbit/fitter.py b/rabbit/fitter.py
index 1bd36fb7..2c31b33f 100644
--- a/rabbit/fitter.py
+++ b/rabbit/fitter.py
@@ -22,17 +22,19 @@ def solve_quad_eq(a, b, c):
 def match_regexp_params(regular_expressions, parameter_names):
     if isinstance(regular_expressions, str):
         regular_expressions = [regular_expressions]
-    
+
     # Check for exact matches first
-    exact_matches = [s for expr in regular_expressions 
-                     for s in parameter_names if s.decode() == expr]
+    exact_matches = [
+        s for expr in regular_expressions for s in parameter_names if s.decode() == expr
+    ]
     if exact_matches:
         return exact_matches
-    
+
     # Fall back to regex matching
     compiled_expressions = [re.compile(expr) for expr in regular_expressions]
     return [
-        s for s in parameter_names
+        s
+        for s in parameter_names
         if any(regex.match(s.decode()) for regex in compiled_expressions)
     ]