-
Notifications
You must be signed in to change notification settings - Fork 30
Postfit pdf grids #652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Postfit pdf grids #652
Changes from all commits
a331112
6c66e46
5d6afcb
24e5b30
24d0736
3f99eb2
0e899ad
6382a17
1bda212
def1cc5
84009e6
2a13f56
5d5d5d2
df399aa
a865639
25045e1
cf70c15
acd9e19
690a462
ba9ee0d
d7ea14b
1e4b8dd
cc09007
b8dd970
673debe
316d6d1
51da3c4
274ebb7
0d30683
65eff14
5285640
55c7324
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,155 @@ | ||
| import argparse | ||
|
|
||
| import hist | ||
| import numpy as np | ||
|
|
||
| from rabbit import inputdata, tensorwriter | ||
| from wremnants.utilities import theory_utils | ||
| from wums import logging, output_tools | ||
|
|
||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument("-o", "--output", default="./", help="output directory") | ||
| parser.add_argument("--outname", default="test_tensor", help="output file name") | ||
| parser.add_argument( | ||
| "--postfix", | ||
| default=None, | ||
| type=str, | ||
| help="Postfix to append on output file name", | ||
| ) | ||
| parser.add_argument( | ||
| "--sparse", | ||
| default=False, | ||
| action="store_true", | ||
| help="Make sparse tensor", | ||
| ) | ||
| parser.add_argument( | ||
| "--rabbit-input", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our convention was to use camelCase for CLA, also below |
||
| type=str, | ||
| required=True, | ||
| help="Rabbit input file for the reference fit", | ||
| ) | ||
| parser.add_argument( | ||
| "--proc", | ||
| type=str, | ||
| choices=["Zmumu", "Wmunu"], | ||
| required=True, | ||
| help="Process name to use for the PDF fit (should match the signal)", | ||
| ) | ||
| parser.add_argument( | ||
| "--noColorLogger", action="store_true", help="Disable colored logging output." | ||
| ) | ||
| parser.add_argument( | ||
| "-l", "--fit-label", type=str, default="cmsmw", help="Label in the output PDF grids" | ||
| ) | ||
| parser.add_argument( | ||
| "-v", "--verbose", choices=[0, 1, 2, 3, 4], default=3, help="Set verbosity level." | ||
| ) | ||
| args = parser.parse_args() | ||
|
|
||
| logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) | ||
|
|
||
| indata = inputdata.FitInputData(args.rabbit_input) | ||
|
|
||
| # Build tensor | ||
| writer = tensorwriter.TensorWriter( | ||
| sparse=args.sparse, | ||
| ) | ||
|
|
||
| metadata = indata.metadata | ||
|
|
||
| pdf_input = indata.metadata["meta_info_input"]["args"]["pdfs"][0] | ||
| pdf_scale = metadata["meta_info"]["args"]["scalePdf"] | ||
|
|
||
| pdfInfo = theory_utils.pdf_info_map("Zmumu_2016PostVFP", pdf_input) | ||
| pdf_name = pdfInfo["lha_name"] | ||
|
|
||
| if pdf_scale == -1: | ||
| pdf_scale = theory_utils.pdf_inflation_factor( | ||
| theory_utils.pdfMap[pdf_input], metadata["meta_info"]["args"]["noi"] | ||
| ) | ||
| logger.info(f"Using default inflation factor: {pdf_scale}") | ||
|
|
||
| pdf_scale *= pdfInfo["scale"] if "scale" in pdfInfo else 1.0 | ||
| logger.info(f"Scaling PDF uncertainties by {pdf_scale}") | ||
|
|
||
| symHessian = pdfInfo["combine"] == "symHessian" | ||
| symmetrize = indata.metadata["meta_info"]["args"]["symmetrizePdfUnc"] | ||
| print(f"PDF symmetrization procedure: {symmetrize}") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logger is defined, use it? |
||
|
|
||
| if not symHessian: | ||
| logger.info(f"Applying {symmetrize} symmetrization procedure") | ||
|
|
||
| labels = np.array( | ||
| [ | ||
| s | ||
| for s in indata.systs | ||
| if "pdf" in s.decode() | ||
| and not any(x in s.decode() for x in ["mcrange", "mbrange", "pdfAlphaS"]) | ||
| ], | ||
| dtype=str, | ||
| ) | ||
|
|
||
| if symmetrize == "quadratic": | ||
| labels[::2] = [ | ||
| s.replace("SymAvg", "Down").replace("SymDiff", "Down") for s in labels[::2] | ||
| ] | ||
| labels[1::2] = [ | ||
| s.replace("SymAvg", "Up").replace("SymDiff", "Up") for s in labels[1::2] | ||
| ] | ||
| elif symmetrize == "average": | ||
| labels = np.array( | ||
| [f"{l}{shift}" for l in labels for shift in ("Down", "Up")], dtype=str | ||
| ) | ||
|
|
||
| x_range = np.logspace(-4, -0.01, 201) | ||
|
|
||
| # Consistency with the incorrect treatment of the central value in setupRabbit | ||
| category_labels = labels if symHessian else ["central", *labels] | ||
|
|
||
| for chan in ["u", "ubar", "d", "dbar", "s", "sbar", "g", "uv", "dv"]: | ||
| pdf_data = theory_utils.pdf_data_from_lhapdf(pdf_name, chan, 80.360, x_range[:-1]) | ||
| pdf_hist = hist.Hist( | ||
| hist.axis.Variable(x_range, name="x"), | ||
| hist.axis.StrCategory(category_labels, name="pdfVar", flow=False), | ||
| data=pdf_data.T, | ||
| ) | ||
|
|
||
| writer.add_channel(pdf_hist.axes[:-1], chan) | ||
|
|
||
| if args.proc.encode("utf-8") not in indata.procs: | ||
| raise ValueError(f"Process {args.proc} not found in input data") | ||
|
|
||
| writer.add_process(pdf_hist[..., 0], args.proc, chan, signal=False) | ||
| writer.add_data(pdf_hist[..., 0], chan) | ||
|
|
||
| if symHessian: | ||
| # This is wrong in setupRabbit (the central val is treated as a variation) so it should also be wrong here... | ||
| for syst in pdf_hist.axes["pdfVar"]: | ||
| writer.add_systematic( | ||
| pdf_hist[..., syst], | ||
| syst, | ||
| args.proc, | ||
| chan, | ||
| kfactor=pdf_scale, | ||
| ) | ||
| else: | ||
| systs = list(pdf_hist.axes["pdfVar"])[1:] | ||
| for systDown, systUp in zip(systs[::2], systs[1::2]): | ||
| writer.add_systematic( | ||
| [pdf_hist[..., systUp], pdf_hist[..., systDown]], | ||
| systUp.replace("Up", ""), | ||
| args.proc, | ||
| chan, | ||
| symmetrize=symmetrize, | ||
| kfactor=pdf_scale, | ||
| ) | ||
|
|
||
| directory = args.output | ||
| if directory == "": | ||
| directory = "./" | ||
| filename = args.outname | ||
| if args.postfix: | ||
| filename += f"_{args.postfix}" | ||
|
|
||
| meta_data = {"meta_info": output_tools.make_meta_info_dict()} | ||
| writer.write(outfolder=directory, outfilename=filename, meta_data_dict=meta_data) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| import numpy as np | ||
|
|
||
| # Add the alias back manually to make mc2hlib work | ||
| if not hasattr(np, "int"): | ||
| np.int = int | ||
| import argparse | ||
|
|
||
| import h5py | ||
|
|
||
| from wremnants.postprocessing.postfit_pdf_helper import ( | ||
| RabbitPostfitPdfHelper, | ||
| SimplePostfitPdfHelper, | ||
| ) | ||
| from wremnants.utilities import theory_utils | ||
| from wums import logging | ||
|
|
||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument( | ||
| "-f", | ||
| "--fitresult", | ||
| type=str, | ||
| required=True, | ||
| help="Path to the fit result file (rabbit HDF5 or simple covariance HDF5).", | ||
| ) | ||
| parser.add_argument( | ||
| "-o", | ||
| "--outfolder", | ||
| type=str, | ||
| required=True, | ||
| help="Output path for the postfit PDF grids (created if it doesn't already exist.", | ||
| ) | ||
| parser.add_argument( | ||
| "-p", | ||
| "--pdf-name", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use camelCase, also below |
||
| type=str, | ||
| required=False, | ||
| choices=["auto", *theory_utils.pdfMap.keys()], | ||
| default="auto", | ||
| help="Name of the PDF set to use. If 'auto', will use the PDF from the fit result metadata.", | ||
| ) | ||
| parser.add_argument( | ||
| "-v", "--verbose", choices=[0, 1, 2, 3, 4], default=3, help="Set verbosity level." | ||
| ) | ||
| parser.add_argument( | ||
| "-l", "--fit-label", type=str, default="cmsmw", help="Label in the output PDF grids" | ||
| ) | ||
| parser.add_argument( | ||
| "-i", "--lhaid", type=str, required=True, help="LHAPDF ID to give the new set" | ||
| ) | ||
| parser.add_argument( | ||
| "--noColorLogger", action="store_true", help="Disable colored logging output." | ||
| ) | ||
| parser.add_argument( | ||
| "--pseudoData", | ||
| type=str, | ||
| default=None, | ||
| help="Pseudo-data label to use (rabbit format only).", | ||
| ) | ||
| args = parser.parse_args() | ||
|
|
||
| logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) | ||
|
|
||
|
|
||
| def is_simple_format(path): | ||
| """Return True if the HDF5 file is in the simple covariance format.""" | ||
| with h5py.File(path, "r") as f: | ||
| return "covariance" in f | ||
|
|
||
|
|
||
| if is_simple_format(args.fitresult): | ||
| logger.info("Detected simple covariance HDF5 format.") | ||
| pdf_helper = SimplePostfitPdfHelper(args.fitresult) | ||
| if args.pdf_name != "auto" and args.pdf_name != pdf_helper.pdf_name: | ||
| raise ValueError( | ||
| f"Specified PDF name {args.pdf_name} does not match input PDF {pdf_helper.pdf_name}." | ||
| ) | ||
| else: | ||
| logger.info("Detected rabbit HDF5 format.") | ||
| pdf_helper = RabbitPostfitPdfHelper(args.fitresult, pseudoData=args.pseudoData) | ||
| if pdf_helper.pdf_name is None: | ||
| if args.pdf_name == "auto": | ||
| raise ValueError( | ||
| "PDF name must be specified if not present in fit result metadata." | ||
| ) | ||
| logger.warning( | ||
| "Input metadata does not contain PDF information. Using specified PDF name." | ||
| ) | ||
| pdf_helper.pdf_name = args.pdf_name | ||
| elif args.pdf_name != "auto" and args.pdf_name != pdf_helper.pdf_name: | ||
| raise ValueError( | ||
| f"Specified PDF name {args.pdf_name} does not match input PDF {pdf_helper.pdf_name}." | ||
| ) | ||
|
|
||
| # TODO: Need to scale back at the end to get 95% CL for consistency? | ||
|
|
||
| postfit_matrix, new_central, central_pdf_path = pdf_helper.compute_postfit_matrix() | ||
| pdf_helper.write_grids( | ||
| central_pdf_path, | ||
| args.outfolder, | ||
| args.fit_label, | ||
| args.lhaid, | ||
| postfit_matrix, | ||
| new_central, | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this not go into
base_io? Which files contain the "command" and how was it generated? Could it not be stored in the supported way using a "meta" dict?