Skip to content

Commit 8ce700d

Browse files
committed
Fake factor method: work in progress
1 parent d7ff944 commit 8ce700d

1 file changed

Lines changed: 34 additions & 36 deletions

File tree

columnflow/tasks/data_driven_methods.py

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,9 @@ def requires(self):
309309
}
310310
def output(self):
311311
return {"ff_json": {ff_type: self.target(f"fake_factors_{ff_type}.json")for ff_type in ['qcd','wj']},
312-
"plots": {syst: self.target(f"fake_factor_syst_{syst}.png") for syst in ['nominal', 'up', 'down']},}
312+
"plots": {'_'.join((ff_type, syst)): self.target(f"fake_factor_{ff_type}_{syst}.png")
313+
for syst in ['nominal', 'up', 'down']
314+
for ff_type in ['qcd','wj']},}
313315

314316
@law.decorator.log
315317
def run(self):
@@ -333,7 +335,7 @@ def run(self):
333335
self.publish_message(f"merging Fake factor histograms for {dataset_name}")
334336
ds_single_hist = sum(hists_per_ds[1:], hists_per_ds[0].copy())
335337
hists_by_dataset.append(ds_single_hist)
336-
338+
#Create a dict of histograms indexed by the process
337339
hists_by_proc = {}
338340
for proc_name in self.config_inst.processes.names():
339341
proc = self.config_inst.processes.get(proc_name)
@@ -349,40 +351,39 @@ def run(self):
349351
else:
350352
hists_by_proc[proc] = h
351353

354+
#Divide histograms to data and bkg
352355
mc_hists = [h for p, h in hists_by_proc.items() if p.is_mc and not p.has_tag("signal")]
353356
data_hists = [h for p, h in hists_by_proc.items() if p.is_data]
354357

358+
#Merge histograms to get a joint data and mc histogram
355359
mc_hists = sum(mc_hists[1:], mc_hists[0].copy())
356360
data_hists = sum(data_hists[1:], data_hists[0].copy())
357361

358-
dr_names = ['dr_num_wj','dr_den_wj','dr_num_qcd','dr_den_qcd']
359-
360-
def get_hist(h, category):
361-
return h[{"category": hist.loc(category.id)}]
362-
363-
364-
#Create two dictionaries that contain histograms for different determination regions
365-
data_h_cat ={}
366-
mc_h_cat = {}
367-
for dr_name in dr_names:
368-
cat = self.config_inst.get_category(self.branch_data.category.replace('sr',dr_name))
369-
data_h_cat[dr_name] = get_hist(data_hists, cat)
370-
mc_h_cat[dr_name] = get_hist(mc_hists, cat)
362+
#Function that performs the calculation of th
363+
def get_ff_corr(self, h_data, h_mc, num_reg = 'dr_num_wj', den_reg = 'dr_den_wj', name='ff_hist', label='ff_hist'):
371364

372-
373-
def get_ff_corr(self, h_data, h_mc, num_cat, den_cat, name='ff_hist', label='ff_hist'):
374-
num = h_data[num_cat].values() - h_mc[num_cat].values()
375-
den = h_data[den_cat].values() - h_mc[den_cat].values()
365+
def get_dr_hist(self, h, det_reg):
366+
cat = self.config_inst.get_category(self.branch_data.category.replace('sr',det_reg))
367+
return h[{"category": hist.loc(cat.id)}]
368+
369+
data_num = get_dr_hist(self, h_data, num_reg)
370+
data_den = get_dr_hist(self, h_data, den_reg)
371+
mc_num = get_dr_hist(self, h_mc, num_reg)
372+
mc_den = get_dr_hist(self, h_mc, den_reg)
373+
374+
num = data_num.values() - mc_num.values()
375+
den = data_den.values() - mc_den.values()
376376
ff_val = np.where((num > 0) & (den > 0),
377377
num / np.maximum(den, 1),
378378
1)
379379
def rel_err(x):
380380
return x.variances()/np.maximum(x.values()**2, 1)
381+
381382
ff_err2 = np.where((num > 0) & (den > 0),
382-
np.sqrt(rel_err(h_data[num_cat]) +
383-
+ rel_err(h_mc[den_cat]) +
384-
+ rel_err(h_data[num_cat]) +
385-
+ rel_err(h_mc[den_cat])) * ff_val**2,
383+
np.sqrt(rel_err(data_num) +
384+
+ rel_err(data_den) +
385+
+ rel_err(mc_num) +
386+
+ rel_err(mc_den)) * ff_val**2,
386387
0.5* np.ones_like(ff_val))
387388
h = hist.Hist.new
388389
for (var_name, var_axis) in self.config_inst.x.fake_factor_method.axes.items():
@@ -394,25 +395,23 @@ def rel_err(x):
394395
ff_hist.view().value[...,2] = np.maximum(ff_val - np.sqrt(ff_err2),0)
395396
ff_hist.name = name
396397
ff_hist.label = label
397-
ff_corr = cl_convert.from_histogram(ff_hist) #temporary correction without systematic axis
398+
ff_corr = cl_convert.from_histogram(ff_hist)
398399
ff_corr.data.flow = "clamp"
399400
return ff_corr, ff_hist
400401

401-
import rich
402-
403402
wj_corr, wj_h = get_ff_corr(self,
404-
data_h_cat,
405-
mc_h_cat,
406-
num_cat = 'dr_num_wj',
407-
den_cat = 'dr_den_wj',
403+
data_hists,
404+
mc_hists,
405+
num_reg = 'dr_num_wj',
406+
den_reg = 'dr_den_wj',
408407
name='ff_wjets',
409408
label='Fake factor W+jets')
410409

411410
qcd_corr, qcd_h = get_ff_corr(self,
412-
data_h_cat,
413-
mc_h_cat,
414-
num_cat = 'dr_num_qcd',
415-
den_cat = 'dr_den_qcd',
411+
data_hists,
412+
mc_hists,
413+
num_reg = 'dr_num_qcd',
414+
den_reg = 'dr_den_qcd',
416415
name='ff_qcd',
417416
label='Fake factor QCD')
418417

@@ -422,9 +421,8 @@ def rel_err(x):
422421
for syst in ['nominal','up','down']:
423422
fig, ax = plt.subplots(figsize=(12, 8))
424423
the_hist[...,syst].plot2d(ax=ax)
425-
self.output()['plots'][syst].dump(fig, formatter="mpl")
424+
self.output()['plots']['_'.join((h_name,syst))].dump(fig, formatter="mpl")
426425

427-
428426
self.output()['ff_json']['wj'].dump(wj_corr.json(exclude_unset=True), formatter="json")
429427
self.output()['ff_json']['qcd'].dump(qcd_corr.json(exclude_unset=True), formatter="json")
430428

0 commit comments

Comments
 (0)