Skip to content

Commit f2c1198

Browse files
committed
Fixing 2022 datasets naming due to the update of the folder naming at HLepRare repo
1 parent 21b9a99 commit f2c1198

1 file changed

Lines changed: 96 additions & 256 deletions

File tree

  • cmsdb/campaigns/run3_2022_preEE_nano_tau_skim_2025_v1

cmsdb/campaigns/run3_2022_preEE_nano_tau_skim_2025_v1/cp_signal.py

Lines changed: 96 additions & 256 deletions
Original file line numberDiff line numberDiff line change
@@ -2,259 +2,99 @@
22
CMS TAUPOG skimmed datasets from the 2022 data-taking campaign
33
"""
44
import cmsdb.processes as procs
5-
from cmsdb.campaigns.run3_2022_preEE_nano_tau_skim_2025_v1 import campaign_run3_2022_preEE_nano_tau_skim_2025_v1 as cpn # TODO: adjust if needed
6-
7-
### prod CP-even datasets ###
8-
cpn.add_dataset(
9-
name="h_ggf_htt_sm_prod_sm_filtered",
10-
id=22000000,
11-
processes=[procs.h_ggf_htt_sm_prod_sm],
12-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_SM_Filtered_ProdAndDecay",],
13-
n_files=18,
14-
n_events=6703604,
15-
)
16-
17-
cpn.add_dataset(
18-
name="h_ggf_htt_mm_prod_sm_filtered",
19-
id=22000001,
20-
processes=[procs.h_ggf_htt_mm_prod_sm],
21-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_SM_Filtered_ProdAndDecay",],
22-
n_files=18,
23-
n_events=6703604,
24-
)
25-
26-
cpn.add_dataset(
27-
name="h_ggf_htt_cpo_prod_sm_filtered",
28-
id=22000002,
29-
processes=[procs.h_ggf_htt_cpo_prod_sm],
30-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_SM_Filtered_ProdAndDecay",],
31-
n_files=18,
32-
n_events=6703604,
33-
)
34-
35-
cpn.add_dataset(
36-
name="h_ggf_htt_flat_prod_sm_filtered",
37-
id=22000003,
38-
processes=[procs.h_ggf_htt_flat_prod_sm],
39-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_SM_Filtered_ProdAndDecay",],
40-
n_files=18,
41-
n_events=6703604,
42-
)
43-
44-
### prod CP-odd datasets ###
45-
cpn.add_dataset(
46-
name="h_ggf_htt_sm_prod_cpo_filtered",
47-
id=22000010,
48-
processes=[procs.h_ggf_htt_sm_prod_cpo],
49-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_CPodd_Filtered_ProdAndDecay",],
50-
n_files=19,
51-
n_events=7185840,
52-
)
53-
cpn.add_dataset(
54-
name="h_ggf_htt_mm_prod_cpo_filtered",
55-
id=22000011,
56-
processes=[procs.h_ggf_htt_mm_prod_cpo],
57-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_CPodd_Filtered_ProdAndDecay",],
58-
n_files=19,
59-
n_events=7185840,
60-
)
61-
cpn.add_dataset(
62-
name="h_ggf_htt_cpo_prod_cpo_filtered",
63-
id=22000012,
64-
processes=[procs.h_ggf_htt_cpo_prod_cpo],
65-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_CPodd_Filtered_ProdAndDecay",],
66-
n_files=19,
67-
n_events=7185840,
68-
)
69-
cpn.add_dataset(
70-
name="h_ggf_htt_flat_prod_cpo_filtered",
71-
id=22000013,
72-
processes=[procs.h_ggf_htt_flat_prod_cpo],
73-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_CPodd_Filtered_ProdAndDecay",],
74-
n_files=19,
75-
n_events=7185840,
76-
)
77-
78-
### prod Max. mixing datasets ###
79-
cpn.add_dataset(
80-
name="h_ggf_htt_sm_prod_mm_filtered",
81-
id=22000020,
82-
processes=[procs.h_ggf_htt_sm_prod_mm],
83-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_MM_Filtered_ProdAndDecay",],
84-
n_files=17,
85-
n_events=6424278,
86-
)
87-
cpn.add_dataset(
88-
name="h_ggf_htt_mm_prod_mm_filtered",
89-
id=22000021,
90-
processes=[procs.h_ggf_htt_mm_prod_mm],
91-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_MM_Filtered_ProdAndDecay",],
92-
n_files=17,
93-
n_events=6424278,
94-
)
95-
cpn.add_dataset(
96-
name="h_ggf_htt_cpo_prod_mm_filtered",
97-
id=22000022,
98-
processes=[procs.h_ggf_htt_cpo_prod_mm],
99-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_MM_Filtered_ProdAndDecay",],
100-
n_files=17,
101-
n_events=6424278,
102-
)
103-
104-
cpn.add_dataset(
105-
name="h_ggf_htt_flat_prod_mm_filtered",
106-
id=22000023,
107-
processes=[procs.h_ggf_htt_flat_prod_mm],
108-
keys=["/GluGluHTo2Tau_UncorrelatedDecay_MM_Filtered_ProdAndDecay",],
109-
n_files=17,
110-
n_events=6424278,
111-
)
112-
113-
114-
#VBF signal samples
115-
cpn.add_dataset(
116-
name="h_vbf_htt_sm_filtered",
117-
id=22000030,
118-
processes=[procs.h_vbf_htt_sm],
119-
keys=["/VBFHToTauTau_UncorrelatedDecay_Filtered",],
120-
n_files=13,
121-
n_events=5082505,
122-
)
123-
124-
cpn.add_dataset(
125-
name="h_vbf_htt_cpo_filtered",
126-
id=22000031,
127-
processes=[procs.h_vbf_htt_cpo],
128-
keys=["/VBFHToTauTau_UncorrelatedDecay_Filtered",],
129-
n_files=13,
130-
n_events=5082505,
131-
)
132-
133-
cpn.add_dataset(
134-
name="h_vbf_htt_mm_filtered",
135-
id=22000032,
136-
processes=[procs.h_vbf_htt_mm],
137-
keys=["/VBFHToTauTau_UncorrelatedDecay_Filtered",],
138-
n_files=13,
139-
n_events=5082505,
140-
)
141-
142-
cpn.add_dataset(
143-
name="h_vbf_htt_flat_filtered",
144-
id=22000033,
145-
processes=[procs.h_vbf_htt_flat],
146-
keys=["/VBFHToTauTau_UncorrelatedDecay_Filtered",],
147-
n_files=13,
148-
n_events=5082505,
149-
)
150-
151-
#VH signal samples
152-
### ZH ###
153-
cpn.add_dataset(
154-
name="zh_htt_sm_filtered",
155-
id=22000040,
156-
processes=[procs.zh_htt_sm],
157-
keys=["/ZHToTauTau_UncorrelatedDecay_Filtered",],
158-
n_files=2,
159-
n_events=613598.0,
160-
)
161-
cpn.add_dataset(
162-
name="zh_htt_mm_filtered",
163-
id=22000041,
164-
processes=[procs.zh_htt_mm],
165-
keys=["/ZHToTauTau_UncorrelatedDecay_Filtered",],
166-
n_files=2,
167-
n_events=613598.0,
168-
)
169-
170-
cpn.add_dataset(
171-
name="zh_htt_cpo_filtered",
172-
id=22000042,
173-
processes=[procs.zh_htt_cpo],
174-
keys=["/ZHToTauTau_UncorrelatedDecay_Filtered",],
175-
n_files=2,
176-
n_events=613598.0,
177-
)
178-
179-
cpn.add_dataset(
180-
name="zh_htt_flat_filtered",
181-
id=22000043,
182-
processes=[procs.zh_htt_flat],
183-
keys=["/ZHToTauTau_UncorrelatedDecay_Filtered",],
184-
n_files=2,
185-
n_events=613598.0,
186-
)
187-
188-
### W^+H ###
189-
cpn.add_dataset(
190-
name="wph_htt_sm_filtered",
191-
id=22000050,
192-
processes=[procs.wph_htt_sm],
193-
keys=["/WplusHToTauTau_UncorrelatedDecay_Filtered"],
194-
n_files=2,
195-
n_events=716466.0,
196-
)
197-
198-
cpn.add_dataset(
199-
name="wph_htt_mm_filtered",
200-
id=22000051,
201-
processes=[procs.wph_htt_mm],
202-
keys=["/WplusHToTauTau_UncorrelatedDecay_Filtered"],
203-
n_files=2,
204-
n_events=716466.0,
205-
)
206-
207-
cpn.add_dataset(
208-
name="wph_htt_cpo_filtered",
209-
id=22000052,
210-
processes=[procs.wph_htt_cpo],
211-
keys=["/WplusHToTauTau_UncorrelatedDecay_Filtered"],
212-
n_files=2,
213-
n_events=716466.0,
214-
)
215-
216-
cpn.add_dataset(
217-
name="wph_htt_flat_filtered",
218-
id=22000053,
219-
processes=[procs.wph_htt_flat],
220-
keys=["/WplusHToTauTau_UncorrelatedDecay_Filtered"],
221-
n_files=2,
222-
n_events=716466.0,
223-
)
224-
225-
### W^-H ###
226-
cpn.add_dataset(
227-
name="wmh_htt_sm_filtered",
228-
id=22000054,
229-
processes=[procs.wmh_htt_sm],
230-
keys=["/WminusHToTauTau_UncorrelatedDecay_Filtered"],
231-
n_files=1,
232-
n_events=431839.0,
233-
)
234-
235-
cpn.add_dataset(
236-
name="wmh_htt_mm_filtered",
237-
id=22000055,
238-
processes=[procs.wmh_htt_mm],
239-
keys=["/WminusHToTauTau_UncorrelatedDecay_Filtered"],
240-
n_files=1,
241-
n_events=431839.0,
242-
)
243-
244-
cpn.add_dataset(
245-
name="wmh_htt_cpo_filtered",
246-
id=22000056,
247-
processes=[procs.wmh_htt_cpo],
248-
keys=["/WminusHToTauTau_UncorrelatedDecay_Filtered"],
249-
n_files=1,
250-
n_events=431839.0,
251-
)
252-
253-
cpn.add_dataset(
254-
name="wmh_htt_flat_filtered",
255-
id=22000057,
256-
processes=[procs.wmh_htt_flat],
257-
keys=["/WminusHToTauTau_UncorrelatedDecay_Filtered"],
258-
n_files=1,
259-
n_events=431839.0,
260-
)
5+
from cmsdb.campaigns.run3_2022_preEE_nano_tau_skim_2025_v1 import campaign_run3_2022_preEE_nano_tau_skim_2025_v1 as cpn
6+
7+
8+
9+
import re
10+
from collections import OrderedDict
11+
12+
def _base_name(name: str) -> str:
13+
m = re.match(r'^(.*)_ext\d+$', name)
14+
return m.group(1) if m else name
15+
16+
def _ext_number(s: str) -> int:
17+
m = re.search(r'_ext(\d+)$', s)
18+
return int(m.group(1)) if m else 0
19+
20+
def _key_sort_key(key: str):
21+
n = _ext_number(key)
22+
# base (no ext) first, then _ext1, _ext2, ...
23+
return (0, 0) if n == 0 else (1, n)
24+
25+
def add_merged_datasets(dataset_rows, cpn, procs):
26+
"""
27+
dataset_rows: iterable of (name, key_or_keys, n_evt, n_files, pid, proc)
28+
Groups *_extX with their base sample, then calls cpn.add_dataset once per base.
29+
"""
30+
groups = {} # base_name -> accumulator
31+
for name, key, n_evt, n_files, pid, proc in dataset_rows:
32+
base = _base_name(name)
33+
g = groups.get(base)
34+
if g is None:
35+
g = {
36+
"name": base,
37+
"proc": proc,
38+
"id": None, # prefer non-ext id; fallback to first seen
39+
"keys": OrderedDict(), # preserve insertion order, avoid dups
40+
"n_events": 0,
41+
"n_files": 0,
42+
}
43+
groups[base] = g
44+
45+
if g["proc"] != proc:
46+
raise ValueError(f"Process mismatch for {base}: {g['proc']} vs {proc}")
47+
48+
if not re.search(r'_ext\d+$', name):
49+
g["id"] = pid
50+
elif g["id"] is None:
51+
g["id"] = pid
52+
53+
# --- FIX: accept string OR list of strings for 'key' ---
54+
keys_in = key if isinstance(key, (list, tuple)) else [key]
55+
for k in keys_in:
56+
if not isinstance(k, str):
57+
raise TypeError(f"key must be a string, got {type(k).__name__}: {k}")
58+
g["keys"][k] = True
59+
60+
g["n_events"] += int(n_evt)
61+
g["n_files"] += int(n_files)
62+
63+
# emit one add per base sample with sorted keys (base first, then ext1, ext2, ...)
64+
for base, g in groups.items():
65+
keys = list(g["keys"].keys())
66+
keys.sort(key=_key_sort_key)
67+
cpn.add_dataset(
68+
name=g["name"],
69+
id=g["id"],
70+
is_data=False,
71+
processes=[getattr(procs, g["proc"])],
72+
keys=keys,
73+
n_files=g["n_files"],
74+
n_events=g["n_events"],
75+
)
76+
77+
# ---- your datasets (name, key, n_evt, n_files, pid, proc) ----
78+
79+
dataset_rows = [
80+
("h_ggf_htt_sm_prod_sm_filtered", ["/GluGluHto2Tau_UncorrelatedDecay_SM_Filtered_ProdAndDecay"], 6703604, 18, 22000000, "h_ggf_htt_sm_prod_sm"),
81+
("h_ggf_htt_sm_prod_cpo_filtered", ["/GluGluHto2Tau_UncorrelatedDecay_CPodd_Filtered_ProdAndDecay"], 7185840, 19, 22000010, "h_ggf_htt_sm_prod_cpo"),
82+
("h_ggf_htt_sm_prod_mm_filtered", ["/GluGluHto2Tau_UncorrelatedDecay_MM_Filtered_ProdAndDecay"], 6424278, 17, 22000020, "h_ggf_htt_sm_prod_mm"),
83+
84+
("h_vbf_htt_sm_filtered", ["/VBFHto2Tau_UncorrelatedDecay_Filtered"], 5082505, 13, 22000030, "h_vbf_htt_sm"),
85+
86+
("zh_htt_sm_filtered", ["/ZHto2Tau_UncorrelatedDecay_Filtered"], 613598, 2, 22000040, "zh_htt_sm"),
87+
("wph_htt_sm_filtered", ["/WplusHto2Tau_UncorrelatedDecay_Filtered"], 716466, 2, 22000050, "wph_htt_sm"),
88+
("wmh_htt_sm_filtered", ["/WminusHto2Tau_UncorrelatedDecay_Filtered"], 431839, 2, 22000060, "wmh_htt_sm"),
89+
]
90+
91+
dataset_rows_cp = []
92+
for name, key, n_evt, n_files, pid, proc in dataset_rows:
93+
dataset_rows_cp.append((name, key, n_evt, n_files, pid, proc))
94+
for idx, the_cp_var in enumerate(['htt_mm','htt_cpo','htt_flat']):
95+
cp_name = name.replace('htt_sm', the_cp_var)
96+
cp_proc = proc.replace('htt_sm', the_cp_var)
97+
cp_pid=pid+idx+1
98+
dataset_rows_cp.append((cp_name, key, n_evt, n_files, cp_pid, cp_proc))
99+
100+
add_merged_datasets(dataset_rows_cp, cpn, procs)

0 commit comments

Comments
 (0)