diff --git a/analysis/btagMCeff/btagMCeff.py b/analysis/btagMCeff/btagMCeff.py index 74bd28014..b0153f1a5 100644 --- a/analysis/btagMCeff/btagMCeff.py +++ b/analysis/btagMCeff/btagMCeff.py @@ -32,15 +32,15 @@ def __init__(self, samples): jeta_axis = hist.axis.Regular(25, -2.5, 2.5, name="eta", label=r"Jet \eta (GeV)") jaeta_axis = hist.axis.Variable([0, 1, 1.8, 2.4], name="abseta", label=r"Jet \eta (GeV)") Flav_axis = hist.axis.StrCategory([], name="Flav", growth=True) - flav_axis = hist.axis.IntCategory([], name="flav", growth=True) + flav_axis = hist.axis.Regular(5, 0, 5, name="flav", label="jet flav") wp_axis = hist.axis.StrCategory([], name="WP", growth=True) self._accumulator = { 'jetpt' : hist.Hist(wp_axis, Flav_axis, jpt_axis), 'jeteta' : hist.Hist(wp_axis, Flav_axis, jeta_axis), 'jetpteta' : hist.Hist(wp_axis, Flav_axis, jpt_axis, jaeta_axis), - 'jetptetaflav' : hist.Hist(wp_axis, jetpt_axis, jaeta_axis, flav_axis), + 'jetptetaflav' : hist.Hist(wp_axis, jetpt_axis, flav_axis, jaeta_axis) } - + self._bjets = 0 @property def accumulator(self): return self._accumulator @@ -51,6 +51,8 @@ def columns(self): # Main function: run on a given dataset def process(self, events): + + events = events # Dataset parameters dataset = events.metadata['dataset'] year = self._samples[dataset]['year'] @@ -61,18 +63,30 @@ def process(self, events): for d in datasets: if d in dataset: dataset = dataset.split('_')[0] + is_run3 = False + if year.startswith("202"): + is_run3 = True + is_run2 = not is_run3 + # Initialize objects met = events.MET e = events.Electron mu = events.Muon j = events.Jet + if is_run3: + leptonSelection = te_os.run3leptonselection() + jetsRho = events.Rho["fixedGridRhoFastjetAll"] + elif is_run2: + leptonSelection = te_os.run2leptonselection() + jetsRho = events.fixedGridRhoFastjetAll + # Muon selection - mu["conept"] = te_os.coneptMuon(mu.pt, mu.mvaTTHUL, mu.jetRelIso, mu.mediumId) + mu["conept"] = leptonSelection.coneptMuon(mu) mu["btagDeepFlavB"] = ak.fill_none(mu.matched_jet.btagDeepFlavB, -99) - mu["isPres"] = te_os.isPresMuon(mu.dxy, mu.dz, mu.sip3d, mu.eta, mu.pt, mu.miniPFRelIso_all) - mu["isFO"] = te_os.isFOMuon(mu.pt, mu.conept, mu.btagDeepFlavB, mu.mvaTTHUL, mu.jetRelIso, year) - mu["isTight"]= te_os.tightSelMuon(mu.isFO, mu.mediumId, mu.mvaTTHUL) + mu["isPres"] = leptonSelection.isPresMuon(mu) + mu["isFO"] = leptonSelection.isFOMuon(mu, year) + mu["isTight"]= leptonSelection.tightSelMuon(mu) mu['isGood'] = mu['isPres'] & mu['isTight'] leading_mu = mu[ak.argmax(mu.pt,axis=-1,keepdims=True)] @@ -83,11 +97,12 @@ def process(self, events): # Electron selection e["idEmu"] = te_os.ttH_idEmu_cuts_E3(e.hoe, e.eta, e.deltaEtaSC, e.eInvMinusPInv, e.sieie) - e["conept"] = te_os.coneptElec(e.pt, e.mvaTTHUL, e.jetRelIso) + e["conept"] = leptonSelection.coneptElec(e) e["btagDeepFlavB"] = ak.fill_none(e.matched_jet.btagDeepFlavB, -99) - e["isPres"] = te_os.isPresElec(e.pt, e.eta, e.dxy, e.dz, e.miniPFRelIso_all, e.sip3d, getattr(e,"mvaFall17V2noIso_WPL")) - e["isFO"] = te_os.isFOElec(e.pt, e.conept, e.btagDeepFlavB, e.idEmu, e.convVeto, e.lostHits, e.mvaTTHUL, e.jetRelIso, e.mvaFall17V2noIso_WP90, year) - e["isTight"] = te_os.tightSelElec(e.isFO, e.mvaTTHUL) + e["isPres"] = leptonSelection.isPresElec(e) + e["isFO"] = leptonSelection.isFOElec(e, year) + e["isTight"] = leptonSelection.tightSelElec(e) + e["isLooseE"] = leptonSelection.isLooseElec(e) e['isClean'] = te_os.isClean(e, mu, drmin=0.05) e['isGood'] = e['isPres'] & e['isTight'] & e['isClean'] @@ -110,9 +125,11 @@ def process(self, events): # Jet selection jetptname = 'pt_nom' if hasattr(j, 'pt_nom') else 'pt' - j["isGood"] = tc_os.is_tight_jet(getattr(j, jetptname), j.eta, j.jetId, pt_cut=30., eta_cut=get_te_param("eta_j_cut"), id_cut=get_te_param("jet_id_cut")) + j["isGood"] = tc_os.is_tight_jet(getattr(j, jetptname), j.eta, j.jetId, pt_cut=30., eta_cut=get_te_param("eta_j_cut"), id_cut=0) j['isClean'] = te_os.isClean(j, e, drmin=0.4)& te_os.isClean(j, mu, drmin=0.4) goodJets = j[(j.isClean)&(j.isGood)] + goodJets = goodJets[(goodJets.partonFlavour != 0)] + njets = ak.num(goodJets) ht = ak.sum(goodJets.pt,axis=-1) j0 = goodJets[ak.argmax(goodJets.pt,axis=-1,keepdims=True)] @@ -150,7 +167,6 @@ def process(self, events): return hout - def postprocess(self, accumulator): return accumulator @@ -160,3 +176,4 @@ def postprocess(self, accumulator): samples = load(outpath+'samples.coffea') topprocessor = AnalysisProcessor(samples) + diff --git a/analysis/topeft_run2/analysis_processor.py b/analysis/topeft_run2/analysis_processor.py index 57e9712cb..7fb44af02 100644 --- a/analysis/topeft_run2/analysis_processor.py +++ b/analysis/topeft_run2/analysis_processor.py @@ -57,7 +57,7 @@ def construct_cat_name(chan_str,njet_str=None,flav_str=None): class AnalysisProcessor(processor.ProcessorABC): - def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None, do_errors=False, do_systematics=False, split_by_lepton_flavor=False, skip_signal_regions=False, skip_control_regions=False, muonSyst='nominal', dtype=np.float32, rebin=False, offZ_split=False, tau_h_analysis=False, fwd_analysis=False): + def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None, do_errors=False, do_systematics=False, split_by_lepton_flavor=False, skip_signal_regions=False, skip_control_regions=False, muonSyst='nominal', dtype=np.float32, rebin=False, offZ_split=False, tau_h_analysis=False, fwd_analysis=False, all_analysis=False): self._samples = samples self._wc_names_lst = wc_names_lst @@ -65,6 +65,7 @@ def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None, self.offZ_3l_split = offZ_split self.tau_h_analysis = tau_h_analysis self.fwd_analysis = fwd_analysis + self.all_analysis = all_analysis proc_axis = hist.axis.StrCategory([], name="process", growth=True) chan_axis = hist.axis.StrCategory([], name="channel", growth=True) @@ -316,7 +317,7 @@ def process(self, events): ################### Tau selection #################### - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: tau["pt"], tau["mass"] = ApplyTES(year, tau, isData) tau["isPres"] = te_os.isPresTau(tau.pt, tau.eta, tau.dxy, tau.dz, tau.idDeepTau2017v2p1VSjet, tau.idDeepTau2017v2p1VSe, tau.idDeepTau2017v2p1VSmu, minpt=20) tau["isClean"] = te_os.isClean(tau, l_fo, drmin=0.3) @@ -357,7 +358,7 @@ def process(self, events): obj_correction_syst_lst = [ f'JER_{year}Up', f'JER_{year}Down' ] + obj_jes_entries - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: obj_correction_syst_lst.append("TESUp") obj_correction_syst_lst.append("TESDown") obj_correction_syst_lst.append("FESUp") @@ -367,7 +368,7 @@ def process(self, events): "lepSF_muonUp","lepSF_muonDown","lepSF_elecUp","lepSF_elecDown",f"btagSFbc_{year}Up",f"btagSFbc_{year}Down","btagSFbc_corrUp","btagSFbc_corrDown",f"btagSFlight_{year}Up",f"btagSFlight_{year}Down","btagSFlight_corrUp","btagSFlight_corrDown","PUUp","PUDown","PreFiringUp","PreFiringDown",f"triggerSF_{year}Up",f"triggerSF_{year}Down", # Exp systs "FSRUp","FSRDown","ISRUp","ISRDown","renormUp","renormDown","factUp","factDown", # Theory systs ] - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: wgt_correction_syst_lst.append("lepSF_taus_realUp") wgt_correction_syst_lst.append("lepSF_taus_realDown") wgt_correction_syst_lst.append("lepSF_taus_fakeUp") @@ -432,7 +433,7 @@ def process(self, events): # Jet cleaning, before any jet selection #vetos_tocleanjets = ak.with_name( ak.concatenate([tau, l_fo], axis=1), "PtEtaPhiMCandidate") - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: vetos_tocleanjets = ak.with_name( ak.concatenate([cleaning_taus, l_fo], axis=1), "PtEtaPhiMCandidate") else: vetos_tocleanjets = ak.with_name( l_fo, "PtEtaPhiMCandidate") @@ -449,7 +450,7 @@ def process(self, events): # Jet energy corrections if not isData: cleanedJets["pt_gen"] = ak.values_astype(ak.fill_none(cleanedJets.matched_gen.pt, 0), np.float32) - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: tau["pt"], tau["mass"] = ApplyTESSystematic(year, tau, isData, syst_var) tau["pt"], tau["mass"] = ApplyFESSystematic(year, tau, isData, syst_var) @@ -601,6 +602,8 @@ def process(self, events): import_sr_cat_dict = select_cat_dict["TAU_CH_LST_SR"] elif self.fwd_analysis: import_sr_cat_dict = select_cat_dict["FWD_CH_LST_SR"] + elif self.all_analysis: + import_sr_cat_dict = select_cat_dict["ALL_CH_LST_SR"] else: import_sr_cat_dict = select_cat_dict["TOP22_006_CH_LST_SR"] @@ -657,19 +660,19 @@ def process(self, events): if ch_name.startswith("1l"): weights_dict[ch_name].add("lepSF_muon", events.sf_1l_muon, copy.deepcopy(events.sf_1l_hi_muon), copy.deepcopy(events.sf_1l_lo_muon)) weights_dict[ch_name].add("lepSF_elec", events.sf_1l_elec, copy.deepcopy(events.sf_1l_hi_elec), copy.deepcopy(events.sf_1l_lo_elec)) - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: weights_dict[ch_name].add("lepSF_taus_real", events.sf_2l_taus_real, copy.deepcopy(events.sf_2l_taus_real_hi), copy.deepcopy(events.sf_2l_taus_real_lo)) weights_dict[ch_name].add("lepSF_taus_fake", events.sf_2l_taus_fake, copy.deepcopy(events.sf_2l_taus_fake_hi), copy.deepcopy(events.sf_2l_taus_fake_lo)) elif ch_name.startswith("2l"): weights_dict[ch_name].add("lepSF_muon", events.sf_2l_muon, copy.deepcopy(events.sf_2l_hi_muon), copy.deepcopy(events.sf_2l_lo_muon)) weights_dict[ch_name].add("lepSF_elec", events.sf_2l_elec, copy.deepcopy(events.sf_2l_hi_elec), copy.deepcopy(events.sf_2l_lo_elec)) - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: weights_dict[ch_name].add("lepSF_taus_real", events.sf_2l_taus_real, copy.deepcopy(events.sf_2l_taus_real_hi), copy.deepcopy(events.sf_2l_taus_real_lo)) weights_dict[ch_name].add("lepSF_taus_fake", events.sf_2l_taus_fake, copy.deepcopy(events.sf_2l_taus_fake_hi), copy.deepcopy(events.sf_2l_taus_fake_lo)) elif ch_name.startswith("3l"): weights_dict[ch_name].add("lepSF_muon", events.sf_3l_muon, copy.deepcopy(events.sf_3l_hi_muon), copy.deepcopy(events.sf_3l_lo_muon)) weights_dict[ch_name].add("lepSF_elec", events.sf_3l_elec, copy.deepcopy(events.sf_3l_hi_elec), copy.deepcopy(events.sf_3l_lo_elec)) - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: weights_dict[ch_name].add("lepSF_taus_real", events.sf_2l_taus_real, copy.deepcopy(events.sf_2l_taus_real_hi), copy.deepcopy(events.sf_2l_taus_real_lo)) weights_dict[ch_name].add("lepSF_taus_fake", events.sf_2l_taus_fake, copy.deepcopy(events.sf_2l_taus_fake_hi), copy.deepcopy(events.sf_2l_taus_fake_lo)) elif ch_name.startswith("4l"): @@ -684,12 +687,12 @@ def process(self, events): # Get mask for events that have two sf os leps close to z peak sfosz_3l_OnZ_mask = tc_es.get_Z_peak_mask(l_fo_conept_sorted_padded[:,0:3],pt_window=10.0) sfosz_3l_OffZ_mask = ~sfosz_3l_OnZ_mask - if self.offZ_3l_split: + if self.offZ_3l_split or self.all_analysis: sfosz_3l_OffZ_low_mask = tc_es.get_off_Z_mask_low(l_fo_conept_sorted_padded[:,0:3],pt_window=0.0) sfosz_3l_OffZ_any_mask = tc_es.get_any_sfos_pair(l_fo_conept_sorted_padded[:,0:3]) sfosz_2l_mask = tc_es.get_Z_peak_mask(l_fo_conept_sorted_padded[:,0:2],pt_window=10.0) sfasz_2l_mask = tc_es.get_Z_peak_mask(l_fo_conept_sorted_padded[:,0:2],pt_window=30.0,flavor="as") # Any sign (do not enforce ss or os here) - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: tl_zpeak_mask = te_es.lt_Z_mask(l0, l1, tau0, 30.0) # Pass trigger mask @@ -699,6 +702,7 @@ def process(self, events): bmask_atleast1med_atleast2loose = ((nbtagsm>=1)&(nbtagsl>=2)) # Used for 2lss and 4l bmask_exactly0med = (nbtagsm==0) # Used for 3l CR and 2los Z CR bmask_exactly1med = (nbtagsm==1) # Used for 3l SR and 2lss CR + bmask_atleast1med = (nbtagsm>=1) # Used for 3l fwd SR bmask_exactly2med = (nbtagsm==2) # Used for CRtt bmask_atleast2med = (nbtagsm>=2) # Used for 3l SR bmask_atmost2med = (nbtagsm< 3) # Used to make 2lss mutually exclusive from tttt enriched @@ -712,7 +716,8 @@ def process(self, events): charge2l_1 = ak.fill_none(((l0.charge+l1.charge)!=0),False) charge3l_p = ak.fill_none(((l0.charge+l1.charge+l2.charge)>0),False) charge3l_m = ak.fill_none(((l0.charge+l1.charge+l2.charge)<0),False) - if self.tau_h_analysis: + + if self.tau_h_analysis or self.all_analysis: tau_F_mask = (ak.num(tau[tau["isVLoose"]>0]) >=1) tau_L_mask = (ak.num(tau[tau["isLoose"]>0]) >=1) no_tau_mask = (ak.num(tau[tau["isLoose"]>0])==0) @@ -740,14 +745,15 @@ def process(self, events): preselections.add("bmask_atmost2m", (bmask_atmost2med)) preselections.add("fwdjet_mask", (fwdjet_mask)) preselections.add("~fwdjet_mask", (~fwdjet_mask)) - if self.tau_h_analysis: + + if self.tau_h_analysis or self.all_analysis: preselections.add("1l", (events.is1l & pass_trg)) preselections.add("1tau", (tau_L_mask)) preselections.add("1Ftau", (tau_F_mask)) preselections.add("0tau", (no_tau_mask)) preselections.add("onZ_tau", (tl_zpeak_mask)) preselections.add("offZ_tau", (~tl_zpeak_mask)) - if self.fwd_analysis: + if self.fwd_analysis or self.all_analysis: preselections.add("2lss_fwd", (events.is2l & pass_trg & fwdjet_mask)) preselections.add("2l_fwd_p", (chargel0_p & fwdjet_mask)) preselections.add("2l_fwd_m", (chargel0_m & fwdjet_mask)) @@ -766,8 +772,10 @@ def process(self, events): preselections.add("3l_p", (events.is3l & pass_trg & charge3l_p)) preselections.add("3l_m", (events.is3l & pass_trg & charge3l_m)) preselections.add("3l_onZ", (sfosz_3l_OnZ_mask)) + if self.fwd_analysis or self.all_analysis: + preselections.add("bmask_atleast1m", (bmask_atleast1med)) - if self.offZ_3l_split: + if self.offZ_3l_split or self.all_analysis: preselections.add("3l_offZ_low", (sfosz_3l_OffZ_mask & sfosz_3l_OffZ_any_mask & sfosz_3l_OffZ_low_mask)) preselections.add("3l_offZ_high", (sfosz_3l_OffZ_mask & sfosz_3l_OffZ_any_mask & ~sfosz_3l_OffZ_low_mask)) preselections.add("3l_offZ_none", (sfosz_3l_OffZ_mask & ~sfosz_3l_OffZ_any_mask)) @@ -838,6 +846,7 @@ def process(self, events): selections.add("exactly_5j", (njets==5)) selections.add("exactly_6j", (njets==6)) selections.add("atleast_1j", (njets>=1)) + selections.add("atleast_3j", (njets>=2)) selections.add("atleast_4j", (njets>=4)) selections.add("atleast_5j", (njets>=5)) selections.add("atleast_6j", (njets>=6)) @@ -851,8 +860,9 @@ def process(self, events): selections.add("isAR_2lSS_OS", ( events.is2l_SR) & charge2l_0) # Sideband for the charge flip selections.add("isSR_2lOS", ( events.is2l_SR) & charge2l_0) selections.add("isAR_2lOS", (~events.is2l_SR) & charge2l_0) - if self.tau_h_analysis: - selections.add("isSR_1l", ( events.is1l_SR)) + + if self.tau_h_analysis or self.all_analysis: + selections.add("isSR_1l", ( events.is1l_SR)) selections.add("isSR_3l", events.is3l_SR) selections.add("isAR_3l", ~events.is3l_SR) @@ -870,11 +880,13 @@ def process(self, events): # Z pt (pt of the ll pair that form the Z for the onZ categories) ptz = te_es.get_Z_pt(l_fo_conept_sorted_padded[:,0:3],10.0) - if self.tau_h_analysis: + + if self.tau_h_analysis or self.all_analysis: ptz_wtau = te_es.get_Zlt_pt(l0, l1, tau0) - if self.offZ_3l_split: + if self.offZ_3l_split or self.all_analysis: ptz = te_es.get_ll_pt(l_fo_conept_sorted_padded[:,0:3],10.0) + # Leading (b+l) pair pt bjetsl = goodJets[isBtagJetsLoose][ak.argsort(goodJets[isBtagJetsLoose].pt, axis=-1, ascending=False)] bl_pairs = ak.cartesian({"b":bjetsl,"l":l_fo_conept_sorted}) @@ -882,7 +894,7 @@ def process(self, events): bl0pt = ak.flatten(blpt[ak.argmax(blpt,axis=-1,keepdims=True)]) # Collection of all objects (leptons and jets) - if self.tau_h_analysis: + if self.tau_h_analysis or self.all_analysis: l_j_collection = ak.with_name(ak.concatenate([l_fo_conept_sorted,goodJets,cleaning_taus], axis=1),"PtEtaPhiMCollection") else: l_j_collection = ak.with_name(ak.concatenate([l_fo_conept_sorted,goodJets], axis=1),"PtEtaPhiMCollection") @@ -930,8 +942,9 @@ def process(self, events): varnames["bl0pt"] = bl0pt varnames["o0pt"] = o0pt varnames["lj0pt"] = lj0pt - varnames["lt"] = lt - if self.tau_h_analysis: + if self.fwd_analysis or self.all_analysis: + varnames["lt"] = lt + if self.tau_h_analysis or self.all_analysis: varnames["ptz_wtau"] = ptz_wtau varnames["tau0pt"] = tau0.pt @@ -1120,7 +1133,14 @@ def process(self, events): elif self.fwd_analysis: if (("ptz" in dense_axis_name) & ("onZ" not in lep_chan)): continue - if (("lt" in dense_axis_name) and ("2lss" not in lep_chan)): continue + if (("lt" in dense_axis_name) and ("fwd" not in lep_chan)): continue + elif self.all_analysis: + if (("ptz" in dense_axis_name) & ("onZ" not in lep_chan)): continue + if (("lt" in dense_axis_name) and ("fwd" not in lep_chan)): continue + if (("ptz" in dense_axis_name) and ("2lss" in lep_chan) and ("ptz_wtau" not in dense_axis_name)): continue + if (("ptz_wtau" in dense_axis_name) and (("1tau" not in lep_chan) or ("onZ" not in lep_chan) or ("2lss" not in lep_chan))): continue + if (("ptz" in dense_axis_name) & ("onZ" not in lep_chan) & ("offZ_high" not in lep_chan) & ("offZ_low" not in lep_chan)):continue + else: if (("ptz" in dense_axis_name) & ("onZ" not in lep_chan)): continue if ((dense_axis_name in ["o0pt","b0pt","bl0pt"]) & ("CR" in ch_name)): continue diff --git a/analysis/topeft_run2/datacards_post_processing.py b/analysis/topeft_run2/datacards_post_processing.py index f1b1df1dc..8267000bb 100644 --- a/analysis/topeft_run2/datacards_post_processing.py +++ b/analysis/topeft_run2/datacards_post_processing.py @@ -1,4 +1,5 @@ import os +import subprocess import shutil import argparse import json @@ -14,6 +15,8 @@ "(Set coffea.deprecations_as_errors = True to get a stack trace now.)", "ImportError: coffea.hist is deprecated", "warnings.warn(message, FutureWarning)", + "UserWarning: Numba extension module 'awkward.numba' failed to load due to 'AttributeError(module 'awkward.numba' has no attribute '_register')", + "entrypoints.init_all()", ] # Return list of lines in a file @@ -44,6 +47,7 @@ def main(): parser.add_argument("-z", "--set-up-offZdivision", action="store_true", help = "Copy the ptz and lj0pt cards with 3l offZ division.") parser.add_argument("-t", "--tau-flag", action="store_true", help = "Copy the ptz, lj0pt, and ptz_wtau cards for tau channels.") parser.add_argument("-f", "--fwd-flag", action="store_true", help = "Copy the ptz, lj0pt, and lt cards for forward channels.") + parser.add_argument("-a", "--all-analysis", action="store_true", help = "Copy all channels in with fwd and offZ contributions.") args = parser.parse_args() ###### Check that you run one only type of analysis ###### @@ -54,6 +58,7 @@ def main(): args.set_up_offZdivision, args.tau_flag, args.fwd_flag, + args.all_analysis, ] # check exactly one is True @@ -126,6 +131,8 @@ def main(): import_sr_ch_lst = select_ch_lst["TAU_CH_LST_SR"] elif args.fwd_flag: import_sr_ch_lst = select_ch_lst["FWD_CH_LST_SR"] + elif args.all_analysis: + import_sr_ch_lst = select_ch_lst["ALL_CH_LST_SR"] CATSELECTED = [] @@ -139,15 +146,24 @@ def main(): lep_ch_name = lep_ch[0] for jet in jet_list: # special channels to be binned by ptz instead of lj0pt - if lep_ch_name == "3l_onZ_1b" or (lep_ch_name == "3l_onZ_2b" and (int(jet) == 4 or int(jet) == 5)): + if "3l" in lep_ch_name and int(jet) == 1 and 'fwd' not in lep_ch_name: # 1j for fwd only + continue + #elif "3l_onZ_1b" in lep_ch_name or ("3l_onZ_2b" in lep_ch_name and (int(jet) == 4 or int(jet) == 5)) and 'fwd' not in lep_ch_name: + elif (("3l_onZ_1b" in lep_ch_name) or ("3l_onZ_2b" in lep_ch_name and int(jet) in [4, 5])) and 'fwd' not in lep_ch_name: channelname = lep_ch_name + "_" + jet + "j_ptz" - elif args.set_up_offZdivision and ( "high" in lep_ch_name or "low" in lep_ch_name ): # extra channels from offZ division binned by ptz + elif args.all_analysis and ( + ("3l_onZ_2b" in lep_ch_name and int(jet) == 1) or + ("3l_onZ_1b" in lep_ch_name and int(jet) == 1 and 'fwd' not in lep_ch_name) or + ("offZ_2b_fwd" in lep_ch_name and int(jet) == 1) + ): + continue + elif (args.set_up_offZdivision or args.all_analysis) and ( "high" in lep_ch_name or "low" in lep_ch_name ): # extra channels from offZ division binned by ptz channelname = lep_ch_name + "_" + jet + "j_ptz" - elif args.tau_flag and ("2los" in lep_ch_name): + elif (args.tau_flag or args.all_analysis) and ("2los" in lep_ch_name): channelname = lep_ch_name + "_" + jet + "j_ptz" - elif args.tau_flag and ("1tau_onZ" in lep_ch_name): + elif (args.tau_flag or args.all_analysis) and ("1tau_onZ" in lep_ch_name): channelname = lep_ch_name + "_" + jet + "j_ptz_wtau" - elif args.fwd_flag and ("fwd" in lep_ch_name or "2lss_p" in lep_ch_name or "2lss_m" in lep_ch_name): + elif (args.fwd_flag or args.all_analysis) and ("fwd" in lep_ch_name): channelname = lep_ch_name + "_" + jet + "j_lt" else: channelname = lep_ch_name + "_" + jet + "j_lj0pt" @@ -173,19 +189,31 @@ def main(): file_name_strip_ext = os.path.splitext(fname)[0] for file in CATSELECTED: if file in file_name_strip_ext: + if fname.endswith(".txt"): + bad = subprocess.call([f'grep "observation 0.00" {os.path.join(args.datacards_path,fname)}'], shell=True, stdout=subprocess.DEVNULL) + if bad == 0: + raise Exception(f"Warning: {file} has 0 observation!") shutil.copyfile(os.path.join(args.datacards_path,fname),os.path.join(ptzlj0pt_path,fname)) if fname.endswith(".txt"): n_txt += 1 if fname.endswith(".root"): n_root += 1 #also copy the selectedWCs.txt file shutil.copyfile(os.path.join(args.datacards_path,"selectedWCs.txt"),os.path.join(ptzlj0pt_path,"selectedWCs.txt")) - for item in scalings_content: - channel_name = item.get("channel") - if channel_name in CATSELECTED: - ch_index = CATSELECTED.index(channel_name) + 1 - item["channel"] = "ch" + str(ch_index) - else: - scalings_content = [d for d in scalings_content if d != item] + new_scalings = [] + for ch_index, channel_name in enumerate(CATSELECTED, start=1): + # find all items that match this channel + matches = [item for item in scalings_content if item.get("channel") == channel_name] + + if not matches: + raise ValueError(f"Channel '{channel_name}' not found in scalings_content") + + # update and append in order + for item in matches: + new_item = item.copy() + new_item["channel"] = f"ch{ch_index}" + new_scalings.append(new_item) + + scalings_content = new_scalings with open(os.path.join(ptzlj0pt_path, 'scalings.json'), 'w') as file: json.dump(scalings_content, file, indent=4) diff --git a/analysis/topeft_run2/fullR2_run.sh b/analysis/topeft_run2/fullR2_run.sh index 0edd457cd..6f48fff9d 100644 --- a/analysis/topeft_run2/fullR2_run.sh +++ b/analysis/topeft_run2/fullR2_run.sh @@ -9,14 +9,15 @@ CFGS="../../input_samples/cfgs/mc_signal_samples_NDSkim.cfg,../../input_samples/ OPTIONS="--hist-list ana --skip-cr --do-systs -s 20000 --do-np -o $OUT_NAME --tau_h_analysis" # For analysis + # Build the run command for filling CR histos #OUT_NAME+="_CRs" #CFGS="../../input_samples/cfgs/mc_signal_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_cr_NDSkim.cfg,../../input_samples/cfgs/data_samples_NDSkim.cfg" -#OPTIONS="--hist-list cr --skip-sr --do-systs --do-np --wc-list ctG -o $OUT_NAME --split-lep-flavor" # For CR plots +#OPTIONS="--hist-list cr --skip-sr --do-systs --do-np --wc-list ctG -o $OUT_NAME" # For CR plots echo "OUT_NAME:" $OUT_NAME # Run the processor over all Run2 samples RUN_COMMAND="time python run_analysis.py $CFGS $OPTIONS" printf "\nRunning the following command:\n$RUN_COMMAND\n\n" -$RUN_COMMAND \ No newline at end of file +$RUN_COMMAND diff --git a/analysis/topeft_run2/get_datacard_yields.py b/analysis/topeft_run2/get_datacard_yields.py index 9744df096..28be7790b 100644 --- a/analysis/topeft_run2/get_datacard_yields.py +++ b/analysis/topeft_run2/get_datacard_yields.py @@ -33,12 +33,16 @@ "2lss_p_3b", "2lss_m_2b", "2lss_p_2b", - "3l_m_1b", - "3l_p_1b", - "3l_m_2b", - "3l_p_2b", + "2lss_fwd_m", + "2lss_fwd_p", + "3l_m_fwd_1b", + "3l_m_fwd_2b", + "3l_p_fwd_1b", + "3l_p_fwd_2b", "3l_onZ_1b", "3l_onZ_2b", + "3l_onZ_1b_fwd", + "3l_onZ_2b_fwd", "4l_2b", ] @@ -48,12 +52,16 @@ "2lss_4t_p" : "2lss_p_3b", "2lss_m" : "2lss_m_2b", "2lss_p" : "2lss_p_2b", - "3l_m_offZ_1b" : "3l_m_1b", - "3l_m_offZ_2b" : "3l_m_2b", - "3l_p_offZ_1b" : "3l_p_1b", - "3l_p_offZ_2b" : "3l_p_2b", + "2lss_fwd_m" : "2lss_fwd_m", + "2lss_fwd_p" : "2lss_fwd_p", + "3l_m_offZ_1b_fwd" : "3l_m_fwd_1b", + "3l_m_offZ_2b_fwd" : "3l_m_fwd_2b", + "3l_p_offZ_1b_fwd" : "3l_p_fwd_1b", + "3l_p_offZ_2b_fwd" : "3l_p_fwd_2b", "3l_onZ_1b" : "3l_onZ_1b", "3l_onZ_2b" : "3l_onZ_2b", + "3l_onZ_1b_fwd" : "3l_onZ_1b_fwd", + "3l_onZ_2b_fwd" : "3l_onZ_2b_fwd", "4l" : "4l_2b", } @@ -196,7 +204,7 @@ def include_skipped_procs_as_zero(rates_dict,proc_lst_to_include): # Takes a string (corresponding to a cateogry name), returns the name with the njets and kinematic var info removed # E.g. "2lss_4t_p_4j_2b_lj0pt" -> "2lss_4t_p_2b" -def get_base_cat_name(cat_name,var_lst=["ptz","lj0pt"]): +def get_base_cat_name(cat_name,var_lst=["ptz","lj0pt","lt"]): cat_name_split = cat_name.split("_") substr_lst_nojets_novarname = [] for substr in cat_name_split: diff --git a/analysis/topeft_run2/make_cr_and_sr_plots.py b/analysis/topeft_run2/make_cr_and_sr_plots.py index 01601a0af..d720f6527 100644 --- a/analysis/topeft_run2/make_cr_and_sr_plots.py +++ b/analysis/topeft_run2/make_cr_and_sr_plots.py @@ -74,16 +74,80 @@ "2lss_m_4j", "2lss_m_5j", "2lss_m_6j", "2lss_m_7j", "2lss_p_4j", "2lss_p_5j", "2lss_p_6j", "2lss_p_7j", ], + "2lss_fwd_SR": [ + "2lss_fwd_m_4j", "2lss_fwd_m_5j", "2lss_fwd_m_6j", "2lss_fwd_m_7j", + "2lss_fwd_p_4j", "2lss_fwd_p_5j", "2lss_fwd_p_6j", "2lss_fwd_p_7j", + ], "3l_offZ_SR" : [ "3l_m_offZ_1b_2j", "3l_m_offZ_1b_3j", "3l_m_offZ_1b_4j", "3l_m_offZ_1b_5j", "3l_m_offZ_2b_2j", "3l_m_offZ_2b_3j", "3l_m_offZ_2b_4j", "3l_m_offZ_2b_5j", "3l_p_offZ_1b_2j", "3l_p_offZ_1b_3j", "3l_p_offZ_1b_4j", "3l_p_offZ_1b_5j", "3l_p_offZ_2b_2j", "3l_p_offZ_2b_3j", "3l_p_offZ_2b_4j", "3l_p_offZ_2b_5j", ], + "3l_offZ_fwd_SR" : [ + "3l_m_offZ_1b_fwd_2j","3l_m_offZ_1b_fwd_3j", "3l_m_offZ_1b_fwd_4j", "3l_m_offZ_1b_fwd_5j", + #"3l_m_offZ_2b_fwd_2j","3l_m_offZ_2b_fwd_3j", "3l_m_offZ_2b_fwd_4j", "3l_m_offZ_2b_fwd_5j", + "3l_p_offZ_1b_fwd_2j","3l_p_offZ_1b_fwd_3j", "3l_p_offZ_1b_fwd_4j", "3l_p_offZ_1b_fwd_5j", + #"3l_p_offZ_2b_fwd_2j","3l_p_offZ_2b_fwd_3j", "3l_p_offZ_2b_fwd_4j", "3l_p_offZ_2b_fwd_5j", + ], + "3l_offZ_fwd_1j_SR" : [ + "3l_m_offZ_1b_fwd_1j", + #"3l_m_offZ_2b_fwd_1j", + "3l_p_offZ_1b_fwd_1j", + #"3l_p_offZ_2b_fwd_1j", + ], + "3l_offZ_fwd_2j_SR" : [ + "3l_m_offZ_1b_fwd_2j", + #"3l_m_offZ_2b_fwd_2j", + "3l_p_offZ_1b_fwd_2j", + #"3l_p_offZ_2b_fwd_2j", + ], + "3l_offZ_fwd_3j_SR" : [ + "3l_m_offZ_1b_fwd_3j", + #"3l_m_offZ_2b_fwd_3j", + "3l_p_offZ_1b_fwd_3j", + #"3l_p_offZ_2b_fwd_3j", + ], + "3l_offZ_fwd_4j_SR" : [ + "3l_m_offZ_1b_fwd_4j", + #"3l_m_offZ_2b_fwd_4j", + "3l_p_offZ_1b_fwd_4j", + #"3l_p_offZ_2b_fwd_4j", + ], + "3l_offZ_fwd_5j_SR" : [ + "3l_m_offZ_1b_fwd_5j", + #"3l_m_offZ_2b_fwd_5j", + "3l_p_offZ_1b_fwd_5j", + #"3l_p_offZ_2b_fwd_5j", + ], "3l_onZ_SR" : [ "3l_onZ_1b_2j" , "3l_onZ_1b_3j" , "3l_onZ_1b_4j" , "3l_onZ_1b_5j", "3l_onZ_2b_2j" , "3l_onZ_2b_3j" , "3l_onZ_2b_4j" , "3l_onZ_2b_5j", ], + "3l_onZ_fwd_SR" : [ + "3l_onZ_1b_fwd_1j" , "3l_onZ_1b_fwd_2j" , "3l_onZ_1b_3j" , "3l_onZ_1b_4j" , "3l_onZ_1b_5j", + #"3l_onZ_2b_fwd_1j" , "3l_onZ_2b_fwd_2j" , "3l_onZ_2b_3j" , "3l_onZ_2b_4j" , "3l_onZ_2b_5j", + ], + "3l_onZ_fwd_1j_SR" : [ + "3l_onZ_1b_fwd_1j" , + #"3l_onZ_2b_fwd_1j" , + ], + "3l_onZ_fwd_2j_SR" : [ + "3l_onZ_1b_fwd_2j" , + #"3l_onZ_2b_fwd_2j" , + ], + "3l_onZ_fwd_3j_SR" : [ + "3l_onZ_1b_fwd_3j" , + #"3l_onZ_2b_fwd_3j" , + ], + "3l_onZ_fwd_4j_SR" : [ + "3l_onZ_1b_fwd_4j" , + #"3l_onZ_2b_fwd_4j" , + ], + "3l_onZ_fwd_5j_SR" : [ + "3l_onZ_1b_fwd_5j" , + #"3l_onZ_2b_fwd_5j" , + ], "4l_SR" : [ "4l_2j", "4l_3j", "4l_4j", ], @@ -225,27 +289,40 @@ def group_bins(histo,bin_map,axis_name="process",drop_unspecified=False): # Match a given sample name to whatever it is called in the json # Will return None if a match is not found -def get_scale_name(sample_name,sample_group_map): +def get_scale_name(sample_name,sample_group_map,group_type="CR"): scale_name_for_json = None - if sample_name in sample_group_map["Conv"]: - scale_name_for_json = "convs" - elif sample_name in sample_group_map["Diboson"]: - scale_name_for_json = "Diboson" - elif sample_name in sample_group_map["Triboson"]: - scale_name_for_json = "Triboson" - elif sample_name in sample_group_map["Signal"]: - for proc_str in ["ttH","tllq","ttlnu","ttll","tHq","tttt"]: - if proc_str in sample_name: - # This should only match once, but maybe we should put a check to enforce this - scale_name_for_json = proc_str + if group_type == "CR": + if sample_name in sample_group_map["Conv"]: + scale_name_for_json = "convs" + elif sample_name in sample_group_map["Diboson"]: + scale_name_for_json = "Diboson" + elif sample_name in sample_group_map["Triboson"]: + scale_name_for_json = "Triboson" + elif sample_name in sample_group_map["Signal"]: + for proc_str in ["ttH","tllq","ttlnu","ttll","tHq","tttt"]: + if proc_str in sample_name: + # This should only match once, but maybe we should put a check to enforce this + scale_name_for_json = proc_str + else: + if sample_name in sample_group_map["Conv"]: + scale_name_for_json = "convs" + elif sample_name in sample_group_map["Diboson"]: + scale_name_for_json = "Diboson" + #elif "Multiboson" in sample_group_map and sample_name in sample_group_map["Multiboson"]: + # scale_name_for_json = "Multiboson" + else: + for proc_str in ["ttH","tllq","ttlnu","ttll","tHq","tttt"]: + if proc_str in sample_name: + # This should only match once, but maybe we should put a check to enforce this + scale_name_for_json = proc_str return scale_name_for_json # This function gets the tag that indicates how a particualr systematic is correlated # - For pdf_scale this corresponds to the initial state (e.g. gg) # - For qcd_scale this corresponds to the process type (e.g. VV) # For any systemaitc or process that is not included in the correlations json we return None -def get_correlation_tag(uncertainty_name,proc_name,sample_group_map): - proc_name_in_json = get_scale_name(proc_name,sample_group_map) +def get_correlation_tag(uncertainty_name,proc_name,sample_group_map,group_type): + proc_name_in_json = get_scale_name(proc_name,sample_group_map,group_type) corr_tag = None # Right now we only have two types of uncorrelated rate systematics if uncertainty_name in ["qcd_scale","pdf_scale"]: @@ -261,10 +338,10 @@ def get_correlation_tag(uncertainty_name,proc_name,sample_group_map): # This function gets all of the the rate systematics from the json file # Returns a dictionary with all of the uncertainties # If the sample does not have an uncertainty in the json, an uncertainty of 0 is returned for that category -def get_rate_systs(sample_name,sample_group_map): +def get_rate_systs(sample_name,sample_group_map,group_type): # Figure out the name of the appropriate sample in the syst rate json (if the proc is in the json) - scale_name_for_json = get_scale_name(sample_name,sample_group_map) + scale_name_for_json = get_scale_name(sample_name,sample_group_map,group_type) # Get the lumi uncty for this sample (same for all samles) lumi_uncty = grs.get_syst("lumi") @@ -299,7 +376,7 @@ def get_rate_systs(sample_name,sample_group_map): # Wrapper for getting plus and minus rate arrs -def get_rate_syst_arrs(base_histo,proc_group_map): +def get_rate_syst_arrs(base_histo,proc_group_map,group_type="CR"): # Fill dictionary with the rate uncertainty arrays (with correlated ones organized together) rate_syst_arr_dict = {} @@ -308,13 +385,13 @@ def get_rate_syst_arrs(base_histo,proc_group_map): for sample_name in yt.get_cat_lables(base_histo,"process"): # Build the plus and minus arrays from the rate uncertainty number and the nominal arr - rate_syst_dict = get_rate_systs(sample_name,proc_group_map) + rate_syst_dict = get_rate_systs(sample_name,proc_group_map,group_type) thissample_nom_arr = base_histo.integrate("process",sample_name).integrate("systematic","nominal").eval({})[()] p_arr = thissample_nom_arr*(rate_syst_dict[rate_sys_type][1]) - thissample_nom_arr # Difference between positive fluctuation and nominal m_arr = thissample_nom_arr*(rate_syst_dict[rate_sys_type][0]) - thissample_nom_arr # Difference between positive fluctuation and nominal # Put the arrays into the correlation dict (organizing correlated ones together) - correlation_tag = get_correlation_tag(rate_sys_type,sample_name,proc_group_map) + correlation_tag = get_correlation_tag(rate_sys_type,sample_name,proc_group_map,group_type) out_key_name = rate_sys_type if correlation_tag is not None: out_key_name += "_"+correlation_tag if out_key_name not in rate_syst_arr_dict[rate_sys_type]: @@ -409,9 +486,9 @@ def get_decorrelated_uncty(syst_name,grp_map,relevant_samples_lst,base_histo,tem for proc_name in proc_lst: if proc_name not in relevant_samples_lst: continue - n_arr_proc = base_histo.integrate("process",proc_name)[{"process": sum}].integrate("systematic","nominal").eval({})[()] - u_arr_proc = base_histo.integrate("process",proc_name)[{"process": sum}].integrate("systematic",syst_name+"Up").eval({})[()] - d_arr_proc = base_histo.integrate("process",proc_name)[{"process": sum}].integrate("systematic",syst_name+"Down").eval({})[()] + n_arr_proc = base_histo.integrate("process",proc_name).integrate("systematic","nominal").eval({})[()] + u_arr_proc = base_histo.integrate("process",proc_name).integrate("systematic",syst_name+"Up").eval({})[()] + d_arr_proc = base_histo.integrate("process",proc_name).integrate("systematic",syst_name+"Down").eval({})[()] u_arr_proc_rel = u_arr_proc - n_arr_proc d_arr_proc_rel = d_arr_proc - n_arr_proc @@ -472,7 +549,7 @@ def get_diboson_njets_syst_arr(njets_histo_vals_arr,bin0_njets): ######### Plotting functions ######### # Takes two histograms and makes a plot (with only one sparse axis, whihc should be "process"), one hist should be mc and one should be data -def make_cr_fig(h_mc,h_data,unit_norm_bool,axis='process',var='lj0pt',bins=[],group=[],set_x_lim=None,err_p=None,err_m=None,err_ratio_p=None,err_ratio_m=None): +def make_cr_fig(h_mc,h_data,unit_norm_bool,axis='process',var='lj0pt',bins=[],group=[],set_x_lim=None,err_p=None,err_m=None,err_ratio_p=None,err_ratio_m=None,unblind=False): colors = ["tab:blue","darkgreen","tab:orange",'tab:cyan',"tab:purple","tab:pink","tan","mediumseagreen","tab:red","brown"] @@ -548,7 +625,7 @@ def make_cr_fig(h_mc,h_data,unit_norm_bool,axis='process',var='lj0pt',bins=[],gr bins=bins, stack=False, density=unit_norm_bool, - label='Data', + label=('Data' if unblind else 'Asimov Data'), #flow='show', histtype='errorbar', **DATA_ERR_OPS, @@ -571,17 +648,19 @@ def make_cr_fig(h_mc,h_data,unit_norm_bool,axis='process',var='lj0pt',bins=[],gr # Plot the syst error if plot_syst_err: bin_edges_arr = h_mc.axes[var].edges - #err_p = np.append(err_p,0) # Work around off by one error - #err_m = np.append(err_m,0) # Work around off by one error - #err_ratio_p = np.append(err_ratio_p,0) # Work around off by one error - #err_ratio_m = np.append(err_ratio_m,0) # Work around off by one error ax.fill_between(bin_edges_arr,err_m,err_p, step='post', facecolor='none', edgecolor='gray', label='Syst err', hatch='////') rax.fill_between(bin_edges_arr,err_ratio_m,err_ratio_p,step='post', facecolor='none', edgecolor='gray', label='Syst err', hatch='////') - err_m = np.append(h_mc[{'process': sum}].as_hist({}).values(flow=True)[1:]-np.sqrt(h_mc[{'process': sum}].as_hist({}).values(flow=True)[1:]), 1) - err_p = np.append(h_mc[{'process': sum}].as_hist({}).values(flow=True)[1:]+np.sqrt(h_mc[{'process': sum}].as_hist({}).values(flow=True)[1:]), 1) - err_ratio_m = np.append(1-1/np.sqrt(h_mc[{'process': sum}].as_hist({}).values(flow=True)[1:]), 1) - err_ratio_p = np.append(1+1/np.sqrt(h_mc[{'process': sum}].as_hist({}).values(flow=True)[1:]), 1) - rax.fill_between(bins,err_ratio_m,err_ratio_p,step='post', facecolor='none', edgecolor='gray', label='Stat err', hatch='////') + bin_edges_arr = h_mc.axes[var].edges + err_m = h_mc[{'process': sum}].eval({})[()][1:]-np.sqrt(h_mc[{'process': sum}].eval({})[()][1:]) + err_p = h_mc[{'process': sum}].eval({})[()][1:]+np.sqrt(h_mc[{'process': sum}].eval({})[()][1:]) + err_ratio_m = h_data[{'process':sum}].as_hist({}).values(flow=True)[1:] / err_m + err_ratio_p = h_data[{'process':sum}].as_hist({}).values(flow=True)[1:] / err_p + err_ratio_m = np.append(err_ratio_m, err_ratio_m[-1]) + err_ratio_p = np.append(err_ratio_p, err_ratio_p[-1]) + err_m = np.append(err_m, err_m[-1]) + err_p = np.append(err_p, err_p[-1]) + ax.fill_between(bins,err_m,err_p, step='post', facecolor='none', edgecolor='gray', label='Total err', hatch='\\\\') + rax.fill_between(bins,err_ratio_m,err_ratio_p,step='post', facecolor='none', edgecolor='gray', label='Stat err', hatch='\\\\') # Scale the y axis and labels ax.autoscale(axis='y') @@ -819,7 +898,7 @@ def make_simple_plots(dict_of_hists,year,save_dir_path): ###################### Wrapper function for SR data and mc plots (unblind!) ###################### # Wrapper function to loop over all SR categories and make plots for all variables -def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path): +def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path,unblind=False,skip_syst_errs=False): # Construct list of MC samples mc_wl = [] @@ -889,6 +968,8 @@ def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path): CR_GRP_MAP["Ttbar"].append(proc_name) elif "TTG" in proc_name: SR_GRP_MAP["Conv"].append(proc_name) + elif "ttg" in proc_name: + SR_GRP_MAP["Conv"].append(proc_name) elif "WWW" in proc_name or "WWZ" in proc_name or "WZZ" in proc_name or "ZZZ" in proc_name: SR_GRP_MAP["Multiboson"].append(proc_name) elif "WWTo2L2Nu" in proc_name or "ZZTo4L" in proc_name or "WZTo3LNu" in proc_name: @@ -909,19 +990,20 @@ def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path): #} analysis_bins['ptz'] = axes_info['ptz']['variable'] analysis_bins['lj0pt'] = axes_info['lj0pt']['variable'] + analysis_bins['l0eta'] = axes_info['l0eta']['variable'] + analysis_bins['l1eta'] = axes_info['l1eta']['variable'] # Loop over hists and make plots - skip_lst = ['ptz', 'njets'] # Skip this hist - #keep_lst = ["njets","lj0pt","ptz","nbtagsl","nbtagsm","l0pt","j0pt"] # Skip all but these hists + skip_lst = ['njets'] # Skip this hist for idx,var_name in enumerate(dict_of_hists.keys()): if 'sumw2' in var_name: continue if (var_name in skip_lst): continue - #if (var_name not in keep_lst): continue print("\nVariable:",var_name) # Extract the MC and data hists hist_mc_orig = dict_of_hists[var_name].remove("process", samples_to_rm_from_mc_hist) hist_data_orig = dict_of_hists[var_name].remove("process", samples_to_rm_from_data_hist) + if not unblind: hist_data_orig = hist_mc_orig # Loop over channels channels_lst = yt.get_cat_lables(dict_of_hists[var_name],"channel") @@ -938,6 +1020,27 @@ def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path): channels = [chan for chan in SR_CHAN_DICT[chan_name] if chan in hist_data_orig.axes['channel']] hist_data = hist_data_orig.integrate("systematic","nominal").integrate("channel",channels)[{'channel': sum}] + # Calculate the syst errors + p_err_arr = None + m_err_arr = None + p_err_arr_ratio = None + m_err_arr_ratio = None + if not skip_syst_errs: + # Get plus and minus rate and shape arrs + rate_systs_summed_arr_m , rate_systs_summed_arr_p = get_rate_syst_arrs(hist_mc_orig[{"channel": channels}][{"channel": sum}], SR_GRP_MAP, group_type="SR") + shape_systs_summed_arr_m , shape_systs_summed_arr_p = get_shape_syst_arrs(hist_mc_orig[{"channel": channels}][{"channel": sum}]) + if (var_name == "njets"): + # This is a special case for the diboson jet dependent systematic + db_hist = hist_mc_orig[{"process": SR_GRP_MAP["Diboson"], "channel": channels, "systematic": "nominal"}][{"process": sum, "channel": sum}].eval({})[()] + shape_systs_summed_arr_p = shape_systs_summed_arr_p + get_diboson_njets_syst_arr(db_hist,bin0_njets=0 + (1 if 'fwd' in proc_name else 0)) # Njets histos are assumed to start at njets=0 + shape_systs_summed_arr_m = shape_systs_summed_arr_m + get_diboson_njets_syst_arr(db_hist,bin0_njets=0 + (1 if 'fwd' in proc_name else 0)) # Njets histos are assumed to start at njets=0 + # Get the arrays we will actually put in the CR plot + nom_arr_all = hist_mc_orig[{"process": sum, "channel":channels, "systematic": "nominal"}][{"channel": sum}].eval({})[()][1:] + p_err_arr = nom_arr_all + np.sqrt(shape_systs_summed_arr_p + rate_systs_summed_arr_p)[1:] # This goes in the main plot + m_err_arr = nom_arr_all - np.sqrt(shape_systs_summed_arr_m + rate_systs_summed_arr_m)[1:] # This goes in the main plot + p_err_arr_ratio = np.where(nom_arr_all>0,p_err_arr/nom_arr_all,1) # This goes in the ratio plot + m_err_arr_ratio = np.where(nom_arr_all>0,m_err_arr/nom_arr_all,1) # This goes in the ratio plot + #print(var_name, chan_name, f'grouping {SR_GRP_MAP=}') # Using new grouping approach in plot functions #hist_mc = group_bins(hist_mc,SR_GRP_MAP,"process",drop_unspecified=False) @@ -948,19 +1051,6 @@ def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path): if not os.path.exists(save_dir_path_tmp): os.mkdir(save_dir_path_tmp) - # Rebin into analysis bins - if var_name in analysis_bins.keys(): - lep_bin = chan_name[:2] - # histEFT doesn't support rebinning for now - ''' - if var_name == "njets": - hist_mc = hist_mc.rebin(var_name, hist.Bin(var_name, hist_mc.axes[var_name].label, analysis_bins[var_name][lep_bin])) - hist_data = hist_data.rebin(var_name, hist.Bin(var_name, hist_data.axes[var_name].label, analysis_bins[var_name][lep_bin])) - else: - hist_mc = hist_mc.rebin(var_name, hist.Bin(var_name, hist_mc.axes[var_name].label, analysis_bins[var_name])) - hist_data = hist_data.rebin(var_name, hist.Bin(var_name, hist_data.axes[var_name].label, analysis_bins[var_name])) - ''' - if not hist_mc.eval({}): print("Warning: empty mc histo, continuing") continue @@ -968,7 +1058,17 @@ def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path): print("Warning: empty data histo, continuing") continue - fig = make_cr_fig(hist_mc, hist_data, var=var_name, unit_norm_bool=False, bins=axes_info[var_name]['variable'],group=SR_GRP_MAP) + fig = make_cr_fig(hist_mc, + hist_data, + var=var_name, + unit_norm_bool=False, + bins=axes_info[var_name]['variable'], + group=SR_GRP_MAP, + unblind=unblind, + err_p = p_err_arr, + err_m = m_err_arr, + err_ratio_p = p_err_arr_ratio, + err_ratio_m = m_err_arr_ratio) if year is not None: year_str = year else: year_str = "ULall" title = chan_name + "_" + var_name + "_" + year_str @@ -985,7 +1085,7 @@ def make_all_sr_data_mc_plots(dict_of_hists,year,save_dir_path): # Wrapper function to loop over all SR categories and make plots for all variables # Right now this function will only plot the signal samples # By default, will make two sets of plots: One with process overlay, one with channel overlay -def make_all_sr_plots(dict_of_hists,year,unit_norm_bool,save_dir_path,split_by_chan=True,split_by_proc=True): +def make_all_sr_plots(dict_of_hists,year,unit_norm_bool,save_dir_path,split_by_chan=True,split_by_proc=True,skip_syst_errs=False): # If selecting a year, append that year to the wight list sig_wl = ["private"] @@ -1030,6 +1130,28 @@ def make_all_sr_plots(dict_of_hists,year,unit_norm_bool,save_dir_path,split_by_c # Make plots for each SR category if split_by_chan: for hist_cat in SR_CHAN_DICT.keys(): + + # Calculate the syst errors + p_err_arr = None + m_err_arr = None + p_err_arr_ratio = None + m_err_arr_ratio = None + if not skip_syst_errs: + # Get plus and minus rate and shape arrs + rate_systs_summed_arr_m , rate_systs_summed_arr_p = get_rate_syst_arrs(hist_sig, CR_GRP_MAP) + shape_systs_summed_arr_m , shape_systs_summed_arr_p = get_shape_syst_arrs(hist_sig) + if (var_name == "njets"): + # This is a special case for the diboson jet dependent systematic + db_hist = hist_sig.integrate("process",CR_GRP_MAP["Diboson"])[{"process": sum}].integrate("systematic","nominal").eval({})[()] + shape_systs_summed_arr_p = shape_systs_summed_arr_p + get_diboson_njets_syst_arr(db_hist,bin0_njets=0 + (1 if 'fwd' in hist_cat else 0)) # Njets histos are assumed to start at njets=0 + shape_systs_summed_arr_m = shape_systs_summed_arr_m + get_diboson_njets_syst_arr(db_hist,bin0_njets=0 + (1 if 'fwd' in hist_cat else 0)) # Njets histos are assumed to start at njets=0 + # Get the arrays we will actually put in the CR plot + nom_arr_all = hist_sig[{"process": sum}].integrate("systematic","nominal").eval({})[()][1:] + p_err_arr = nom_arr_all + np.sqrt(shape_systs_summed_arr_p + rate_systs_summed_arr_p)[1:] # This goes in the main plot + m_err_arr = nom_arr_all - np.sqrt(shape_systs_summed_arr_m + rate_systs_summed_arr_m)[1:] # This goes in the main plot + p_err_arr_ratio = np.where(nom_arr_all>0,p_err_arr/nom_arr_all,1) # This goes in the ratio plot + m_err_arr_ratio = np.where(nom_arr_all>0,m_err_arr/nom_arr_all,1) # This goes in the ratio plot + if ((var_name == "ptz") and ("3l" not in hist_cat)): continue # Make a sub dir for this category @@ -1238,8 +1360,8 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_ if (var_name == "njets"): # This is a special case for the diboson jet dependent systematic db_hist = hist_mc_integrated.integrate("process",CR_GRP_MAP["Diboson"])[{"process": sum}].integrate("systematic","nominal").eval({})[()] - shape_systs_summed_arr_p = shape_systs_summed_arr_p + get_diboson_njets_syst_arr(db_hist,bin0_njets=0) # Njets histos are assumed to start at njets=0 - shape_systs_summed_arr_m = shape_systs_summed_arr_m + get_diboson_njets_syst_arr(db_hist,bin0_njets=0) # Njets histos are assumed to start at njets=0 + shape_systs_summed_arr_p = shape_systs_summed_arr_p + get_diboson_njets_syst_arr(db_hist,bin0_njets=0 + (1 if 'fwd' in proc_name else 0)) # Njets histos are assumed to start at njets=0 + shape_systs_summed_arr_m = shape_systs_summed_arr_m + get_diboson_njets_syst_arr(db_hist,bin0_njets=0 + (1 if 'fwd' in proc_name else 0)) # Njets histos are assumed to start at njets=0 # Get the arrays we will actually put in the CR plot nom_arr_all = hist_mc_integrated[{"process": sum}].integrate("systematic","nominal").eval({})[()][1:] p_err_arr = nom_arr_all + np.sqrt(shape_systs_summed_arr_p + rate_systs_summed_arr_p)[1:] # This goes in the main plot @@ -1330,9 +1452,12 @@ def main(): #exit() # Make the plots - make_all_cr_plots(hin_dict,args.year,args.skip_syst,unit_norm_bool,save_dir_path) - #make_all_sr_plots(hin_dict,args.year,unit_norm_bool,save_dir_path) - #make_all_sr_data_mc_plots(hin_dict,args.year,save_dir_path) + #make_all_cr_plots(hin_dict,args.year,args.skip_syst,unit_norm_bool,save_dir_path) + make_all_sr_plots(hin_dict,args.year,unit_norm_bool,save_dir_path) + # Blinded plots (Asimov data) + #make_all_sr_data_mc_plots(hin_dict,args.year,save_dir_path,unblind=False) + # Unblinded plots (real data) + #make_all_sr_data_mc_plots(hin_dict,args.year,save_dir_path,unblind=True) #make_all_sr_sys_plots(hin_dict,args.year,save_dir_path) #make_simple_plots(hin_dict,args.year,save_dir_path) diff --git a/analysis/topeft_run2/make_lambda_plot.py b/analysis/topeft_run2/make_lambda_plot.py index 06e79118f..abb5e5477 100644 --- a/analysis/topeft_run2/make_lambda_plot.py +++ b/analysis/topeft_run2/make_lambda_plot.py @@ -196,7 +196,7 @@ def main(): make_plot( wc_lst=WC_LST, range_dict_a = lambda_dict_top22006, - tag_a = r"TOP-21-006 2$\sigma$ profiled asimov", + tag_a = r"TOP-22-006 2$\sigma$ profiled asimov", save_name="lambda_lims_b", xlog = True, ) diff --git a/analysis/topeft_run2/missing_parton.py b/analysis/topeft_run2/missing_parton.py index d46ec208b..102219f49 100644 --- a/analysis/topeft_run2/missing_parton.py +++ b/analysis/topeft_run2/missing_parton.py @@ -19,14 +19,16 @@ from topcoffea.modules.get_param_from_jsons import GetParam get_tc_param = GetParam(topcoffea_path("params/params.json")) -files = ['2lss_4t_m', '2lss_4t_p', '2lss_fwd_m', '2lss_fwd_p', '2lss_m', '2lss_p', '3l_m_offZ_1b', '3l_m_offZ_2b', '3l_onZ_1b', '3l_onZ_2b', '3l_p_offZ_1b', '3l_p_offZ_2b', '4l'] +files = ['2lss_4t_m', '2lss_4t_p', '2lss_fwd_m', '2lss_fwd_p', '2lss_m', '2lss_p', '3l_m_offZ_1b', '3l_m_offZ_2b', '3l_onZ_1b', '3l_onZ_2b', '3l_p_offZ_1b', '3l_p_offZ_2b', '3l_m_offZ_fwd_1b', '3l_m_offZ_fwd_2b', '3l_onZ_fwd_1b', '3l_onZ_fwd_2b', '3l_p_offZ_fwd_1b', '3l_p_offZ_fwd_2b', '4l'] +files = ['3l_m_offZ_fwd_1b', '3l_m_offZ_fwd_2b', '3l_onZ_fwd_1b', '3l_onZ_fwd_2b', '3l_p_offZ_fwd_1b', '3l_p_offZ_fwd_2b'] files = ['2lss_fwd_m', '2lss_fwd_p'] +files = ['2lss_fwd_m', '2lss_fwd_p', '3l_m_offZ_1b_fwd', '3l_m_offZ_2b_fwd', '3l_onZ_1b_fwd', '3l_onZ_2b_fwd', '3l_p_offZ_1b_fwd', '3l_p_offZ_2b_fwd'] files_diff = ['2lss_4t_m_4j_2b', '2lss_4t_m_5j_2b', '2lss_4t_m_6j_2b', '2lss_4t_m_7j_2b', '2lss_4t_p_4j_2b', '2lss_4t_p_5j_2b', '2lss_4t_p_6j_2b', '2lss_4t_p_7j_2b', '2lss_m_4j_2b', '2lss_m_5j_2b', '2lss_m_6j_2b', '2lss_m_7j_2b', '2lss_p_4j_2b', '2lss_p_5j_2b', '2lss_p_6j_2b', '2lss_p_7j_2b', '3l_m_offZ_1b_2j', '3l_m_offZ_1b_3j', '3l_m_offZ_1b_4j', '3l_m_offZ_1b_5j', '3l_m_offZ_2b_2j', '3l_m_offZ_2b_3j', '3l_m_offZ_2b_4j', '3l_m_offZ_2b_5j', '3l_onZ_1b_2j', '3l_onZ_1b_3j', '3l_onZ_1b_4j', '3l_onZ_1b_5j', '3l_onZ_2b_2j', '3l_onZ_2b_3j', '3l_onZ_2b_4j', '3l_onZ_2b_5j', '3l_p_offZ_1b_2j', '3l_p_offZ_1b_3j', '3l_p_offZ_1b_4j', '3l_p_offZ_1b_5j', '3l_p_offZ_2b_2j', '3l_p_offZ_2b_3j', '3l_p_offZ_2b_4j', '3l_p_offZ_2b_5j', '4l_2j_2b', '4l_3j_2b', '4l_4j_2b'] files_ptz = ['3l_onZ_1b_2j', '3l_onZ_1b_3j', '3l_onZ_1b_4j', '3l_onZ_1b_5j', '3l_onZ_2b_2j', '3l_onZ_2b_3j', '3l_onZ_2b_4j', '3l_onZ_2b_5j'] def get_hists(fname, path, process): - fin = uproot.open('histos/'+path+'/2lss_fwd/ttx_multileptons-'+fname+'.root') - card = strip('histos/'+path+'/2lss_fwd/ttx_multileptons-'+fname+'.txt') + fin = uproot.open('/scratch365/byates2/3l_fwd/'+path+'/ttx_multileptons-'+fname+'.root') + card = strip('/scratch365/byates2/3l_fwd/'+path+'/ttx_multileptons-'+fname+'.txt') sm = [k.split(';')[0] for k in fin.keys() if 'sm' in k] nom = {}; up = {}; down = {} @@ -126,7 +128,6 @@ def get_hists(fname, path, process): fout = uproot.open(fout) rename = {'tllq': 'tZq', 'ttZ': 'ttll', 'ttW': 'ttlnu'} #Used to rename things like ttZ to ttll and ttHnobb to ttH - rename = {} #Used to rename things like ttZ to ttll and ttHnobb to ttH for proc in ['tllq']: for fname in files: fname += '_' + var diff --git a/analysis/topeft_run2/run_analysis.py b/analysis/topeft_run2/run_analysis.py index e5d6a49e7..1a78497f1 100644 --- a/analysis/topeft_run2/run_analysis.py +++ b/analysis/topeft_run2/run_analysis.py @@ -129,6 +129,11 @@ action="store_true", help="Add fwd channels", ) + parser.add_argument( + "--all-analysis", + action="store_true", + help="Add channels from all contributions", + ) parser.add_argument( "--skip-sr", action="store_true", @@ -202,6 +207,7 @@ offZ_split = args.offZ_split tau_h_analysis = args.tau_h_analysis fwd_analysis = args.fwd_analysis + all_analysis = args.all_analysis skip_sr = args.skip_sr skip_cr = args.skip_cr do_np = args.do_np @@ -232,6 +238,7 @@ offZ_split = ops.pop("offZ_split",offZ_split) tau_h_analysis = ops.pop("tau_h_analysis",tau_h_analysis) fwd_analysis = ops.pop("fwd_analysis",fwd_analysis) + all_analysis = ops.pop("all_analysis", all_analysis) skip_sr = ops.pop("skip_sr",skip_sr) skip_cr = ops.pop("skip_cr",skip_cr) do_np = ops.pop("do_np",do_np) @@ -289,9 +296,9 @@ if hist_list == ["ana"]: # Here we hardcode a list of hists used for the analysis hist_lst = ["njets", "lj0pt", "ptz"] - if tau_h_analysis: + if tau_h_analysis or all_analysis: hist_lst.append("ptz_wtau") - if fwd_analysis: + if fwd_analysis or all_analysis: hist_lst.append("lt") elif args.hist_list == ["cr"]: # Here we hardcode a list of hists used for the CRs @@ -456,6 +463,7 @@ def LoadJsonToSampleName(jsonFile, prefix): offZ_split=offZ_split, tau_h_analysis=tau_h_analysis, fwd_analysis=fwd_analysis, + all_analysis=all_analysis ) if executor in ["work_queue", "taskvine"]: @@ -472,6 +480,7 @@ def LoadJsonToSampleName(jsonFile, prefix): ), "extra_input_files": ["analysis_processor.py"], "retries": 15, + # use mid-range compression for chunks results. # Valid values are 0 (minimum compression, less memory # usage) to 16 (maximum compression, more memory usage). @@ -500,7 +509,7 @@ def LoadJsonToSampleName(jsonFile, prefix): # 'disk': 8000, #MB # 'memory': 10000, #MB # control the size of accumulation tasks. - "treereduction": 10, + #"treereduction": 10, #'chunks_per_accum': 25, #'chunks_accum_in_mem': 2, # terminate workers on which tasks have been running longer than average. @@ -521,6 +530,8 @@ def LoadJsonToSampleName(jsonFile, prefix): # off print_stdout for all tasks. "verbose": True, "print_stdout": False, + 'chunks_per_accum': 25, + 'chunks_accum_in_mem': 2, } # Run the processor and get the output diff --git a/input_samples/cfgs/data_samples_NDSkim.cfg b/input_samples/cfgs/data_samples_NDSkim.cfg index b34c0737c..83c7f9dfe 100644 --- a/input_samples/cfgs/data_samples_NDSkim.cfg +++ b/input_samples/cfgs/data_samples_NDSkim.cfg @@ -109,4 +109,4 @@ root://skynet013.crc.nd.edu/ ../../input_samples/sample_jsons/data_samples/2018/SingleMuon_A-UL2018_NDSkim.json ../../input_samples/sample_jsons/data_samples/2018/SingleMuon_B-UL2018_NDSkim.json ../../input_samples/sample_jsons/data_samples/2018/SingleMuon_C-UL2018_NDSkim.json -../../input_samples/sample_jsons/data_samples/2018/SingleMuon_D-UL2018_NDSkim.json \ No newline at end of file +../../input_samples/sample_jsons/data_samples/2018/SingleMuon_D-UL2018_NDSkim.json diff --git a/input_samples/cfgs/mc_background_samples_NDSkim.cfg b/input_samples/cfgs/mc_background_samples_NDSkim.cfg index 371114732..dd84047a3 100644 --- a/input_samples/cfgs/mc_background_samples_NDSkim.cfg +++ b/input_samples/cfgs/mc_background_samples_NDSkim.cfg @@ -106,4 +106,4 @@ root://skynet013.crc.nd.edu/ ../../input_samples/sample_jsons/background_samples/central_UL/UL18_GluGluToContinToZZTo2mu2tau_NDSkim.json ../../input_samples/sample_jsons/background_samples/central_UL/UL18_GluGluToContinToZZTo4e_NDSkim.json ../../input_samples/sample_jsons/background_samples/central_UL/UL18_GluGluToContinToZZTo4mu_NDSkim.json -../../input_samples/sample_jsons/background_samples/central_UL/UL18_GluGluToContinToZZTo4tau_NDSkim.json \ No newline at end of file +../../input_samples/sample_jsons/background_samples/central_UL/UL18_GluGluToContinToZZTo4tau_NDSkim.json diff --git a/input_samples/cfgs/mc_background_samples_cr_NDSkim.cfg b/input_samples/cfgs/mc_background_samples_cr_NDSkim.cfg index 5ef947e86..a5d8aeb22 100644 --- a/input_samples/cfgs/mc_background_samples_cr_NDSkim.cfg +++ b/input_samples/cfgs/mc_background_samples_cr_NDSkim.cfg @@ -50,4 +50,4 @@ root://skynet013.crc.nd.edu/ ../../input_samples/sample_jsons/background_samples/central_UL/UL18_ST_top_t-channel_NDSkim.json ../../input_samples/sample_jsons/background_samples/central_UL/UL18_tbarW_NDSkim.json ../../input_samples/sample_jsons/background_samples/central_UL/UL18_tW_NDSkim.json -../../input_samples/sample_jsons/background_samples/central_UL/UL18_WJetsToLNu_NDSkim.json \ No newline at end of file +../../input_samples/sample_jsons/background_samples/central_UL/UL18_WJetsToLNu_NDSkim.json diff --git a/input_samples/cfgs/mc_signal_samples_NDSkim.cfg b/input_samples/cfgs/mc_signal_samples_NDSkim.cfg index 14503a83e..d050bc2bf 100644 --- a/input_samples/cfgs/mc_signal_samples_NDSkim.cfg +++ b/input_samples/cfgs/mc_signal_samples_NDSkim.cfg @@ -24,7 +24,6 @@ root://skynet013.crc.nd.edu/ # Private UL16 samples root://skynet013.crc.nd.edu/ - #file:///cms/cephfs/data/ ../../input_samples/sample_jsons/signal_samples/private_UL/UL16_tHq_b1_NDSkim.json ../../input_samples/sample_jsons/signal_samples/private_UL/UL16_tllq_b1_NDSkim.json @@ -41,4 +40,4 @@ root://skynet013.crc.nd.edu/ ../../input_samples/sample_jsons/signal_samples/private_UL/UL16APV_ttHJet_b1_NDSkim.json ../../input_samples/sample_jsons/signal_samples/private_UL/UL16APV_ttllJet_b1_NDSkim.json ../../input_samples/sample_jsons/signal_samples/private_UL/UL16APV_ttlnuJet_b1_NDSkim.json -../../input_samples/sample_jsons/signal_samples/private_UL/UL16APV_tttt_b1_NDSkim.json \ No newline at end of file +../../input_samples/sample_jsons/signal_samples/private_UL/UL16APV_tttt_b1_NDSkim.json diff --git a/topeft/channels/ch_lst.json b/topeft/channels/ch_lst.json index 366e55b93..36b7e6aee 100644 --- a/topeft/channels/ch_lst.json +++ b/topeft/channels/ch_lst.json @@ -407,142 +407,104 @@ ] } }, - "OFFZ_SPLIT_CH_LST_SR": { - "2l": { - "lep_chan_lst": [ - [ - "2lss_p", - "2lss", - "2l_p", - "bmask_atleast1m2l", - "bmask_atmost2m" - ], - [ - "2lss_m", - "2lss", - "2l_m", - "bmask_atleast1m2l", - "bmask_atmost2m" - ], - [ - "2lss_4t_p", - "2lss", - "2l_p", - "bmask_atleast1m2l", - "bmask_atleast3m" - ], - [ - "2lss_4t_m", - "2lss", - "2l_m", - "bmask_atleast1m2l", - "bmask_atleast3m" - ] - ], - "lep_flav_lst": [ - "ee", - "em", - "mm" - ], - "appl_lst": [ - "isSR_2lSS", - "isAR_2lSS" - ], - "appl_lst_data": [ - "isAR_2lSS_OS" - ], - "jet_lst": [ - "=4", - "=5", - "=6", - ">7" - ] - }, + "OFFZ_TAU_SPLIT_CH_LST_SR": { "3l": { "lep_chan_lst": [ - [ - "3l_onZ_1b", - "3l", - "3l_onZ", - "bmask_exactly1m" - ], - [ - "3l_onZ_2b", - "3l", - "3l_onZ", - "bmask_atleast2m" - ], [ "3l_m_offZ_low_1b", "3l_m", "3l_offZ_low", - "bmask_exactly1m" + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ "3l_m_offZ_high_1b", "3l_m", "3l_offZ_high", - "bmask_exactly1m" + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ "3l_m_offZ_none_1b", "3l_m", "3l_offZ_none", - "bmask_exactly1m" + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ "3l_m_offZ_low_2b", "3l_m", "3l_offZ_low", - "bmask_atleast2m" + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ], [ "3l_m_offZ_high_2b", "3l_m", "3l_offZ_high", - "bmask_atleast2m" + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ], [ "3l_m_offZ_none_2b", "3l_m", "3l_offZ_none", - "bmask_atleast2m" + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ], [ "3l_p_offZ_low_1b", "3l_p", "3l_offZ_low", - "bmask_exactly1m" + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ "3l_p_offZ_high_1b", "3l_p", "3l_offZ_high", - "bmask_exactly1m" + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ "3l_p_offZ_none_1b", "3l_p", "3l_offZ_none", - "bmask_exactly1m" + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ "3l_p_offZ_low_2b", "3l_p", "3l_offZ_low", - "bmask_atleast2m" + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ], [ "3l_p_offZ_high_2b", "3l_p", "3l_offZ_high", - "bmask_atleast2m" + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ], [ "3l_p_offZ_none_2b", "3l_p", "3l_offZ_none", - "bmask_atleast2m" + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ] ], "lep_flav_lst": [ @@ -561,26 +523,6 @@ "=4", ">5" ] - }, - "4l": { - "lep_chan_lst": [ - [ - "4l", - "4l", - "bmask_atleast1m2l" - ] - ], - "lep_flav_lst": [ - "llll" - ], - "appl_lst": [ - "isSR_4l" - ], - "jet_lst": [ - "=2", - "=3", - ">4" - ] } }, "CH_LST_CR": { @@ -700,8 +642,8 @@ ">1" ] } - }, - "FWD_CH_LST_SR": { + }, + "ALL_CH_LST_SR": { "2l": { "lep_chan_lst": [ [ @@ -710,7 +652,8 @@ "2l_p", "bmask_atleast1m2l", "bmask_atmost2m", - "~fwdjet_mask" + "~fwdjet_mask", + "0tau" ], [ "2lss_m", @@ -718,35 +661,42 @@ "2l_m", "bmask_atleast1m2l", "bmask_atmost2m", - "~fwdjet_mask" + "~fwdjet_mask", + "0tau" ], [ "2lss_4t_p", "2lss", "2l_p", "bmask_atleast1m2l", - "bmask_atleast3m" + "bmask_atleast3m", + "0tau" ], [ "2lss_4t_m", "2lss", "2l_m", "bmask_atleast1m2l", - "bmask_atleast3m" + "bmask_atleast3m", + "0tau" ], [ "2lss_fwd_p", - "2lss_fwd", - "2l_fwd_p", + "2lss", + "2l_p", "bmask_atleast1m2l", - "bmask_atmost2m" + "bmask_atmost2m", + "fwdjet_mask", + "0tau" ], [ "2lss_fwd_m", - "2lss_fwd", - "2l_fwd_m", + "2lss", + "2l_m", "bmask_atleast1m2l", - "bmask_atmost2m" + "bmask_atmost2m", + "fwdjet_mask", + "0tau" ] ], "lep_flav_lst": [ @@ -768,62 +718,315 @@ ">7" ] }, + "2lss_1tau": { + "lep_chan_lst": [ + [ + "2lss_m_1tau_onZ", + "2lss", + "2l_m", + "bmask_atleast1m2l", + "1tau", + "onZ_tau" + ], + [ + "2lss_p_1tau_offZ", + "2lss", + "2l_p", + "bmask_atleast1m2l", + "1tau", + "offZ_tau" + ], + [ + "2lss_m_1tau_offZ", + "2lss", + "2l_m", + "bmask_atleast1m2l", + "1tau", + "offZ_tau" + ], + [ + "2lss_p_1tau_onZ", + "2lss", + "2l_p", + "bmask_atleast1m2l", + "1tau", + "onZ_tau" + ] + ], + "lep_flav_lst": [ + "ee", + "em", + "mm" + ], + "appl_lst": [ + "isSR_2lSS", + "isAR_2lSS" + ], + "appl_lst_data": [ + "isAR_2lSS_OS" + ], + "jet_lst": [ + "=3", + "=4", + "=5", + ">6" + ] + }, + "2los_1tau": { + "lep_chan_lst": [ + [ + "2los_onZ_1tau", + "2los", + "bmask_atleast2m", + "1tau", + "2l_onZ" + ] + ], + "lep_flav_lst": [ + "ee", + "em", + "mm" + ], + "appl_lst": [ + "isSR_2lOS", + "isAR_2lOS" + ], + "jet_lst": [ + ">5" + ] + }, "3l": { "lep_chan_lst": [ [ "3l_onZ_1b", "3l", "3l_onZ", - "bmask_exactly1m" + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ "3l_onZ_2b", "3l", "3l_onZ", - "bmask_atleast2m" + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ], [ - "3l_m_offZ_1b", + "3l_1tau_1b", + "bmask_exactly1m", + "1tau" + ], + [ + "3l_1tau_2b", + "bmask_atleast2m", + "1tau" + ], + [ + "3l_m_offZ_low_1b", "3l_m", - "3l_offZ", - "bmask_exactly1m" + "3l_offZ_low", + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ - "3l_m_offZ_2b", + "3l_m_offZ_high_1b", "3l_m", - "3l_offZ", - "bmask_atleast2m" + "3l_offZ_high", + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ - "3l_p_offZ_1b", + "3l_m_offZ_none_1b", + "3l_m", + "3l_offZ_none", + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_m_offZ_low_2b", + "3l_m", + "3l_offZ_low", + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_m_offZ_high_2b", + "3l_m", + "3l_offZ_high", + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_m_offZ_none_2b", + "3l_m", + "3l_offZ_none", + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_p_offZ_low_1b", "3l_p", - "3l_offZ", - "bmask_exactly1m" + "3l_offZ_low", + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" ], [ - "3l_p_offZ_2b", + "3l_p_offZ_high_1b", "3l_p", - "3l_offZ", - "bmask_atleast2m" + "3l_offZ_high", + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_p_offZ_none_1b", + "3l_p", + "3l_offZ_none", + "bmask_exactly1m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_p_offZ_low_2b", + "3l_p", + "3l_offZ_low", + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_p_offZ_high_2b", + "3l_p", + "3l_offZ_high", + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" + ], + [ + "3l_p_offZ_none_2b", + "3l_p", + "3l_offZ_none", + "bmask_atleast2m", + "~fwdjet_mask", + "0tau" ] ], "lep_flav_lst": [ "eee", "eem", "emm", - "mmm" + "mmm" ], "appl_lst": [ "isSR_3l", "isAR_3l" ], "jet_lst": [ + "=1", "=2", "=3", "=4", ">5" ] }, + "3l_fwd": { + "lep_chan_lst": [ + [ + "3l_onZ_1b_fwd", + "3l", + "3l_onZ", + "bmask_exactly1m", + "fwdjet_mask", + "0tau" + ], + [ + "3l_onZ_2b_fwd", + "3l", + "3l_onZ", + "bmask_atleast2m", + "fwdjet_mask", + "0tau" + ] + ], + "lep_flav_lst": [ + "eee", + "eem", + "emm", + "mmm" + ], + "appl_lst": [ + "isSR_3l", + "isAR_3l" + ], + "jet_lst": [ + "=1", + "=2", + "=3", + ">4" + ] + }, + "3l_offZ_fwd": { + "lep_chan_lst": [ + [ + "3l_m_offZ_1b_fwd", + "3l_m", + "3l_offZ", + "bmask_exactly1m", + "fwdjet_mask", + "0tau" + ], + [ + "3l_p_offZ_1b_fwd", + "3l_p", + "3l_offZ", + "bmask_exactly1m", + "fwdjet_mask", + "0tau" + ], + [ + "3l_m_offZ_2b_fwd", + "3l_m", + "3l_offZ", + "bmask_atleast2m", + "fwdjet_mask", + "0tau" + ], + + [ + "3l_p_offZ_2b_fwd", + "3l_p", + "3l_offZ", + "bmask_atleast2m", + "fwdjet_mask", + "0tau" + ] + ], + "lep_flav_lst": [ + "eee", + "eem", + "emm", + "mmm" + ], + "appl_lst": [ + "isSR_3l", + "isAR_3l" + ], + "jet_lst": [ + "=1", + "=2", + "=3", + ">4" + ] + }, "4l": { "lep_chan_lst": [ [ @@ -845,4 +1048,4 @@ ] } } -} \ No newline at end of file +} diff --git a/topeft/data/missing_parton/missing_parton.root b/topeft/data/missing_parton/missing_parton.root index 5a2cce1a4..6fd562b33 100644 Binary files a/topeft/data/missing_parton/missing_parton.root and b/topeft/data/missing_parton/missing_parton.root differ diff --git a/topeft/modules/datacard_tools.py b/topeft/modules/datacard_tools.py index cc8d4a38d..3e394bce6 100644 --- a/topeft/modules/datacard_tools.py +++ b/topeft/modules/datacard_tools.py @@ -245,9 +245,9 @@ def get_jet_mults(cls,s): For the regular expression, group 1 matches 'njet_bjet', group 2 matches 'bjet_njet' group 3 matches '_njet'. """ - rgx = re.compile(r"(_[2-7]j_[1-2]b)|(_[1-2]b_[2-7]j)|(_[2-7]j$)") + rgx = re.compile(r"(_[2-7]j_[1-2]b)|(_[1-2]b_[1-7]j)|(_[1-7]j$)") - m = rgx.search(s) + m = rgx.search(s.replace('_fwd', '')) if m.group(1) and m.group(2) is None and m.group(3) is None: # The order is '_Nj_Mb' _,j,b = m.group(1).split("_") @@ -995,10 +995,10 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict): # obtain the scalings for scalings.json file if p in self.SIGNALS: if self.wc_scalings: - scalings = h[{'channel':ch,'process':p,'systematic':'nominal'}].make_scaling(self.wc_scalings) + scalings = h[{'channel':ch,'process':p,'systematic':'nominal'}].make_scaling(flow='show', wc_list=self.wc_scalings) self.scalings_json = self.make_scalings_json(self.scalings,ch,km_dist,p,self.wc_scalings,scalings) else: - scalings = h[{'channel':ch,'process':p,'systematic':'nominal'}].make_scaling() + scalings = h[{'channel':ch,'process':p,'systematic':'nominal'}].make_scaling(flow='show') self.scalings_json = self.make_scalings_json(self.scalings,ch,km_dist,p,h.wc_names,scalings) f["data_obs"] = to_hist(data_obs,"data_obs") @@ -1086,7 +1086,7 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict): proc_name = self.get_process(p) # Strips off any "_sm" or "_lin_*" junk # Need to handle certain systematics in a special way if syst_name == "diboson_njets": - v = rate_syst.get_process(proc_name,num_j) + v = rate_syst.get_process(proc_name,min(num_j + (1 if 'fwd' in outf_root_name else 0), 7)) # fwd jet is an extra jet # v = rate_syst.get_process(proc_name) # if isinstance(v,dict): # v = v[str(num_j)] @@ -1112,6 +1112,8 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict): ch_key = f"{num_l}l{num_b}b_p" elif "_m_offZ" in ch: ch_key = f"{num_l}l{num_b}b_m" + elif "tau" in ch: + ch_key = f"{num_l}l_sfz_{num_b}b" else: raise ValueError(f"Unable to match {ch} for {syst_name} rate systematic") elif num_l == 4: @@ -1138,6 +1140,12 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict): f.write("* autoMCStats 10\n") else: f.write("* autoMCStats -1\n") + + outf_json_name = self.FNAME_TEMPLATE.format(cat=ch,kmvar=km_dist,ext="json") + with open(os.path.join(self.out_dir,f"{outf_json_name}"),"w") as f: + print('making', os.path.join(self.out_dir,f"{outf_json_name}")) + json.dump(self.scalings_json, f, indent=4) + dt = time.time() - tic print(f"File Write Time: {dt:.2f} s") print(f"Total Hists Written: {num_h}") diff --git a/topeft/modules/object_selection.py b/topeft/modules/object_selection.py index 03294f324..45cd31280 100644 --- a/topeft/modules/object_selection.py +++ b/topeft/modules/object_selection.py @@ -46,7 +46,7 @@ def ttH_idEmu_cuts_E3(hoe, eta, deltaEtaSC, eInvMinusPInv, sieie): return (hoe<(0.10-0.00*(abs(eta+deltaEtaSC)>1.479))) & (eInvMinusPInv>-0.04) & (sieie<(0.011+0.019*(abs(eta+deltaEtaSC)>1.479))) def isFwdJet(pt, eta, jet_id, jetPtCut=25.0): - mask = ((pt>jetPtCut) & (abs(eta)>get_te_param("eta_j_cut")) & (jet_id>get_te_param("jet_id_cut"))) + mask = ((pt>jetPtCut) & (abs(eta)>get_te_param("eta_j_cut")) & (abs(eta)get_te_param("jet_id_cut"))) return mask def smoothBFlav(jetpt,ptmin,ptmax,year,scale_loose=1.0): diff --git a/topeft/params/params.json b/topeft/params/params.json index 18819a357..568f1bbaa 100644 --- a/topeft/params/params.json +++ b/topeft/params/params.json @@ -3,6 +3,7 @@ "eta_m_cut" : 2.4, "eta_t_cut" : 2.3, "eta_j_cut" : 2.4, + "eta_j_cut_upper" : 5, "fo_pt_cut" : 10.0, "pres_e_pt_cut" : 7.0,