TopEFT · bryates · Dec 11, 2025 · Aug 15, 2025 · Aug 15, 2025 · Aug 15, 2025
diff --git a/analysis/mc_validation/README.md b/analysis/mc_validation/README.md
@@ -16,5 +16,17 @@ This directory contains scripts from the validation studies of the FullR2 privat
     - Should be run on the output of the topeft processor
     - Was used during the June 2022 MC validation studies (for TOP-22-006 pre approval checks)
 
+:memo: The scripts below were updated in August of 2025
+* `gen_processor.py`:
+    - This is an updated script to produce gen level histograms for comparison of various samples
 
+* `gen_hist_eventweights_processor.py`:
+    - This script produces distributions of MG weights (if they are saved in the samples)
 
+* `gen_hist_eventweights_plotter.py`:
+    - This script produces plots of MG weights
+    - Should be run on the output of the gen_hist_eventweights_processor.py processor
+
+* `comp_norm.py`:
+    - This script plots two pkl files for different variables for comparison of shapes and normalizations
+    - Should be run on the output of the gen_processor.py processor
diff --git a/analysis/mc_validation/comp_norm.py b/analysis/mc_validation/comp_norm.py
diff --git a/analysis/mc_validation/djr.py b/analysis/mc_validation/djr.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+'''
+This script produces the DJR plots from nanoGEN files
+(assuming the DJR were also saved)
+Example Run
+python djr.py \
+--input /cms/cephfs/data/store/user/byates/tttt/nanoGEN_Run3/2022/tttt_LO_EFT/crab_tttt_nanoGEN_Run3/250715_223705/0000/ \
+--output /users/byates2/afs/www/EFT/tttt_Run3/weights/weights.pdf
+'''
+
+import uproot
+import os
+import hist
+import awkward as ak
+import numpy as np
+np.seterr(invalid='ignore')
+import matplotlib.pyplot as plt
+import mplhep as hep
+import warnings
+plt.style.use(hep.style.CMS)
+
+if __name__ == '__main__':
+    '''
+    good example:
+    root://cmseos.fnal.gov//store/user/dspitzba/EFT/qcut30.root
+
+    bad example:
+    root://cmseos.fnal.gov//store/user/cmsdas/2023/short_exercises/Generators/wjets_2j/w2jets_qcut10.root
+    '''
+
+    import argparse
+
+    argParser = argparse.ArgumentParser(description = "Argument parser")
+    argParser.add_argument('--input', action='store', default='root://cmseos.fnal.gov//store/user/dspitzba/EFT/qcut80.root', help="Input file")
+    argParser.add_argument('--output', action='store', default='./djr.pdf', help="Output file")
+    argParser.add_argument('--nevents', action='store', default=50e3, help="Number of events generated")
+    args = argParser.parse_args()
+
+    n_events_in = int(args.nevents)
+
+    djr_axis = hist.axis.Regular(40, -0.5, 3.5, name="djr", label=r"$\Delta JR$")
+    parton_axis = hist.axis.Integer(0, 4, name="n", label="Number of partons")
+    #parton_axis = hist.axis.Regular(5, 0, 4, name="n", label="Number of partons")
+    transition_axis = hist.axis.Integer(0, 6, name="t", label="DJR X->Y")
+    djr = hist.Hist(djr_axis, parton_axis, transition_axis)
+
+    files = [f for f in os.listdir(args.input) if '.root' in f and 'COPYING' not in f]
+    #n_events_in *= len(files)
+    tot = 0
+    for fin in files:
+        #print(f"Loading input file {args.input}/{fin}")
+        fin = uproot.open(args.input+fin)
+        events = fin["Events"]
+        #ar = events["GenEventInfoProduct_generator__GEN./GenEventInfoProduct_generator__GEN.obj"].arrays()
+        #djr_values = ar['GenEventInfoProduct_generator__GEN.obj']['DJRValues_']
+        #nMEPartons = ar['GenEventInfoProduct_generator__GEN.obj']['nMEPartons_']
+        djr_values10 = ak.Array(events['LHEWeight_DJR10'].array())
+        djr_values21 = ak.Array(events['LHEWeight_DJR21'].array())
+        djr_values32 = ak.Array(events['LHEWeight_DJR32'].array())
+        #djr_values = ak.concatenate([events['LHEWeight_DJR10'].array(), events['LHEWeight_DJR21'].array(), events['LHEWeight_DJR32'].array()])
+        #nMEPartons = events['LHEWeight_nMEPartons'].array()
+        nMEPartons = events['Generator_nMEPartons'].array()
+
+        djr.fill(
+            djr = np.log10(ak.flatten(djr_values10, axis=0)),
+            n = ak.values_astype(ak.flatten(ak.ones_like(djr_values10)*nMEPartons, axis=0), np.int32),
+            #n = ak.flatten(ak.ones_like(djr_values10)*nMEPartons, axis=0),
+            t = ak.flatten(ak.local_index(djr_values10), axis=0),
+        )
+        djr.fill(
+            djr = np.log10(ak.flatten(djr_values21, axis=0)),
+            n = ak.values_astype(ak.flatten(ak.ones_like(djr_values10)*nMEPartons, axis=0), np.int32),
+            #n = ak.flatten(ak.ones_like(djr_values21)*nMEPartons, axis=0),
+            t = ak.flatten(ak.local_index(djr_values21), axis=0),
+        )
+        djr.fill(
+            djr = np.log10(ak.flatten(djr_values32, axis=0)),
+            n = ak.values_astype(ak.flatten(ak.ones_like(djr_values10)*nMEPartons, axis=0), np.int32),
+            #n = ak.flatten(ak.ones_like(djr_values32)*nMEPartons, axis=0),
+            t = ak.flatten(ak.local_index(djr_values32), axis=0),
+        )
+        n_events = ak.num(djr_values10, axis=0)
+    print(f"Efficiency is {n_events/n_events_in}, assuming {n_events_in} where simulated")
+
+    print("Plotting...")
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore')
+        fig, axs = plt.subplots(3,2, figsize=(15,21))
+
+        for i in range(3):
+            for j in range(2):
+                transition = 2*i+j
+                djr[:, :, transition].plot1d(
+                    overlay='n',
+                    ax=axs[i][j],
+                    label= [f'{k} partons' for k in range(4)]
+                )
+                djr[:, :, transition][{'n':sum}].plot1d(
+                    ax=axs[i][j],
+                    label = ['total'],
+                    color = 'gray',
+                )
+
+                axs[i][j].set_xlabel(r'$DJR\ %s \to %s$'%(transition, transition+1))
+                axs[i][j].set_yscale('log')
+                axs[i][j].set_ylim(0.3,n_events*1000)
+                axs[i][j].legend(
+                    loc='upper right',
+                    bbox_to_anchor=(0.03, 0.88, 0.90, .11),
+                    mode="expand",
+                    ncol=2,
+                )
+
+        fig.savefig(args.output)
+        print(f"Figure saved in {args.output}")
diff --git a/analysis/mc_validation/eft_weights.py b/analysis/mc_validation/eft_weights.py
@@ -0,0 +1,135 @@
+'''
+This script produces the distributions of weights from nanoGEN files
+(assuming the MG weights were also saved)
+Example Run
+python eft_weights.py \
+--input /cms/cephfs/data/store/user/byates/tttt/nanoGEN_Run3/2022/tttt_LO_EFT/crab_tttt_nanoGEN_Run3/250715_223705/0000/ \
+--output /users/byates2/afs/www/EFT/tttt_Run3/weights/weights.pdf
+'''
+
+#!/usr/bin/env python3
+
+import hist
+import os
+import awkward as ak
+from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
+import numpy as np
+import matplotlib.pyplot as plt
+import mplhep as hep
+import topcoffea.modules.utils as utils
+plt.style.use(hep.style.CMS)
+
+NanoAODSchema.warn_missing_crossrefs = False
+
+if __name__ == '__main__':
+
+    import argparse
+    # 'EFTrwgt66_ctW_0.0_ctq1_0.0_cQq81_0.0_ctZ_0.0_cQq83_0.0_ctG_0.0_ctq8_0.0_cQq13_0.0_cQq11_0.0'
+
+    argParser = argparse.ArgumentParser(description = "Argument parser")
+    argParser.add_argument('--input', action='store', default='root://cmseos.fnal.gov//store/user/dspitzba/EFT/nanogen_small.root', help="Input file")
+    argParser.add_argument('--output', action='store', default='./weights.pdf', help="Output file")
+    args = argParser.parse_args()
+
+    path = args.input
+    #files = ['nanogen_12.root', 'nanogen_49.root', 'nanogen_51.root', 'nanogen_54.root', 'nanogen_5.root', 'nanogen_82.root', 'nanogen_13.root', 'nanogen_4.root', 'nanogen_52.root', 'nanogen_55.root', 'nanogen_63.root', 'nanogen_9.root', 'nanogen_2.root', 'nanogen_50.root', 'nanogen_53.root', 'nanogen_56.root', 'nanogen_7.root']
+    files = [f for f in os.listdir(path) if '.root' in f]
+    #files = [files[0]] # Only process a single file
+    #files = [files[x] for x in range(10)] # Only process the first 10 files
+    #files = ['nanogen_123_220.root']
+
+    weight_axis = hist.axis.Regular(22, -20, 2, name="weight_ax", label="weight", underflow=True, overflow=True)
+    #weight_axis = hist.axis.Regular(100, -1, 1e1, name="weight_ax", label="weight", underflow=True, overflow=True)
+    h_SM = hist.Hist(weight_axis)
+    events = NanoEventsFactory.from_root(
+        #args.input,
+        path+files[0],
+        schemaclass=NanoAODSchema,
+    ).events()
+
+    w = events.LHEWeight
+    eft_weight_names = [ x for x in w.fields if x.startswith('EFTrwgt') ]
+    #h_ttG = hist.Hist(weight_axis)
+    h_ttG = []
+    eft_coeffs = ak.to_numpy(events["EFTfitCoefficients"]) if hasattr(events, "EFTfitCoefficients") else None
+    #h_ttG_rwgt = HistEFT(weight_axis, wc_names=['ctZ', 'cpt', 'cpQM', 'cpQ3', 'ctW', 'ctp', 'ctG'], label=r"Events")
+    for weight in eft_weight_names:
+        h_ttG.append(hist.Hist(weight_axis))
+
+    print('[', end='')
+    for fin in files:
+        events = NanoEventsFactory.from_root(
+            #args.input,
+            path+fin,
+            schemaclass=NanoAODSchema,
+        ).events()
+
+        w = events.LHEWeight
+        eft_weight_names = [ x for x in w.fields if x.startswith('EFTrwgt') ]
+        if not eft_weight_names:
+            eft_weight_names = utils.get_list_of_wc_names(path+fin)
+            eft_weight_names = ['_0.0_'.join(eft_weight_names)]
+            print(fin, eft_weight_names)
+
+        sm_wgt = getattr(events.LHEWeight, eft_weight_names[-1])
+        print('.', end='')
+        #print(f'Smallest non-zero SM weight: {np.min(sm_wgt[sm_wgt != 0.0])}')
+        h_SM.fill(weight_ax=np.log10(getattr(events.LHEWeight, eft_weight_names[-1])))
+        #h_SM.fill(weight_ax=getattr(events.LHEWeight, eft_weight_names[-1]))
+
+        #h_ttG = [(w,hist.Hist(weight_axis)) for w in ttG]
+        #h_ctg1 = hist.Hist(weight_axis)
+        #h_ctg1.fill(weight_ax=getattr(events.LHEWeight, 'EFTrwgt66_ctW_0.0_ctq1_0.0_cQq81_0.0_ctZ_0.0_cQq83_0.0_ctG_0.0_ctq8_0.0_cQq13_0.0_cQq11_0.0'))
+
+        # EFTrwgt10_ctGRe_2.0_ctGIm_0.0_ctWRe_0.0_ctWIm_0.0_ctBRe_0.0_ctBIm_0.0_cHtbRe_0.0_cHtbIm_0.0_cHt_0.0
+        #h_ctg2 = hist.Hist(weight_axis)
+        #h_ctg2.fill(weight_ax=getattr(events.LHEWeight, 'EFTrwgt0_ctW_-1.722436_ctq1_1.171197_cQq81_1.34397_ctZ_-6.408086_cQq83_1.555205_ctG_0.2893_ctq8_-0.625025_cQq13_-1.305265_cQq11_1.762244'))
+        #for (weight,h) in h_ttG:
+        #    h.fill(weight_ax=getattr(events.LHEWeight, weight))
+
+
+        #h_ctg1.plot1d(ax=ax, label=r'$C_{tG}=1$')
+        #h_ctg2.plot1d(ax=ax, label=r'$C_{tG}=2$')
+        #for i,(_,h) in enumerate(h_ttG):
+        #    h.plot1d(ax=ax, label=f'wgt_{i}')
+
+        for nw,weight in enumerate(eft_weight_names[:-1]):
+            h_ttG[nw].fill(weight_ax=np.log10(getattr(events.LHEWeight, weight)))
+            #h_ttG[nw].fill(weight_ax=getattr(events.LHEWeight, weight))
+    #wgts = getattr(events.LHEWeight, 'EFTrwgt_ctZ_0.0_cpt_0.0_cpQM_0.0_cpQ3_0.0_ctW_0.0_ctp_0.0_ctG_0.0')
+    #h_ttG_rwgt.fill(weight_ax=wgts, eft_coeff=eft_coeffs)
+    print(']')
+
+    for iweight,weight in enumerate(eft_weight_names):
+        fig, ax = plt.subplots()
+
+        hep.histplot(h_SM, ax=ax, label=r'$SM=0$', flow='show', histtype='errorbar', yerr=False)
+        label = '$tt\gamma$'
+        label = 'EFT'
+        hep.histplot(h_ttG[iweight], ax=ax, label=label, flow='show', histtype='errorbar', yerr=False)
+        # 'EFTrwgt66_ctW_0.0_ctq1_0.0_cQq81_0.0_ctZ_0.0_cQq83_0.0_ctG_0.0_ctq8_0.0_cQq13_0.0_cQq11_0.0'
+        eft_weight = weight.split('_')[1:]
+        wcs  = eft_weight[:-1:2]
+        vals = eft_weight[1::2]
+        vals = [float(v) for v in vals]
+        eft_pt = dict(zip(wcs,vals))
+        #hep.histplot(h_ttG_rwgt.as_hist(eft_pt), ax=ax, label=label+' reweight', flow='show', histtype='errorbar', yerr=False)
+        ax.set_ylabel(r'# Events')
+        ax.set_xlabel(r'log(weight)')
+
+        #ax.set_xscale("log")
+        ax.set_yscale("log")
+        #ax.set_xlim([1e-2, 1e2])
+        #ax.set_xlim([1e-6, 1e2])
+        #ax.set_xlim([1e-2, 1e2])
+
+        plt.legend()
+
+        fig.savefig(args.output)
+        fig.savefig(args.output.replace('.pdf', '_rwgt{}.pdf'.format(iweight)))
+        fig.savefig(args.output.replace('.pdf', '_rwgt{}.png'.format(iweight)))
+        print(f"Figure saved in {args.output.replace('.pdf', '_rwgt{}.pdf'.format(iweight))}")
+        #fig.savefig(args.output.replace('.pdf', '_{}.pdf'.format(eft_weight_names[iweight])))
+        #fig.savefig(args.output.replace('.pdf', '_{}.png'.format(eft_weight_names[iweight])))
+        #print(f"Figure saved in {args.output.replace('.pdf', '_rwgt{}.pdf'.format(iweight))}")
+        plt.close()
diff --git a/analysis/mc_validation/gen_hist_eventweights_plotter.py b/analysis/mc_validation/gen_hist_eventweights_plotter.py
@@ -0,0 +1,66 @@
+'''
+This script plots weights produced by `gen_hist_eventweights_processor.py`
+Example:
+python gen_hist_eventweights_plotter.py 2022_tllq_NewStPt4.pkl.gz /users/byates2/afs/www/EFT/tllq_NewStPt4_Run3/weights/weights.pdf
+'''
+import os
+import pickle
+import gzip
+#import numpy as np
+import matplotlib.pyplot as plt
+import argparse
+from topeft.modules import axes
+BINNING = {k: v['variable'] for k,v in axes.info.items() if 'variable' in v}
+
+#Load hists from pickle file created by TopCoffea
+hists={}
+
+parser = argparse.ArgumentParser(description='You can select which file to run over')
+parser.add_argument('fin'   , default='analysis/topEFT/histos/mar03_central17_pdf_np.pkl.gz' , help = 'File to run over')
+parser.add_argument('output'   , default='/users/byates2/afs/www/EFT/tllq_NewStPt4_Run3/weights/' , help = 'Output path')
+args  = parser.parse_args()
+fin   = args.fin
+
+#hin = pickle.load(gzip.open(fin))
+#for k in hin.keys():
+#  if k in hists: hists[k]+=hin[k]
+#  else:               hists[k]=hin[k]
+with gzip.open(fin) as fin:
+    hin = pickle.load(fin)
+    for k in hin.keys():
+        if isinstance(hin[k], dict):
+            continue
+        if k in hists: hists[k]+=hin[k]
+        else:               hists[k]=hin[k]
+
+for h_name in hists:
+    ls = '-'
+    if 'coeff' in h_name: continue
+    if 'efth' in h_name: continue
+    if 'SM' in h_name and False:
+        label = 'SM'
+    elif 'neg' in h_name:
+        ls = '--'
+    elif 'abs' in h_name:
+        ls = '-.'
+    if 'pt' in h_name:
+        label = 'EFT' + h_name.split('_')[1]
+    else:
+        label = h_name.split('_')[1]
+    hists[h_name].plot1d(label=label, yerr=False, ls=ls, flow='show')
+    #(hists[h_name]/np.sum(hists['weights_SMabs_log'].values(flow=True))).plot1d(label=label, yerr=False, ls=ls, flow='show')
+    #plt.gca().set_ylabel('log(weights) / sum(SMpos)')
+    if 'coeff' in h_name: continue
+    #if 'coeff' in h_name: hists[h_name].plot1d(label=label, yerr=False)#, flow='show', ls='--')
+    elif 'efth' in h_name: continue
+    #elif 'efth' in h_name: hists[h_name].plot1d(label=label, yerr=False, flow='show', ls='-.')
+    #else: hists[h_name].plot1d(label=label, yerr=False)#, flow='show')
+
+plt.legend(ncol=3)
+plt.gca().set_yscale('log')
+plt.gca().set_xlabel('log(event weights)')
+plt.tight_layout()
+os.makedirs(f'{args.output}', exist_ok=True)
+plt.savefig(f'{args.output}/weights.pdf')
+plt.savefig(f'{args.output}/weights.png')
+#plt.savefig(args.output.replace('.pdf', '.png'))