This is the python package containing schemas and helper functions enabling analyzers to work with ATLAS datasets (Monte Carlo and Data), using coffea.
The simplest example is to just get started processing the file as expected:
from atlas_schema.schema import NtupleSchema
from coffea import dataset_tools
import awkward as ak
fileset = {"ttbar": {"files": {"path/to/ttbar.root": "tree_name"}}}
samples, report = dataset_tools.preprocess(fileset)
def noop(events):
return ak.fields(events)
fields = dataset_tools.apply_to_fileset(noop, samples, schemaclass=NtupleSchema)
print(fields)
which produces something similar to
{
"ttbar": [
"dataTakingYear",
"mcChannelNumber",
"runNumber",
"eventNumber",
"lumiBlock",
"actualInteractionsPerCrossing",
"averageInteractionsPerCrossing",
"truthjet",
"PileupWeight",
"RandomRunNumber",
"met",
"recojet",
"truth",
"generatorWeight",
"beamSpotWeight",
"trigPassed",
"jvt",
]
}
However, a more involved example to apply a selection and fill a histogram looks like below:
import awkward as ak
import dask
import hist.dask as had
import matplotlib.pyplot as plt
from coffea import processor
from coffea.nanoevents import NanoEventsFactory
from distributed import Client
from atlas_schema.schema import NtupleSchema
class MyFirstProcessor(processor.ProcessorABC):
def __init__(self):
pass
def process(self, events):
dataset = events.metadata["dataset"]
h_ph_pt = (
had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
.Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
.Int64()
)
cut = ak.all(events.ph.isEM, axis=1)
h_ph_pt.fill(isEM="all", pt=ak.firsts(events.ph.pt / 1.0e3))
h_ph_pt.fill(isEM="pass", pt=ak.firsts(events[cut].ph.pt / 1.0e3))
h_ph_pt.fill(isEM="fail", pt=ak.firsts(events[~cut].ph.pt / 1.0e3))
return {
dataset: {
"entries": ak.num(events, axis=0),
"ph_pt": h_ph_pt,
}
}
def postprocess(self, accumulator):
pass
if __name__ == "__main__":
client = Client()
fname = "ntuple.root"
events = NanoEventsFactory.from_root(
{fname: "analysis"},
schemaclass=NtupleSchema,
metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
).events()
p = MyFirstProcessor()
out = p.process(events)
(computed,) = dask.compute(out)
print(computed)
fig, ax = plt.subplots()
computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
ax.set_xscale("log")
ax.legend(title="Photon pT for Zqqgamma")
fig.savefig("ph_pt.pdf")
which produces
This useful vim
substitution helps:
%s/ \([A-Za-z]\+\)\s\+= \(\d\+\),\?/ \1: Annotated[int, "\1"] = \2