atlas-schema v0.2.4

This is the python package containing schemas and helper functions enabling analyzers to work with ATLAS datasets (Monte Carlo and Data), using coffea.

Hello World

The simplest example is to just get started processing the file as expected:

from atlas_schema.schema import NtupleSchema
from coffea import dataset_tools
import awkward as ak

fileset = {"ttbar": {"files": {"path/to/ttbar.root": "tree_name"}}}
samples, report = dataset_tools.preprocess(fileset)


def noop(events):
    return ak.fields(events)


fields = dataset_tools.apply_to_fileset(noop, samples, schemaclass=NtupleSchema)
print(fields)

which produces something similar to

{
    "ttbar": [
        "dataTakingYear",
        "mcChannelNumber",
        "runNumber",
        "eventNumber",
        "lumiBlock",
        "actualInteractionsPerCrossing",
        "averageInteractionsPerCrossing",
        "truthjet",
        "PileupWeight",
        "RandomRunNumber",
        "met",
        "recojet",
        "truth",
        "generatorWeight",
        "beamSpotWeight",
        "trigPassed",
        "jvt",
    ]
}

However, a more involved example to apply a selection and fill a histogram looks like below:

import awkward as ak
import dask
import hist.dask as had
import matplotlib.pyplot as plt
from coffea import processor
from coffea.nanoevents import NanoEventsFactory
from distributed import Client

from atlas_schema.schema import NtupleSchema


class MyFirstProcessor(processor.ProcessorABC):
    def __init__(self):
        pass

    def process(self, events):
        dataset = events.metadata["dataset"]
        h_ph_pt = (
            had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
            .Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
            .Int64()
        )

        cut = ak.all(events.ph.isEM, axis=1)
        h_ph_pt.fill(isEM="all", pt=ak.firsts(events.ph.pt / 1.0e3))
        h_ph_pt.fill(isEM="pass", pt=ak.firsts(events[cut].ph.pt / 1.0e3))
        h_ph_pt.fill(isEM="fail", pt=ak.firsts(events[~cut].ph.pt / 1.0e3))

        return {
            dataset: {
                "entries": ak.num(events, axis=0),
                "ph_pt": h_ph_pt,
            }
        }

    def postprocess(self, accumulator):
        pass


if __name__ == "__main__":
    client = Client()

    fname = "ntuple.root"
    events = NanoEventsFactory.from_root(
        {fname: "analysis"},
        schemaclass=NtupleSchema,
        metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
    ).events()

    p = MyFirstProcessor()
    out = p.process(events)
    (computed,) = dask.compute(out)
    print(computed)

    fig, ax = plt.subplots()
    computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
    ax.set_xscale("log")
    ax.legend(title="Photon pT for Zqqgamma")

    fig.savefig("ph_pt.pdf")

which produces

Developer Notes

Converting Enums from C++ to Python

This useful vim substitution helps:

%s/    \([A-Za-z]\+\)\s\+=  \(\d\+\),\?/    \1: Annotated[int, "\1"] = \2

Name		Name	Last commit message	Last commit date
Latest commit History 61 Commits
.github		.github
docs		docs
src/atlas_schema		src/atlas_schema
tests		tests
.copier-answers.yml		.copier-answers.yml
.git_archival.txt		.git_archival.txt
.gitattributes		.gitattributes
.gitignore		.gitignore
.pre-commit-config.yaml		.pre-commit-config.yaml
.readthedocs.yaml		.readthedocs.yaml
LICENSE		LICENSE
README.md		README.md
noxfile.py		noxfile.py
pyproject.toml		pyproject.toml
tbump.toml		tbump.toml

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

atlas-schema v0.2.4

Hello World

Developer Notes

Converting Enums from C++ to Python

About

Releases 6

Packages

Contributors 4

Languages

License

scipp-atlas/atlas-schema

Folders and files

Latest commit

History

Repository files navigation

atlas-schema v0.2.4

Hello World

Developer Notes

Converting Enums from C++ to Python

About

Resources

License

Stars

Watchers

Forks

Releases 6

Packages 0

Contributors 4

Languages

Packages