|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "id": "67e17aad-909a-486e-b85d-9ad6d7756dd4", |
| 6 | + "metadata": {}, |
| 7 | + "source": [ |
| 8 | + "# Importing and file opening" |
| 9 | + ] |
| 10 | + }, |
| 11 | + { |
| 12 | + "cell_type": "code", |
| 13 | + "execution_count": 36, |
| 14 | + "id": "b6522b4c-28ac-454e-a61f-56821cee4fb6", |
| 15 | + "metadata": {}, |
| 16 | + "outputs": [ |
| 17 | + { |
| 18 | + "name": "stdout", |
| 19 | + "output_type": "stream", |
| 20 | + "text": [ |
| 21 | + "<TTree 'Events' (1628 branches) at 0x2b030b5961d0>\n" |
| 22 | + ] |
| 23 | + } |
| 24 | + ], |
| 25 | + "source": [ |
| 26 | + "import pyarrow.parquet as pq\n", |
| 27 | + "import numpy as np\n", |
| 28 | + "import awkward as ak\n", |
| 29 | + "import uproot\n", |
| 30 | + "# import particle\n", |
| 31 | + "import vector\n", |
| 32 | + "vector.register_awkward()\n", |
| 33 | + "\n", |
| 34 | + "fname = \"/nfs/dust/cms/user/matsch/for_Artak/0520A050-AF68-EF43-AA5B-5AA77C74ED73.root\"\n", |
| 35 | + "\n", |
| 36 | + "fname_HH = \"/nfs/dust/cms/user/frahmmat/tutorial_files/HHtobbVV_4B83EAAD-1DEF-1641-BF6E-E9D72832F33A.root\"\n", |
| 37 | + "fname_tt = \"/nfs/dust/cms/user/frahmmat/tutorial_files/tt_sl_0520A050-AF68-EF43-AA5B-5AA77C74ED73.root\"\n", |
| 38 | + "\n", |
| 39 | + "file = uproot.open(fname)\n", |
| 40 | + "tree = file[\"Events\"]\n", |
| 41 | + "print(tree)\n", |
| 42 | + "# print(tree.keys()) # 1628 keys" |
| 43 | + ] |
| 44 | + }, |
| 45 | + { |
| 46 | + "cell_type": "markdown", |
| 47 | + "id": "c118fe2c-f633-4ffa-8a50-ca5f5098dc75", |
| 48 | + "metadata": {}, |
| 49 | + "source": [ |
| 50 | + "# directly accessing fields via filter" |
| 51 | + ] |
| 52 | + }, |
| 53 | + { |
| 54 | + "cell_type": "code", |
| 55 | + "execution_count": 37, |
| 56 | + "id": "942b0427-e2a3-43ab-80be-20b40410fb20", |
| 57 | + "metadata": {}, |
| 58 | + "outputs": [], |
| 59 | + "source": [ |
| 60 | + "tree = file[\"Events\"]\n", |
| 61 | + "\n", |
| 62 | + "fields = [\"pt\", \"eta\", \"phi\", \"mass\", \"charge\", \"btagDeepB\"]\n", |
| 63 | + "\n", |
| 64 | + "electrons = tree.arrays(filter_name=[\"Electron_\" + f for f in fields], entry_stop=100_000)\n", |
| 65 | + "muons = tree.arrays(filter_name=[\"Muon_\" + f for f in fields], entry_stop=100_000)\n", |
| 66 | + "jets = tree.arrays(filter_name=[\"Jet_\" + f for f in fields], entry_stop=100_000)" |
| 67 | + ] |
| 68 | + }, |
| 69 | + { |
| 70 | + "cell_type": "code", |
| 71 | + "execution_count": 38, |
| 72 | + "id": "8ff7f950-b36e-4eae-8c82-f5aec25f52c7", |
| 73 | + "metadata": {}, |
| 74 | + "outputs": [ |
| 75 | + { |
| 76 | + "name": "stdout", |
| 77 | + "output_type": "stream", |
| 78 | + "text": [ |
| 79 | + "Muon fields: ['eta', 'mass', 'phi', 'pt', 'charge']\n", |
| 80 | + "Jet fields: ['btagDeepB', 'eta', 'mass', 'phi', 'pt']\n", |
| 81 | + "<class 'vector._backends.awkward_.MomentumArray4D'>\n", |
| 82 | + "None\n", |
| 83 | + "<class 'vector._backends.awkward_.MomentumArray4D'>\n", |
| 84 | + "event fields: ['Electron', 'Muon', 'Jet']\n", |
| 85 | + "None\n" |
| 86 | + ] |
| 87 | + } |
| 88 | + ], |
| 89 | + "source": [ |
| 90 | + "# switch naming of fields from \"{Object}_{field}\" to \"{field}\"\n", |
| 91 | + "electrons = ak.zip({key.replace(\"Electron_\",\"\"):electrons[key] for key in electrons.fields}, with_name=\"Momentum4D\")\n", |
| 92 | + "muons = ak.zip({key.replace(\"Muon_\",\"\"):muons[key] for key in muons.fields}, with_name=\"Momentum4D\")\n", |
| 93 | + "jets = ak.zip({key.replace(\"Jet_\",\"\"):jets[key] for key in jets.fields}, with_name=\"Momentum4D\")\n", |
| 94 | + "\n", |
| 95 | + "# combine fields into a single awkward array via ak.zip\n", |
| 96 | + "events = ak.zip({\"Electron\": electrons, \"Muon\": muons, \"Jet\": jets}, depth_limit=1)\n", |
| 97 | + "\n", |
| 98 | + "# save output as parquet file\n", |
| 99 | + "f_out_name = \"tt_sl_test.parquet\"\n", |
| 100 | + "ak.to_parquet(events, f_out_name)" |
| 101 | + ] |
| 102 | + }, |
| 103 | + { |
| 104 | + "cell_type": "markdown", |
| 105 | + "id": "6b258600-6afc-4e46-8bb5-4360e901bc6b", |
| 106 | + "metadata": {}, |
| 107 | + "source": [ |
| 108 | + "# opening parquet with awkward" |
| 109 | + ] |
| 110 | + }, |
| 111 | + { |
| 112 | + "cell_type": "code", |
| 113 | + "execution_count": 42, |
| 114 | + "id": "7d047a49-a980-480f-8dab-b3cb21bf9630", |
| 115 | + "metadata": {}, |
| 116 | + "outputs": [ |
| 117 | + { |
| 118 | + "name": "stdout", |
| 119 | + "output_type": "stream", |
| 120 | + "text": [ |
| 121 | + "<class 'awkward.highlevel.Array'>\n", |
| 122 | + "['Electron', 'Muon', 'Jet']\n", |
| 123 | + "<class 'vector._backends.awkward_.MomentumArray4D'>\n" |
| 124 | + ] |
| 125 | + } |
| 126 | + ], |
| 127 | + "source": [ |
| 128 | + "events_new = ak.from_parquet(\"tt_sl_test.parquet\")\n", |
| 129 | + "\n", |
| 130 | + "print(type(events_new.Electron))\n", |
| 131 | + "# enable 4-vector behavior (pt,eta,phi,mass)\n", |
| 132 | + "behaviors = {\n", |
| 133 | + " \"Jet\": \"Momentum4D\",\n", |
| 134 | + " \"Electron\": \"Momentum4D\",\n", |
| 135 | + " \"Muon\": \"Momentum4D\",\n", |
| 136 | + "}\n", |
| 137 | + "for f in events_new.fields:\n", |
| 138 | + " if f in behaviors:\n", |
| 139 | + " events_new[f] = ak.with_name(events_new[f], behaviors[f])\n", |
| 140 | + " \n", |
| 141 | + "print(events_new.fields)\n", |
| 142 | + "print(type(events_new.Electron))" |
| 143 | + ] |
| 144 | + }, |
| 145 | + { |
| 146 | + "cell_type": "code", |
| 147 | + "execution_count": 43, |
| 148 | + "id": "77c094fc-c0b0-4e7a-87f1-90559038de6b", |
| 149 | + "metadata": {}, |
| 150 | + "outputs": [ |
| 151 | + { |
| 152 | + "name": "stdout", |
| 153 | + "output_type": "stream", |
| 154 | + "text": [ |
| 155 | + "[6, 7, 7, 9, 7, 8, 6, 5, 6, 8, 6, 10, 9, ... 6, 11, 7, 7, 8, 11, 8, 5, 9, 16, 11, 8]\n", |
| 156 | + "[[11.3, 9.84, 7.24, 4.14, 4.92, 3.79], ... 8.74, 5.56, 5.16, 6.71, 4.8, 6.57, 4.31]]\n", |
| 157 | + "[86.9, 303, 203, 542, 108, 246, 105, 309, ... 271, 149, 184, 412, 59.7, 327, 147]\n", |
| 158 | + "[[101, 62.8, 47.9, 22.3, 19.2, 15.5], ... 42.2, 40.2, 35.8, 33.5, 27, 21.2, 15.1]]\n", |
| 159 | + "[157, 56.7, 146, 84.8, 108, 83.2, 35.4, ... 78.4, 76, 37.1, 67.9, 194, 23.3, 38.2]\n", |
| 160 | + "[[118, 66.1, 49.1, 274, 49.1, 30.9], ... 43.2, 79.3, 48.1, 35.8, 182, 156, 105]]\n", |
| 161 | + "[[118, 66.1, 49.1, 274, 49.1, 30.9], ... 43.2, 79.3, 48.1, 35.8, 182, 156, 105]]\n" |
| 162 | + ] |
| 163 | + } |
| 164 | + ], |
| 165 | + "source": [ |
| 166 | + "# playing around with fields, testing vector behaviour\n", |
| 167 | + "jets = events_new.Jet\n", |
| 168 | + "\n", |
| 169 | + "print(ak.num(jets))\n", |
| 170 | + "\n", |
| 171 | + "print(jets.mass)\n", |
| 172 | + "jets = jets[ak.num(jets)>1]\n", |
| 173 | + "print((jets[:,0]+jets[:,1]).mass)\n", |
| 174 | + "print(jets.pt)\n", |
| 175 | + "print((jets[:,0]+jets[:,1]).pt)\n", |
| 176 | + "print(jets.E)\n", |
| 177 | + "print(np.sqrt(jets.pt**2 + jets.pz**2 + jets.mass**2))\n" |
| 178 | + ] |
| 179 | + }, |
| 180 | + { |
| 181 | + "cell_type": "markdown", |
| 182 | + "id": "3f1a4116-180e-453a-8d40-543c71dc0909", |
| 183 | + "metadata": {}, |
| 184 | + "source": [ |
| 185 | + "# alternatively: accessing fields via coffea" |
| 186 | + ] |
| 187 | + }, |
| 188 | + { |
| 189 | + "cell_type": "code", |
| 190 | + "execution_count": 78, |
| 191 | + "id": "58c8b7fd-d7d5-4131-9cfe-0270f36bf9be", |
| 192 | + "metadata": {}, |
| 193 | + "outputs": [], |
| 194 | + "source": [ |
| 195 | + "from coffea.nanoevents import NanoEventsFactory, BaseSchema\n", |
| 196 | + "from coffea.nanoevents.methods import candidate\n", |
| 197 | + "\n", |
| 198 | + "events = NanoEventsFactory.from_root(\n", |
| 199 | + " file,\n", |
| 200 | + " entry_stop=100000,\n", |
| 201 | + " metadata={\"dataset\": \"tt_sl\"},\n", |
| 202 | + " schemaclass=BaseSchema,\n", |
| 203 | + ").events()\n", |
| 204 | + "\n", |
| 205 | + "muons = ak.zip(\n", |
| 206 | + " {\n", |
| 207 | + " \"pt\": events.Muon_pt,\n", |
| 208 | + " \"eta\": events.Muon_eta,\n", |
| 209 | + " \"phi\": events.Muon_phi,\n", |
| 210 | + " \"mass\": events.Muon_mass,\n", |
| 211 | + " \"charge\": events.Muon_charge,\n", |
| 212 | + " },\n", |
| 213 | + " with_name=\"PtEtaPhiMCandidate\",\n", |
| 214 | + " behavior=candidate.behavior,\n", |
| 215 | + ")\n", |
| 216 | + "jets = ak.zip(\n", |
| 217 | + " {\n", |
| 218 | + " \"pt\": events.Muon_pt,\n", |
| 219 | + " \"eta\": events.Muon_eta,\n", |
| 220 | + " \"phi\": events.Muon_phi,\n", |
| 221 | + " \"mass\": events.Muon_mass,\n", |
| 222 | + " \"charge\": events.Muon_charge,\n", |
| 223 | + " },\n", |
| 224 | + " with_name=\"PtEtaPhiMCandidate\",\n", |
| 225 | + " behavior=candidate.behavior,\n", |
| 226 | + ")" |
| 227 | + ] |
| 228 | + } |
| 229 | + ], |
| 230 | + "metadata": { |
| 231 | + "kernelspec": { |
| 232 | + "display_name": "myenv", |
| 233 | + "language": "python", |
| 234 | + "name": "myenv" |
| 235 | + }, |
| 236 | + "language_info": { |
| 237 | + "codemirror_mode": { |
| 238 | + "name": "ipython", |
| 239 | + "version": 3 |
| 240 | + }, |
| 241 | + "file_extension": ".py", |
| 242 | + "mimetype": "text/x-python", |
| 243 | + "name": "python", |
| 244 | + "nbconvert_exporter": "python", |
| 245 | + "pygments_lexer": "ipython3", |
| 246 | + "version": "3.7.13" |
| 247 | + } |
| 248 | + }, |
| 249 | + "nbformat": 4, |
| 250 | + "nbformat_minor": 5 |
| 251 | +} |
0 commit comments