Skip to content

Commit cf7a120

Browse files
authored
summary-warnings for normalize step; extend summary warnings for errors (#57)
* summary-warnings for normalize step; extend summary warnings for errors Signed-off-by: Lars Schneidenbach <schneidenbach@us.ibm.com>
1 parent 81333d3 commit cf7a120

File tree

2 files changed

+28
-11
lines changed

2 files changed

+28
-11
lines changed

src/aiu_trace_analyzer/pipeline/normalize.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import re
66

77
import aiu_trace_analyzer.logger as aiulog
8+
from aiu_trace_analyzer.types import TraceEvent, GlobalIngestData, TraceWarning
89
from aiu_trace_analyzer.pipeline.context import AbstractContext
910
from aiu_trace_analyzer.pipeline.hashqueue import AbstractHashQueueContext
10-
from aiu_trace_analyzer.types import TraceEvent, GlobalIngestData
1111
from aiu_trace_analyzer.pipeline.tools import FlexEventMapToTS
1212

1313

@@ -59,7 +59,20 @@ class NormalizationContext(AbstractHashQueueContext):
5959

6060
def __init__(self, soc_frequency: float, ignore_crit: bool = False,
6161
filterstr: str = "") -> None:
62-
super().__init__()
62+
super().__init__(warnings=[
63+
TraceWarning(
64+
name="long_dur",
65+
text="OVC: Detected {d[count]} event(s) with long duration and"
66+
" thus potential undetected overflow in TSx counter.",
67+
data={"count": 0}
68+
),
69+
TraceWarning(
70+
name="ts_seq_err",
71+
text="OVC: local_correction fix has missed a spot in TS-sequence of {d[count]} event(s).",
72+
data={"count": 0},
73+
is_error=True
74+
)
75+
])
6376
self.soc_frequency = soc_frequency
6477
self.frequency_minmax = (1e99, 0.0, 0, 0.0, 0.0)
6578
self.OVERFLOW_TIME_SPAN_US = float(1 << 32) / self.soc_frequency
@@ -195,9 +208,7 @@ def tsx_32bit_local_correction(self, event: TraceEvent) -> dict:
195208
prev = curr
196209

197210
if event["dur"] > self.OVERFLOW_TIME_SPAN_US:
198-
aiulog.log(aiulog.WARN,
199-
"OVC: Detected event with long duration and"
200-
" thus potential undetected overflow in TSx counter.")
211+
self.warnings["long_dur"].update()
201212

202213
if "Cmpt Exec" not in event["name"]:
203214
return args
@@ -249,7 +260,7 @@ def tsx_32bit_global_correction(self, qid, event: TraceEvent) -> dict:
249260
curr = int(args[ts], 0)
250261
curr += (ovc * 1 << 32)
251262
if curr < prev:
252-
aiulog.log(aiulog.ERROR, "attempt of local_correction fix has missed a spot in TS-sequence.")
263+
self.warnings["ts_seq_err"].update()
253264
if not self.ignore_crit:
254265
assert curr >= prev, "local_correction of TS-sequence incomplete."
255266
args[ts] = str(curr)

src/aiu_trace_analyzer/types.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,24 +180,28 @@ def get_dialect(cls, jobhash: int) -> InputDialect:
180180
class TraceWarning:
181181
"""
182182
Keep track of warnings to allow accumulated warning at the end of a run
183-
Usage:
183+
Example:
184184
185185
w = TraceWarning(
186186
name="MyWarning",
187187
text="This stage has detected {d[count]} issues with max {d[max]}.",
188188
data={"count": 0, "max": 0.0},
189189
update_fn={"count": int.__add__, "max": max},
190-
autolog=True
190+
autolog=True,
191+
is_error=True,
191192
)
192193
193194
name: a key that can be used to manage multiple warnings in e.g. a dictionary
194195
text: the warning text with variables (always us 'd' as the dictionary name)
195196
data: dictionary with entries that match the text variables
196197
update_fn: functions to run when the update function is called with data
197-
autolog: automatically print the warning at destruction time
198+
autolog: automatically print the warning at destruction time (default)
199+
is_error: print the summary warning as ERROR level and not as WARN (default)
198200
199201
Whenever a warning should be added:
202+
200203
w.update({"count": 1, "max": 100.0})
204+
201205
this calls the preset update_fn for each item to update the values
202206
if update_fn is e.g. int.__add__, then the new_val entry for count will be increased by 1
203207
@@ -213,14 +217,16 @@ def __init__(
213217
text: str,
214218
data: dict[str, any],
215219
update_fn: dict[str, callable] = {},
216-
auto_log: bool = True):
220+
auto_log: bool = True,
221+
is_error: bool = False):
217222
self.occurred = False
218223
self.name = name
219224
# format-string with {d[key]} placeholders
220225
self.text: str = text
221226
self.args_list: dict[str, any] = {k: v for k, v in data.items()}
222227
self.update_fn: dict[str, callable] = {k: v for k, v in update_fn.items()}
223228
self.auto_log = auto_log
229+
self.warn_level = aiulog.WARN if not is_error else aiulog.ERROR
224230

225231
text_keys = re.findall(r"{d\[([.\w]+)\]}", self.text)
226232
if len(text_keys) != len(self.args_list):
@@ -249,7 +255,7 @@ def __init__(
249255

250256
def __del__(self) -> None:
251257
if self.auto_log is True and self.has_warning():
252-
aiulog.log(aiulog.WARN, self)
258+
aiulog.log(self.warn_level, self)
253259

254260
def get_name(self) -> str:
255261
return self.name

0 commit comments

Comments
 (0)