Skip to content

Commit 8c1e7fb

Browse files
Fix for: Only zero values in "increased problem patterns" section
- Closes #3
1 parent cb26504 commit 8c1e7fb

File tree

2 files changed

+140
-5
lines changed

2 files changed

+140
-5
lines changed

src/platform_problem_monitoring_core/step8_compare_normalizations.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import argparse
55
import json
66
import sys
7-
from typing import List, TypedDict
7+
from typing import List, NotRequired, TypedDict
88

99
from platform_problem_monitoring_core.utils import load_json, logger, save_json
1010

@@ -19,6 +19,11 @@ class PatternDict(TypedDict):
1919
last_seen: str
2020
sample_log_lines: List[str]
2121
sample_doc_references: List[str]
22+
# Fields required by step9_generate_email_bodies
23+
current_count: NotRequired[int] # Current count (same as count for clarity)
24+
previous_count: NotRequired[int] # Count from previous run
25+
absolute_change: NotRequired[int] # Absolute difference between counts
26+
percent_change: NotRequired[float] # Percentage difference
2227

2328

2429
# Define a function to get the count safely with a proper return type
@@ -127,11 +132,20 @@ def _find_increased_patterns(current_dict: dict, previous_dict: dict) -> List[Pa
127132
previous_count = previous_patterns[pattern_text]["count"]
128133
current_count = current_pattern["count"]
129134

135+
# Only include patterns with actual increases
130136
if current_count > previous_count:
137+
# Calculate absolute and percentage change
138+
absolute_change = current_count - previous_count
139+
percent_change = round((absolute_change / previous_count) * 100, 1) if previous_count > 0 else 100
140+
131141
# Ensure all required fields are present
132142
increased_pattern: PatternDict = {
133143
"cluster_id": current_pattern["cluster_id"],
134144
"count": current_count,
145+
"current_count": current_count,
146+
"previous_count": previous_count,
147+
"absolute_change": absolute_change,
148+
"percent_change": percent_change,
135149
"pattern": pattern_text,
136150
"first_seen": current_pattern.get("first_seen", ""),
137151
"last_seen": current_pattern.get("last_seen", ""),
@@ -140,8 +154,8 @@ def _find_increased_patterns(current_dict: dict, previous_dict: dict) -> List[Pa
140154
}
141155
increased_patterns.append(increased_pattern)
142156

143-
# Sort by count (descending)
144-
increased_patterns.sort(key=get_count, reverse=True)
157+
# Sort by percent change (descending), then by absolute change for ties, then by count
158+
increased_patterns.sort(key=lambda p: (p["percent_change"], p["absolute_change"], p["count"]), reverse=True)
145159
return increased_patterns
146160

147161

@@ -168,11 +182,20 @@ def _find_decreased_patterns(current_dict: dict, previous_dict: dict) -> List[Pa
168182
previous_count = previous_patterns[pattern_text]["count"]
169183
current_count = current_pattern["count"]
170184

185+
# Only include patterns with actual decreases
171186
if current_count < previous_count:
187+
# Calculate absolute and percentage change
188+
absolute_change = previous_count - current_count
189+
percent_change = round((absolute_change / previous_count) * 100, 1) if previous_count > 0 else 0
190+
172191
# Ensure all required fields are present
173192
decreased_pattern: PatternDict = {
174193
"cluster_id": current_pattern["cluster_id"],
175194
"count": current_count,
195+
"current_count": current_count,
196+
"previous_count": previous_count,
197+
"absolute_change": absolute_change,
198+
"percent_change": percent_change,
176199
"pattern": pattern_text,
177200
"first_seen": current_pattern.get("first_seen", ""),
178201
"last_seen": current_pattern.get("last_seen", ""),
@@ -181,8 +204,8 @@ def _find_decreased_patterns(current_dict: dict, previous_dict: dict) -> List[Pa
181204
}
182205
decreased_patterns.append(decreased_pattern)
183206

184-
# Sort by count (descending)
185-
decreased_patterns.sort(key=get_count, reverse=True)
207+
# Sort by percent change (descending), then by absolute change for ties, then by count
208+
decreased_patterns.sort(key=lambda p: (p["percent_change"], p["absolute_change"], p["count"]), reverse=True)
186209
return decreased_patterns
187210

188211

src/tests/test_step8_compare_normalizations.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,115 @@ def test_compare_normalizations_with_invalid_json(self) -> None:
314314
Path(current_file.name).unlink(missing_ok=True)
315315
Path(previous_file.name).unlink(missing_ok=True)
316316
Path(output_file.name).unlink(missing_ok=True)
317+
318+
def test_increased_patterns_contains_required_fields(
319+
self, sample_current_data: Dict[str, Any], sample_previous_data: Dict[str, Any]
320+
) -> None:
321+
"""Test that increased patterns contain all fields required by the email generation."""
322+
increased_patterns = _find_increased_patterns(sample_current_data, sample_previous_data)
323+
324+
# There should be one increased pattern based on our test data
325+
assert len(increased_patterns) == 1
326+
pattern = increased_patterns[0]
327+
328+
# The increased pattern should have proper pattern text
329+
assert pattern["pattern"] == "Error message 1"
330+
331+
# Verify it contains all the fields required by email generation
332+
assert "count" in pattern, "Pattern is missing 'count' field"
333+
assert pattern["count"] == 10, "Pattern has incorrect current count"
334+
335+
# These fields are expected by step9_generate_email_bodies.py but are missing
336+
assert "current_count" in pattern, "Pattern is missing 'current_count' field"
337+
assert pattern["current_count"] == 10, "Pattern has incorrect current count"
338+
assert "previous_count" in pattern, "Pattern is missing 'previous_count' field"
339+
assert pattern["previous_count"] == 5, "Pattern has incorrect previous count"
340+
assert "absolute_change" in pattern, "Pattern is missing 'absolute_change' field"
341+
assert pattern["absolute_change"] == 5, "Pattern has incorrect absolute change"
342+
assert "percent_change" in pattern, "Pattern is missing 'percent_change' field"
343+
assert pattern["percent_change"] == 100, "Pattern has incorrect percent change"
344+
345+
def test_decreased_patterns_contains_required_fields(
346+
self, sample_current_data: Dict[str, Any], sample_previous_data: Dict[str, Any]
347+
) -> None:
348+
"""Test that decreased patterns contain all fields required by the email generation."""
349+
decreased_patterns = _find_decreased_patterns(sample_current_data, sample_previous_data)
350+
351+
# There should be one decreased pattern based on our test data
352+
assert len(decreased_patterns) == 1
353+
pattern = decreased_patterns[0]
354+
355+
# The decreased pattern should have proper pattern text
356+
assert pattern["pattern"] == "Error message 2"
357+
358+
# Verify it contains all the fields required by email generation
359+
assert "count" in pattern, "Pattern is missing 'count' field"
360+
assert pattern["count"] == 5, "Pattern has incorrect current count"
361+
362+
# These fields are expected by step9_generate_email_bodies.py but are missing
363+
assert "current_count" in pattern, "Pattern is missing 'current_count' field"
364+
assert pattern["current_count"] == 5, "Pattern has incorrect current count"
365+
assert "previous_count" in pattern, "Pattern is missing 'previous_count' field"
366+
assert pattern["previous_count"] == 10, "Pattern has incorrect previous count"
367+
assert "absolute_change" in pattern, "Pattern is missing 'absolute_change' field"
368+
assert pattern["absolute_change"] == 5, "Pattern has incorrect absolute change"
369+
assert "percent_change" in pattern, "Pattern is missing 'percent_change' field"
370+
assert pattern["percent_change"] == 50, "Pattern has incorrect percent change"
371+
372+
def test_no_patterns_with_zero_change(self) -> None:
373+
"""Test that patterns with zero change are not included in increased or decreased lists."""
374+
# Create test data with patterns that have the same count
375+
current_data = {
376+
"patterns": [
377+
{"cluster_id": 1, "count": 10, "pattern": "Same count pattern", "sample_doc_references": ["index:id1"]}
378+
]
379+
}
380+
381+
previous_data = {
382+
"patterns": [
383+
{"cluster_id": 1, "count": 10, "pattern": "Same count pattern", "sample_doc_references": ["index:id1"]}
384+
]
385+
}
386+
387+
# There should be no increased patterns
388+
increased_patterns = _find_increased_patterns(current_data, previous_data)
389+
assert len(increased_patterns) == 0, "Patterns with no increase should not be in increased_patterns"
390+
391+
# There should be no decreased patterns
392+
decreased_patterns = _find_decreased_patterns(current_data, previous_data)
393+
assert len(decreased_patterns) == 0, "Patterns with no decrease should not be in decreased_patterns"
394+
395+
def test_zero_counts_handled_correctly(self) -> None:
396+
"""Test that zero counts are handled correctly in comparison."""
397+
# Create test data with patterns that have zero count
398+
current_data = {
399+
"patterns": [
400+
{"cluster_id": 1, "count": 0, "pattern": "Zero count current", "sample_doc_references": ["index:id1"]},
401+
{"cluster_id": 2, "count": 5, "pattern": "Zero count previous", "sample_doc_references": ["index:id2"]},
402+
]
403+
}
404+
405+
previous_data = {
406+
"patterns": [
407+
{"cluster_id": 1, "count": 5, "pattern": "Zero count current", "sample_doc_references": ["index:id1"]},
408+
{"cluster_id": 2, "count": 0, "pattern": "Zero count previous", "sample_doc_references": ["index:id2"]},
409+
]
410+
}
411+
412+
# "Zero count current" should be in decreased patterns
413+
decreased_patterns = _find_decreased_patterns(current_data, previous_data)
414+
assert len(decreased_patterns) == 1
415+
assert decreased_patterns[0]["pattern"] == "Zero count current"
416+
assert "current_count" in decreased_patterns[0], "Pattern is missing 'current_count' field"
417+
assert "previous_count" in decreased_patterns[0], "Pattern is missing 'previous_count' field"
418+
assert "absolute_change" in decreased_patterns[0], "Pattern is missing 'absolute_change' field"
419+
assert "percent_change" in decreased_patterns[0], "Pattern is missing 'percent_change' field"
420+
421+
# "Zero count previous" should be in increased patterns
422+
increased_patterns = _find_increased_patterns(current_data, previous_data)
423+
assert len(increased_patterns) == 1
424+
assert increased_patterns[0]["pattern"] == "Zero count previous"
425+
assert "current_count" in increased_patterns[0], "Pattern is missing 'current_count' field"
426+
assert "previous_count" in increased_patterns[0], "Pattern is missing 'previous_count' field"
427+
assert "absolute_change" in increased_patterns[0], "Pattern is missing 'absolute_change' field"
428+
assert "percent_change" in increased_patterns[0], "Pattern is missing 'percent_change' field"

0 commit comments

Comments
 (0)