Skip to content

Commit 70c5c40

Browse files
Adrien Bestelmetalmatze
Adrien Bestel
authored andcommitted
slo: fix latency burnrate recording rules with regexp matchers
Same as #327 Ratio SLOs were the only ones creating these "grouping" on regexp filters Fixes #1050
1 parent 6bf8659 commit 70c5c40

File tree

3 files changed

+75
-13
lines changed

3 files changed

+75
-13
lines changed

slo/promql_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,11 @@ var (
264264
},
265265
}
266266
}
267+
objectiveAPIServerRatioGrouping = func() Objective {
268+
o := objectiveAPIServerRatio()
269+
o.Indicator.Ratio.Grouping = []string{"verb"}
270+
return o
271+
}
267272
objectiveAPIServerRatioAlertingDisabled = func() Objective {
268273
o := objectiveAPIServerRatio()
269274
o.Alerting.Disabled = true

slo/rules.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) {
104104
if m.Name == labels.MetricName {
105105
continue
106106
}
107+
if _, ok := groupingMap[m.Name]; !ok {
108+
if m.Type == labels.MatchRegexp || m.Type == labels.MatchNotRegexp {
109+
continue
110+
}
111+
}
112+
107113
alertMatchers = append(alertMatchers, m.String())
108114
}
109115
alertMatchers = append(alertMatchers, fmt.Sprintf(`slo="%s"`, sloName))
@@ -452,11 +458,6 @@ func (o Objective) Burnrate(timerange time.Duration) string {
452458
for _, s := range o.Indicator.Ratio.Grouping {
453459
groupingMap[s] = struct{}{}
454460
}
455-
for _, m := range o.Indicator.Ratio.Total.LabelMatchers {
456-
if m.Type == labels.MatchRegexp || m.Type == labels.MatchNotRegexp {
457-
groupingMap[m.Name] = struct{}{}
458-
}
459-
}
460461

461462
grouping := make([]string, 0, len(groupingMap))
462463
for s := range groupingMap {

slo/rules_test.go

+64-8
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,62 @@ func TestObjective_Burnrates(t *testing.T) {
745745
}, {
746746
name: "apiserver-write-response-errors",
747747
slo: objectiveAPIServerRatio(),
748+
rules: monitoringv1.RuleGroup{
749+
Name: "apiserver-write-response-errors",
750+
Interval: monitoringDuration("30s"),
751+
Rules: []monitoringv1.Rule{{
752+
Record: "apiserver_request:burnrate3m",
753+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m]))`),
754+
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
755+
}, {
756+
Record: "apiserver_request:burnrate15m",
757+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m]))`),
758+
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
759+
}, {
760+
Record: "apiserver_request:burnrate30m",
761+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))`),
762+
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
763+
}, {
764+
Record: "apiserver_request:burnrate1h",
765+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))`),
766+
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
767+
}, {
768+
Record: "apiserver_request:burnrate3h",
769+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h]))`),
770+
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
771+
}, {
772+
Record: "apiserver_request:burnrate12h",
773+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h]))`),
774+
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
775+
}, {
776+
Record: "apiserver_request:burnrate2d",
777+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
778+
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
779+
}, {
780+
Alert: "ErrorBudgetBurn",
781+
Expr: intstr.FromString(`apiserver_request:burnrate3m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99)) and apiserver_request:burnrate30m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99))`),
782+
For: monitoringDuration("1m0s"),
783+
Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1d"},
784+
}, {
785+
Alert: "ErrorBudgetBurn",
786+
Expr: intstr.FromString(`apiserver_request:burnrate15m{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99)) and apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99))`),
787+
For: monitoringDuration("8m0s"),
788+
Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2d"},
789+
}, {
790+
Alert: "ErrorBudgetBurn",
791+
Expr: intstr.FromString(`apiserver_request:burnrate1h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99)) and apiserver_request:burnrate12h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99))`),
792+
For: monitoringDuration("30m0s"),
793+
Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1w"},
794+
}, {
795+
Alert: "ErrorBudgetBurn",
796+
Expr: intstr.FromString(`apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99)) and apiserver_request:burnrate2d{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99))`),
797+
For: monitoringDuration("1h30m0s"),
798+
Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2w"},
799+
}},
800+
},
801+
}, {
802+
name: "apiserver-write-response-errors-grouping",
803+
slo: objectiveAPIServerRatioGrouping(),
748804
rules: monitoringv1.RuleGroup{
749805
Name: "apiserver-write-response-errors",
750806
Interval: monitoringDuration("30s"),
@@ -806,31 +862,31 @@ func TestObjective_Burnrates(t *testing.T) {
806862
Interval: monitoringDuration("30s"),
807863
Rules: []monitoringv1.Rule{{
808864
Record: "apiserver_request:burnrate3m",
809-
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m]))`),
865+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m]))`),
810866
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
811867
}, {
812868
Record: "apiserver_request:burnrate15m",
813-
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m]))`),
869+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m]))`),
814870
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
815871
}, {
816872
Record: "apiserver_request:burnrate30m",
817-
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))`),
873+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))`),
818874
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
819875
}, {
820876
Record: "apiserver_request:burnrate1h",
821-
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))`),
877+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))`),
822878
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
823879
}, {
824880
Record: "apiserver_request:burnrate3h",
825-
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h]))`),
881+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h]))`),
826882
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
827883
}, {
828884
Record: "apiserver_request:burnrate12h",
829-
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h]))`),
885+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h]))`),
830886
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
831887
}, {
832888
Record: "apiserver_request:burnrate2d",
833-
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
889+
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
834890
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
835891
}},
836892
},
@@ -1096,7 +1152,7 @@ func TestObjective_Burnrates(t *testing.T) {
10961152
},
10971153
}}
10981154

1099-
require.Len(t, testcases, 20)
1155+
require.Len(t, testcases, 21)
11001156

11011157
for _, tc := range testcases {
11021158
t.Run(tc.name, func(t *testing.T) {

0 commit comments

Comments
 (0)