Skip to content

Commit bb9544b

Browse files
authored
Merge pull request #117 from pyrra-dev/fix-latency-burnrates
slo: Fix latency burnrate recording rules
2 parents c45329f + 073b4bf commit bb9544b

File tree

4 files changed

+59
-50
lines changed

4 files changed

+59
-50
lines changed

slo/promql.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ func (o Objective) ErrorsRange(timerange time.Duration) string {
377377
return expr.String()
378378
}
379379
if o.Indicator.Latency != nil && o.Indicator.Latency.Total.Name != "" {
380-
expr, err := parser.ParseExpr(`sum(rate(metric{matchers="total"}[1s])) - sum(rate(errorMetric{matchers="errors"}[1s]))`)
380+
expr, err := parser.ParseExpr(`(sum(rate(metric{matchers="total"}[1s])) - sum(rate(errorMetric{matchers="errors"}[1s]))) / sum(rate(metric{matchers="total"}[1s]))`)
381381
if err != nil {
382382
return err.Error()
383383
}

slo/promql_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -578,27 +578,27 @@ func TestObjective_ErrorsRange(t *testing.T) {
578578
name: "http-latency",
579579
objective: objectiveHTTPLatency(),
580580
timerange: time.Hour,
581-
expected: `sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))`,
581+
expected: `(sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))) / sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h]))`,
582582
}, {
583583
name: "http-latency-grouping",
584584
objective: objectiveHTTPLatencyGrouping(),
585585
timerange: time.Hour,
586-
expected: `sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))`,
586+
expected: `(sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))) / sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h]))`,
587587
}, {
588588
name: "http-latency-grouping-regex",
589589
objective: objectiveHTTPLatencyGroupingRegex(),
590590
timerange: time.Hour,
591-
expected: `sum(rate(http_request_duration_seconds_count{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="1"}[1h]))`,
591+
expected: `(sum(rate(http_request_duration_seconds_count{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="1"}[1h]))) / sum(rate(http_request_duration_seconds_count{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default"}[1h]))`,
592592
}, {
593593
name: "grpc-latency",
594594
objective: objectiveGRPCLatency(),
595595
timerange: time.Hour,
596-
expected: `sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))`,
596+
expected: `(sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))) / sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h]))`,
597597
}, {
598598
name: "grpc-latency-grouping",
599599
objective: objectiveGRPCLatencyGrouping(),
600600
timerange: time.Hour,
601-
expected: `sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))`,
601+
expected: `(sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))) / sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h]))`,
602602
}, {
603603
name: "operator-ratio",
604604
objective: objectiveOperator(),
@@ -618,7 +618,7 @@ func TestObjective_ErrorsRange(t *testing.T) {
618618
name: "apiserver-read-resource-latency",
619619
objective: objectiveAPIServerLatency(),
620620
timerange: 2 * time.Hour,
621-
expected: `sum(rate(apiserver_request_duration_seconds_count{job="apiserver",resource=~"resource|",verb=~"LIST|GET"}[2h])) - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",le="0.1",resource=~"resource|",verb=~"LIST|GET"}[2h]))`,
621+
expected: `(sum(rate(apiserver_request_duration_seconds_count{job="apiserver",resource=~"resource|",verb=~"LIST|GET"}[2h])) - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",le="0.1",resource=~"resource|",verb=~"LIST|GET"}[2h]))) / sum(rate(apiserver_request_duration_seconds_count{job="apiserver",resource=~"resource|",verb=~"LIST|GET"}[2h]))`,
622622
}}
623623

624624
for _, tc := range testcases {

slo/rules.go

+10-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,16 @@ func (o Objective) Burnrate(timerange time.Duration) string {
310310
return expr.String()
311311
}
312312
if o.Indicator.Latency != nil && o.Indicator.Latency.Total.Name != "" {
313-
expr, err := parser.ParseExpr(`sum by(grouping) (rate(metric{matchers="total"}[1s])) - sum by(grouping) (rate(errorMetric{matchers="errors"}[1s]))`)
313+
query := `
314+
(
315+
sum by(grouping) (rate(metric{matchers="total"}[1s]))
316+
-
317+
sum by(grouping) (rate(errorMetric{matchers="errors"}[1s]))
318+
)
319+
/
320+
sum by(grouping) (rate(metric{matchers="total"}[1s]))
321+
`
322+
expr, err := parser.ParseExpr(query)
314323
if err != nil {
315324
return err.Error()
316325
}

0 commit comments

Comments
 (0)