From 49638b074e86d1a93340ea497f01f2579669174a Mon Sep 17 00:00:00 2001 From: Sebastian Gaiser Date: Wed, 29 Jan 2025 12:58:35 +0000 Subject: [PATCH] fix(alerts): set severity of 'ectdMembersDown' from 'critical' to 'warning' Downgraded severity of 'etcdMembersDown' from 'critical' to 'warning' as a single etcd member being not available should not be a problem for etcd's quorum. If the quorum would not be fulfilled, 'etcdInsufficientMembers' should fire. In addition the 'for' interval was extended from '10m' to '20m' as e.g. a node reboot with a big physical node takes usually longer than 10 minutes. Signed-off-by: Sebastian Gaiser --- contrib/mixin/alerts/alerts.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/mixin/alerts/alerts.libsonnet b/contrib/mixin/alerts/alerts.libsonnet index 856fe4c1e38b..0a41632e6771 100644 --- a/contrib/mixin/alerts/alerts.libsonnet +++ b/contrib/mixin/alerts/alerts.libsonnet @@ -16,9 +16,9 @@ ) > 0 ||| % { etcd_instance_labels: $._config.etcd_instance_labels, etcd_selector: $._config.etcd_selector, network_failure_range: $._config.scrape_interval_seconds * 4 }, - 'for': '10m', + 'for': '20m', labels: { - severity: 'critical', + severity: 'warning', }, annotations: { description: 'etcd cluster "{{ $labels.%s }}": members are down ({{ $value }}).' % $._config.clusterLabel,