diff --git a/server/etcdserver/metrics.go b/server/etcdserver/metrics.go index 7af0e7354d0..ea1fce461b7 100644 --- a/server/etcdserver/metrics.go +++ b/server/etcdserver/metrics.go @@ -118,6 +118,12 @@ var ( Name: "lease_expired_total", Help: "The total number of expired leases.", }) + rangeResponseKvCount = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "range_response_kv_count", + Help: "The number of KVs returned by range calls.", + }, []string{"range_begin"}) currentVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "etcd", @@ -168,6 +174,7 @@ func init() { prometheus.MustRegister(slowReadIndex) prometheus.MustRegister(readIndexFailed) prometheus.MustRegister(leaseExpired) + prometheus.MustRegister(rangeResponseKvCount) prometheus.MustRegister(currentVersion) prometheus.MustRegister(currentGoVersion) prometheus.MustRegister(serverID) diff --git a/server/etcdserver/v3_server.go b/server/etcdserver/v3_server.go index 07d1f546c9f..d32c52a6a1c 100644 --- a/server/etcdserver/v3_server.go +++ b/server/etcdserver/v3_server.go @@ -116,6 +116,11 @@ func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeRe traceutil.Field{Key: "response_count", Value: len(resp.Kvs)}, traceutil.Field{Key: "response_revision", Value: resp.Header.Revision}, ) + + // collect the metric on very large responses only, because the key dimensionality might blow up Prometheus. + if rangeResponseKvCount != nil && len(resp.Kvs) > 1000 { + rangeResponseKvCount.WithLabelValues(string(r.Key)).Observe(float64(len(resp.Kvs))) + } } trace.LogIfLong(traceThreshold) }(time.Now())