From f98ab3aa1f0958fd4cff18cda03c74454187cd63 Mon Sep 17 00:00:00 2001 From: Angela Dietz Date: Fri, 28 Nov 2025 11:03:14 -0500 Subject: [PATCH] spanlatch: add cluster setting for slow latch request threshold This adds a new cluster setting kv.concurrency.slow_latch_request_duration which configures the threshold for logging slow latch acquisitions. Previously, logging would occur after 15 seconds, but now the default is reduced to 5s. Informs #154271 Release note: None Epic: None Part of: CRDB-54843 --- pkg/kv/kvserver/spanlatch/BUILD.bazel | 1 - pkg/kv/kvserver/spanlatch/manager.go | 15 ++++++++++++--- pkg/kv/kvserver/spanlatch/settings.go | 11 +++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/pkg/kv/kvserver/spanlatch/BUILD.bazel b/pkg/kv/kvserver/spanlatch/BUILD.bazel index 48fbe44c8dae..3bccf896f7c6 100644 --- a/pkg/kv/kvserver/spanlatch/BUILD.bazel +++ b/pkg/kv/kvserver/spanlatch/BUILD.bazel @@ -14,7 +14,6 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanlatch", visibility = ["//visibility:public"], deps = [ - "//pkg/base", "//pkg/kv/kvpb", "//pkg/kv/kvserver/concurrency/poison", "//pkg/kv/kvserver/spanset", diff --git a/pkg/kv/kvserver/spanlatch/manager.go b/pkg/kv/kvserver/spanlatch/manager.go index 222f66be82ac..c0aa821e66fe 100644 --- a/pkg/kv/kvserver/spanlatch/manager.go +++ b/pkg/kv/kvserver/spanlatch/manager.go @@ -11,7 +11,6 @@ import ( "time" "unsafe" - "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" @@ -585,7 +584,9 @@ func (m *Manager) waitForSignal( } log.Eventf(ctx, "waiting to acquire %s latch %s, held by %s latch %s", waitType, wait, heldType, held) poisonCh := held.g.poison.signalChan() - t.Reset(base.SlowRequestThreshold) + + slowThreshold := m.slowLatchRequestThreshold() + t.Reset(slowThreshold) for { select { case <-held.g.done.signalChan(): @@ -608,7 +609,7 @@ func (m *Manager) waitForSignal( } case <-t.C: log.Warningf(ctx, "have been waiting %s to acquire %s latch %s, held by %s latch %s", - base.SlowRequestThreshold, waitType, wait, heldType, held) + slowThreshold, waitType, wait, heldType, held) if m.slowReqs != nil { m.slowReqs.Inc(1) defer m.slowReqs.Dec(1) //nolint:deferloop @@ -709,6 +710,14 @@ func (m *Manager) longLatchHoldThreshold() time.Duration { return LongLatchHoldThreshold.Get(&m.settings.SV) } +// slowLatchRequestThreshold returns the threshold for logging slow latch requests. +func (m *Manager) slowLatchRequestThreshold() time.Duration { + if m.settings == nil { + return math.MaxInt64 // disable + } + return SlowLatchRequestThreshold.Get(&m.settings.SV) +} + // Metrics holds information about the state of a Manager. type Metrics struct { ReadCount int64 diff --git a/pkg/kv/kvserver/spanlatch/settings.go b/pkg/kv/kvserver/spanlatch/settings.go index 45098b4a6b3b..0752488fbe04 100644 --- a/pkg/kv/kvserver/spanlatch/settings.go +++ b/pkg/kv/kvserver/spanlatch/settings.go @@ -22,3 +22,14 @@ var LongLatchHoldThreshold = settings.RegisterDurationSetting( "the threshold for logging long latch holds", 3*time.Second, ) + +// SlowLatchRequestThreshold controls when we will log slow latch acquisition +// attempts. When a latch acquisition has been waiting for this duration, a +// warning is logged and the slow request metric is incremented. +var SlowLatchRequestThreshold = settings.RegisterDurationSettingWithExplicitUnit( + settings.SystemOnly, + "kv.concurrency.slow_latch_request_duration", + "the threshold for logging slow latch acquisition attempts", + 5*time.Second, + settings.DurationWithMinimum(10*time.Millisecond), +)