diff --git a/client/v3/client.go b/client/v3/client.go index c712b76c38ab..02b316a0a184 100644 --- a/client/v3/client.go +++ b/client/v3/client.go @@ -264,10 +264,20 @@ func (c *Client) dialSetupOpts(creds grpccredentials.TransportCredentials, dopts backoffJitterFraction = c.cfg.BackoffJitterFraction } + backoffExponent := defaultBackoffExponent + if c.cfg.BackoffExponent > 0 { + backoffExponent = c.cfg.BackoffExponent + } + + backoffMaxWaitBetween := defaultBackoffMaxWaitBetween + if c.cfg.BackoffMaxWaitBetween > 0 { + backoffMaxWaitBetween = c.cfg.BackoffMaxWaitBetween + } + // Interceptor retry and backoff. // TODO: Replace all of clientv3/retry.go with RetryPolicy: // https://github.com/grpc/grpc-proto/blob/cdd9ed5c3d3f87aef62f373b93361cf7bddc620d/grpc/service_config/service_config.proto#L130 - rrBackoff := withBackoff(c.roundRobinQuorumBackoff(backoffWaitBetween, backoffJitterFraction)) + rrBackoff := withBackoff(c.roundRobinQuorumBackoff(backoffWaitBetween, backoffJitterFraction, backoffExponent, backoffMaxWaitBetween)) opts = append(opts, // Disable stream retry by default since go-grpc-middleware/retry does not support client streams. // Streams that are safe to retry are enabled individually. @@ -502,14 +512,22 @@ func newClient(cfg *Config) (*Client, error) { // roundRobinQuorumBackoff retries against quorum between each backoff. // This is intended for use with a round robin load balancer. -func (c *Client) roundRobinQuorumBackoff(waitBetween time.Duration, jitterFraction float64) backoffFunc { +func (c *Client) roundRobinQuorumBackoff(waitBetween time.Duration, jitterFraction float64, backoffExponent float64, maxWaitBetween time.Duration) backoffFunc { return func(attempt uint) time.Duration { // after each round robin across quorum, backoff for our wait between duration n := uint(len(c.Endpoints())) quorum := (n/2 + 1) if attempt%quorum == 0 { - c.lg.Debug("backoff", zap.Uint("attempt", attempt), zap.Uint("quorum", quorum), zap.Duration("waitBetween", waitBetween), zap.Float64("jitterFraction", jitterFraction)) - return jitterUp(waitBetween, jitterFraction) + c.lg.Debug( + "backoff", + zap.Uint("attempt", attempt), + zap.Uint("quorum", quorum), + zap.Duration("waitBetween", waitBetween), + zap.Float64("jitterFraction", jitterFraction), + zap.Float64("backoffExponent", backoffExponent), + zap.Duration("maxWaitBetween", maxWaitBetween), + ) + return jitterUp(expBackoff(attempt, backoffExponent, waitBetween, maxWaitBetween), jitterFraction) } c.lg.Debug("backoff skipped", zap.Uint("attempt", attempt), zap.Uint("quorum", quorum)) return 0 diff --git a/client/v3/client_test.go b/client/v3/client_test.go index 151c24166920..9915e356f3f4 100644 --- a/client/v3/client_test.go +++ b/client/v3/client_test.go @@ -194,6 +194,46 @@ func TestBackoffJitterFraction(t *testing.T) { require.InDelta(t, backoffJitterFraction, c.cfg.BackoffJitterFraction, 0.01) } +func TestBackoffExponent(t *testing.T) { + backoffExponent := float64(2.0) + cfg := Config{ + Endpoints: []string{"127.0.0.1:12345"}, + BackoffExponent: backoffExponent, + } + c, err := NewClient(t, cfg) + require.NoError(t, err) + require.NotNil(t, c) + defer c.Close() + + require.InDelta(t, backoffExponent, c.cfg.BackoffExponent, 0.01) + + backoffExponent = float64(1.0) + cfg = Config{ + Endpoints: []string{"127.0.0.1:12345"}, + BackoffExponent: backoffExponent, + } + c, err = NewClient(t, cfg) + require.NoError(t, err) + require.NotNil(t, c) + defer c.Close() + + require.InDelta(t, backoffExponent, c.cfg.BackoffExponent, 0.01) +} + +func TestMaxBackoff(t *testing.T) { + backoffMaxWaitBetween := 100 * time.Millisecond + cfg := Config{ + Endpoints: []string{"127.0.0.1:12345"}, + BackoffMaxWaitBetween: backoffMaxWaitBetween, + } + c, err := NewClient(t, cfg) + require.NoError(t, err) + require.NotNil(t, c) + defer c.Close() + + require.Equal(t, backoffMaxWaitBetween, c.cfg.BackoffMaxWaitBetween) +} + func TestIsHaltErr(t *testing.T) { assert.Truef(t, isHaltErr(t.Context(), errors.New("etcdserver: some etcdserver error")), diff --git a/client/v3/config.go b/client/v3/config.go index 61d093760749..8868230d7022 100644 --- a/client/v3/config.go +++ b/client/v3/config.go @@ -102,6 +102,12 @@ type Config struct { // BackoffJitterFraction is the jitter fraction to randomize backoff wait time. BackoffJitterFraction float64 `json:"backoff-jitter-fraction"` + // BackoffExponent is the exponential backoff factor for retries. + BackoffExponent float64 `json:"backoff-exponent"` + + // BackoffMaxWaitBetween is the max wait time before retrying an RPC after exponential backoff. + BackoffMaxWaitBetween time.Duration `json:"backoff-max-wait-between"` + // TODO: support custom balancer picker } diff --git a/client/v3/options.go b/client/v3/options.go index cc10a03d76d1..f8002461b610 100644 --- a/client/v3/options.go +++ b/client/v3/options.go @@ -54,6 +54,12 @@ var ( // client-side retry backoff default jitter fraction. defaultBackoffJitterFraction = 0.10 + + // client-side retry backoff exponential factor. Default of 1.0 which is no exponential backoff. + defaultBackoffExponent = 1.0 + + // client-side retry backoff exponential max wait between requests. + defaultBackoffMaxWaitBetween = 5 * time.Second ) // defaultCallOpts defines a list of default "gRPC.CallOption". diff --git a/client/v3/utils.go b/client/v3/utils.go index 850275877d32..d7d3d00bc45a 100644 --- a/client/v3/utils.go +++ b/client/v3/utils.go @@ -15,6 +15,7 @@ package clientv3 import ( + "math" "math/rand" "time" ) @@ -29,3 +30,13 @@ func jitterUp(duration time.Duration, jitter float64) time.Duration { multiplier := jitter * (rand.Float64()*2 - 1) return time.Duration(float64(duration) * (1 + multiplier)) } + +// expBackoff returns an exponential backoff duration. +// +// This will calculate exponential backoff based upon generation and exponent. The backoff is within [minDelay, maxDelay]. +// For example, an exponent of 2.0 will double the backoff duration every subsequent generation. A generation of 0 will +// return minDelay. +func expBackoff(generation uint, exponent float64, minDelay, maxDelay time.Duration) time.Duration { + delay := math.Min(math.Pow(exponent, float64(generation))*float64(minDelay), float64(maxDelay)) + return time.Duration(delay) +} diff --git a/client/v3/utils_test.go b/client/v3/utils_test.go new file mode 100644 index 000000000000..16e3cb270c03 --- /dev/null +++ b/client/v3/utils_test.go @@ -0,0 +1,56 @@ +// Copyright 2025 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clientv3 + +import ( + "fmt" + "math" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestExpBackoff(t *testing.T) { + testCases := []struct { + generation uint + exponent float64 + minDelay time.Duration + maxDelay time.Duration + expectedBackoff time.Duration + }{ + // exponential backoff with 2.0 exponent + {generation: 0, exponent: 2.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 100 * time.Millisecond}, + {generation: 1, exponent: 2.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 200 * time.Millisecond}, + {generation: 2, exponent: 2.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 400 * time.Millisecond}, + {generation: 3, exponent: 2.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 500 * time.Millisecond}, + {generation: math.MaxUint, exponent: 2.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 500 * time.Millisecond}, + + // exponential backoff with 1.0 exponent + {generation: 0, exponent: 1.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 100 * time.Millisecond}, + {generation: 1, exponent: 1.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 100 * time.Millisecond}, + {generation: 2, exponent: 1.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 100 * time.Millisecond}, + {generation: 3, exponent: 1.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 100 * time.Millisecond}, + {generation: math.MaxUint, exponent: 1.0, minDelay: 100 * time.Millisecond, maxDelay: 500 * time.Millisecond, expectedBackoff: 100 * time.Millisecond}, + } + + for _, testCase := range testCases { + testName := fmt.Sprintf("%+v", testCase) + t.Run(testName, func(t *testing.T) { + backoff := expBackoff(testCase.generation, testCase.exponent, testCase.minDelay, testCase.maxDelay) + require.InDelta(t, testCase.expectedBackoff, backoff, 0.01) + }) + } +}