From 2213f06df31268d6610a92ea102008ad2902a8c2 Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Fri, 19 Jan 2024 15:17:31 +0100 Subject: [PATCH] Add failpoint for nospace on puts This CR introduces a new failput that will trigger a member to report no space. Signed-off-by: Thomas Jungblut --- server/etcdserver/apply/apply.go | 2 + tests/robustness/failpoint/failpoint.go | 1 + tests/robustness/failpoint/gofail.go | 79 +++++++++++++++++++++++++ tests/robustness/makefile.mk | 4 +- 4 files changed, 84 insertions(+), 2 deletions(-) diff --git a/server/etcdserver/apply/apply.go b/server/etcdserver/apply/apply.go index 2380f741017..0dee87fbe36 100644 --- a/server/etcdserver/apply/apply.go +++ b/server/etcdserver/apply/apply.go @@ -457,6 +457,8 @@ func newQuotaApplierV3(lg *zap.Logger, quotaBackendBytesCfg int64, be backend.Ba func (a *quotaApplierV3) Put(ctx context.Context, p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) { ok := a.q.Available(p) resp, trace, err := a.applierV3.Put(ctx, p) + // gofail: var putReturnErrNoSpace struct{} + // return resp, trace, errors.ErrNoSpace if err == nil && !ok { err = errors.ErrNoSpace } diff --git a/tests/robustness/failpoint/failpoint.go b/tests/robustness/failpoint/failpoint.go index 12a72b69d96..750f8e2ae9f 100644 --- a/tests/robustness/failpoint/failpoint.go +++ b/tests/robustness/failpoint/failpoint.go @@ -51,6 +51,7 @@ var ( RaftBeforeSaveSleep, RaftAfterSaveSleep, ApplyBeforeOpenSnapshot, + PutReturnErrNoSpace, } ) diff --git a/tests/robustness/failpoint/gofail.go b/tests/robustness/failpoint/gofail.go index 3d90c5ddd8f..ea9a47832eb 100644 --- a/tests/robustness/failpoint/gofail.go +++ b/tests/robustness/failpoint/gofail.go @@ -22,6 +22,9 @@ import ( "testing" "time" + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + "go.etcd.io/etcd/tests/v3/framework/config" + "go.uber.org/zap" "go.etcd.io/etcd/tests/v3/framework/e2e" @@ -57,6 +60,7 @@ var ( BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second} RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second} RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second} + PutReturnErrNoSpace Failpoint = noSpaceFailpoint{alarmDuration: time.Second * 5} ) type goPanicFailpoint struct { @@ -233,3 +237,78 @@ func (f gofailSleepAndDeactivate) Available(config e2e.EtcdProcessClusterConfig, } return memberFailpoints.Available(f.failpoint) } + +type noSpaceFailpoint struct { + alarmDuration time.Duration +} + +func (f noSpaceFailpoint) Name() string { + return "putReturnErrNoSpace" +} + +func (f noSpaceFailpoint) Available(config e2e.EtcdProcessClusterConfig, clus e2e.EtcdProcess) bool { + if config.ClusterSize == 1 { + return false + } + memberFailpoints := clus.Failpoints() + if memberFailpoints == nil { + return false + } + return memberFailpoints.Available(f.Name()) +} + +func (f noSpaceFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error { + memberIdx := rand.Int() % len(clus.Procs) + member := clus.Procs[memberIdx] + if !member.IsRunning() { + return fmt.Errorf("member at index %d is not running", memberIdx) + } + + lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name)) + err := member.Failpoints().SetupHTTP(ctx, f.Name(), "return") + if err != nil { + lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err)) + return err + } + + numRetries := 0 + maxRetries := 10 + for { + if numRetries > maxRetries { + lg.Info(fmt.Sprintf("goFailpoint giving up after %d retries", maxRetries), zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name)) + return err + } + + err = member.Etcdctl().Put(context.Background(), "failpoint_"+member.Config().Name, "", config.PutOptions{}) + if err != nil { + if strings.Contains(err.Error(), rpctypes.ErrGRPCNoSpace.Error()) { + lg.Info("goFailpoint triggered successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name)) + break + } + + lg.Info("goFailpoint received unrelated error, retrying", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name)) + time.Sleep(1 * time.Second) + numRetries++ + continue + } + } + + time.Sleep(f.alarmDuration) + + lg.Info("goFailpoint starting to disable", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name)) + err = member.Failpoints().DeactivateHTTP(ctx, f.Name()) + if err != nil { + lg.Info("goFailpoint disable failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name)) + return err + } + + lg.Info("goFailpoint disabled successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name)) + + _, err = member.Etcdctl().AlarmDisarm(ctx, nil) + if err != nil { + lg.Info("goFailpoint alarm disarm failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name)) + return err + } + + return nil +} diff --git a/tests/robustness/makefile.mk b/tests/robustness/makefile.mk index 957b58f920e..c3f2d875738 100644 --- a/tests/robustness/makefile.mk +++ b/tests/robustness/makefile.mk @@ -36,7 +36,7 @@ GOFAIL_VERSION = $(shell cd tools/mod && go list -m -f {{.Version}} go.etcd.io/g .PHONY: gofail-enable gofail-enable: install-gofail - gofail enable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ + gofail enable server/etcdserver/ server/etcdserver/apply server/storage/backend/ server/storage/mvcc/ server/storage/wal/ cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION} cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION} cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION} @@ -44,7 +44,7 @@ gofail-enable: install-gofail .PHONY: gofail-disable gofail-disable: install-gofail - gofail disable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ + gofail disable server/etcdserver/ server/etcdserver/apply server/storage/backend/ server/storage/mvcc/ server/storage/wal/ cd ./server && go mod tidy cd ./etcdutl && go mod tidy cd ./etcdctl && go mod tidy