Skip to content

Commit

Permalink
Add failpoint for nospace on puts
Browse files Browse the repository at this point in the history
This CR introduces a new failput that will trigger a member to report
no space.

Signed-off-by: Thomas Jungblut <[email protected]>
  • Loading branch information
tjungblu committed Jan 19, 2024
1 parent e5665a7 commit 2213f06
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 2 deletions.
2 changes: 2 additions & 0 deletions server/etcdserver/apply/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ func newQuotaApplierV3(lg *zap.Logger, quotaBackendBytesCfg int64, be backend.Ba
func (a *quotaApplierV3) Put(ctx context.Context, p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
ok := a.q.Available(p)
resp, trace, err := a.applierV3.Put(ctx, p)
// gofail: var putReturnErrNoSpace struct{}
// return resp, trace, errors.ErrNoSpace
if err == nil && !ok {
err = errors.ErrNoSpace
}
Expand Down
1 change: 1 addition & 0 deletions tests/robustness/failpoint/failpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ var (
RaftBeforeSaveSleep,
RaftAfterSaveSleep,
ApplyBeforeOpenSnapshot,
PutReturnErrNoSpace,
}
)

Expand Down
79 changes: 79 additions & 0 deletions tests/robustness/failpoint/gofail.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ import (
"testing"
"time"

"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
"go.etcd.io/etcd/tests/v3/framework/config"

"go.uber.org/zap"

"go.etcd.io/etcd/tests/v3/framework/e2e"
Expand Down Expand Up @@ -57,6 +60,7 @@ var (
BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second}
RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second}
PutReturnErrNoSpace Failpoint = noSpaceFailpoint{alarmDuration: time.Second * 5}
)

type goPanicFailpoint struct {
Expand Down Expand Up @@ -233,3 +237,78 @@ func (f gofailSleepAndDeactivate) Available(config e2e.EtcdProcessClusterConfig,
}
return memberFailpoints.Available(f.failpoint)
}

type noSpaceFailpoint struct {
alarmDuration time.Duration
}

func (f noSpaceFailpoint) Name() string {
return "putReturnErrNoSpace"
}

func (f noSpaceFailpoint) Available(config e2e.EtcdProcessClusterConfig, clus e2e.EtcdProcess) bool {
if config.ClusterSize == 1 {
return false
}
memberFailpoints := clus.Failpoints()
if memberFailpoints == nil {
return false
}
return memberFailpoints.Available(f.Name())
}

func (f noSpaceFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error {
memberIdx := rand.Int() % len(clus.Procs)
member := clus.Procs[memberIdx]
if !member.IsRunning() {
return fmt.Errorf("member at index %d is not running", memberIdx)
}

lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
err := member.Failpoints().SetupHTTP(ctx, f.Name(), "return")
if err != nil {
lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err))
return err
}

numRetries := 0
maxRetries := 10
for {
if numRetries > maxRetries {
lg.Info(fmt.Sprintf("goFailpoint giving up after %d retries", maxRetries), zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

err = member.Etcdctl().Put(context.Background(), "failpoint_"+member.Config().Name, "", config.PutOptions{})
if err != nil {
if strings.Contains(err.Error(), rpctypes.ErrGRPCNoSpace.Error()) {
lg.Info("goFailpoint triggered successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
break
}

lg.Info("goFailpoint received unrelated error, retrying", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
time.Sleep(1 * time.Second)
numRetries++
continue
}
}

time.Sleep(f.alarmDuration)

lg.Info("goFailpoint starting to disable", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
err = member.Failpoints().DeactivateHTTP(ctx, f.Name())
if err != nil {
lg.Info("goFailpoint disable failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

lg.Info("goFailpoint disabled successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))

_, err = member.Etcdctl().AlarmDisarm(ctx, nil)
if err != nil {
lg.Info("goFailpoint alarm disarm failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

return nil
}
4 changes: 2 additions & 2 deletions tests/robustness/makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ GOFAIL_VERSION = $(shell cd tools/mod && go list -m -f {{.Version}} go.etcd.io/g

.PHONY: gofail-enable
gofail-enable: install-gofail
gofail enable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/
gofail enable server/etcdserver/ server/etcdserver/apply server/storage/backend/ server/storage/mvcc/ server/storage/wal/
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./tests && go get go.etcd.io/gofail@${GOFAIL_VERSION}

.PHONY: gofail-disable
gofail-disable: install-gofail
gofail disable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/
gofail disable server/etcdserver/ server/etcdserver/apply server/storage/backend/ server/storage/mvcc/ server/storage/wal/
cd ./server && go mod tidy
cd ./etcdutl && go mod tidy
cd ./etcdctl && go mod tidy
Expand Down

0 comments on commit 2213f06

Please sign in to comment.