Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add failpoint for nospace on puts #16018

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions server/etcdserver/apply/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ func newQuotaApplierV3(lg *zap.Logger, quotaBackendBytesCfg int64, be backend.Ba
func (a *quotaApplierV3) Put(ctx context.Context, p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
ok := a.q.Available(p)
resp, trace, err := a.applierV3.Put(ctx, p)
// gofail: var putReturnErrNoSpace struct{}
// return resp, trace, errors.ErrNoSpace
if err == nil && !ok {
err = errors.ErrNoSpace
}
Expand Down
1 change: 1 addition & 0 deletions tests/robustness/failpoint/failpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ var (
RaftBeforeSaveSleep,
RaftAfterSaveSleep,
ApplyBeforeOpenSnapshot,
PutReturnErrNoSpace,
}
)

Expand Down
79 changes: 79 additions & 0 deletions tests/robustness/failpoint/gofail.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ import (
"testing"
"time"

"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
"go.etcd.io/etcd/tests/v3/framework/config"

"go.uber.org/zap"

"go.etcd.io/etcd/tests/v3/framework/e2e"
Expand Down Expand Up @@ -57,6 +60,7 @@ var (
BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second}
RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second}
PutReturnErrNoSpace Failpoint = noSpaceFailpoint{alarmDuration: time.Second * 5}
)

type goPanicFailpoint struct {
Expand Down Expand Up @@ -233,3 +237,78 @@ func (f gofailSleepAndDeactivate) Available(config e2e.EtcdProcessClusterConfig,
}
return memberFailpoints.Available(f.failpoint)
}

type noSpaceFailpoint struct {
alarmDuration time.Duration
}

func (f noSpaceFailpoint) Name() string {
return "putReturnErrNoSpace"
}

func (f noSpaceFailpoint) Available(config e2e.EtcdProcessClusterConfig, clus e2e.EtcdProcess) bool {
if config.ClusterSize == 1 {
return false
}
memberFailpoints := clus.Failpoints()
if memberFailpoints == nil {
return false
}
return memberFailpoints.Available(f.Name())
}

func (f noSpaceFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error {
memberIdx := rand.Int() % len(clus.Procs)
member := clus.Procs[memberIdx]
if !member.IsRunning() {
return fmt.Errorf("member at index %d is not running", memberIdx)
}

lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
err := member.Failpoints().SetupHTTP(ctx, f.Name(), "return")
if err != nil {
lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err))
return err
}

numRetries := 0
maxRetries := 10
for {
if numRetries > maxRetries {
lg.Info(fmt.Sprintf("goFailpoint giving up after %d retries", maxRetries), zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

err = member.Etcdctl().Put(context.Background(), "failpoint_"+member.Config().Name, "", config.PutOptions{})
if err != nil {
if strings.Contains(err.Error(), rpctypes.ErrGRPCNoSpace.Error()) {
lg.Info("goFailpoint triggered successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
break
}

lg.Info("goFailpoint received unrelated error, retrying", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
time.Sleep(1 * time.Second)
numRetries++
continue
}
}

time.Sleep(f.alarmDuration)

lg.Info("goFailpoint starting to disable", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
err = member.Failpoints().DeactivateHTTP(ctx, f.Name())
if err != nil {
lg.Info("goFailpoint disable failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

lg.Info("goFailpoint disabled successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))

_, err = member.Etcdctl().AlarmDisarm(ctx, nil)
if err != nil {
lg.Info("goFailpoint alarm disarm failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

return nil
}
4 changes: 2 additions & 2 deletions tests/robustness/makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ GOFAIL_VERSION = $(shell cd tools/mod && go list -m -f {{.Version}} go.etcd.io/g

.PHONY: gofail-enable
gofail-enable: install-gofail
gofail enable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/
gofail enable server/etcdserver/ server/etcdserver/apply server/storage/backend/ server/storage/mvcc/ server/storage/wal/
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./tests && go get go.etcd.io/gofail@${GOFAIL_VERSION}

.PHONY: gofail-disable
gofail-disable: install-gofail
gofail disable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/
gofail disable server/etcdserver/ server/etcdserver/apply server/storage/backend/ server/storage/mvcc/ server/storage/wal/
cd ./server && go mod tidy
cd ./etcdutl && go mod tidy
cd ./etcdctl && go mod tidy
Expand Down
Loading