Skip to content

Commit

Permalink
triggering once is enough
Browse files Browse the repository at this point in the history
  • Loading branch information
tjungblu committed Jun 9, 2023
1 parent aeaf52b commit a030901
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 44 deletions.
70 changes: 31 additions & 39 deletions tests/robustness/failpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,58 +531,50 @@ type noSpaceFailpoint struct {
}

func (f noSpaceFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error {
quorumSize := (len(clus.Procs) / 2) + 1
for i := 0; i < quorumSize; i++ {
member := clus.Procs[i]
if member.IsRunning() {
lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
err := member.Failpoints().Setup(ctx, f.Name(), "return")
if err != nil {
lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err))
memberIdx := rand.Int() % len(clus.Procs)
member := clus.Procs[memberIdx]
if member.IsRunning() {
lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
err := member.Failpoints().Setup(ctx, f.Name(), "return")
if err != nil {
lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err))
return err
}

numRetries := 0
for {
if numRetries > 5 {
lg.Info("goFailpoint giving up after 5 retries", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

numRetries := 0
for {
if numRetries > 5 {
lg.Info("goFailpoint giving up after 5 retries", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
err = member.Etcdctl().Put(context.Background(), "failpoint_"+member.Config().Name, "", config.PutOptions{})
if err != nil {
if strings.Contains(err.Error(), rpctypes.ErrGRPCNoSpace.Error()) {
lg.Info("goFailpoint triggered successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
break
}

err = member.Etcdctl().Put(context.Background(), "failpoint_"+member.Config().Name, "", config.PutOptions{})
if err != nil {
if strings.Contains(err.Error(), rpctypes.ErrGRPCNoSpace.Error()) {
lg.Info("goFailpoint triggered successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
break
}

lg.Info("goFailpoint received unrelated error, retrying", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
time.Sleep(1 * time.Second)
numRetries++
continue
}
lg.Info("goFailpoint received unrelated error, retrying", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
time.Sleep(1 * time.Second)
numRetries++
continue
}
}
}

time.Sleep(f.alarmDuration)

for i := 0; i < quorumSize; i++ {
member := clus.Procs[i]
lg.Info("goFailpoint starting to disable", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))

err := member.Failpoints().Disable(ctx, f.Name())
if err != nil {
lg.Info("goFailpoint disable failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

lg.Info("goFailpoint disabled successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
lg.Info("goFailpoint starting to disable", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))
err := member.Failpoints().Disable(ctx, f.Name())
if err != nil {
lg.Info("goFailpoint disable failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
}

// disarming can be done once at the end, clearing all alarms
member := clus.Procs[0]
_, err := member.Etcdctl().AlarmDisarm(ctx, nil)
lg.Info("goFailpoint disabled successfully", zap.String("failpoint", f.Name()), zap.String("target", member.Config().Name))

_, err = member.Etcdctl().AlarmDisarm(ctx, nil)
if err != nil {
lg.Info("goFailpoint alarm disarm failed", zap.String("failpoint", f.Name()), zap.Error(err), zap.String("target", member.Config().Name))
return err
Expand Down
6 changes: 1 addition & 5 deletions tests/robustness/linearizability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,10 @@ func TestRobustness(t *testing.T) {
scenarios = append(scenarios, testScenario{
name: "IssueXYZ",
failpoint: PutReturnErrNoSpace,
traffic: traffic.KubernetesTraffic,
traffic: traffic.LowTraffic,
clusterOpts: []e2e.EPClusterOption{
e2e.WithIsPeerTLS(true),
e2e.WithSnapshotCount(100),
e2e.WithPeerProxy(true),
e2e.WithGoFailEnabled(true),
e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond),
e2e.WithLogLevel("debug"),
},
})
if v.Compare(version.V3_5) >= 0 {
Expand Down

0 comments on commit a030901

Please sign in to comment.