From b599235dc674258d9fbc38eb3880826e022fea2d Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Wed, 14 Jun 2023 10:34:35 +0200 Subject: [PATCH] Retain process logs on robustness tests Adding a new flag to retain e2e etcd process logs after stop and saving next to the visualized model. Spun out of etcd-io#16018 where I used it for easier local debugging on model violations. Signed-off-by: Thomas Jungblut --- tests/framework/e2e/cluster.go | 5 +++ tests/framework/e2e/cluster_proxy.go | 4 +++ tests/framework/e2e/etcd_process.go | 19 +++++++++++ tests/robustness/linearizability_test.go | 42 +++++++++++++----------- tests/robustness/model/history.go | 4 ++- tests/robustness/report.go | 11 +++++++ 6 files changed, 64 insertions(+), 21 deletions(-) diff --git a/tests/framework/e2e/cluster.go b/tests/framework/e2e/cluster.go index 0eafc4579104..e2f31a9db3df 100644 --- a/tests/framework/e2e/cluster.go +++ b/tests/framework/e2e/cluster.go @@ -175,6 +175,7 @@ type EtcdProcessClusterConfig struct { DiscoveryEndpoints []string // v3 discovery DiscoveryToken string LogLevel string + KeepLogs bool MaxConcurrentStreams uint32 // default is math.MaxUint32 CorruptCheckTime time.Duration @@ -311,6 +312,10 @@ func WithLogLevel(level string) EPClusterOption { return func(c *EtcdProcessClusterConfig) { c.LogLevel = level } } +func WithKeepLogs(keep bool) EPClusterOption { + return func(c *EtcdProcessClusterConfig) { c.KeepLogs = keep } +} + func WithCorruptCheckTime(time time.Duration) EPClusterOption { return func(c *EtcdProcessClusterConfig) { c.CorruptCheckTime = time } } diff --git a/tests/framework/e2e/cluster_proxy.go b/tests/framework/e2e/cluster_proxy.go index f090d1c6e7d1..93db357dd12b 100644 --- a/tests/framework/e2e/cluster_proxy.go +++ b/tests/framework/e2e/cluster_proxy.go @@ -112,6 +112,10 @@ func (p *proxyEtcdProcess) Logs() LogsExpect { return p.etcdProc.Logs() } +func (p *proxyEtcdProcess) LogLines() []string { + return p.etcdProc.LogLines() +} + func (p *proxyEtcdProcess) Kill() error { return p.etcdProc.Kill() } diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index 38934f861b69..803833266b37 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -57,6 +57,7 @@ type EtcdProcess interface { PeerProxy() proxy.Server Failpoints() *BinaryFailpoints Logs() LogsExpect + LogLines() []string Kill() error } @@ -72,6 +73,8 @@ type EtcdServerProcess struct { proxy proxy.Server failpoints *BinaryFailpoints donec chan struct{} // closed when Interact() terminates + + keptLogLines []string // only filled when KeepLogs=true } type EtcdServerProcessConfig struct { @@ -96,6 +99,8 @@ type EtcdServerProcessConfig struct { InitialCluster string GoFailPort int + KeepLogs bool + Proxy *proxy.ServerConfig } @@ -177,6 +182,9 @@ func (ep *EtcdServerProcess) Stop() (err error) { return nil } defer func() { + if ep.cfg.KeepLogs { + ep.keptLogLines = ep.proc.Lines() + } ep.proc = nil }() @@ -235,6 +243,17 @@ func (ep *EtcdServerProcess) Logs() LogsExpect { return ep.proc } +func (ep *EtcdServerProcess) LogLines() []string { + if ep.proc == nil { + if ep.cfg.KeepLogs { + return ep.keptLogLines + } + + ep.cfg.lg.Panic("Please grab logs before process is stopped or enable KeepLogs to retain them.") + } + return ep.proc.Lines() +} + func (ep *EtcdServerProcess) Kill() error { ep.cfg.lg.Info("killing server...", zap.String("name", ep.cfg.Name)) return ep.proc.Signal(syscall.SIGKILL) diff --git a/tests/robustness/linearizability_test.go b/tests/robustness/linearizability_test.go index 1065b0f27935..3c84f3a99d5d 100644 --- a/tests/robustness/linearizability_test.go +++ b/tests/robustness/linearizability_test.go @@ -42,13 +42,12 @@ func TestRobustness(t *testing.T) { name: "ClusterOfSize1/" + traffic.Name, failpoint: RandomFailpoint, traffic: traffic, - cluster: *e2e.NewConfig( - e2e.WithClusterSize(1), + clusterOpts: []e2e.EPClusterOption{e2e.WithClusterSize(1), e2e.WithSnapshotCount(100), e2e.WithGoFailEnabled(true), e2e.WithCompactionBatchLimit(100), // required for compactBeforeCommitBatch and compactAfterCommitBatch failpoints - e2e.WithWatchProcessNotifyInterval(100*time.Millisecond), - ), + e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond), + }, }) clusterOfSize3Options := []e2e.EPClusterOption{ e2e.WithIsPeerTLS(true), @@ -68,32 +67,32 @@ func TestRobustness(t *testing.T) { watch: watchConfig{ expectUniqueRevision: traffic.Traffic.ExpectUniqueRevision(), }, - cluster: *e2e.NewConfig(clusterOfSize3Options...), + clusterOpts: clusterOfSize3Options, }) } scenarios = append(scenarios, testScenario{ name: "Issue14370", failpoint: RaftBeforeSavePanic, - cluster: *e2e.NewConfig( + clusterOpts: []e2e.EPClusterOption{ e2e.WithClusterSize(1), e2e.WithGoFailEnabled(true), - ), + }, }) scenarios = append(scenarios, testScenario{ name: "Issue14685", failpoint: DefragBeforeCopyPanic, - cluster: *e2e.NewConfig( + clusterOpts: []e2e.EPClusterOption{ e2e.WithClusterSize(1), e2e.WithGoFailEnabled(true), - ), + }, }) scenarios = append(scenarios, testScenario{ name: "Issue13766", failpoint: KillFailpoint, traffic: traffic.HighTraffic, - cluster: *e2e.NewConfig( + clusterOpts: []e2e.EPClusterOption{ e2e.WithSnapshotCount(100), - ), + }, }) scenarios = append(scenarios, testScenario{ name: "Issue15220", @@ -101,20 +100,20 @@ func TestRobustness(t *testing.T) { watch: watchConfig{ requestProgress: true, }, - cluster: *e2e.NewConfig( + clusterOpts: []e2e.EPClusterOption{ e2e.WithClusterSize(1), - ), + }, }) if v.Compare(version.V3_5) >= 0 { scenarios = append(scenarios, testScenario{ name: "Issue15271", failpoint: BlackholeUntilSnapshot, traffic: traffic.HighTraffic, - cluster: *e2e.NewConfig( + clusterOpts: []e2e.EPClusterOption{ e2e.WithSnapshotCount(100), e2e.WithPeerProxy(true), e2e.WithIsPeerTLS(true), - ), + }, }) } for _, scenario := range scenarios { @@ -124,6 +123,8 @@ func TestRobustness(t *testing.T) { t.Run(scenario.name, func(t *testing.T) { lg := zaptest.NewLogger(t) + scenario.clusterOpts = append(scenario.clusterOpts, e2e.WithKeepLogs(true)) + scenario.cluster = *e2e.NewConfig(scenario.clusterOpts...) scenario.cluster.Logger = lg ctx := context.Background() testRobustness(ctx, t, lg, scenario) @@ -132,11 +133,12 @@ func TestRobustness(t *testing.T) { } type testScenario struct { - name string - failpoint Failpoint - cluster e2e.EtcdProcessClusterConfig - traffic traffic.Config - watch watchConfig + name string + failpoint Failpoint + clusterOpts []e2e.EPClusterOption + cluster e2e.EtcdProcessClusterConfig + traffic traffic.Config + watch watchConfig } func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testScenario) { diff --git a/tests/robustness/model/history.go b/tests/robustness/model/history.go index 4f5d52c98749..e9e9fa4449aa 100644 --- a/tests/robustness/model/history.go +++ b/tests/robustness/model/history.go @@ -30,7 +30,9 @@ import ( // ValidateOperationHistoryAndReturnVisualize return visualize as porcupine.linearizationInfo used to generate visualization is private. func ValidateOperationHistoryAndReturnVisualize(t *testing.T, lg *zap.Logger, operations []porcupine.Operation) (visualize func(basepath string)) { - linearizable, info := porcupine.CheckOperationsVerbose(NonDeterministicModel, operations, 5*time.Minute) + const timeout = 5 * time.Minute + lg.Info("Running porcupine to check operations", zap.String("model", "NonDeterministicModel"), zap.Duration("timeout", timeout)) + linearizable, info := porcupine.CheckOperationsVerbose(NonDeterministicModel, operations, timeout) if linearizable == porcupine.Illegal { t.Error("Model is not linearizable") } diff --git a/tests/robustness/report.go b/tests/robustness/report.go index 4e995acbdb07..299238e2353c 100644 --- a/tests/robustness/report.go +++ b/tests/robustness/report.go @@ -67,6 +67,9 @@ func (r *report) Report(t *testing.T, force bool) { for _, member := range r.clus.Procs { memberDataDir := filepath.Join(path, fmt.Sprintf("server-%s", member.Config().Name)) persistMemberDataDir(t, r.lg, member, memberDataDir) + + logsFile := filepath.Join(path, fmt.Sprintf("server-%s.log", member.Config().Name)) + persistMemberLogs(t, r.lg, member, logsFile) } if r.clientReports != nil { sort.Slice(r.clientReports, func(i, j int) bool { @@ -118,6 +121,14 @@ func persistWatchResponses(t *testing.T, lg *zap.Logger, path string, responses } } +func persistMemberLogs(t *testing.T, lg *zap.Logger, member e2e.EtcdProcess, path string) { + lg.Info("Saving member logs dir", zap.String("member", member.Config().Name), zap.String("path", path)) + err := os.WriteFile(path, []byte(strings.Join(member.LogLines(), "")), 0644) + if err != nil { + t.Fatal(err) + } +} + func persistWatchEvents(t *testing.T, lg *zap.Logger, path string, events []traffic.TimedWatchEvent) { lg.Info("Saving watch events", zap.String("path", path)) file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0755)