Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 48 additions & 46 deletions op-acceptor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,30 @@ import (

// Config holds the application configuration
type Config struct {
TestDir string
ValidatorConfig string
TargetGate string
GatelessMode bool
GoBinary string
RunInterval time.Duration // Interval between test runs
RunOnce bool // Indicates if the service should exit after one test run
AllowSkips bool // Allow tests to be skipped instead of failing when preconditions are not met
DefaultTimeout time.Duration // Default timeout for individual tests, can be overridden by test config
Timeout time.Duration // Timeout for gateless mode tests (if specified)
LogDir string // Directory to store test logs
OutputRealtimeLogs bool // If enabled, test logs will be outputted in realtime
TestLogLevel string // Log level to be used for the tests
Orchestrator flags.OrchestratorType // Devstack orchestrator type
DevnetEnvURL string // URL or path to the devnet environment file
Serial bool // Whether to run tests serially instead of in parallel
Concurrency int // Number of concurrent test workers (0 = auto-determine)
ShowProgress bool // Whether to show periodic progress updates during test execution
ProgressInterval time.Duration // Interval between progress updates when ShowProgress is 'true'
FlakeShake bool // Enable flake-shake mode for test stability validation
FlakeShakeIterations int // Number of times to run each test in flake-shake mode
Log log.Logger
ExcludeGates []string // List of gate IDs whose tests should be excluded
TestDir string
ValidatorConfig string
TargetGate string
GatelessMode bool
GoBinary string
RunInterval time.Duration // Interval between test runs
RunOnce bool // Indicates if the service should exit after one test run
AllowSkips bool // Allow tests to be skipped instead of failing when preconditions are not met
DefaultTimeout time.Duration // Default timeout for individual tests, can be overridden by test config
Timeout time.Duration // Timeout for gateless mode tests (if specified)
LogDir string // Directory to store test logs
OutputRealtimeLogs bool // If enabled, test logs will be outputted in realtime
TestLogLevel string // Log level to be used for the tests
Orchestrator flags.OrchestratorType // Devstack orchestrator type
DevnetEnvURL string // URL or path to the devnet environment file
Serial bool // Whether to run tests serially instead of in parallel
Concurrency int // Number of concurrent test workers (0 = auto-determine)
ShowProgress bool // Whether to show periodic progress updates during test execution
ProgressInterval time.Duration // Interval between progress updates when ShowProgress is 'true'
FlakeShake bool // Enable flake-shake mode for test stability validation
FlakeShakeIterations int // Number of times to run each test in flake-shake mode
Log log.Logger
ExcludeGates []string // List of gate IDs whose tests should be excluded
StripFileLinePrefixes bool // If enabled, strip file:line prefixes from test output logs
}

// NewConfig creates a new Config from cli context
Expand Down Expand Up @@ -114,29 +115,30 @@ func NewConfig(ctx *cli.Context, log log.Logger, testDir string, validatorConfig
}

return &Config{
TestDir: absTestDir,
ValidatorConfig: absValidatorConfig,
TargetGate: gate,
GatelessMode: gatelessMode,
GoBinary: ctx.String(flags.GoBinary.Name),
RunInterval: runInterval,
RunOnce: runOnce,
AllowSkips: ctx.Bool(flags.AllowSkips.Name),
DefaultTimeout: ctx.Duration(flags.DefaultTimeout.Name),
Timeout: ctx.Duration(flags.Timeout.Name),
OutputRealtimeLogs: ctx.Bool(flags.OutputRealtimeLogs.Name),
TestLogLevel: ctx.String(flags.TestLogLevel.Name),
Orchestrator: orchestrator,
DevnetEnvURL: devnetEnvURL,
Serial: ctx.Bool(flags.Serial.Name),
Concurrency: ctx.Int(flags.Concurrency.Name),
ShowProgress: ctx.Bool(flags.ShowProgress.Name),
ProgressInterval: ctx.Duration(flags.ProgressInterval.Name),
FlakeShake: ctx.Bool(flags.FlakeShake.Name),
FlakeShakeIterations: ctx.Int(flags.FlakeShakeIterations.Name),
LogDir: logDir,
Log: log,
ExcludeGates: excludeGates,
TestDir: absTestDir,
ValidatorConfig: absValidatorConfig,
TargetGate: gate,
GatelessMode: gatelessMode,
GoBinary: ctx.String(flags.GoBinary.Name),
RunInterval: runInterval,
RunOnce: runOnce,
AllowSkips: ctx.Bool(flags.AllowSkips.Name),
DefaultTimeout: ctx.Duration(flags.DefaultTimeout.Name),
Timeout: ctx.Duration(flags.Timeout.Name),
OutputRealtimeLogs: ctx.Bool(flags.OutputRealtimeLogs.Name),
TestLogLevel: ctx.String(flags.TestLogLevel.Name),
Orchestrator: orchestrator,
DevnetEnvURL: devnetEnvURL,
Serial: ctx.Bool(flags.Serial.Name),
Concurrency: ctx.Int(flags.Concurrency.Name),
ShowProgress: ctx.Bool(flags.ShowProgress.Name),
ProgressInterval: ctx.Duration(flags.ProgressInterval.Name),
FlakeShake: ctx.Bool(flags.FlakeShake.Name),
FlakeShakeIterations: ctx.Int(flags.FlakeShakeIterations.Name),
LogDir: logDir,
Log: log,
ExcludeGates: excludeGates,
StripFileLinePrefixes: ctx.Bool(flags.StripFileLinePrefixes.Name),
}, nil
}

Expand Down
7 changes: 7 additions & 0 deletions op-acceptor/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ var (
EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "OUTPUT_REALTIME_LOGS"),
Usage: "If enabled, test logs will be outputted to the console in realtime. Defaults to false.",
}
StripFileLinePrefixes = &cli.BoolFlag{
Name: "strip-file-line-prefixes",
Value: true,
EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "STRIP_FILE_LINE_PREFIXES"),
Usage: "Strip file:line prefixes (e.g., 'system.go:28:') from test output logs. Defaults to true.",
}
ShowProgress = &cli.BoolFlag{
Name: "show-progress",
Value: false,
Expand Down Expand Up @@ -196,6 +202,7 @@ var optionalFlags = []cli.Flag{
LogDir,
TestLogLevel,
OutputRealtimeLogs,
StripFileLinePrefixes,
ShowProgress,
ProgressInterval,
Orchestrator,
Expand Down
118 changes: 118 additions & 0 deletions op-acceptor/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,99 @@ var (
"gate",
"suite",
})

// Test duration histogram to track distribution of test execution times
testDurationHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Name: "test_duration_histogram_seconds",
Help: "Histogram of test execution durations in seconds",
Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120, 300, 600}, // 100ms to 10min
}, []string{
"network_name",
"gate",
"suite",
})

// Test timeout tracking
testTimeouts = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Name: "test_timeouts_total",
Help: "Total number of tests that timed out",
}, []string{
"network_name",
"run_id",
"gate",
"suite",
})

// Gate-level aggregated metrics
gateTestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Name: "gate_tests_total",
Help: "Total number of tests per gate",
}, []string{
"network_name",
"gate",
})

gateTestsPassed = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Name: "gate_tests_passed_total",
Help: "Total number of passed tests per gate",
}, []string{
"network_name",
"gate",
})

gateTestsFailed = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Name: "gate_tests_failed_total",
Help: "Total number of failed tests per gate",
}, []string{
"network_name",
"gate",
})

gateDurationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "gate_duration_seconds",
Help: "Duration of gate execution in seconds",
}, []string{
"network_name",
"run_id",
"gate",
})

// Suite-level metrics
suiteTestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Name: "suite_tests_total",
Help: "Total number of tests per suite",
}, []string{
"network_name",
"gate",
"suite",
})

suiteTestsPassed = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Name: "suite_tests_passed_total",
Help: "Total number of passed tests per suite",
}, []string{
"network_name",
"gate",
"suite",
})

suiteTestsFailed = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Name: "suite_tests_failed_total",
Help: "Total number of failed tests per suite",
}, []string{
"network_name",
"gate",
"suite",
})
)

// errToLabel tries to make the error string a more valid Prometheus label
Expand Down Expand Up @@ -270,3 +363,28 @@ func RecordIndividualTest(
func isValidResult(result types.TestStatus) bool {
return slices.Contains(validResults, result)
}

// RecordTestDurationHistogram records test duration in a histogram for distribution analysis
func RecordTestDurationHistogram(network string, gate string, suite string, duration time.Duration) {
testDurationHistogram.WithLabelValues(network, gate, suite).Observe(duration.Seconds())
}

// RecordTestTimeout records when a test times out
func RecordTestTimeout(network string, runID string, gate string, suite string) {
testTimeouts.WithLabelValues(network, runID, gate, suite).Inc()
}

// RecordGateMetrics records aggregated metrics for a gate
func RecordGateMetrics(network string, runID string, gate string, total int, passed int, failed int, duration time.Duration) {
gateTestsTotal.WithLabelValues(network, gate).Add(float64(total))
gateTestsPassed.WithLabelValues(network, gate).Add(float64(passed))
gateTestsFailed.WithLabelValues(network, gate).Add(float64(failed))
gateDurationSeconds.WithLabelValues(network, runID, gate).Set(duration.Seconds())
}

// RecordSuiteMetrics records aggregated metrics for a suite
func RecordSuiteMetrics(network string, gate string, suite string, total int, passed int, failed int) {
suiteTestsTotal.WithLabelValues(network, gate, suite).Add(float64(total))
suiteTestsPassed.WithLabelValues(network, gate, suite).Add(float64(passed))
suiteTestsFailed.WithLabelValues(network, gate, suite).Add(float64(failed))
}
Loading