@@ -44,6 +44,9 @@ import Glibc
4444 let skipLoadingBenchmarks = argumentExtractor. extractFlag ( named: " skip-loading-benchmark-targets " )
4545 let checkAbsoluteThresholds =
4646 checkAbsoluteThresholdsPath. count > 0 ? 1 : argumentExtractor. extractFlag ( named: " check-absolute " )
47+ let runCount = argumentExtractor. extractOption ( named: " run-count " )
48+ let relative = argumentExtractor. extractFlag ( named: " relative " )
49+ let range = argumentExtractor. extractFlag ( named: " range " )
4750 let groupingToUse = argumentExtractor. extractOption ( named: " grouping " )
4851 let metricsToUse = argumentExtractor. extractOption ( named: " metric " )
4952 let timeUnits = argumentExtractor. extractOption ( named: " time-units " )
@@ -234,6 +237,7 @@ import Glibc
234237 throw MyError . invalidArgument
235238 }
236239
240+ var totalRunCount = 1
237241 var skipLoadingBenchmarksFlagIsValid = skipLoadingBenchmarks == 0
238242 if commandToPerform == . thresholds {
239243 guard positionalArguments. count > 0 ,
@@ -264,10 +268,27 @@ import Glibc
264268 )
265269 throw MyError . invalidArgument
266270 }
267- if positionalArguments. count > 0 {
271+ let usesExistingBaseline = positionalArguments. count > 0
272+ if usesExistingBaseline {
268273 shouldBuildTargets = false
269274 }
270- break
275+ let requestedRunCount = runCount. first. flatMap { Int ( $0) } ?? 1
276+ /// These update the run count to 5 by default if it's set to 1.
277+ /// Using relative/range flags doesn't mean anything if we're not running multiple times.
278+ /// The benchmarks will need to be run multiple times in order to be able to calculate a
279+ /// relative/range of thresholds which satisfy all benchmark runs.
280+ if relative > 0 {
281+ args. append ( " --wants-relative-thresholds " )
282+ if !usesExistingBaseline {
283+ totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
284+ }
285+ }
286+ if range > 0 {
287+ args. append ( " --wants-range-thresholds " )
288+ if !usesExistingBaseline {
289+ totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
290+ }
291+ }
271292 case . check:
272293 skipLoadingBenchmarksFlagIsValid = true
273294 shouldBuildTargets = skipLoadingBenchmarks == 0
@@ -477,53 +498,108 @@ import Glibc
477498
478499 var failedBenchmarkCount = 0
479500
480- try withCStrings ( args ) { cArgs in
481- if debug > 0 {
482- print ( " To debug, start \( benchmarkToolName ) in LLDB using: " )
483- print ( " lldb \( benchmarkTool . string ) " )
484- print ( " " )
485- print ( " Then launch \( benchmarkToolName ) with: " )
486- print ( " run \( args . dropFirst ( ) . joined ( separator : " " ) ) " )
487- print ( " " )
488- return
489- }
501+ var allFailureCount = 0
502+ let results : [ Result < Void , Error > ] = ( 0 ..< max ( totalRunCount , 1 ) )
503+ . map { runIdx in
504+ // If we're running multiple times, we need to add the run count to the arguments
505+ var args = args
506+ if totalRunCount > 1 {
507+ args += [ " -- run-number " , " \( runIdx + 1 ) " ]
508+ if quietRunning == 0 {
509+ print (
510+ """
490511
491- var pid : pid_t = 0
492- var status = posix_spawn ( & pid, benchmarkTool. string, nil , nil , cArgs, environ)
493-
494- if status == 0 {
495- if waitpid ( pid, & status, 0 ) != - 1 {
496- // Ok, this sucks, but there is no way to get a C support target for plugins and
497- // the way the status is extracted portably is with macros - so we just need to
498- // reimplement the logic here in Swift according to the waitpid man page to
499- // get some nicer feedback on failure reason.
500- guard let waitStatus = ExitCode ( rawValue: ( status & 0xFF00 ) >> 8 ) else {
501- print ( " One or more benchmarks returned an unexpected return code \( status) " )
502- throw MyError . benchmarkUnexpectedReturnCode
512+ Running the command multiple times, round \( runIdx + 1 ) of \( totalRunCount) ...
513+ """
514+ )
503515 }
504- switch waitStatus {
505- case . success:
506- break
507- case . baselineNotFound:
508- throw MyError . baselineNotFound
509- case . genericFailure:
510- print ( " One or more benchmark suites crashed during runtime. " )
511- throw MyError . benchmarkCrashed
512- case . thresholdRegression:
513- throw MyError . benchmarkThresholdRegression
514- case . thresholdImprovement:
515- throw MyError . benchmarkThresholdImprovement
516- case . benchmarkJobFailed:
517- failedBenchmarkCount += 1
518- case . noPermissions:
519- throw MyError . noPermissions
516+ }
517+
518+ return Result < Void , Error > {
519+ try withCStrings ( args) { cArgs in
520+ /// We'll decrement this in the success path
521+ allFailureCount += 1
522+
523+ if debug > 0 {
524+ print ( " To debug, start \( benchmarkToolName) in LLDB using: " )
525+ print ( " lldb \( benchmarkTool. string) " )
526+ print ( " " )
527+ print ( " Then launch \( benchmarkToolName) with: " )
528+ print ( " run \( args. dropFirst ( ) . joined ( separator: " " ) ) " )
529+ print ( " " )
530+ return
531+ }
532+
533+ var pid : pid_t = 0
534+ var status = posix_spawn ( & pid, benchmarkTool. string, nil , nil , cArgs, environ)
535+
536+ if status == 0 {
537+ if waitpid ( pid, & status, 0 ) != - 1 {
538+ // Ok, this sucks, but there is no way to get a C support target for plugins and
539+ // the way the status is extracted portably is with macros - so we just need to
540+ // reimplement the logic here in Swift according to the waitpid man page to
541+ // get some nicer feedback on failure reason.
542+ guard let waitStatus = ExitCode ( rawValue: ( status & 0xFF00 ) >> 8 ) else {
543+ print ( " One or more benchmarks returned an unexpected return code \( status) " )
544+ throw MyError . benchmarkUnexpectedReturnCode
545+ }
546+ switch waitStatus {
547+ case . success:
548+ allFailureCount -= 1
549+ case . baselineNotFound:
550+ throw MyError . baselineNotFound
551+ case . genericFailure:
552+ print ( " One or more benchmark suites crashed during runtime. " )
553+ throw MyError . benchmarkCrashed
554+ case . thresholdRegression:
555+ throw MyError . benchmarkThresholdRegression
556+ case . thresholdImprovement:
557+ throw MyError . benchmarkThresholdImprovement
558+ case . benchmarkJobFailed:
559+ failedBenchmarkCount += 1
560+ case . noPermissions:
561+ throw MyError . noPermissions
562+ }
563+ } else {
564+ print (
565+ " waitpid() for pid \( pid) returned a non-zero exit code \( status) , errno = \( errno) "
566+ )
567+ exit ( errno)
568+ }
569+ } else {
570+ print ( " Failed to run BenchmarkTool, posix_spawn() returned [ \( status) ] " )
571+ }
520572 }
521- } else {
522- print ( " waitpid() for pid \( pid) returned a non-zero exit code \( status) , errno = \( errno) " )
523- exit ( errno)
524573 }
525- } else {
526- print ( " Failed to run BenchmarkTool, posix_spawn() returned [ \( status) ] " )
574+ }
575+
576+ switch results. count {
577+ case ... 0 :
578+ throw MyError . unknownFailure
579+ case 1 :
580+ try results [ 0 ] . get ( )
581+ default :
582+ if allFailureCount > 0 {
583+ print (
584+ """
585+ Ran BenchmarkTool \( results. count) times, but it failed \( allFailureCount) times.
586+ Will exit with the first failure.
587+
588+ """
589+ )
590+ guard
591+ let failure = results. first ( where: { result in
592+ switch result {
593+ case . failure:
594+ return true
595+ case . success:
596+ return false
597+ }
598+ } )
599+ else {
600+ throw MyError . unknownFailure
601+ }
602+ try failure. get ( )
527603 }
528604 }
529605
@@ -543,5 +619,6 @@ import Glibc
543619 case noPermissions = 6
544620 case invalidArgument = 101
545621 case buildFailed = 102
622+ case unknownFailure = 103
546623 }
547624}
0 commit comments