From 895db5a808224ae158beeaaf65ea19a0a865a378 Mon Sep 17 00:00:00 2001 From: Antoine du Hamel Date: Sat, 23 Jul 2022 13:38:02 +0200 Subject: [PATCH 1/3] tools: add more options to track flaky tests Refs: https://github.com/nodejs/node/pull/43929#issuecomment-1193104729 --- .github/workflows/build-tarball.yml | 4 +-- .github/workflows/build-windows.yml | 2 +- .github/workflows/coverage-linux.yml | 4 +-- .github/workflows/coverage-windows.yml | 2 +- .github/workflows/doc.yml | 2 +- .github/workflows/test-asan.yml | 4 +-- .github/workflows/test-internet.yml | 2 +- .github/workflows/test-linux.yml | 4 +-- .github/workflows/test-macos.yml | 4 +-- tools/test.py | 35 ++++++++++++++++++-------- 10 files changed, 39 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build-tarball.yml b/.github/workflows/build-tarball.yml index 4509450172f277..6f7100bc8f050e 100644 --- a/.github/workflows/build-tarball.yml +++ b/.github/workflows/build-tarball.yml @@ -29,7 +29,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read @@ -94,4 +94,4 @@ jobs: - name: Test run: | cd $TAR_DIR - make run-ci -j2 V=1 TEST_CI_ARGS="-p dots" + make run-ci -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 10" diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 6ef65b9c64fbea..af376d3aa5c533 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -24,7 +24,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read diff --git a/.github/workflows/coverage-linux.yml b/.github/workflows/coverage-linux.yml index abd69801f60045..310ed03a66e80d 100644 --- a/.github/workflows/coverage-linux.yml +++ b/.github/workflows/coverage-linux.yml @@ -27,7 +27,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read @@ -53,7 +53,7 @@ jobs: # TODO(bcoe): fix the couple tests that fail with the inspector enabled. # The cause is most likely coverage's use of the inspector. - name: Test - run: NODE_V8_COVERAGE=coverage/tmp make test-cov -j2 V=1 TEST_CI_ARGS="-p dots" || exit 0 + run: NODE_V8_COVERAGE=coverage/tmp make test-cov -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 10" || exit 0 - name: Report JS run: npx c8 report --check-coverage env: diff --git a/.github/workflows/coverage-windows.yml b/.github/workflows/coverage-windows.yml index d9c5bfb58d61e6..78465acb17566d 100644 --- a/.github/workflows/coverage-windows.yml +++ b/.github/workflows/coverage-windows.yml @@ -29,7 +29,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index 72abb16ad5083c..cdbd44062831c5 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -40,4 +40,4 @@ jobs: name: docs path: out/doc - name: Test - run: NODE=$(command -v node) make test-doc-ci TEST_CI_ARGS="-p actions" + run: NODE=$(command -v node) make test-doc-ci TEST_CI_ARGS="-p actions --measure-flakiness 10" diff --git a/.github/workflows/test-asan.yml b/.github/workflows/test-asan.yml index ba30449e90ba40..d724e320d385c5 100644 --- a/.github/workflows/test-asan.yml +++ b/.github/workflows/test-asan.yml @@ -31,7 +31,7 @@ concurrency: env: ASAN_OPTIONS: intercept_tls_get_addr=0 PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read @@ -58,4 +58,4 @@ jobs: - name: Build run: make build-ci -j2 V=1 - name: Test - run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions -t 300" + run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions -t 300 --measure-flakiness 10" diff --git a/.github/workflows/test-internet.yml b/.github/workflows/test-internet.yml index 297ceea987a76a..4fcb18a14f5ce9 100644 --- a/.github/workflows/test-internet.yml +++ b/.github/workflows/test-internet.yml @@ -22,7 +22,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index 4cc09f22c25105..10248a2100a2c8 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -24,7 +24,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read @@ -46,4 +46,4 @@ jobs: - name: Build run: make build-ci -j2 V=1 CONFIG_FLAGS="--error-on-warn" - name: Test - run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions" + run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 10" diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 5f93730d666fc0..a878326aebc92f 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -30,7 +30,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - FLAKY_TESTS: dontcare + FLAKY_TESTS: keep_retrying permissions: contents: read @@ -60,4 +60,4 @@ jobs: - name: Build run: make build-ci -j2 V=1 CONFIG_FLAGS="--error-on-warn" - name: Test - run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions" + run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 10" diff --git a/tools/test.py b/tools/test.py index 9a7de5ed24d706..293549166ff002 100755 --- a/tools/test.py +++ b/tools/test.py @@ -96,10 +96,11 @@ def get_module(name, path): class ProgressIndicator(object): - def __init__(self, cases, flaky_tests_mode): + def __init__(self, cases, flaky_tests_mode, measure_flakiness): self.cases = cases self.serial_id = 0 self.flaky_tests_mode = flaky_tests_mode + self.measure_flakiness = measure_flakiness self.parallel_queue = Queue(len(cases)) self.sequential_queue = Queue(len(cases)) for case in cases: @@ -211,10 +212,20 @@ def RunSingle(self, parallel, thread_id): if output.UnexpectedOutput(): if FLAKY in output.test.outcomes and self.flaky_tests_mode == DONTCARE: self.flaky_failed.append(output) + elif FLAKY in output.test.outcomes and self.flaky_tests_mode == KEEP_RETRYING: + for _ in range(99): + if not case.Run().UnexpectedOutput(): + self.flaky_failed.append(output) + break + # If after 100 tries, the test is not passing, it's not flaky. + self.failed.append(output) else: self.failed.append(output) if output.HasCrashed(): self.crashed += 1 + if self.measure_flakiness: + outputs = [case.Run() for _ in range(self.measure_flakiness)] + print(f" failed {len([i for i in outputs if i.UnexpectedOutput()])} out of {self.measure_flakiness}") else: self.succeeded += 1 self.remaining -= 1 @@ -436,8 +447,8 @@ def Done(self): class CompactProgressIndicator(ProgressIndicator): - def __init__(self, cases, flaky_tests_mode, templates): - super(CompactProgressIndicator, self).__init__(cases, flaky_tests_mode) + def __init__(self, cases, flaky_tests_mode, measure_flakiness, templates): + super(CompactProgressIndicator, self).__init__(cases, flaky_tests_mode, measure_flakiness) self.templates = templates self.last_status_length = 0 self.start_time = time.time() @@ -492,13 +503,13 @@ def PrintProgress(self, name): class ColorProgressIndicator(CompactProgressIndicator): - def __init__(self, cases, flaky_tests_mode): + def __init__(self, cases, flaky_tests_mode, measure_flakiness): templates = { 'status_line': "[%(mins)02i:%(secs)02i|\033[34m%%%(remaining) 4d\033[0m|\033[32m+%(passed) 4d\033[0m|\033[31m-%(failed) 4d\033[0m]: %(test)s", 'stdout': "\033[1m%s\033[0m", 'stderr': "\033[31m%s\033[0m", } - super(ColorProgressIndicator, self).__init__(cases, flaky_tests_mode, templates) + super(ColorProgressIndicator, self).__init__(cases, flaky_tests_mode, measure_flakiness, templates) def ClearLine(self, last_line_length): print("\033[1K\r", end='') @@ -506,7 +517,7 @@ def ClearLine(self, last_line_length): class MonochromeProgressIndicator(CompactProgressIndicator): - def __init__(self, cases, flaky_tests_mode): + def __init__(self, cases, flaky_tests_mode, measure_flakiness): templates = { 'status_line': "[%(mins)02i:%(secs)02i|%%%(remaining) 4d|+%(passed) 4d|-%(failed) 4d]: %(test)s", 'stdout': '%s', @@ -514,7 +525,7 @@ def __init__(self, cases, flaky_tests_mode): 'clear': lambda last_line_length: ("\r" + (" " * last_line_length) + "\r"), 'max_length': 78 } - super(MonochromeProgressIndicator, self).__init__(cases, flaky_tests_mode, templates) + super(MonochromeProgressIndicator, self).__init__(cases, flaky_tests_mode, measure_flakiness, templates) def ClearLine(self, last_line_length): print(("\r" + (" " * last_line_length) + "\r"), end='') @@ -948,8 +959,8 @@ def GetTimeout(self, mode, section=''): timeout = timeout * 6 return timeout -def RunTestCases(cases_to_run, progress, tasks, flaky_tests_mode): - progress = PROGRESS_INDICATORS[progress](cases_to_run, flaky_tests_mode) +def RunTestCases(cases_to_run, progress, tasks, flaky_tests_mode, measure_flakiness): + progress = PROGRESS_INDICATORS[progress](cases_to_run, flaky_tests_mode, measure_flakiness) return progress.Run(tasks) # ------------------------------------------- @@ -967,6 +978,7 @@ def RunTestCases(cases_to_run, progress, tasks, flaky_tests_mode): SLOW = 'slow' FLAKY = 'flaky' DONTCARE = 'dontcare' +KEEP_RETRYING = 'keep_retrying' class Expression(object): pass @@ -1357,6 +1369,9 @@ def BuildOptions(): result.add_option("--flaky-tests", help="Regard tests marked as flaky (run|skip|dontcare)", default="run") + result.add_option("--measure-flakiness", + help="When a test fails, re-run it x number of times", + default=0, type="int") result.add_option("--skip-tests", help="Tests that should not be executed (comma-separated)", default="") @@ -1733,7 +1748,7 @@ def should_keep(case): else: try: start = time.time() - if RunTestCases(cases_to_run, options.progress, options.j, options.flaky_tests): + if RunTestCases(cases_to_run, options.progress, options.j, options.flaky_tests, options.measure_flakiness): result = 0 else: result = 1 From 6a816a205bd8ec00f9254784283a0f084c8f076d Mon Sep 17 00:00:00 2001 From: Antoine du Hamel Date: Sat, 23 Jul 2022 22:22:21 +0200 Subject: [PATCH 2/3] fixup! tools: add more options to track flaky tests --- tools/test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/test.py b/tools/test.py index 293549166ff002..c3fc5cf56740a9 100755 --- a/tools/test.py +++ b/tools/test.py @@ -217,8 +217,9 @@ def RunSingle(self, parallel, thread_id): if not case.Run().UnexpectedOutput(): self.flaky_failed.append(output) break - # If after 100 tries, the test is not passing, it's not flaky. - self.failed.append(output) + else: + # If after 100 tries, the test is not passing, it's not flaky. + self.failed.append(output) else: self.failed.append(output) if output.HasCrashed(): @@ -1367,7 +1368,7 @@ def BuildOptions(): result.add_option("--cat", help="Print the source of the tests", default=False, action="store_true") result.add_option("--flaky-tests", - help="Regard tests marked as flaky (run|skip|dontcare)", + help="Regard tests marked as flaky (run|skip|dontcare|keep_retrying)", default="run") result.add_option("--measure-flakiness", help="When a test fails, re-run it x number of times", @@ -1448,7 +1449,7 @@ def ProcessOptions(options): # -j and ignoring -J, which is the opposite of what we used to do before -J # became a legacy no-op. print('Warning: Legacy -J option is ignored. Using the -j option.') - if options.flaky_tests not in [RUN, SKIP, DONTCARE]: + if options.flaky_tests not in [RUN, SKIP, DONTCARE, KEEP_RETRYING]: print("Unknown flaky-tests mode %s" % options.flaky_tests) return False return True From a189bff1e34787c61445af3a09a7f65754f5958e Mon Sep 17 00:00:00 2001 From: Antoine du Hamel Date: Sun, 24 Jul 2022 01:26:27 +0200 Subject: [PATCH 3/3] fixup! tools: add more options to track flaky tests --- .github/workflows/build-tarball.yml | 2 +- .github/workflows/coverage-linux.yml | 2 +- .github/workflows/doc.yml | 2 +- .github/workflows/test-asan.yml | 2 +- .github/workflows/test-linux.yml | 2 +- .github/workflows/test-macos.yml | 2 +- tools/test.py | 3 ++- 7 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-tarball.yml b/.github/workflows/build-tarball.yml index 6f7100bc8f050e..2ced884f35c539 100644 --- a/.github/workflows/build-tarball.yml +++ b/.github/workflows/build-tarball.yml @@ -94,4 +94,4 @@ jobs: - name: Test run: | cd $TAR_DIR - make run-ci -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 10" + make run-ci -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 9" diff --git a/.github/workflows/coverage-linux.yml b/.github/workflows/coverage-linux.yml index 310ed03a66e80d..db0a82c9e21d73 100644 --- a/.github/workflows/coverage-linux.yml +++ b/.github/workflows/coverage-linux.yml @@ -53,7 +53,7 @@ jobs: # TODO(bcoe): fix the couple tests that fail with the inspector enabled. # The cause is most likely coverage's use of the inspector. - name: Test - run: NODE_V8_COVERAGE=coverage/tmp make test-cov -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 10" || exit 0 + run: NODE_V8_COVERAGE=coverage/tmp make test-cov -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 9" || exit 0 - name: Report JS run: npx c8 report --check-coverage env: diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index cdbd44062831c5..76660343ca2f46 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -40,4 +40,4 @@ jobs: name: docs path: out/doc - name: Test - run: NODE=$(command -v node) make test-doc-ci TEST_CI_ARGS="-p actions --measure-flakiness 10" + run: NODE=$(command -v node) make test-doc-ci TEST_CI_ARGS="-p actions --measure-flakiness 9" diff --git a/.github/workflows/test-asan.yml b/.github/workflows/test-asan.yml index d724e320d385c5..c8e7a09e2e4efd 100644 --- a/.github/workflows/test-asan.yml +++ b/.github/workflows/test-asan.yml @@ -58,4 +58,4 @@ jobs: - name: Build run: make build-ci -j2 V=1 - name: Test - run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions -t 300 --measure-flakiness 10" + run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions -t 300 --measure-flakiness 9" diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index 10248a2100a2c8..24196849d1bac8 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -46,4 +46,4 @@ jobs: - name: Build run: make build-ci -j2 V=1 CONFIG_FLAGS="--error-on-warn" - name: Test - run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 10" + run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 9" diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index a878326aebc92f..7faddc8eaedaec 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -60,4 +60,4 @@ jobs: - name: Build run: make build-ci -j2 V=1 CONFIG_FLAGS="--error-on-warn" - name: Test - run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 10" + run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 9" diff --git a/tools/test.py b/tools/test.py index c3fc5cf56740a9..35153057f0b9f2 100755 --- a/tools/test.py +++ b/tools/test.py @@ -226,7 +226,8 @@ def RunSingle(self, parallel, thread_id): self.crashed += 1 if self.measure_flakiness: outputs = [case.Run() for _ in range(self.measure_flakiness)] - print(f" failed {len([i for i in outputs if i.UnexpectedOutput()])} out of {self.measure_flakiness}") + # +1s are there because the test already failed once at this point. + print(f" failed {len([i for i in outputs if i.UnexpectedOutput()]) + 1} out of {self.measure_flakiness + 1}") else: self.succeeded += 1 self.remaining -= 1