add retries to testament, use it for GC tests (#24279)

Testament now retries a test by a specified amount if it fails in any way other than an invalid spec. This is to deal with the flaky GC tests on Windows CI that fail in many different ways, from the linker randomly erroring, segfaults, etc. Unfortunately I couldn't do this cleanly in testament's current code. The proc `addResult`, which is the "final" proc called in a test run's lifetime, is now wrapped in a proc `finishTest` that returns a bool `true` if the test failed and has to be retried. This result is propagated up from `cmpMsgs` and `compilerOutputTests` until it reaches `testSpecHelper`, which handles these results by recursing if the test has to be retried. Since calling `testSpecHelper` means "run this test with one given configuration", this means every single matrix option/target etc. receive an equal amount of retries each. The result of `finishTest` is ignored in cases where it's known that it won't be retried due to passing, being skipped, having an invalid spec etc. It's also ignored in `testNimblePackages` because it's not necessary for those specific tests yet and similar retry behavior is already implemented for part of it. This was a last resort for the flaky GC tests but they've been a problem for years at this point, they give us more work to do and turn off contributors. Ideally GC tests failing should mark as "needs review" in the CI rather than "failed" but I don't know if Github supports something like this. (cherry picked from commit 720d0aee5c)
2025-12-28 17:04:41 +00:00 · 2024-10-12 23:48:44 +03:00
parent bffd2e0330
commit 660a9cecf0
28 changed files with 126 additions and 33 deletions
--- a/testament/categories.nim
+++ b/testament/categories.nim
@@ -449,7 +449,7 @@ proc testNimblePackages(r: var TResults; cat: Category; packageFilter: string) =
          if pkg.allowFailure:
            inc r.passed
            inc r.failedButAllowed
-          addResult(r, test, targetC, "", "", cmd & "\n" & outp, reFailed, allowFailure = pkg.allowFailure)
+          discard r.finishTest(test, targetC, "", "", cmd & "\n" & outp, reFailed, allowFailure = pkg.allowFailure)
          continue
        outp

@@ -465,21 +465,21 @@ proc testNimblePackages(r: var TResults; cat: Category; packageFilter: string) =
        discard tryCommand(cmds[i], maxRetries = 3)
      discard tryCommand(cmds[^1], reFailed = reBuildFailed)
      inc r.passed
-      r.addResult(test, targetC, "", "", "", reSuccess, allowFailure = pkg.allowFailure)
+      discard r.finishTest(test, targetC, "", "", "", reSuccess, allowFailure = pkg.allowFailure)

    errors = r.total - r.passed
    if errors == 0:
-      r.addResult(packageFileTest, targetC, "", "", "", reSuccess)
+      discard r.finishTest(packageFileTest, targetC, "", "", "", reSuccess)
    else:
-      r.addResult(packageFileTest, targetC, "", "", "", reBuildFailed)
+      discard r.finishTest(packageFileTest, targetC, "", "", "", reBuildFailed)

  except JsonParsingError:
    errors = 1
-    r.addResult(packageFileTest, targetC, "", "", "Invalid package file", reBuildFailed)
+    discard r.finishTest(packageFileTest, targetC, "", "", "Invalid package file", reBuildFailed)
    raise
  except ValueError:
    errors = 1
-    r.addResult(packageFileTest, targetC, "", "", "Unknown package", reBuildFailed)
+    discard r.finishTest(packageFileTest, targetC, "", "", "Unknown package", reBuildFailed)
    raise # bug #18805
  finally:
    if errors == 0: removeDir(packagesDir)
@@ -568,6 +568,7 @@ proc isJoinableSpec(spec: TSpec): bool =
    spec.err != reDisabled and
    not spec.unjoinable and
    spec.exitCode == 0 and
+    spec.retries == 0 and
    spec.input.len == 0 and
    spec.nimout.len == 0 and
    spec.nimoutFull == false and
--- a/testament/specs.nim
+++ b/testament/specs.nim
@@ -56,6 +56,7 @@ type
    reJoined,          # test is disabled because it was joined into the megatest
    reSuccess          # test was successful
    reInvalidSpec      # test had problems to parse the spec
+    reRetry            # test is being retried

  TTarget* = enum
    targetC = "c"
@@ -102,6 +103,7 @@ type
                      # but don't rely on much precision
    inlineErrors*: seq[InlineError] # line information to error message
    debugInfo*: string # debug info to give more context
+    retries*: int # number of retry attempts after the test fails

 proc getCmd*(s: TSpec): string =
  if s.cmd.len == 0:
@@ -477,6 +479,8 @@ proc parseSpec*(filename: string): TSpec =
          result.timeout = parseFloat(e.value)
        except ValueError:
          result.parseErrors.addLine "cannot interpret as a float: ", e.value
+      of "retries":
+        discard parseInt(e.value, result.retries)
      of "targets", "target":
        try:
          result.targets.incl parseTargets(e.value)
--- a/testament/testament.nim
+++ b/testament/testament.nim
@@ -275,16 +275,13 @@ proc testName(test: TTest, target: TTarget, extraOptions: string, allowFailure:
  name.strip()

 proc addResult(r: var TResults, test: TTest, target: TTarget,
-               extraOptions, expected, given: string, successOrig: TResultEnum,
+               extraOptions, expected, given: string, success: TResultEnum, duration: float,
               allowFailure = false, givenSpec: ptr TSpec = nil) =
  # instead of `ptr TSpec` we could also use `Option[TSpec]`; passing `givenSpec` makes it easier to get what we need
  # instead of having to pass individual fields, or abusing existing ones like expected vs given.
  # test.name is easier to find than test.name.extractFilename
  # A bit hacky but simple and works with tests/testament/tshould_not_work.nim
  let name = testName(test, target, extraOptions, allowFailure)
-  let duration = epochTime() - test.startTime
-  let success = if test.spec.timeout > 0.0 and duration > test.spec.timeout: reTimeout
-                else: successOrig

  let durationStr = duration.formatFloat(ffDecimal, precision = 2).align(5)
  if backendLogging:
@@ -345,6 +342,18 @@ proc addResult(r: var TResults, test: TTest, target: TTarget,
      discard waitForExit(p)
      close(p)

+proc finishTest(r: var TResults, test: TTest, target: TTarget,
+                extraOptions, expected, given: string, successOrig: TResultEnum,
+                allowFailure = false, givenSpec: ptr TSpec = nil): bool =
+  result = false
+  let duration = epochTime() - test.startTime
+  let success = if test.spec.timeout > 0.0 and duration > test.spec.timeout: reTimeout
+                else: successOrig
+  if test.spec.retries > 0 and success notin {reSuccess, reDisabled, reJoined, reInvalidSpec}:
+    return true
+  else:
+    addResult(r, test, target, extraOptions, expected, given, success, duration, allowFailure, givenSpec)
+
 proc toString(inlineError: InlineError, filename: string): string =
  result.add "$file($line, $col) $kind: $msg" % [
    "file", filename,
@@ -373,23 +382,23 @@ proc nimoutCheck(expected, given: TSpec): bool =
    result = false

 proc cmpMsgs(r: var TResults, expected, given: TSpec, test: TTest,
-             target: TTarget, extraOptions: string) =
+             target: TTarget, extraOptions: string): bool =
  if not checkForInlineErrors(expected, given) or
    (not expected.nimoutFull and not nimoutCheck(expected, given)):
-      r.addResult(test, target, extraOptions, expected.nimout & inlineErrorsMsgs(expected), given.nimout, reMsgsDiffer)
+      result = r.finishTest(test, target, extraOptions, expected.nimout & inlineErrorsMsgs(expected), given.nimout, reMsgsDiffer)
  elif strip(expected.msg) notin strip(given.msg):
-    r.addResult(test, target, extraOptions, expected.msg, given.msg, reMsgsDiffer)
+    result = r.finishTest(test, target, extraOptions, expected.msg, given.msg, reMsgsDiffer)
  elif not nimoutCheck(expected, given):
-    r.addResult(test, target, extraOptions, expected.nimout, given.nimout, reMsgsDiffer)
+    result = r.finishTest(test, target, extraOptions, expected.nimout, given.nimout, reMsgsDiffer)
  elif extractFilename(expected.file) != extractFilename(given.file) and
      "internal error:" notin expected.msg:
-    r.addResult(test, target, extraOptions, expected.file, given.file, reFilesDiffer)
+    result = r.finishTest(test, target, extraOptions, expected.file, given.file, reFilesDiffer)
  elif expected.line != given.line and expected.line != 0 or
       expected.column != given.column and expected.column != 0:
-    r.addResult(test, target, extraOptions, $expected.line & ':' & $expected.column,
+    result = r.finishTest(test, target, extraOptions, $expected.line & ':' & $expected.column,
                      $given.line & ':' & $given.column, reLinesDiffer)
  else:
-    r.addResult(test, target, extraOptions, expected.msg, given.msg, reSuccess)
+    result = r.finishTest(test, target, extraOptions, expected.msg, given.msg, reSuccess)
    inc(r.passed)

 proc generatedFile(test: TTest, target: TTarget): string =
@@ -428,7 +437,7 @@ proc codegenCheck(test: TTest, target: TTarget, spec: TSpec, expectedMsg: var st
    echo getCurrentExceptionMsg()

 proc compilerOutputTests(test: TTest, target: TTarget, extraOptions: string,
-                         given: var TSpec, expected: TSpec; r: var TResults) =
+                         given: var TSpec, expected: TSpec; r: var TResults): bool =
  var expectedmsg: string = ""
  var givenmsg: string = ""
  if given.err == reSuccess:
@@ -443,7 +452,7 @@ proc compilerOutputTests(test: TTest, target: TTarget, extraOptions: string,
  else:
    givenmsg = "$ " & given.cmd & '\n' & given.nimout
  if given.err == reSuccess: inc(r.passed)
-  r.addResult(test, target, extraOptions, expectedmsg, givenmsg, given.err)
+  result = r.finishTest(test, target, extraOptions, expectedmsg, givenmsg, given.err)

 proc getTestSpecTarget(): TTarget =
  if getEnv("NIM_COMPILE_TO_CPP", "false") == "true":
@@ -459,31 +468,38 @@ proc equalModuloLastNewline(a, b: string): bool =

 proc testSpecHelper(r: var TResults, test: var TTest, expected: TSpec,
                    target: TTarget, extraOptions: string, nimcache: string) =
-  test.startTime = epochTime()
+  template maybeRetry(x: bool) =
+    if x:
+      test.spec.err = reRetry
+      dec test.spec.retries
+      testSpecHelper(r, test, expected, target, extraOptions, nimcache)
+      return
+  if test.spec.err != reRetry:
+    test.startTime = epochTime()
  if testName(test, target, extraOptions, false) in skips:
    test.spec.err = reDisabled

  if test.spec.err in {reDisabled, reJoined}:
-    r.addResult(test, target, extraOptions, "", "", test.spec.err)
+    discard r.finishTest(test, target, extraOptions, "", "", test.spec.err)
    inc(r.skipped)
    return
  var given = callNimCompiler(expected.getCmd, test.name, test.options, nimcache, target, extraOptions)
  case expected.action
  of actionCompile:
-    compilerOutputTests(test, target, extraOptions, given, expected, r)
+    maybeRetry compilerOutputTests(test, target, extraOptions, given, expected, r)
  of actionRun:
    if given.err != reSuccess:
-      r.addResult(test, target, extraOptions, "", "$ " & given.cmd & '\n' & given.nimout, given.err, givenSpec = given.addr)
+      maybeRetry r.finishTest(test, target, extraOptions, "", "$ " & given.cmd & '\n' & given.nimout, given.err, givenSpec = given.addr)
    else:
      let isJsTarget = target == targetJS
      var exeFile = changeFileExt(test.name, if isJsTarget: "js" else: ExeExt)
      if not fileExists(exeFile):
-        r.addResult(test, target, extraOptions, expected.output,
+        maybeRetry r.finishTest(test, target, extraOptions, expected.output,
                    "executable not found: " & exeFile, reExeNotFound)
      else:
        let nodejs = if isJsTarget: findNodeJs() else: ""
        if isJsTarget and nodejs == "":
-          r.addResult(test, target, extraOptions, expected.output, "nodejs binary not in PATH",
+          maybeRetry r.finishTest(test, target, extraOptions, expected.output, "nodejs binary not in PATH",
                      reExeNotFound)
        else:
          var exeCmd: string
@@ -515,19 +531,19 @@ proc testSpecHelper(r: var TResults, test: var TTest, expected: TSpec,
              buf
          if exitCode != expected.exitCode:
            given.err = reExitcodesDiffer
-            r.addResult(test, target, extraOptions, "exitcode: " & $expected.exitCode,
+            maybeRetry r.finishTest(test, target, extraOptions, "exitcode: " & $expected.exitCode,
                              "exitcode: " & $exitCode & "\n\nOutput:\n" &
                              bufB, reExitcodesDiffer)
          elif (expected.outputCheck == ocEqual and not expected.output.equalModuloLastNewline(bufB)) or
              (expected.outputCheck == ocSubstr and expected.output notin bufB):
            given.err = reOutputsDiffer
-            r.addResult(test, target, extraOptions, expected.output, bufB, reOutputsDiffer)
-          compilerOutputTests(test, target, extraOptions, given, expected, r)
+            maybeRetry r.finishTest(test, target, extraOptions, expected.output, bufB, reOutputsDiffer)
+          maybeRetry compilerOutputTests(test, target, extraOptions, given, expected, r)
  of actionReject:
    # Make sure its the compiler rejecting and not the system (e.g. segfault)
-    cmpMsgs(r, expected, given, test, target, extraOptions)
+    maybeRetry cmpMsgs(r, expected, given, test, target, extraOptions)
    if given.exitCode != QuitFailure:
-      r.addResult(test, target, extraOptions, "exitcode: " & $QuitFailure,
+      maybeRetry r.finishTest(test, target, extraOptions, "exitcode: " & $QuitFailure,
                        "exitcode: " & $given.exitCode & "\n\nOutput:\n" &
                        given.nimout, reExitcodesDiffer)

@@ -552,7 +568,7 @@ proc targetHelper(r: var TResults, test: TTest, expected: TSpec, extraOptions: s
  for target in expected.targets:
    inc(r.total)
    if target notin gTargets:
-      r.addResult(test, target, extraOptions, "", "", reDisabled)
+      discard r.finishTest(test, target, extraOptions, "", "", reDisabled)
      inc(r.skipped)
    elif simulate:
      inc count
@@ -567,7 +583,7 @@ proc testSpec(r: var TResults, test: TTest, targets: set[TTarget] = {}) =
  var expected = test.spec
  if expected.parseErrors.len > 0:
    # targetC is a lie, but a parameter is required
-    r.addResult(test, targetC, "", "", expected.parseErrors, reInvalidSpec)
+    discard r.finishTest(test, targetC, "", "", expected.parseErrors, reInvalidSpec)
    inc(r.total)
    return

--- a/testament/tests/shouldfail/tnotenoughretries.nim
+++ b/testament/tests/shouldfail/tnotenoughretries.nim
@@ -0,0 +1,20 @@
+discard """
+  retries: 1
+"""
+
+import os
+
+const tempFile = "tnotenoughretries_temp"
+
+if not fileExists(tempFile):
+  writeFile(tempFile, "abc")
+  quit(1)
+else:
+  let content = readFile(tempFile)
+  if content == "abc":
+    writeFile(tempFile, "def")
+    quit(1)
+  else:
+    # success
+    removeFile(tempFile)
+    discard
--- a/tests/gc/bintrees.nim
+++ b/tests/gc/bintrees.nim
@@ -1,3 +1,7 @@
+discard """
+  retries: 2
+"""
+
 # -*- nim -*-

 import os, strutils
--- a/tests/gc/closureleak.nim
+++ b/tests/gc/closureleak.nim
@@ -1,6 +1,7 @@
 discard """
  outputsub: "true"
  disabled: "32bit"
+  retries: 2
 """

 type
--- a/tests/gc/cyclecollector.nim
+++ b/tests/gc/cyclecollector.nim
@@ -1,3 +1,6 @@
+discard """
+  retries: 2
+"""

 # Program to detect bug #1796 reliably

--- a/tests/gc/cycleleak.nim
+++ b/tests/gc/cycleleak.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "no leak: "
+  retries: 2
 """

 type
--- a/tests/gc/foreign_thr.nim
+++ b/tests/gc/foreign_thr.nim
@@ -6,6 +6,7 @@ Hello from thread
 Hello from thread
 '''
  cmd: "nim $target --hints:on --threads:on --tlsEmulation:off $options $file"
+  retries: 2
 """
 # Copied from stdlib
 import strutils
--- a/tests/gc/gcbench.nim
+++ b/tests/gc/gcbench.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "Success!"
+  retries: 2
 """

 # This is adapted from a benchmark written by John Ellis and Pete Kovac
--- a/tests/gc/gcemscripten.nim
+++ b/tests/gc/gcemscripten.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "77\n77"
+  retries: 2
 """

 ## Check how GC/Alloc works in Emscripten
--- a/tests/gc/gcleak.nim
+++ b/tests/gc/gcleak.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "no leak: "
+  retries: 2
 """

 when defined(GC_setMaxPause):
--- a/tests/gc/gcleak2.nim
+++ b/tests/gc/gcleak2.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "no leak: "
+  retries: 2
 """

 when defined(GC_setMaxPause):
--- a/tests/gc/gcleak3.nim
+++ b/tests/gc/gcleak3.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "no leak: "
+  retries: 2
 """

 when defined(GC_setMaxPause):
--- a/tests/gc/gcleak4.nim
+++ b/tests/gc/gcleak4.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "no leak: "
+  retries: 2
 """

 type
--- a/tests/gc/gcleak5.nim
+++ b/tests/gc/gcleak5.nim
@@ -1,5 +1,6 @@
 discard """
  output: "success"
+  retries: 2
 """

 import os, times
--- a/tests/gc/gctest.nim
+++ b/tests/gc/gctest.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "finished"
+  retries: 2
 """

 # Test the garbage collector.
--- a/tests/gc/growobjcrash.nim
+++ b/tests/gc/growobjcrash.nim
@@ -1,3 +1,7 @@
+discard """
+  retries: 2
+"""
+
 import std/[cgi, strtabs]

 proc handleRequest(query: string): StringTableRef =
--- a/tests/gc/refarrayleak.nim
+++ b/tests/gc/refarrayleak.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "no leak: "
+  retries: 2
 """

 type
--- a/tests/gc/stackrefleak.nim
+++ b/tests/gc/stackrefleak.nim
@@ -1,5 +1,6 @@
 discard """
  outputsub: "no leak: "
+  retries: 2
 """

 type
--- a/tests/gc/tdisable_orc.nim
+++ b/tests/gc/tdisable_orc.nim
@@ -1,5 +1,6 @@
 discard """
  joinable: false
+  retries: 2
 """

 import std/asyncdispatch
--- a/tests/gc/thavlak.nim
+++ b/tests/gc/thavlak.nim
@@ -8,6 +8,7 @@ Performing Loop Recognition
 Another 3 iterations...
 ...
 Found 1 loops (including artificial root node) (3)'''
+  retries: 2
 """

 # bug #3184
--- a/tests/gc/tlists.nim
+++ b/tests/gc/tlists.nim
@@ -1,5 +1,6 @@
 discard """
    output: '''Success'''
+    retries: 2
 """

 # bug #3793
--- a/tests/gc/trace_globals.nim
+++ b/tests/gc/trace_globals.nim
@@ -3,6 +3,7 @@ discard """
 10000000
 10000000
 10000000'''
+  retries: 2
 """

 # bug #17085
--- a/tests/gc/tregionleak.nim
+++ b/tests/gc/tregionleak.nim
@@ -4,6 +4,7 @@ discard """
 finalized
 finalized
 '''
+  retries: 2
 """

 proc finish(o: RootRef) =
--- a/tests/gc/weakrefs.nim
+++ b/tests/gc/weakrefs.nim
@@ -1,5 +1,6 @@
 discard """
  output: "true"
+  retries: 2
 """

 import intsets
--- a/tests/testament/tretries.nim
+++ b/tests/testament/tretries.nim
@@ -0,0 +1,20 @@
+discard """
+  retries: 2
+"""
+
+import os
+
+const tempFile = "tretries_temp"
+
+if not fileExists(tempFile):
+  writeFile(tempFile, "abc")
+  quit(1)
+else:
+  let content = readFile(tempFile)
+  if content == "abc":
+    writeFile(tempFile, "def")
+    quit(1)
+  else:
+    # success
+    removeFile(tempFile)
+    discard
--- a/tests/testament/tshould_not_work.nim
+++ b/tests/testament/tshould_not_work.nim
@@ -24,6 +24,8 @@ FAIL: tests/shouldfail/tnimout.nim
 Failure: reMsgsDiffer
 FAIL: tests/shouldfail/tnimoutfull.nim
 Failure: reMsgsDiffer
+FAIL: tests/shouldfail/tnotenoughretries.nim
+Failure: reExitcodesDiffer
 FAIL: tests/shouldfail/toutput.nim
 Failure: reOutputsDiffer
 FAIL: tests/shouldfail/toutputsub.nim
@@ -43,7 +45,7 @@ import stdtest/testutils

 proc main =
  const nim = getCurrentCompilerExe()
-  let testamentExe = "bin/testament"
+  let testamentExe = "testament/testament"
  let cmd = fmt"{testamentExe} --directory:testament --colors:off --backendLogging:off --nim:{nim} category shouldfail"
  let (outp, status) = execCmdEx(cmd)
  doAssert status == 1, $status