From 69dc7485a9792908ca97df6ebe8f0000e8ceb7f7 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Tue, 15 Jul 2025 12:15:51 +0200 Subject: [PATCH 01/36] Proposal #74609: goroutine leak detection by using the garbage collector. --- .../goexperiment/exp_deadlockgc_off.go | 8 + .../goexperiment/exp_deadlockgc_on.go | 8 + src/internal/goexperiment/flags.go | 3 + src/runtime/export_test.go | 2 +- src/runtime/mgc.go | 308 +++++++++++++++++- src/runtime/mgcmark.go | 145 ++++++++- src/runtime/preempt.go | 3 +- src/runtime/proc.go | 14 +- src/runtime/runtime2.go | 19 ++ src/runtime/sema.go | 50 ++- src/runtime/traceback.go | 17 +- src/runtime/tracestatus.go | 2 +- 12 files changed, 534 insertions(+), 45 deletions(-) create mode 100644 src/internal/goexperiment/exp_deadlockgc_off.go create mode 100644 src/internal/goexperiment/exp_deadlockgc_on.go diff --git a/src/internal/goexperiment/exp_deadlockgc_off.go b/src/internal/goexperiment/exp_deadlockgc_off.go new file mode 100644 index 00000000000000..185171f4414c6c --- /dev/null +++ b/src/internal/goexperiment/exp_deadlockgc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.deadlockgc + +package goexperiment + +const DeadlockGC = false +const DeadlockGCInt = 0 diff --git a/src/internal/goexperiment/exp_deadlockgc_on.go b/src/internal/goexperiment/exp_deadlockgc_on.go new file mode 100644 index 00000000000000..5c2b07ca55653b --- /dev/null +++ b/src/internal/goexperiment/exp_deadlockgc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.deadlockgc + +package goexperiment + +const DeadlockGC = true +const DeadlockGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index dd7a4f446c1a57..dff301cd74d439 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -120,4 +120,7 @@ type Flags struct { // RandomizedHeapBase enables heap base address randomization on 64-bit // platforms. RandomizedHeapBase64 bool + + // DeadlockGC enables the Deadlock GC implementation. + DeadlockGC bool } diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 1f55717f0a1a60..466e6eb7ab31ae 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1221,7 +1221,7 @@ func (t *SemTable) Enqueue(addr *uint32) { s.releasetime = 0 s.acquiretime = 0 s.ticket = 0 - t.semTable.rootFor(addr).queue(addr, s, false) + t.semTable.rootFor(addr).queue(addr, s, false, false) } // Dequeue simulates dequeuing a waiter for a semaphore (or lock) at addr. diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f2df1a00e0c683..b7105aa965f62a 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -373,17 +373,32 @@ type workType struct { // Number of roots of various root types. Set by gcPrepareMarkRoots. // - // nStackRoots == len(stackRoots), but we have nStackRoots for - // consistency. - nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int + // During normal GC cycle, nStackRoots == nLiveStackRoots == len(stackRoots) + // during deadlock detection GC, nLiveStackRoots is the number of stackRoots + // to examine, and nStackRoots == len(stackRoots), which include goroutines that are + // unmarked / not runnable + nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nLiveStackRoots int + + // The GC has performed deadlock detection during this GC cycle. + detectedDeadlocks bool + + // Is set to true by DetectDeadlocks(), instructing the next GC cycle to perform deadlock detection. + pendingDeadlockDetection bool + + // When set, the GC is running in deadlock detection mode. + // This can be triggered with a runtime flag. + deadlockDetectionMode bool // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 - // stackRoots is a snapshot of all of the Gs that existed - // before the beginning of concurrent marking. The backing - // store of this must not be modified because it might be - // shared with allgs. + // stackRoots is a snapshot of all of the Gs that existed before the + // beginning of concurrent marking. During deadlock detection GC, stackRoots + // is partitioned into two sets; to the left of nLiveStackRoots are stackRoots + // of running / runnable goroutines and to the right of nLiveStackRoots are + // stackRoots of unmarked / not runnable goroutines + // gcDiscoverMoreStackRoots modifies the stackRoots array to redo the partition + // after each marking phase stackRoots []*g // Each type of GC state transition is protected by a lock. @@ -550,6 +565,29 @@ func GC() { releasem(mp) } +// DetectDeadlocks instructs the Go garbage collector to attempt +// partial deadlock detection. +// +// Only operates if deadlockgc is enabled in GOEXPERIMENT. +// Otherwise, it just runs runtime.GC(). +func DetectDeadlocks() { + if !goexperiment.DeadlockGC { + GC() + return + } + + // This write should be thread-safe, as the overwritten value is true. + // pendingDeadlockDetection is only set to false under STW at the start + // of the GC cycle that picks it up. + work.pendingDeadlockDetection = true + + // This read should be thread-safe for the same reason as the write above above. + // At most, we trigger the GC an additional time. + for work.pendingDeadlockDetection { + GC() + } +} + // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has // already completed this mark phase, it returns immediately. func gcWaitOnMark(n uint32) { @@ -695,6 +733,11 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode + } else if goexperiment.DeadlockGC { + if work.pendingDeadlockDetection { + // Fully stop the world if running deadlock detection. + mode = gcForceBlockMode + } } // Ok, we're doing it! Stop everybody else @@ -757,6 +800,7 @@ func gcStart(trigger gcTrigger) { clearpools() work.cycles.Add(1) + work.detectedDeadlocks = false // Assists and workers can start the moment we start // the world. @@ -788,6 +832,14 @@ func gcStart(trigger gcTrigger) { // possible. setGCPhase(_GCmark) + if goexperiment.DeadlockGC { + if work.pendingDeadlockDetection { + // Write is thread-safe because the world is stopped + work.deadlockDetectionMode = true + work.pendingDeadlockDetection = false + } + } + gcBgMarkPrepare() // Must happen before assists are enabled. gcPrepareMarkRoots() @@ -888,6 +940,11 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) + if goexperiment.DeadlockGC { + if work.deadlockDetectionMode { + gcDiscoverMoreStackRoots() + } + } top: // Re-check transition condition under transition lock. @@ -947,8 +1004,7 @@ top: // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be // shaded. Transition to mark termination. - now := nanotime() - work.tMarkTerm = now + var now int64 getg().m.preemptoff = "gcing" var stw worldStop systemstack(func() { @@ -994,6 +1050,54 @@ top: }) semrelease(&worldsema) goto top + } else if goexperiment.DeadlockGC { + // Otherwise, do a deadlock detection round. + // Only do one deadlock detection round per GC cycle. + if work.deadlockDetectionMode && !work.detectedDeadlocks { + work.detectedDeadlocks = detectDeadlocks() + + getg().m.preemptoff = "" + systemstack(func() { + // Accumulate the time we were stopped before we had to start again. + work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) + + now := startTheWorldWithSema(0, stw) + work.pauseNS += now - stw.startedStopping + }) + semrelease(&worldsema) + goto top + } + + now = nanotime() + work.tMarkTerm = now + // Check again whether any P needs to flush its write barrier + // to the GC work queue. + systemstack(func() { + for _, p := range allp { + wbBufFlush1(p) + if !p.gcw.empty() { + restart = true + break + } + } + }) + + // If that is the case, restart again. Once restarts are no longer needed, + // run this without deadlock detection. + if restart { + gcDebugMarkDone.restartedDueTo27993 = true + + getg().m.preemptoff = "" + systemstack(func() { + // Accumulate the time we were stopped before we had to start again. + work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) + + now := startTheWorldWithSema(0, stw) + work.pauseNS += now - stw.startedStopping + }) + semrelease(&worldsema) + goto top + } } gcComputeStartingStackSize() @@ -1032,6 +1136,171 @@ top: gcMarkTermination(stw) } +// Check if an object is marked in the heap. +func checkIfMarked(p unsafe.Pointer) bool { + obj, span, objIndex := findObject(uintptr(p), 0, 0) + if obj != 0 { + mbits := span.markBitsForIndex(objIndex) + return mbits.isMarked() + } + // if we fall through to get here, we are within the stack ranges of reachable goroutines + return true +} + +// maybeLive checks whether a goroutine may still be semantically runnable. +// This returns true if the goroutine is waiting on at least one concurrency primitive +// which is reachable in memory, i.e., has been by the GC. +// +// For goroutines which are semantically runnable, this will eventually return true +// as the GC marking phase progresses. +func (gp *g) maybeLive() bool { + // Unmask the goroutine address to ensure we are not + // dereferencing a masked address. + gp = gp.unmask() + + switch gp.waitreason { + case waitReasonSelectNoCases, + waitReasonChanSendNilChan, + waitReasonChanReceiveNilChan: + // Select with no cases or communicating on nil channels + // make goroutines unrunnable by definition. + return false + case waitReasonChanReceive, + waitReasonSelect, + waitReasonChanSend: + // Cycle all through all *sudog to check whether + // the goroutine is waiting on a marked channel. + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + if checkIfMarked(unsafe.Pointer(sg.c)) { + return true + } + } + return false + case waitReasonSyncCondWait, + waitReasonSyncWaitGroupWait, + waitReasonSyncMutexLock, + waitReasonSyncRWMutexLock, + waitReasonSyncRWMutexRLock: + // If waiting on mutexes, wait groups, or condition variables, + // check if the synchronization primitive attached to the sudog is marked. + if gp.waiting != nil { + // Unmask the sema address and check if it's marked. + return checkIfMarked(gcUnmask(gp.waiting.elem)) + } + } + return true +} + +// unmask returns a *g object with an unmasked address. +// +//go:nosplit +func (gp *g) unmask() *g { + return (*g)(gcUnmask(unsafe.Pointer(gp))) +} + +// mask returns a *g object with a masked address. +// +//go:nosplit +func (gp *g) mask() *g { + return (*g)(gcMask(unsafe.Pointer(gp))) +} + +// Check to see if more blocked but marked goroutines exist; +// if so add them into root set and increment work.markrootJobs accordingly +// return true if we need to run another phase of markroots; return false otherwise +func gcDiscoverMoreStackRoots() { + // to begin with we have a set of unchecked stackRoots between + // vIndex and ivIndex. During the loop, anything < vIndex should be + // valid stackRoots and anything >= ivIndex should be invalid stackRoots + // and the loop terminates when the two indices meet + var vIndex, ivIndex int = work.nLiveStackRoots, work.nStackRoots + + // Reorder goroutine list + for vIndex < ivIndex { + gp := work.stackRoots[vIndex] + if gp.maybeLive() { + work.stackRoots[vIndex] = gp + vIndex = vIndex + 1 + continue + } + for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { + if swapGp := work.stackRoots[ivIndex]; swapGp.maybeLive() { + work.stackRoots[ivIndex] = gp + work.stackRoots[vIndex] = swapGp.unmask() + vIndex = vIndex + 1 + break + } + } + } + + var oldRootJobs int32 = int32(atomic.Load(&work.markrootJobs)) + var newRootJobs int32 = int32(work.baseStacks) + int32(vIndex) + + if newRootJobs > oldRootJobs { + // reset markrootNext as it could have been incremented past markrootJobs + work.nLiveStackRoots = vIndex + atomic.Store(&work.markrootJobs, uint32(newRootJobs)) + } +} + +// detectDeadlocks scans the remaining stackRoots and marks any which are +// blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). +// Returns true if goroutine leak was performed (or unnecessary). +// Returns false if the GC cycle has not yet reached a fix point for reachable goroutines. +func detectDeadlocks() bool { + // Report deadlocks and mark them unreachable, and resume marking + // we still need to mark these unreachable *g structs as they + // get reused, but their stack won't get scanned + if work.nLiveStackRoots == work.nStackRoots { + // nStackRoots == nLiveStackRoots means that all goroutines are marked. + return true + } + + // Try to reach another fix point here. Keep scouting for runnable goroutines until + // none are left. + // Valid goroutines may be found after all GC work is drained. + // Make sure these are pushed to the runnable set and ready to be marked. + var foundMoreWork bool + for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { + gp := work.stackRoots[i].unmask() + if readgstatus(gp) == _Gwaiting && !gp.maybeLive() { + // Blocking unrunnable goroutines will be skipped. + continue + } + work.stackRoots[i] = work.stackRoots[work.nLiveStackRoots] + work.stackRoots[work.nLiveStackRoots] = gp + work.nLiveStackRoots += 1 + // We now have one more markroot job. + work.markrootJobs += 1 + // We might still have some work to do. + // Make sure in the next iteration we will check re-check for new runnable goroutines. + foundMoreWork = true + } + if foundMoreWork { + // We found more work, so we need to resume the marking phase. + return false + } + + // For the remaining goroutines, mark them as unreachable and deadlocking. + for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { + gp := work.stackRoots[i].unmask() + casgstatus(gp, _Gwaiting, _Gdeadlocked) + fn := findfunc(gp.startpc) + if fn.valid() { + print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") + } else { + print("goroutine leak! goroutine ", gp.goid, ": !unnamed goroutine!", " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") + } + traceback(gp.sched.pc, gp.sched.sp, gp.sched.lr, gp) + println() + work.stackRoots[i] = gp + } + // Put the remaining roots as ready for marking and drain them. + work.markrootJobs += uint32(work.nStackRoots - work.nLiveStackRoots) + work.nLiveStackRoots = work.nStackRoots + return true +} + // World must be stopped and mark assists and background workers must be // disabled. func gcMarkTermination(stw worldStop) { @@ -1185,6 +1454,13 @@ func gcMarkTermination(stw worldStop) { } systemstack(func() { + if goexperiment.DeadlockGC { + // Pull the GC out of deadlock detection mode. + // Write is thread-safe because the world is stopped, and only one + // GC cycle can run at a time. + work.deadlockDetectionMode = false + } + // The memstats updated above must be updated with the world // stopped to ensure consistency of some values, such as // sched.idleTime and sched.totaltime. memstats also include @@ -1612,10 +1888,12 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if work.markrootNext < work.markrootJobs { - return true // root scan work available + if !work.deadlockDetectionMode { + return work.markrootNext < work.markrootJobs } - return false + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + return rootNext < rootJobs } // gcMark runs the mark (or, for concurrent GC, mark termination) @@ -1628,8 +1906,10 @@ func gcMark(startTime int64) { work.tstart = startTime // Check that there's no marking work remaining. - if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() { - print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n") + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + if work.full != 0 || rootNext < rootJobs { + print("runtime: full=", hex(work.full), " next=", rootNext, " jobs=", rootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") panic("non-empty mark queue after concurrent mark") } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 8b306045c5da21..d7e6daeaadb646 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -51,8 +51,86 @@ const ( // Must be a multiple of the pageInUse bitmap element size and // must also evenly divide pagesPerArena. pagesPerSpanRoot = 512 + + gcUndoBitMask = uintptr(uintptrMask >> 2) // This constant reserves some bits of the address space for the GC to use in order to mask addresses. + gcBitMask = ^gcUndoBitMask // This flips every bit in gcUndoBitMask of uinptr width ) +// gcMask masks addresses that should not be automatically marked during the GC. +// +//go:nosplit +func gcMask(p unsafe.Pointer) unsafe.Pointer { + if goexperiment.DeadlockGC { + return unsafe.Pointer(uintptr(p) | gcBitMask) + } + return p +} + +// gcUnmask undoes the bit-mask applied to a pointer. +// +//go:nosplit +func gcUnmask(p unsafe.Pointer) unsafe.Pointer { + if goexperiment.DeadlockGC { + return unsafe.Pointer(uintptr(p) & gcUndoBitMask) + } + return p +} + +// internalBlocked returns true if the goroutine is blocked due to a +// non-deadlocking waitReason, e.g. waiting for the netpoller or garbage collector. +// Such goroutines should never be considered for deadlock detection. +// +//go:nosplit +func (gp *g) internalBlocked() bool { + reason := gp.waitreason + return reason != waitReasonChanReceive && + reason != waitReasonSyncWaitGroupWait && + reason != waitReasonChanSend && + reason != waitReasonChanReceiveNilChan && + reason != waitReasonChanSendNilChan && + reason != waitReasonSelect && + reason != waitReasonSelectNoCases && + reason != waitReasonSyncMutexLock && + reason != waitReasonSyncRWMutexRLock && + reason != waitReasonSyncRWMutexLock && + reason != waitReasonSyncCondWait +} + +// The world must be stopped or allglock must be held. +// go through the snapshot of allgs, putting them into an arrays, +// separated by index, where [0:blockedIndex] contains only running Gs +// allGs[blockedIndex:] contain only blocking Gs +// To avoid GC from marking and scanning the blocked Gs by scanning +// the returned array (which is heap allocated), we mask the highest +// bit of the pointers to Gs with gcBitMask. +func allGsSnapshotSortedForGC() ([]*g, int) { + assertWorldStoppedOrLockHeld(&allglock) + + allgsSorted := make([]*g, len(allgs)) + + // Indices cutting off runnable and blocked Gs. + var currIndex, blockedIndex = 0, len(allgsSorted) - 1 + for _, gp := range allgs { + gp = gp.unmask() + // not sure if we need atomic load because we are stopping the world, + // but do it just to be safe for now + if status := readgstatus(gp); status != _Gwaiting || gp.internalBlocked() { + allgsSorted[currIndex] = gp + currIndex++ + } else { + allgsSorted[blockedIndex] = gp.mask() + blockedIndex-- + } + } + + // Because the world is stopped or allglock is held, allgadd + // cannot happen concurrently with this. allgs grows + // monotonically and existing entries never change, so we can + // simply return a copy of the slice header. For added safety, + // we trim everything past len because that can still change. + return allgsSorted, blockedIndex + 1 +} + // gcPrepareMarkRoots queues root scanning jobs (stacks, globals, and // some miscellany) and initializes scanning-related state. // @@ -102,11 +180,23 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - work.stackRoots = allGsSnapshot() + if goexperiment.DeadlockGC { + if work.deadlockDetectionMode { + work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() + } else { + // regular GC --- scan every go routine + work.stackRoots = allGsSnapshot() + work.nLiveStackRoots = len(work.stackRoots) + } + } else { + // regular GC --- scan every go routine + work.stackRoots = allGsSnapshot() + work.nLiveStackRoots = len(work.stackRoots) + } work.nStackRoots = len(work.stackRoots) work.markrootNext = 0 - work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) + work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nLiveStackRoots) // Calculate base indexes of each root type work.baseData = uint32(fixedRootCount) @@ -119,8 +209,10 @@ func gcPrepareMarkRoots() { // gcMarkRootCheck checks that all roots have been scanned. It is // purely for debugging. func gcMarkRootCheck() { - if work.markrootNext < work.markrootJobs { - print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + if rootNext < rootJobs { + print(rootNext, " of ", rootJobs, " markroot jobs done\n") throw("left over markroot jobs") } @@ -868,7 +960,7 @@ func scanstack(gp *g, gcw *gcWork) int64 { case _Grunning: print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") throw("scanstack: goroutine not stopped") - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: // ok } @@ -1136,6 +1228,32 @@ func gcDrainMarkWorkerFractional(gcw *gcWork) { gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) } +func gcUpdateMarkrootNext() (uint32, bool) { + var success bool + var next uint32 = atomic.Load(&work.markrootNext) + var jobs uint32 = atomic.Load(&work.markrootJobs) + + if next < jobs { + // still work available at the moment + for !success { + success = atomic.Cas(&work.markrootNext, next, next+1) + // We manage to snatch a root job. Return the root index. + if success { + return next, true + } + + // Get the latest value of markrootNext. + next = atomic.Load(&work.markrootNext) + jobs := atomic.Load(&work.markrootJobs) + // We are out of markroot jobs. + if next >= jobs { + break + } + } + } + return 0, false +} + // gcDrain scans roots and objects in work buffers, blackening grey // objects until it is unable to get more work. It may return before // GC is done; it's the caller's responsibility to balance work from @@ -1194,13 +1312,16 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { } } - // Drain root marking jobs. - if work.markrootNext < work.markrootJobs { + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + if rootNext < rootJobs { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. + // + // Continue unconditionally if we're draining partial deadlocks. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job >= work.markrootJobs { + job, success := gcUpdateMarkrootNext() + if !success { break } markroot(gcw, job, flushBgCredit) @@ -1346,9 +1467,9 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { wbBufFlush() if b = gcw.tryGetObj(); b == 0 { // Try to do a root job. - if work.markrootNext < work.markrootJobs { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job < work.markrootJobs { + if atomic.Load(&work.markrootNext) < atomic.Load(&work.markrootJobs) { + job, success := gcUpdateMarkrootNext() + if success { workFlushed += markroot(gcw, job, false) continue } diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index 22727df74eead2..728f5952b4a5f4 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -160,7 +160,7 @@ func suspendG(gp *g) suspendGState { s = _Gwaiting fallthrough - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: // Claim goroutine by setting scan bit. // This may race with execution or readying of gp. // The scan bit keeps it from transition state. @@ -269,6 +269,7 @@ func resumeG(state suspendGState) { case _Grunnable | _Gscan, _Gwaiting | _Gscan, + _Gdeadlocked | _Gscan, _Gsyscall | _Gscan: casfrom_Gscanstatus(gp, s, s&^_Gscan) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 25d39d9ba389ad..ec30acaf5e3c2f 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -8,6 +8,7 @@ import ( "internal/abi" "internal/cpu" "internal/goarch" + "internal/goexperiment" "internal/goos" "internal/runtime/atomic" "internal/runtime/exithook" @@ -689,7 +690,7 @@ func allgadd(gp *g) { } lock(&allglock) - allgs = append(allgs, gp) + allgs = append(allgs, gp.mask()) if &allgs[0] != allgptr { atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) } @@ -708,6 +709,11 @@ func allGsSnapshot() []*g { // monotonically and existing entries never change, so we can // simply return a copy of the slice header. For added safety, // we trim everything past len because that can still change. + if goexperiment.DeadlockGC { + for i, gp := range allgs { + allgs[i] = gp.unmask() + } + } return allgs[:len(allgs):len(allgs)] } @@ -729,7 +735,7 @@ func atomicAllGIndex(ptr **g, i uintptr) *g { func forEachG(fn func(gp *g)) { lock(&allglock) for _, gp := range allgs { - fn(gp) + fn(gp.unmask()) } unlock(&allglock) } @@ -742,7 +748,7 @@ func forEachGRace(fn func(gp *g)) { ptr, length := atomicAllG() for i := uintptr(0); i < length; i++ { gp := atomicAllGIndex(ptr, i) - fn(gp) + fn(gp.unmask()) } return } @@ -1208,6 +1214,7 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { _Gscanwaiting, _Gscanrunning, _Gscansyscall, + _Gscandeadlocked, _Gscanpreempted: if newval == oldval&^_Gscan { success = gp.atomicstatus.CompareAndSwap(oldval, newval) @@ -1228,6 +1235,7 @@ func castogscanstatus(gp *g, oldval, newval uint32) bool { case _Grunnable, _Grunning, _Gwaiting, + _Gdeadlocked, _Gsyscall: if newval == oldval|_Gscan { r := gp.atomicstatus.CompareAndSwap(oldval, newval) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index b5d2dcefaded99..4abb18ba2625bb 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,6 +87,9 @@ const ( // ready()ing this G. _Gpreempted // 9 + // _Gdeadlocked represents a deadlocked goroutine caught by the GC. + _Gdeadlocked // 10 + // _Gscan combined with one of the above states other than // _Grunning indicates that GC is scanning the stack. The // goroutine is not executing user code and the stack is owned @@ -104,6 +107,8 @@ const ( _Gscansyscall = _Gscan + _Gsyscall // 0x1003 _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 + + _Gscandeadlocked = _Gscan + _Gdeadlocked // 0x100a ) const ( @@ -1167,12 +1172,26 @@ func (w waitReason) String() string { return waitReasonStrings[w] } +// isMutexWait returns true if the goroutine is blocked because of +// sync.Mutex.Lock or sync.RWMutex.[R]Lock. +// +//go:nosplit func (w waitReason) isMutexWait() bool { return w == waitReasonSyncMutexLock || w == waitReasonSyncRWMutexRLock || w == waitReasonSyncRWMutexLock } +// isSyncWait returns true if the goroutine is blocked because of +// sync library primitive operations. +// +//go:nosplit +func (w waitReason) isSyncWait() bool { + return w == waitReasonSyncWaitGroupWait || + w == waitReasonSyncCondWait || + w.isMutexWait() +} + func (w waitReason) isWaitingForSuspendG() bool { return isWaitingForSuspendG[w] } diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 6af49b1b0c42d9..8927dfe262160e 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -21,6 +21,7 @@ package runtime import ( "internal/cpu" + "internal/goexperiment" "internal/runtime/atomic" "unsafe" ) @@ -188,7 +189,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes i } // Any semrelease after the cansemacquire knows we're waiting // (we set nwait above), so go to sleep. - root.queue(addr, s, lifo) + root.queue(addr, s, lifo, reason.isSyncWait()) goparkunlock(&root.lock, reason, traceBlockSync, 4+skipframes) if s.ticket != 0 || cansemacquire(addr) { break @@ -301,9 +302,18 @@ func cansemacquire(addr *uint32) bool { } // queue adds s to the blocked goroutines in semaRoot. -func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { +func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() - s.elem = unsafe.Pointer(addr) + pAddr := unsafe.Pointer(addr) + if goexperiment.DeadlockGC { + if syncSema { + // Mask the addr so it doesn't get marked during GC + // through marking of the treap or marking of the blocked goroutine + pAddr = gcMask(unsafe.Pointer(addr)) + s.g.waiting = s + } + } + s.elem = pAddr s.next = nil s.prev = nil s.waiters = 0 @@ -311,7 +321,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { var last *sudog pt := &root.treap for t := *pt; t != nil; t = *pt { - if t.elem == unsafe.Pointer(addr) { + if t.elem == pAddr { // Already have addr in list. if lifo { // Substitute s in t's place in treap. @@ -357,7 +367,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { return } last = t - if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) { + if uintptr(pAddr) < uintptr(t.elem) { pt = &t.prev } else { pt = &t.next @@ -402,6 +412,25 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) { ps := &root.treap s := *ps + + if goexperiment.DeadlockGC { + // First try to find a masked address. + var pAddr unsafe.Pointer = gcMask(unsafe.Pointer(addr)) + for ; s != nil; s = *ps { + if s.elem == pAddr { + goto Found + } + if uintptr(pAddr) < uintptr(s.elem) { + ps = &s.prev + } else { + ps = &s.next + } + } + // Otherwise, try to find an unmasked address. + ps = &root.treap + s = *ps + } + for ; s != nil; s = *ps { if s.elem == unsafe.Pointer(addr) { goto Found @@ -470,6 +499,9 @@ Found: } tailtime = s.acquiretime } + if goexperiment.DeadlockGC { + s.g.waiting = nil + } s.parent = nil s.elem = nil s.next = nil @@ -590,6 +622,10 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() + if goexperiment.DeadlockGC { + s.elem = gcMask(unsafe.Pointer(l)) + s.g.waiting = s + } s.ticket = t s.releasetime = 0 t0 := int64(0) @@ -607,6 +643,10 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } + if goexperiment.DeadlockGC { + s.g.waiting = nil + s.elem = nil + } releaseSudog(s) } diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 00c0f08e5593c8..44f28971f89de0 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1199,14 +1199,15 @@ func elideWrapperCalling(id abi.FuncID) bool { } var gStatusStrings = [...]string{ - _Gidle: "idle", - _Grunnable: "runnable", - _Grunning: "running", - _Gsyscall: "syscall", - _Gwaiting: "waiting", - _Gdead: "dead", - _Gcopystack: "copystack", - _Gpreempted: "preempted", + _Gidle: "idle", + _Grunnable: "runnable", + _Grunning: "running", + _Gsyscall: "syscall", + _Gwaiting: "waiting", + _Gdead: "dead", + _Gcopystack: "copystack", + _Gdeadlocked: "deadlocked", + _Gpreempted: "preempted", } func goroutineheader(gp *g) { diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index 03ec81fc0262a1..e04f012001dd60 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -122,7 +122,7 @@ func goStatusToTraceGoStatus(status uint32, wr waitReason) tracev2.GoStatus { tgs = tracev2.GoRunning case _Gsyscall: tgs = tracev2.GoSyscall - case _Gwaiting, _Gpreempted: + case _Gwaiting, _Gpreempted, _Gdeadlocked: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the From c902ec6e5ef4c89bd979f9b2aad2ba8e3f6d9294 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Wed, 16 Jul 2025 14:12:21 +0200 Subject: [PATCH 02/36] Corrected sema dequeue implementation. --- src/runtime/sema.go | 49 +++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 8927dfe262160e..d7288580c96611 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -321,7 +321,13 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { var last *sudog pt := &root.treap for t := *pt; t != nil; t = *pt { - if t.elem == pAddr { + var cmp bool + if goexperiment.DeadlockGC { + cmp = uintptr(gcUnmask(pAddr)) == uintptr(gcUnmask(t.elem)) + } else { + cmp = uintptr(pAddr) == uintptr(t.elem) + } + if cmp { // Already have addr in list. if lifo { // Substitute s in t's place in treap. @@ -367,7 +373,12 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { return } last = t - if uintptr(pAddr) < uintptr(t.elem) { + if goexperiment.DeadlockGC { + cmp = uintptr(gcUnmask(pAddr)) < uintptr(gcUnmask(t.elem)) + } else { + cmp = uintptr(pAddr) < uintptr(t.elem) + } + if cmp { pt = &t.prev } else { pt = &t.next @@ -413,29 +424,23 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) ps := &root.treap s := *ps - if goexperiment.DeadlockGC { - // First try to find a masked address. - var pAddr unsafe.Pointer = gcMask(unsafe.Pointer(addr)) - for ; s != nil; s = *ps { - if s.elem == pAddr { - goto Found - } - if uintptr(pAddr) < uintptr(s.elem) { - ps = &s.prev - } else { - ps = &s.next - } - } - // Otherwise, try to find an unmasked address. - ps = &root.treap - s = *ps - } - for ; s != nil; s = *ps { - if s.elem == unsafe.Pointer(addr) { + var cmp bool + if goexperiment.DeadlockGC { + cmp = gcUnmask(unsafe.Pointer(addr)) == gcUnmask(s.elem) + } else { + cmp = unsafe.Pointer(addr) == s.elem + } + if cmp { goto Found } - if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) { + + if goexperiment.DeadlockGC { + cmp = uintptr(gcUnmask(unsafe.Pointer(addr))) < uintptr(gcUnmask(s.elem)) + } else { + cmp = uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) + } + if cmp { ps = &s.prev } else { ps = &s.next From a672db89d7ab3893854d008a6b84d83deaaa6d55 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Wed, 16 Jul 2025 14:47:02 +0200 Subject: [PATCH 03/36] Ordered wait reasons for easier checks. --- src/runtime/mgcmark.go | 12 +----------- src/runtime/runtime2.go | 16 +++++++--------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index d7e6daeaadb646..8bc624c5b698cc 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -83,17 +83,7 @@ func gcUnmask(p unsafe.Pointer) unsafe.Pointer { //go:nosplit func (gp *g) internalBlocked() bool { reason := gp.waitreason - return reason != waitReasonChanReceive && - reason != waitReasonSyncWaitGroupWait && - reason != waitReasonChanSend && - reason != waitReasonChanReceiveNilChan && - reason != waitReasonChanSendNilChan && - reason != waitReasonSelect && - reason != waitReasonSelectNoCases && - reason != waitReasonSyncMutexLock && - reason != waitReasonSyncRWMutexRLock && - reason != waitReasonSyncRWMutexLock && - reason != waitReasonSyncCondWait + return reason < waitReasonChanReceiveNilChan || waitReasonSyncWaitGroupWait < reason } // The world must be stopped or allglock must be held. diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 4abb18ba2625bb..30bfa0fe99f934 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1069,24 +1069,24 @@ const ( waitReasonZero waitReason = iota // "" waitReasonGCAssistMarking // "GC assist marking" waitReasonIOWait // "IO wait" - waitReasonChanReceiveNilChan // "chan receive (nil chan)" - waitReasonChanSendNilChan // "chan send (nil chan)" waitReasonDumpingHeap // "dumping heap" waitReasonGarbageCollection // "garbage collection" waitReasonGarbageCollectionScan // "garbage collection scan" waitReasonPanicWait // "panicwait" - waitReasonSelect // "select" - waitReasonSelectNoCases // "select (no cases)" waitReasonGCAssistWait // "GC assist wait" waitReasonGCSweepWait // "GC sweep wait" waitReasonGCScavengeWait // "GC scavenge wait" - waitReasonChanReceive // "chan receive" - waitReasonChanSend // "chan send" waitReasonFinalizerWait // "finalizer wait" waitReasonForceGCIdle // "force gc (idle)" waitReasonUpdateGOMAXPROCSIdle // "GOMAXPROCS updater (idle)" waitReasonSemacquire // "semacquire" waitReasonSleep // "sleep" + waitReasonChanReceiveNilChan // "chan receive (nil chan)" + waitReasonChanSendNilChan // "chan send (nil chan)" + waitReasonSelect // "select" + waitReasonSelectNoCases // "select (no cases)" + waitReasonChanReceive // "chan receive" + waitReasonChanSend // "chan send" waitReasonSyncCondWait // "sync.Cond.Wait" waitReasonSyncMutexLock // "sync.Mutex.Lock" waitReasonSyncRWMutexRLock // "sync.RWMutex.RLock" @@ -1187,9 +1187,7 @@ func (w waitReason) isMutexWait() bool { // //go:nosplit func (w waitReason) isSyncWait() bool { - return w == waitReasonSyncWaitGroupWait || - w == waitReasonSyncCondWait || - w.isMutexWait() + return waitReasonSyncCondWait <= w && w <= waitReasonSyncWaitGroupWait } func (w waitReason) isWaitingForSuspendG() bool { From 8de03688d313678a59f7431a844a362035dc529d Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 17 Jul 2025 16:31:19 +0200 Subject: [PATCH 04/36] Renamed deadlockgc to golfgc and deadlocks to goroutine leaks to avoid confusion with global deadlock. --- .../goexperiment/exp_deadlockgc_off.go | 8 -- .../goexperiment/exp_deadlockgc_on.go | 8 -- src/internal/goexperiment/exp_golfgc_off.go | 8 ++ src/internal/goexperiment/exp_golfgc_on.go | 8 ++ src/internal/goexperiment/flags.go | 4 +- src/runtime/mgc.go | 97 ++++++++++--------- src/runtime/mgcmark.go | 18 ++-- src/runtime/preempt.go | 4 +- src/runtime/proc.go | 2 +- src/runtime/runtime2.go | 6 +- src/runtime/sema.go | 17 ++-- src/runtime/traceback.go | 18 ++-- src/runtime/tracestatus.go | 2 +- 13 files changed, 102 insertions(+), 98 deletions(-) delete mode 100644 src/internal/goexperiment/exp_deadlockgc_off.go delete mode 100644 src/internal/goexperiment/exp_deadlockgc_on.go create mode 100644 src/internal/goexperiment/exp_golfgc_off.go create mode 100644 src/internal/goexperiment/exp_golfgc_on.go diff --git a/src/internal/goexperiment/exp_deadlockgc_off.go b/src/internal/goexperiment/exp_deadlockgc_off.go deleted file mode 100644 index 185171f4414c6c..00000000000000 --- a/src/internal/goexperiment/exp_deadlockgc_off.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build !goexperiment.deadlockgc - -package goexperiment - -const DeadlockGC = false -const DeadlockGCInt = 0 diff --git a/src/internal/goexperiment/exp_deadlockgc_on.go b/src/internal/goexperiment/exp_deadlockgc_on.go deleted file mode 100644 index 5c2b07ca55653b..00000000000000 --- a/src/internal/goexperiment/exp_deadlockgc_on.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build goexperiment.deadlockgc - -package goexperiment - -const DeadlockGC = true -const DeadlockGCInt = 1 diff --git a/src/internal/goexperiment/exp_golfgc_off.go b/src/internal/goexperiment/exp_golfgc_off.go new file mode 100644 index 00000000000000..547d1b4ffcda95 --- /dev/null +++ b/src/internal/goexperiment/exp_golfgc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.golfgc + +package goexperiment + +const GolfGC = false +const GolfGCInt = 0 diff --git a/src/internal/goexperiment/exp_golfgc_on.go b/src/internal/goexperiment/exp_golfgc_on.go new file mode 100644 index 00000000000000..830b9015ccbf07 --- /dev/null +++ b/src/internal/goexperiment/exp_golfgc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.golfgc + +package goexperiment + +const GolfGC = true +const GolfGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index dff301cd74d439..17e29a63f2b1fb 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -121,6 +121,6 @@ type Flags struct { // platforms. RandomizedHeapBase64 bool - // DeadlockGC enables the Deadlock GC implementation. - DeadlockGC bool + // GolfGC enables the Deadlock GC implementation. + GolfGC bool } diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b7105aa965f62a..709bda103c37e5 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -373,32 +373,35 @@ type workType struct { // Number of roots of various root types. Set by gcPrepareMarkRoots. // - // During normal GC cycle, nStackRoots == nLiveStackRoots == len(stackRoots) - // during deadlock detection GC, nLiveStackRoots is the number of stackRoots + // During normal GC cycle, nStackRoots == nLiveStackRoots == len(stackRoots); + // during goroutine leak detection, nLiveStackRoots is the number of stackRoots // to examine, and nStackRoots == len(stackRoots), which include goroutines that are // unmarked / not runnable nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nLiveStackRoots int - // The GC has performed deadlock detection during this GC cycle. - detectedDeadlocks bool - - // Is set to true by DetectDeadlocks(), instructing the next GC cycle to perform deadlock detection. - pendingDeadlockDetection bool - - // When set, the GC is running in deadlock detection mode. - // This can be triggered with a runtime flag. - deadlockDetectionMode bool + // The following fields monitor the GC phase of the current cycle during + // goroutine leak detection. + // + // - pendingGoleakDetection: The GC has been instructed to perform goroutine leak + // detection during the next GC cycle; it is set by DetectGoroutineLeaks() + // and unset during gcStart(). + // - detectingGoleaks: The GC is running in goroutine leak detection mode; it is set + // during gcStart() and unset during gcMarkTermination(). + // - detectedGoleaks: The GC has performed goroutine leak detection during the current + // GC cycle; it is set during gcMarkDone(), right after goroutine leak detection has concluded, + // and unset during gcStart(). + pendingGoleakDetection, detectingGoleaks, detectedGoleaks bool // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 // stackRoots is a snapshot of all of the Gs that existed before the - // beginning of concurrent marking. During deadlock detection GC, stackRoots + // beginning of concurrent marking. During goroutine leak detection, stackRoots // is partitioned into two sets; to the left of nLiveStackRoots are stackRoots // of running / runnable goroutines and to the right of nLiveStackRoots are // stackRoots of unmarked / not runnable goroutines // gcDiscoverMoreStackRoots modifies the stackRoots array to redo the partition - // after each marking phase + // after each marking phase iteration. stackRoots []*g // Each type of GC state transition is protected by a lock. @@ -565,25 +568,25 @@ func GC() { releasem(mp) } -// DetectDeadlocks instructs the Go garbage collector to attempt -// partial deadlock detection. +// FindGoleaks instructs the Go garbage collector to attempt +// goroutine leak detection during the next GC cycle. // -// Only operates if deadlockgc is enabled in GOEXPERIMENT. +// Only operates if golfgc is enabled in GOEXPERIMENT. // Otherwise, it just runs runtime.GC(). -func DetectDeadlocks() { - if !goexperiment.DeadlockGC { +func FindGoLeaks() { + if !goexperiment.GolfGC { GC() return } // This write should be thread-safe, as the overwritten value is true. - // pendingDeadlockDetection is only set to false under STW at the start + // pendingGoleakDetection is only set to false under STW at the start // of the GC cycle that picks it up. - work.pendingDeadlockDetection = true + work.pendingGoleakDetection = true // This read should be thread-safe for the same reason as the write above above. // At most, we trigger the GC an additional time. - for work.pendingDeadlockDetection { + for work.pendingGoleakDetection { GC() } } @@ -733,8 +736,8 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode - } else if goexperiment.DeadlockGC { - if work.pendingDeadlockDetection { + } else if goexperiment.GolfGC { + if work.pendingGoleakDetection { // Fully stop the world if running deadlock detection. mode = gcForceBlockMode } @@ -800,7 +803,7 @@ func gcStart(trigger gcTrigger) { clearpools() work.cycles.Add(1) - work.detectedDeadlocks = false + work.detectedGoleaks = false // Assists and workers can start the moment we start // the world. @@ -832,11 +835,11 @@ func gcStart(trigger gcTrigger) { // possible. setGCPhase(_GCmark) - if goexperiment.DeadlockGC { - if work.pendingDeadlockDetection { + if goexperiment.GolfGC { + if work.pendingGoleakDetection { // Write is thread-safe because the world is stopped - work.deadlockDetectionMode = true - work.pendingDeadlockDetection = false + work.detectingGoleaks = true + work.pendingGoleakDetection = false } } @@ -940,8 +943,8 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) - if goexperiment.DeadlockGC { - if work.deadlockDetectionMode { + if goexperiment.GolfGC { + if work.detectingGoleaks { gcDiscoverMoreStackRoots() } } @@ -1050,11 +1053,13 @@ top: }) semrelease(&worldsema) goto top - } else if goexperiment.DeadlockGC { - // Otherwise, do a deadlock detection round. - // Only do one deadlock detection round per GC cycle. - if work.deadlockDetectionMode && !work.detectedDeadlocks { - work.detectedDeadlocks = detectDeadlocks() + } else if goexperiment.GolfGC { + // If we are detecting goroutine leaks, do so now. + if work.detectingGoleaks && !work.detectedGoleaks { + // Detect goroutine leaks. If the returned value is true, then + // detection was performed during this cycle. Otherwise, more mark work is needed, + // or live goroutines were found. + work.detectedGoleaks = findGoleaks() getg().m.preemptoff = "" systemstack(func() { @@ -1243,12 +1248,12 @@ func gcDiscoverMoreStackRoots() { } } -// detectDeadlocks scans the remaining stackRoots and marks any which are +// findGoleaks scans the remaining stackRoots and marks any which are // blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). -// Returns true if goroutine leak was performed (or unnecessary). -// Returns false if the GC cycle has not yet reached a fix point for reachable goroutines. -func detectDeadlocks() bool { - // Report deadlocks and mark them unreachable, and resume marking +// Returns true if the goroutine leak check was performed (or unnecessary). +// Returns false if the GC cycle has not yet computed all (maybe-)live goroutines. +func findGoleaks() bool { + // Report goroutine leaks and mark them unreachable, and resume marking // we still need to mark these unreachable *g structs as they // get reused, but their stack won't get scanned if work.nLiveStackRoots == work.nStackRoots { @@ -1281,10 +1286,10 @@ func detectDeadlocks() bool { return false } - // For the remaining goroutines, mark them as unreachable and deadlocking. + // For the remaining goroutines, mark them as unreachable and leaked. for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { gp := work.stackRoots[i].unmask() - casgstatus(gp, _Gwaiting, _Gdeadlocked) + casgstatus(gp, _Gwaiting, _Gleaked) fn := findfunc(gp.startpc) if fn.valid() { print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") @@ -1454,11 +1459,11 @@ func gcMarkTermination(stw worldStop) { } systemstack(func() { - if goexperiment.DeadlockGC { - // Pull the GC out of deadlock detection mode. + if goexperiment.GolfGC { + // Pull the GC out of goroutine leak detection mode. // Write is thread-safe because the world is stopped, and only one // GC cycle can run at a time. - work.deadlockDetectionMode = false + work.detectingGoleaks = false } // The memstats updated above must be updated with the world @@ -1888,7 +1893,7 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if !work.deadlockDetectionMode { + if !work.detectingGoleaks { return work.markrootNext < work.markrootJobs } rootNext := atomic.Load(&work.markrootNext) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 8bc624c5b698cc..bf5510fc643554 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -60,7 +60,7 @@ const ( // //go:nosplit func gcMask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { return unsafe.Pointer(uintptr(p) | gcBitMask) } return p @@ -70,15 +70,15 @@ func gcMask(p unsafe.Pointer) unsafe.Pointer { // //go:nosplit func gcUnmask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { return unsafe.Pointer(uintptr(p) & gcUndoBitMask) } return p } -// internalBlocked returns true if the goroutine is blocked due to a -// non-deadlocking waitReason, e.g. waiting for the netpoller or garbage collector. -// Such goroutines should never be considered for deadlock detection. +// internalBlocked returns true if the goroutine is blocked due to an +// internal (non-leaking) waitReason, e.g. waiting for the netpoller or garbage collector. +// Such goroutines are never leak detection candidates according to the GC. // //go:nosplit func (gp *g) internalBlocked() bool { @@ -170,8 +170,8 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - if goexperiment.DeadlockGC { - if work.deadlockDetectionMode { + if goexperiment.GolfGC { + if work.detectingGoleaks { work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() } else { // regular GC --- scan every go routine @@ -950,7 +950,7 @@ func scanstack(gp *g, gcw *gcWork) int64 { case _Grunning: print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") throw("scanstack: goroutine not stopped") - case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // ok } @@ -1307,8 +1307,6 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { if rootNext < rootJobs { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. - // - // Continue unconditionally if we're draining partial deadlocks. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { job, success := gcUpdateMarkrootNext() if !success { diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index 728f5952b4a5f4..5367f66213804b 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -160,7 +160,7 @@ func suspendG(gp *g) suspendGState { s = _Gwaiting fallthrough - case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // Claim goroutine by setting scan bit. // This may race with execution or readying of gp. // The scan bit keeps it from transition state. @@ -269,7 +269,7 @@ func resumeG(state suspendGState) { case _Grunnable | _Gscan, _Gwaiting | _Gscan, - _Gdeadlocked | _Gscan, + _Gleaked | _Gscan, _Gsyscall | _Gscan: casfrom_Gscanstatus(gp, s, s&^_Gscan) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ec30acaf5e3c2f..ec8f2a1859768f 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -709,7 +709,7 @@ func allGsSnapshot() []*g { // monotonically and existing entries never change, so we can // simply return a copy of the slice header. For added safety, // we trim everything past len because that can still change. - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { for i, gp := range allgs { allgs[i] = gp.unmask() } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 30bfa0fe99f934..f9be7d468a04d4 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,8 +87,8 @@ const ( // ready()ing this G. _Gpreempted // 9 - // _Gdeadlocked represents a deadlocked goroutine caught by the GC. - _Gdeadlocked // 10 + // _Gleaked represents a deadlocked goroutine caught by the GC. + _Gleaked // 10 // _Gscan combined with one of the above states other than // _Grunning indicates that GC is scanning the stack. The @@ -108,7 +108,7 @@ const ( _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 - _Gscandeadlocked = _Gscan + _Gdeadlocked // 0x100a + _Gscandeadlocked = _Gscan + _Gleaked // 0x100a ) const ( diff --git a/src/runtime/sema.go b/src/runtime/sema.go index d7288580c96611..69a98a24cfe316 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -305,7 +305,7 @@ func cansemacquire(addr *uint32) bool { func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() pAddr := unsafe.Pointer(addr) - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { if syncSema { // Mask the addr so it doesn't get marked during GC // through marking of the treap or marking of the blocked goroutine @@ -322,7 +322,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { pt := &root.treap for t := *pt; t != nil; t = *pt { var cmp bool - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = uintptr(gcUnmask(pAddr)) == uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) == uintptr(t.elem) @@ -373,7 +373,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { return } last = t - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = uintptr(gcUnmask(pAddr)) < uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) < uintptr(t.elem) @@ -426,7 +426,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) for ; s != nil; s = *ps { var cmp bool - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = gcUnmask(unsafe.Pointer(addr)) == gcUnmask(s.elem) } else { cmp = unsafe.Pointer(addr) == s.elem @@ -435,7 +435,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) goto Found } - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = uintptr(gcUnmask(unsafe.Pointer(addr))) < uintptr(gcUnmask(s.elem)) } else { cmp = uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) @@ -504,7 +504,7 @@ Found: } tailtime = s.acquiretime } - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { s.g.waiting = nil } s.parent = nil @@ -627,7 +627,8 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { + // Storing this pointer is s.elem = gcMask(unsafe.Pointer(l)) s.g.waiting = s } @@ -648,7 +649,7 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { s.g.waiting = nil s.elem = nil } diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 44f28971f89de0..bbfd169a4c2260 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1199,15 +1199,15 @@ func elideWrapperCalling(id abi.FuncID) bool { } var gStatusStrings = [...]string{ - _Gidle: "idle", - _Grunnable: "runnable", - _Grunning: "running", - _Gsyscall: "syscall", - _Gwaiting: "waiting", - _Gdead: "dead", - _Gcopystack: "copystack", - _Gdeadlocked: "deadlocked", - _Gpreempted: "preempted", + _Gidle: "idle", + _Grunnable: "runnable", + _Grunning: "running", + _Gsyscall: "syscall", + _Gwaiting: "waiting", + _Gdead: "dead", + _Gcopystack: "copystack", + _Gleaked: "deadlocked", + _Gpreempted: "preempted", } func goroutineheader(gp *g) { diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index e04f012001dd60..8b5eafd170f488 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -122,7 +122,7 @@ func goStatusToTraceGoStatus(status uint32, wr waitReason) tracev2.GoStatus { tgs = tracev2.GoRunning case _Gsyscall: tgs = tracev2.GoSyscall - case _Gwaiting, _Gpreempted, _Gdeadlocked: + case _Gwaiting, _Gpreempted, _Gleaked: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the From a150f635713c1557fd6818ef719b15eb4514a51b Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 18 Jul 2025 12:21:18 +0200 Subject: [PATCH 05/36] Fixed status text for leaked goroutines. --- src/runtime/runtime2.go | 4 ++-- src/runtime/traceback.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index f9be7d468a04d4..75273aae420e96 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,7 +87,7 @@ const ( // ready()ing this G. _Gpreempted // 9 - // _Gleaked represents a deadlocked goroutine caught by the GC. + // _Gleaked represents a leaked goroutine caught by the GC. _Gleaked // 10 // _Gscan combined with one of the above states other than @@ -108,7 +108,7 @@ const ( _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 - _Gscandeadlocked = _Gscan + _Gleaked // 0x100a + _Gscanleaked = _Gscan + _Gleaked // 0x100a ) const ( diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index bbfd169a4c2260..e8fef35da7d104 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1206,7 +1206,7 @@ var gStatusStrings = [...]string{ _Gwaiting: "waiting", _Gdead: "dead", _Gcopystack: "copystack", - _Gleaked: "deadlocked", + _Gleaked: "leaked", _Gpreempted: "preempted", } From 305bef914e58e9a5063350f8c940c3f5329f38a6 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 18 Jul 2025 12:23:09 +0200 Subject: [PATCH 06/36] Fixed bad goroutine status. --- src/runtime/proc.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ec8f2a1859768f..c58e9283a98d8c 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1214,7 +1214,7 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { _Gscanwaiting, _Gscanrunning, _Gscansyscall, - _Gscandeadlocked, + _Gscanleaked, _Gscanpreempted: if newval == oldval&^_Gscan { success = gp.atomicstatus.CompareAndSwap(oldval, newval) @@ -1235,7 +1235,7 @@ func castogscanstatus(gp *g, oldval, newval uint32) bool { case _Grunnable, _Grunning, _Gwaiting, - _Gdeadlocked, + _Gleaked, _Gsyscall: if newval == oldval|_Gscan { r := gp.atomicstatus.CompareAndSwap(oldval, newval) From 6a5cd909f8e0008fccb8d1440e9e946207dcf836 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 21 Jul 2025 16:11:30 +0200 Subject: [PATCH 07/36] Addressing some of the comments. --- .../goexperiment/exp_goleakfindergc_off.go | 8 + .../goexperiment/exp_goleakfindergc_on.go | 8 + src/internal/goexperiment/exp_golfgc_off.go | 8 - src/internal/goexperiment/exp_golfgc_on.go | 8 - src/internal/goexperiment/flags.go | 4 +- src/runtime/mbitmap.go | 22 +++ src/runtime/mgc.go | 143 ++++++------------ src/runtime/mgcmark.go | 15 +- src/runtime/proc.go | 2 +- src/runtime/runtime2.go | 3 +- src/runtime/sema.go | 23 +-- 11 files changed, 105 insertions(+), 139 deletions(-) create mode 100644 src/internal/goexperiment/exp_goleakfindergc_off.go create mode 100644 src/internal/goexperiment/exp_goleakfindergc_on.go delete mode 100644 src/internal/goexperiment/exp_golfgc_off.go delete mode 100644 src/internal/goexperiment/exp_golfgc_on.go diff --git a/src/internal/goexperiment/exp_goleakfindergc_off.go b/src/internal/goexperiment/exp_goleakfindergc_off.go new file mode 100644 index 00000000000000..1a141fd5b7cfc7 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = false +const GoroutineLeakFinderGCInt = 0 diff --git a/src/internal/goexperiment/exp_goleakfindergc_on.go b/src/internal/goexperiment/exp_goleakfindergc_on.go new file mode 100644 index 00000000000000..8c816645927656 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = true +const GoroutineLeakFinderGCInt = 1 diff --git a/src/internal/goexperiment/exp_golfgc_off.go b/src/internal/goexperiment/exp_golfgc_off.go deleted file mode 100644 index 547d1b4ffcda95..00000000000000 --- a/src/internal/goexperiment/exp_golfgc_off.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build !goexperiment.golfgc - -package goexperiment - -const GolfGC = false -const GolfGCInt = 0 diff --git a/src/internal/goexperiment/exp_golfgc_on.go b/src/internal/goexperiment/exp_golfgc_on.go deleted file mode 100644 index 830b9015ccbf07..00000000000000 --- a/src/internal/goexperiment/exp_golfgc_on.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build goexperiment.golfgc - -package goexperiment - -const GolfGC = true -const GolfGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index 17e29a63f2b1fb..4261ab9ca99679 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -121,6 +121,6 @@ type Flags struct { // platforms. RandomizedHeapBase64 bool - // GolfGC enables the Deadlock GC implementation. - GolfGC bool + // GoroutineLeakFinderGC enables the Deadlock GC implementation. + GoroutineLeakFinderGC bool } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 9872e5297fb4b7..05c04e4007ec22 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1247,6 +1247,28 @@ func markBitsForSpan(base uintptr) (mbits markBits) { return mbits } +// isMarkedOrNotInHeap returns true if a pointer is in the heap and marked, +// or if the pointer is not in the heap. Used by goroutine leak detection +// to determine if concurrency resources are reachable in memory. +func isMarkedOrNotInHeap(p unsafe.Pointer) bool { + obj, span, objIndex := findObject(uintptr(p), 0, 0) + if obj != 0 { + mbits := span.markBitsForIndex(objIndex) + return mbits.isMarked() + } + + // If we fall through to get here, the object is not in the heap. + // In this case, it is either a pointer to a stack object or a global resource. + // Treat it as reachable in memory by default, to be safe. + // + // (vsaioc) TODO: we could possibly be more precise by only checking against the stacks + // of runnable goroutines. I don't think this is necessary, based on what we've seen, but + // let's keep the option open in case the runtime evolves. + // This will (naively) lead to quadratic blow-up for goroutine leak detection, + // but if it is only run on demand, maybe the extra cost is not a show-stopper. + return true +} + // advance advances the markBits to the next object in the span. func (m *markBits) advance() { if m.mask == 1<<7 { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 709bda103c37e5..0f09ca067bcad3 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -381,16 +381,18 @@ type workType struct { // The following fields monitor the GC phase of the current cycle during // goroutine leak detection. - // - // - pendingGoleakDetection: The GC has been instructed to perform goroutine leak - // detection during the next GC cycle; it is set by DetectGoroutineLeaks() - // and unset during gcStart(). - // - detectingGoleaks: The GC is running in goroutine leak detection mode; it is set - // during gcStart() and unset during gcMarkTermination(). - // - detectedGoleaks: The GC has performed goroutine leak detection during the current - // GC cycle; it is set during gcMarkDone(), right after goroutine leak detection has concluded, - // and unset during gcStart(). - pendingGoleakDetection, detectingGoleaks, detectedGoleaks bool + goroutineLeakFinder struct { + // The GC has been instructed to perform goroutine leak detection during the next GC cycle; + // it is set by DetectGoroutineLeaks() and unset during gcStart(). + pending atomic.Bool + // The GC is running in goroutine leak detection mode; it is set during gcStart() + // and unset during gcMarkTermination(). Is protected by STW. + enabled bool + // The GC has performed goroutine leak detection during the current GC cycle; it is set + // during gcMarkDone(), right after goroutine leak detection has concluded, and unset during + // gcStart(). Is protected by STW. + done bool + } // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 @@ -571,22 +573,17 @@ func GC() { // FindGoleaks instructs the Go garbage collector to attempt // goroutine leak detection during the next GC cycle. // -// Only operates if golfgc is enabled in GOEXPERIMENT. +// Only operates if goroutineleakfindergc is enabled in GOEXPERIMENT. // Otherwise, it just runs runtime.GC(). func FindGoLeaks() { - if !goexperiment.GolfGC { + if !goexperiment.GoroutineLeakFinderGC { GC() return } - // This write should be thread-safe, as the overwritten value is true. - // pendingGoleakDetection is only set to false under STW at the start - // of the GC cycle that picks it up. - work.pendingGoleakDetection = true + work.goroutineLeakFinder.pending.Store(true) - // This read should be thread-safe for the same reason as the write above above. - // At most, we trigger the GC an additional time. - for work.pendingGoleakDetection { + for work.goroutineLeakFinder.pending.Load() { GC() } } @@ -736,8 +733,8 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode - } else if goexperiment.GolfGC { - if work.pendingGoleakDetection { + } else if goexperiment.GoroutineLeakFinderGC { + if work.goroutineLeakFinder.pending.Load() { // Fully stop the world if running deadlock detection. mode = gcForceBlockMode } @@ -803,7 +800,7 @@ func gcStart(trigger gcTrigger) { clearpools() work.cycles.Add(1) - work.detectedGoleaks = false + work.goroutineLeakFinder.done = false // Assists and workers can start the moment we start // the world. @@ -835,12 +832,9 @@ func gcStart(trigger gcTrigger) { // possible. setGCPhase(_GCmark) - if goexperiment.GolfGC { - if work.pendingGoleakDetection { - // Write is thread-safe because the world is stopped - work.detectingGoleaks = true - work.pendingGoleakDetection = false - } + if work.goroutineLeakFinder.pending.Load() { + work.goroutineLeakFinder.enabled = true + work.goroutineLeakFinder.pending.Store(false) } gcBgMarkPrepare() // Must happen before assists are enabled. @@ -943,10 +937,8 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) - if goexperiment.GolfGC { - if work.detectingGoleaks { - gcDiscoverMoreStackRoots() - } + if work.goroutineLeakFinder.enabled { + gcDiscoverMoreStackRoots() } top: @@ -1007,7 +999,8 @@ top: // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be // shaded. Transition to mark termination. - var now int64 + now := nanotime() + work.tMarkTerm = now getg().m.preemptoff = "gcing" var stw worldStop systemstack(func() { @@ -1053,44 +1046,13 @@ top: }) semrelease(&worldsema) goto top - } else if goexperiment.GolfGC { + } else if goexperiment.GoroutineLeakFinderGC { // If we are detecting goroutine leaks, do so now. - if work.detectingGoleaks && !work.detectedGoleaks { + if work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done { // Detect goroutine leaks. If the returned value is true, then // detection was performed during this cycle. Otherwise, more mark work is needed, // or live goroutines were found. - work.detectedGoleaks = findGoleaks() - - getg().m.preemptoff = "" - systemstack(func() { - // Accumulate the time we were stopped before we had to start again. - work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) - - now := startTheWorldWithSema(0, stw) - work.pauseNS += now - stw.startedStopping - }) - semrelease(&worldsema) - goto top - } - - now = nanotime() - work.tMarkTerm = now - // Check again whether any P needs to flush its write barrier - // to the GC work queue. - systemstack(func() { - for _, p := range allp { - wbBufFlush1(p) - if !p.gcw.empty() { - restart = true - break - } - } - }) - - // If that is the case, restart again. Once restarts are no longer needed, - // run this without deadlock detection. - if restart { - gcDebugMarkDone.restartedDueTo27993 = true + work.goroutineLeakFinder.done = findGoleaks() getg().m.preemptoff = "" systemstack(func() { @@ -1141,24 +1103,11 @@ top: gcMarkTermination(stw) } -// Check if an object is marked in the heap. -func checkIfMarked(p unsafe.Pointer) bool { - obj, span, objIndex := findObject(uintptr(p), 0, 0) - if obj != 0 { - mbits := span.markBitsForIndex(objIndex) - return mbits.isMarked() - } - // if we fall through to get here, we are within the stack ranges of reachable goroutines - return true -} - -// maybeLive checks whether a goroutine may still be semantically runnable. -// This returns true if the goroutine is waiting on at least one concurrency primitive -// which is reachable in memory, i.e., has been by the GC. -// +// checkIfMaybeRunnable checks whether a goroutine may still be semantically runnable. // For goroutines which are semantically runnable, this will eventually return true -// as the GC marking phase progresses. -func (gp *g) maybeLive() bool { +// as the GC marking phase progresses. It returns false for leaked goroutines, or for +// goroutines which are not yet computed as possibly runnable by the GC. +func (gp *g) checkIfMaybeRunnable() bool { // Unmask the goroutine address to ensure we are not // dereferencing a masked address. gp = gp.unmask() @@ -1176,7 +1125,7 @@ func (gp *g) maybeLive() bool { // Cycle all through all *sudog to check whether // the goroutine is waiting on a marked channel. for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if checkIfMarked(unsafe.Pointer(sg.c)) { + if isMarkedOrNotInHeap(unsafe.Pointer(sg.c)) { return true } } @@ -1190,7 +1139,7 @@ func (gp *g) maybeLive() bool { // check if the synchronization primitive attached to the sudog is marked. if gp.waiting != nil { // Unmask the sema address and check if it's marked. - return checkIfMarked(gcUnmask(gp.waiting.elem)) + return isMarkedOrNotInHeap(gcUnmask(gp.waiting.elem)) } } return true @@ -1223,13 +1172,13 @@ func gcDiscoverMoreStackRoots() { // Reorder goroutine list for vIndex < ivIndex { gp := work.stackRoots[vIndex] - if gp.maybeLive() { + if gp.checkIfMaybeRunnable() { work.stackRoots[vIndex] = gp vIndex = vIndex + 1 continue } for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { - if swapGp := work.stackRoots[ivIndex]; swapGp.maybeLive() { + if swapGp := work.stackRoots[ivIndex]; swapGp.checkIfMaybeRunnable() { work.stackRoots[ivIndex] = gp work.stackRoots[vIndex] = swapGp.unmask() vIndex = vIndex + 1 @@ -1268,7 +1217,7 @@ func findGoleaks() bool { var foundMoreWork bool for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { gp := work.stackRoots[i].unmask() - if readgstatus(gp) == _Gwaiting && !gp.maybeLive() { + if readgstatus(gp) == _Gwaiting && !gp.checkIfMaybeRunnable() { // Blocking unrunnable goroutines will be skipped. continue } @@ -1459,12 +1408,8 @@ func gcMarkTermination(stw worldStop) { } systemstack(func() { - if goexperiment.GolfGC { - // Pull the GC out of goroutine leak detection mode. - // Write is thread-safe because the world is stopped, and only one - // GC cycle can run at a time. - work.detectingGoleaks = false - } + // Pull the GC out of goroutine leak detection mode. + work.goroutineLeakFinder.enabled = false // The memstats updated above must be updated with the world // stopped to ensure consistency of some values, such as @@ -1893,12 +1838,12 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if !work.detectingGoleaks { - return work.markrootNext < work.markrootJobs - } rootNext := atomic.Load(&work.markrootNext) rootJobs := atomic.Load(&work.markrootJobs) - return rootNext < rootJobs + if rootNext < rootJobs { + return true // root scan work available + } + return false } // gcMark runs the mark (or, for concurrent GC, mark termination) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index bf5510fc643554..c64c34807e9ff0 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -60,7 +60,7 @@ const ( // //go:nosplit func gcMask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { return unsafe.Pointer(uintptr(p) | gcBitMask) } return p @@ -70,7 +70,7 @@ func gcMask(p unsafe.Pointer) unsafe.Pointer { // //go:nosplit func gcUnmask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { return unsafe.Pointer(uintptr(p) & gcUndoBitMask) } return p @@ -170,19 +170,14 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - if goexperiment.GolfGC { - if work.detectingGoleaks { - work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() - } else { - // regular GC --- scan every go routine - work.stackRoots = allGsSnapshot() - work.nLiveStackRoots = len(work.stackRoots) - } + if work.goroutineLeakFinder.enabled { + work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() } else { // regular GC --- scan every go routine work.stackRoots = allGsSnapshot() work.nLiveStackRoots = len(work.stackRoots) } + work.nStackRoots = len(work.stackRoots) work.markrootNext = 0 diff --git a/src/runtime/proc.go b/src/runtime/proc.go index c58e9283a98d8c..2e1895d6e683ad 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -709,7 +709,7 @@ func allGsSnapshot() []*g { // monotonically and existing entries never change, so we can // simply return a copy of the slice header. For added safety, // we trim everything past len because that can still change. - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { for i, gp := range allgs { allgs[i] = gp.unmask() } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 75273aae420e96..fb1984596c9f8c 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -107,8 +107,7 @@ const ( _Gscansyscall = _Gscan + _Gsyscall // 0x1003 _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 - - _Gscanleaked = _Gscan + _Gleaked // 0x100a + _Gscanleaked = _Gscan + _Gleaked // 0x100a ) const ( diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 69a98a24cfe316..08167ff217a6a6 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -305,7 +305,7 @@ func cansemacquire(addr *uint32) bool { func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() pAddr := unsafe.Pointer(addr) - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { if syncSema { // Mask the addr so it doesn't get marked during GC // through marking of the treap or marking of the blocked goroutine @@ -322,7 +322,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { pt := &root.treap for t := *pt; t != nil; t = *pt { var cmp bool - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = uintptr(gcUnmask(pAddr)) == uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) == uintptr(t.elem) @@ -373,7 +373,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { return } last = t - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = uintptr(gcUnmask(pAddr)) < uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) < uintptr(t.elem) @@ -426,7 +426,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) for ; s != nil; s = *ps { var cmp bool - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = gcUnmask(unsafe.Pointer(addr)) == gcUnmask(s.elem) } else { cmp = unsafe.Pointer(addr) == s.elem @@ -435,7 +435,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) goto Found } - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = uintptr(gcUnmask(unsafe.Pointer(addr))) < uintptr(gcUnmask(s.elem)) } else { cmp = uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) @@ -504,7 +504,8 @@ Found: } tailtime = s.acquiretime } - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear the waiting pointer. s.g.waiting = nil } s.parent = nil @@ -627,8 +628,10 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() - if goexperiment.GolfGC { - // Storing this pointer is + if goexperiment.GoroutineLeakFinderGC { + // Storing this pointer (masked) so that we can trace + // the condvar address from the blocked goroutine when + // checking for goroutine leaks. s.elem = gcMask(unsafe.Pointer(l)) s.g.waiting = s } @@ -649,7 +652,9 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear up its waiting pointer, + // and clean up the sudog before releasing it. s.g.waiting = nil s.elem = nil } From 328c0c482918117ead7c33b659ccaa9116d1df4d Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 24 Jul 2025 12:18:16 +0200 Subject: [PATCH 08/36] Removed bitmask and switched to tristate pointers (thank you @mknyszek). Reset leaked goroutines to waiting when restarting GC cycle. --- src/runtime/chan.go | 32 +++++++-------- src/runtime/mgc.go | 80 +++++++++++++++++++++--------------- src/runtime/mgcmark.go | 32 ++++----------- src/runtime/proc.go | 18 +++------ src/runtime/runtime2.go | 83 +++++++++++++++++++++++++++++++++++--- src/runtime/select.go | 12 +++--- src/runtime/sema.go | 55 ++++++++----------------- src/runtime/sizeof_test.go | 2 +- src/runtime/stack.go | 17 ++++---- 9 files changed, 187 insertions(+), 144 deletions(-) diff --git a/src/runtime/chan.go b/src/runtime/chan.go index bb554ebfdb1f3a..3fe5d635333a3f 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -263,11 +263,11 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { } // No stack splits between assigning elem and enqueuing mysg // on gp.waiting where copystack can find it. - mysg.elem = ep + mysg.elem.set(ep) mysg.waitlink = nil mysg.g = gp mysg.isSelect = false - mysg.c = c + mysg.c.set(c) gp.waiting = mysg gp.param = nil c.sendq.enqueue(mysg) @@ -298,7 +298,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } - mysg.c = nil + mysg.c.set(nil) releaseSudog(mysg) if closed { if c.closed == 0 { @@ -336,9 +336,9 @@ func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz } } - if sg.elem != nil { + if sg.elem.get() != nil { sendDirect(c.elemtype, sg, ep) - sg.elem = nil + sg.elem.set(nil) } gp := sg.g unlockf() @@ -395,7 +395,7 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) { // Once we read sg.elem out of sg, it will no longer // be updated if the destination's stack gets copied (shrunk). // So make sure that no preemption points can happen between read & use. - dst := sg.elem + dst := sg.elem.get() typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.Size_) // No need for cgo write barrier checks because dst is always // Go memory. @@ -406,7 +406,7 @@ func recvDirect(t *_type, sg *sudog, dst unsafe.Pointer) { // dst is on our stack or the heap, src is on another stack. // The channel is locked, so src will not move during this // operation. - src := sg.elem + src := sg.elem.get() typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.Size_) memmove(dst, src, t.Size_) } @@ -441,9 +441,9 @@ func closechan(c *hchan) { if sg == nil { break } - if sg.elem != nil { - typedmemclr(c.elemtype, sg.elem) - sg.elem = nil + if sg.elem.get() != nil { + typedmemclr(c.elemtype, sg.elem.get()) + sg.elem.set(nil) } if sg.releasetime != 0 { sg.releasetime = cputicks() @@ -463,7 +463,7 @@ func closechan(c *hchan) { if sg == nil { break } - sg.elem = nil + sg.elem.set(nil) if sg.releasetime != 0 { sg.releasetime = cputicks() } @@ -642,13 +642,13 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) } // No stack splits between assigning elem and enqueuing mysg // on gp.waiting where copystack can find it. - mysg.elem = ep + mysg.elem.set(ep) mysg.waitlink = nil gp.waiting = mysg mysg.g = gp mysg.isSelect = false - mysg.c = c + mysg.c.set(c) gp.param = nil c.recvq.enqueue(mysg) if c.timer != nil { @@ -680,7 +680,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) } success := mysg.success gp.param = nil - mysg.c = nil + mysg.c.set(nil) releaseSudog(mysg) return true, success } @@ -727,14 +727,14 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { typedmemmove(c.elemtype, ep, qp) } // copy data from sender to queue - typedmemmove(c.elemtype, qp, sg.elem) + typedmemmove(c.elemtype, qp, sg.elem.get()) c.recvx++ if c.recvx == c.dataqsiz { c.recvx = 0 } c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz } - sg.elem = nil + sg.elem.set(nil) gp := sg.g unlockf() gp.param = unsafe.Pointer(sg) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 0f09ca067bcad3..11883263ff8753 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -816,6 +816,12 @@ func gcStart(trigger gcTrigger) { schedEnableUser(false) } + if work.goroutineLeakFinder.pending.Load() { + work.goroutineLeakFinder.enabled = true + work.goroutineLeakFinder.pending.Store(false) + gcUntrackSyncObjects() + } + // Enter concurrent mark phase and enable // write barriers. // @@ -832,11 +838,6 @@ func gcStart(trigger gcTrigger) { // possible. setGCPhase(_GCmark) - if work.goroutineLeakFinder.pending.Load() { - work.goroutineLeakFinder.enabled = true - work.goroutineLeakFinder.pending.Store(false) - } - gcBgMarkPrepare() // Must happen before assists are enabled. gcPrepareMarkRoots() @@ -1110,8 +1111,6 @@ top: func (gp *g) checkIfMaybeRunnable() bool { // Unmask the goroutine address to ensure we are not // dereferencing a masked address. - gp = gp.unmask() - switch gp.waitreason { case waitReasonSelectNoCases, waitReasonChanSendNilChan, @@ -1125,7 +1124,7 @@ func (gp *g) checkIfMaybeRunnable() bool { // Cycle all through all *sudog to check whether // the goroutine is waiting on a marked channel. for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if isMarkedOrNotInHeap(unsafe.Pointer(sg.c)) { + if isMarkedOrNotInHeap(unsafe.Pointer(sg.c.get())) { return true } } @@ -1139,26 +1138,12 @@ func (gp *g) checkIfMaybeRunnable() bool { // check if the synchronization primitive attached to the sudog is marked. if gp.waiting != nil { // Unmask the sema address and check if it's marked. - return isMarkedOrNotInHeap(gcUnmask(gp.waiting.elem)) + return isMarkedOrNotInHeap(gp.waiting.elem.get()) } } return true } -// unmask returns a *g object with an unmasked address. -// -//go:nosplit -func (gp *g) unmask() *g { - return (*g)(gcUnmask(unsafe.Pointer(gp))) -} - -// mask returns a *g object with a masked address. -// -//go:nosplit -func (gp *g) mask() *g { - return (*g)(gcMask(unsafe.Pointer(gp))) -} - // Check to see if more blocked but marked goroutines exist; // if so add them into root set and increment work.markrootJobs accordingly // return true if we need to run another phase of markroots; return false otherwise @@ -1171,16 +1156,14 @@ func gcDiscoverMoreStackRoots() { // Reorder goroutine list for vIndex < ivIndex { - gp := work.stackRoots[vIndex] - if gp.checkIfMaybeRunnable() { - work.stackRoots[vIndex] = gp + if work.stackRoots[vIndex].checkIfMaybeRunnable() { vIndex = vIndex + 1 continue } for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { - if swapGp := work.stackRoots[ivIndex]; swapGp.checkIfMaybeRunnable() { - work.stackRoots[ivIndex] = gp - work.stackRoots[vIndex] = swapGp.unmask() + if gp := work.stackRoots[ivIndex]; gp.checkIfMaybeRunnable() { + work.stackRoots[ivIndex] = work.stackRoots[vIndex] + work.stackRoots[vIndex] = gp vIndex = vIndex + 1 break } @@ -1197,6 +1180,35 @@ func gcDiscoverMoreStackRoots() { } } +// getSyncObjectsUnreachable scans allgs and sets the elem and c fields of all sudogs to +// an untrackable pointer. This prevents the GC from marking these objects as live in memory +// by following these pointers when runnning deadlock detection. +func gcUntrackSyncObjects() { + assertWorldStopped() + + forEachGRace(func(gp *g) { + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + sg.elem.untrack() + sg.c.untrack() + } + }) +} + +// gcRestoreSyncObjects restores the elem and c fields of all sudogs to their original values. +// Should be invoked after the goroutine leak detection phase. +// +//go:nosplit +func gcRestoreSyncObjects() { + assertWorldStopped() + + forEachGRace(func(gp *g) { + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + sg.elem.track() + sg.c.track() + } + }) +} + // findGoleaks scans the remaining stackRoots and marks any which are // blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). // Returns true if the goroutine leak check was performed (or unnecessary). @@ -1216,7 +1228,7 @@ func findGoleaks() bool { // Make sure these are pushed to the runnable set and ready to be marked. var foundMoreWork bool for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { - gp := work.stackRoots[i].unmask() + gp := work.stackRoots[i] if readgstatus(gp) == _Gwaiting && !gp.checkIfMaybeRunnable() { // Blocking unrunnable goroutines will be skipped. continue @@ -1237,7 +1249,7 @@ func findGoleaks() bool { // For the remaining goroutines, mark them as unreachable and leaked. for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { - gp := work.stackRoots[i].unmask() + gp := work.stackRoots[i] casgstatus(gp, _Gwaiting, _Gleaked) fn := findfunc(gp.startpc) if fn.valid() { @@ -1247,7 +1259,6 @@ func findGoleaks() bool { } traceback(gp.sched.pc, gp.sched.sp, gp.sched.lr, gp) println() - work.stackRoots[i] = gp } // Put the remaining roots as ready for marking and drain them. work.markrootJobs += uint32(work.nStackRoots - work.nLiveStackRoots) @@ -1407,6 +1418,11 @@ func gcMarkTermination(stw worldStop) { throw("non-concurrent sweep failed to drain all sweep queues") } + if work.goroutineLeakFinder.enabled { + // Restore the elem and c fields of all sudogs to their original values. + gcRestoreSyncObjects() + } + systemstack(func() { // Pull the GC out of goroutine leak detection mode. work.goroutineLeakFinder.enabled = false diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index c64c34807e9ff0..e16d7c35da48ba 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -51,31 +51,8 @@ const ( // Must be a multiple of the pageInUse bitmap element size and // must also evenly divide pagesPerArena. pagesPerSpanRoot = 512 - - gcUndoBitMask = uintptr(uintptrMask >> 2) // This constant reserves some bits of the address space for the GC to use in order to mask addresses. - gcBitMask = ^gcUndoBitMask // This flips every bit in gcUndoBitMask of uinptr width ) -// gcMask masks addresses that should not be automatically marked during the GC. -// -//go:nosplit -func gcMask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.GoroutineLeakFinderGC { - return unsafe.Pointer(uintptr(p) | gcBitMask) - } - return p -} - -// gcUnmask undoes the bit-mask applied to a pointer. -// -//go:nosplit -func gcUnmask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.GoroutineLeakFinderGC { - return unsafe.Pointer(uintptr(p) & gcUndoBitMask) - } - return p -} - // internalBlocked returns true if the goroutine is blocked due to an // internal (non-leaking) waitReason, e.g. waiting for the netpoller or garbage collector. // Such goroutines are never leak detection candidates according to the GC. @@ -96,19 +73,24 @@ func (gp *g) internalBlocked() bool { func allGsSnapshotSortedForGC() ([]*g, int) { assertWorldStoppedOrLockHeld(&allglock) + // Reset the status of leaked goroutines in order to improve + // the precision of goroutine leak detection. + for _, gp := range allgs { + gp.atomicstatus.CompareAndSwap(_Gleaked, _Gwaiting) + } + allgsSorted := make([]*g, len(allgs)) // Indices cutting off runnable and blocked Gs. var currIndex, blockedIndex = 0, len(allgsSorted) - 1 for _, gp := range allgs { - gp = gp.unmask() // not sure if we need atomic load because we are stopping the world, // but do it just to be safe for now if status := readgstatus(gp); status != _Gwaiting || gp.internalBlocked() { allgsSorted[currIndex] = gp currIndex++ } else { - allgsSorted[blockedIndex] = gp.mask() + allgsSorted[blockedIndex] = gp blockedIndex-- } } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 2e1895d6e683ad..2be179dbfad794 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -8,7 +8,6 @@ import ( "internal/abi" "internal/cpu" "internal/goarch" - "internal/goexperiment" "internal/goos" "internal/runtime/atomic" "internal/runtime/exithook" @@ -514,7 +513,7 @@ func acquireSudog() *sudog { s := pp.sudogcache[n-1] pp.sudogcache[n-1] = nil pp.sudogcache = pp.sudogcache[:n-1] - if s.elem != nil { + if s.elem.get() != nil { throw("acquireSudog: found s.elem != nil in cache") } releasem(mp) @@ -523,7 +522,7 @@ func acquireSudog() *sudog { //go:nosplit func releaseSudog(s *sudog) { - if s.elem != nil { + if s.elem.get() != nil { throw("runtime: sudog with non-nil elem") } if s.isSelect { @@ -538,7 +537,7 @@ func releaseSudog(s *sudog) { if s.waitlink != nil { throw("runtime: sudog with non-nil waitlink") } - if s.c != nil { + if s.c.get() != nil { throw("runtime: sudog with non-nil c") } gp := getg() @@ -690,7 +689,7 @@ func allgadd(gp *g) { } lock(&allglock) - allgs = append(allgs, gp.mask()) + allgs = append(allgs, gp) if &allgs[0] != allgptr { atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) } @@ -709,11 +708,6 @@ func allGsSnapshot() []*g { // monotonically and existing entries never change, so we can // simply return a copy of the slice header. For added safety, // we trim everything past len because that can still change. - if goexperiment.GoroutineLeakFinderGC { - for i, gp := range allgs { - allgs[i] = gp.unmask() - } - } return allgs[:len(allgs):len(allgs)] } @@ -735,7 +729,7 @@ func atomicAllGIndex(ptr **g, i uintptr) *g { func forEachG(fn func(gp *g)) { lock(&allglock) for _, gp := range allgs { - fn(gp.unmask()) + fn(gp) } unlock(&allglock) } @@ -748,7 +742,7 @@ func forEachGRace(fn func(gp *g)) { ptr, length := atomicAllG() for i := uintptr(0); i < length; i++ { gp := atomicAllGIndex(ptr, i) - fn(gp.unmask()) + fn(gp) } return } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index fb1984596c9f8c..e1aaaa40a16b6c 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -319,6 +319,78 @@ type gobuf struct { bp uintptr // for framepointer-enabled architectures } +// maybeLivePtr is a special pointer that is conditionally trackable +// by the GC. It consists of an address as a uintptr (vu) and a pointer +// to a data element (vp). +// +// maybeLivePtr values can be in one of three states: +// 1. Unset: vu == 0 && vp == nil +// 2. Untracked: vu != 0 && vp == nil +// 3. Tracked: vu != 0 && vp != nil +// +// Do not set fields manually. Use methods instead. +// Extend this type with additional methods if needed. +type maybeLivePtr struct { + vp unsafe.Pointer // For liveness only. + vu uintptr // Source of truth. +} + +// untrack unsets the pointer but preserves the address. +// This is used to hide the pointer from the GC. +// +//go:nosplit +func (p *maybeLivePtr) untrack() { + p.vp = nil +} + +// track resets the pointer to the stored address. +// This is used to make the pointer visible to the GC. +// +//go:nosplit +func (p *maybeLivePtr) track() { + p.vp = unsafe.Pointer(p.vu) +} + +// set sets the pointer to the data element and updates the address. +// +//go:nosplit +func (p *maybeLivePtr) set(v unsafe.Pointer) { + p.vp = v + p.vu = uintptr(v) +} + +// get retrieves the pointer to the data element. +// +//go:nosplit +func (p *maybeLivePtr) get() unsafe.Pointer { + return unsafe.Pointer(p.vu) +} + +// uintptr returns the uintptr address of the pointer. +// +//go:nosplit +func (p *maybeLivePtr) uintptr() uintptr { + return p.vu +} + +// maybeLiveChan extends conditionally trackable pointers (maybeLivePtr) +// to track hchan pointers. +// +// Do not set fields manually. Use methods instead. +type maybeLiveChan struct { + maybeLivePtr +} + +//go:nosplit +func (p *maybeLiveChan) set(c *hchan) { + p.maybeLivePtr.set(unsafe.Pointer(c)) +} + +//go:nosplit +func (p *maybeLiveChan) get() *hchan { + return (*hchan)(p.maybeLivePtr.get()) +} + // sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving // on a channel. // @@ -338,7 +410,8 @@ type sudog struct { next *sudog prev *sudog - elem unsafe.Pointer // data element (may point to stack) + + elem maybeLivePtr // data element (may point to stack) // The following fields are never accessed concurrently. // For channels, waitlink is only accessed by g. @@ -366,10 +439,10 @@ type sudog struct { // in the second entry in the list.) waiters uint16 - parent *sudog // semaRoot binary tree - waitlink *sudog // g.waiting list or semaRoot - waittail *sudog // semaRoot - c *hchan // channel + parent *sudog // semaRoot binary tree + waitlink *sudog // g.waiting list or semaRoot + waittail *sudog // semaRoot + c maybeLiveChan // channel } type libcall struct { diff --git a/src/runtime/select.go b/src/runtime/select.go index ae7754b17377dd..d94a08c2651082 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -83,7 +83,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool { // channels in lock order. var lastc *hchan for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc && lastc != nil { + if sg.c.get() != lastc && lastc != nil { // As soon as we unlock the channel, fields in // any sudog with that channel may change, // including c and waitlink. Since multiple @@ -92,7 +92,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool { // of a channel. unlock(&lastc.lock) } - lastc = sg.c + lastc = sg.c.get() } if lastc != nil { unlock(&lastc.lock) @@ -320,12 +320,12 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, blo sg.isSelect = true // No stack splits between assigning elem and enqueuing // sg on gp.waiting where copystack can find it. - sg.elem = cas.elem + sg.elem.set(cas.elem) sg.releasetime = 0 if t0 != 0 { sg.releasetime = -1 } - sg.c = c + sg.c.set(c) // Construct waiting list in lock order. *nextp = sg nextp = &sg.waitlink @@ -368,8 +368,8 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, blo // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { sg1.isSelect = false - sg1.elem = nil - sg1.c = nil + sg1.elem.set(nil) + sg1.c.set(nil) } gp.waiting = nil diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 08167ff217a6a6..e240d336202ac5 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -304,16 +304,14 @@ func cansemacquire(addr *uint32) bool { // queue adds s to the blocked goroutines in semaRoot. func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() - pAddr := unsafe.Pointer(addr) - if goexperiment.GoroutineLeakFinderGC { - if syncSema { - // Mask the addr so it doesn't get marked during GC - // through marking of the treap or marking of the blocked goroutine - pAddr = gcMask(unsafe.Pointer(addr)) - s.g.waiting = s - } + s.elem.set(unsafe.Pointer(addr)) + if goexperiment.GoroutineLeakFinderGC && syncSema { + s.g.waiting = s + // When dealing with sync semaphores, hide the elem field from the GC + // to prevent it from prematurely marking the semaphore when running + // goroutine leak detection. + s.elem.untrack() } - s.elem = pAddr s.next = nil s.prev = nil s.waiters = 0 @@ -321,13 +319,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { var last *sudog pt := &root.treap for t := *pt; t != nil; t = *pt { - var cmp bool - if goexperiment.GoroutineLeakFinderGC { - cmp = uintptr(gcUnmask(pAddr)) == uintptr(gcUnmask(t.elem)) - } else { - cmp = uintptr(pAddr) == uintptr(t.elem) - } - if cmp { + if uintptr(unsafe.Pointer(addr)) == t.elem.uintptr() { // Already have addr in list. if lifo { // Substitute s in t's place in treap. @@ -373,12 +365,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { return } last = t - if goexperiment.GoroutineLeakFinderGC { - cmp = uintptr(gcUnmask(pAddr)) < uintptr(gcUnmask(t.elem)) - } else { - cmp = uintptr(pAddr) < uintptr(t.elem) - } - if cmp { + if uintptr(unsafe.Pointer(addr)) < t.elem.uintptr() { pt = &t.prev } else { pt = &t.next @@ -425,22 +412,11 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) s := *ps for ; s != nil; s = *ps { - var cmp bool - if goexperiment.GoroutineLeakFinderGC { - cmp = gcUnmask(unsafe.Pointer(addr)) == gcUnmask(s.elem) - } else { - cmp = unsafe.Pointer(addr) == s.elem - } - if cmp { + if uintptr(unsafe.Pointer(addr)) == s.elem.uintptr() { goto Found } - if goexperiment.GoroutineLeakFinderGC { - cmp = uintptr(gcUnmask(unsafe.Pointer(addr))) < uintptr(gcUnmask(s.elem)) - } else { - cmp = uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) - } - if cmp { + if uintptr(unsafe.Pointer(addr)) < s.elem.uintptr() { ps = &s.prev } else { ps = &s.next @@ -509,7 +485,7 @@ Found: s.g.waiting = nil } s.parent = nil - s.elem = nil + s.elem.set(nil) s.next = nil s.prev = nil s.ticket = 0 @@ -629,10 +605,11 @@ func notifyListWait(l *notifyList, t uint32) { s := acquireSudog() s.g = getg() if goexperiment.GoroutineLeakFinderGC { - // Storing this pointer (masked) so that we can trace + // Storing this pointer (invisible to GC) so that we can trace // the condvar address from the blocked goroutine when // checking for goroutine leaks. - s.elem = gcMask(unsafe.Pointer(l)) + s.elem.set(unsafe.Pointer(l)) + s.elem.untrack() s.g.waiting = s } s.ticket = t @@ -656,7 +633,7 @@ func notifyListWait(l *notifyList, t uint32) { // Goroutine is no longer blocked. Clear up its waiting pointer, // and clean up the sudog before releasing it. s.g.waiting = nil - s.elem = nil + s.elem.set(nil) } releaseSudog(s) } diff --git a/src/runtime/sizeof_test.go b/src/runtime/sizeof_test.go index de859866a5adb2..5888177f0ea7a1 100644 --- a/src/runtime/sizeof_test.go +++ b/src/runtime/sizeof_test.go @@ -22,7 +22,7 @@ func TestSizeof(t *testing.T) { _64bit uintptr // size on 64bit platforms }{ {runtime.G{}, 280 + xreg, 440 + xreg}, // g, but exported for testing - {runtime.Sudog{}, 56, 88}, // sudog, but exported for testing + {runtime.Sudog{}, 64, 104}, // sudog, but exported for testing } if xreg > runtime.PtrSize { diff --git a/src/runtime/stack.go b/src/runtime/stack.go index a338708d76fca8..8c78965d372223 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -821,7 +821,8 @@ func adjustsudogs(gp *g, adjinfo *adjustinfo) { // the data elements pointed to by a SudoG structure // might be in the stack. for s := gp.waiting; s != nil; s = s.waitlink { - adjustpointer(adjinfo, unsafe.Pointer(&s.elem)) + adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vu)) + adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vp)) } } @@ -834,7 +835,7 @@ func fillstack(stk stack, b byte) { func findsghi(gp *g, stk stack) uintptr { var sghi uintptr for sg := gp.waiting; sg != nil; sg = sg.waitlink { - p := uintptr(sg.elem) + uintptr(sg.c.elemsize) + p := sg.elem.uintptr() + uintptr(sg.c.get().elemsize) if stk.lo <= p && p < stk.hi && p > sghi { sghi = p } @@ -853,7 +854,7 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // Lock channels to prevent concurrent send/receive. var lastc *hchan for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc { + if sg.c.get() != lastc { // There is a ranking cycle here between gscan bit and // hchan locks. Normally, we only allow acquiring hchan // locks and then getting a gscan bit. In this case, we @@ -863,9 +864,9 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // suspended. So, we get a special hchan lock rank here // that is lower than gscan, but doesn't allow acquiring // any other locks other than hchan. - lockWithRank(&sg.c.lock, lockRankHchanLeaf) + lockWithRank(&sg.c.get().lock, lockRankHchanLeaf) } - lastc = sg.c + lastc = sg.c.get() } // Adjust sudogs. @@ -885,10 +886,10 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // Unlock channels. lastc = nil for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc { - unlock(&sg.c.lock) + if sg.c.get() != lastc { + unlock(&sg.c.get().lock) } - lastc = sg.c + lastc = sg.c.get() } return sgsize From aadb7649151c016f2355bd289db33b2111d94577 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 24 Jul 2025 13:56:23 +0200 Subject: [PATCH 09/36] Switched markrootNext and markrootJobs to atomic.Uint32. Added GCDEBUG flag that makes the GC continuously run leak detection. --- src/runtime/mgc.go | 30 +++++++++++++----------------- src/runtime/mgcmark.go | 24 +++++++++--------------- src/runtime/runtime1.go | 2 ++ 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 11883263ff8753..3d56c11862d9f0 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -364,8 +364,8 @@ type workType struct { // (and thus 8-byte alignment even on 32-bit architectures). bytesMarked uint64 - markrootNext uint32 // next markroot job - markrootJobs uint32 // number of markroot jobs + markrootNext atomic.Uint32 // next markroot job + markrootJobs atomic.Uint32 // number of markroot jobs nproc uint32 tstart int64 @@ -734,7 +734,8 @@ func gcStart(trigger gcTrigger) { } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode } else if goexperiment.GoroutineLeakFinderGC { - if work.goroutineLeakFinder.pending.Load() { + if work.goroutineLeakFinder.pending.Load() || + debug.gcgoroutineleaks > 0 { // Fully stop the world if running deadlock detection. mode = gcForceBlockMode } @@ -816,7 +817,8 @@ func gcStart(trigger gcTrigger) { schedEnableUser(false) } - if work.goroutineLeakFinder.pending.Load() { + if work.goroutineLeakFinder.pending.Load() || + debug.gcgoroutineleaks > 0 { work.goroutineLeakFinder.enabled = true work.goroutineLeakFinder.pending.Store(false) gcUntrackSyncObjects() @@ -1170,13 +1172,11 @@ func gcDiscoverMoreStackRoots() { } } - var oldRootJobs int32 = int32(atomic.Load(&work.markrootJobs)) var newRootJobs int32 = int32(work.baseStacks) + int32(vIndex) - - if newRootJobs > oldRootJobs { + if newRootJobs > int32(work.markrootJobs.Load()) { // reset markrootNext as it could have been incremented past markrootJobs work.nLiveStackRoots = vIndex - atomic.Store(&work.markrootJobs, uint32(newRootJobs)) + work.markrootJobs.Store(uint32(newRootJobs)) } } @@ -1237,7 +1237,7 @@ func findGoleaks() bool { work.stackRoots[work.nLiveStackRoots] = gp work.nLiveStackRoots += 1 // We now have one more markroot job. - work.markrootJobs += 1 + work.markrootJobs.Add(1) // We might still have some work to do. // Make sure in the next iteration we will check re-check for new runnable goroutines. foundMoreWork = true @@ -1261,7 +1261,7 @@ func findGoleaks() bool { println() } // Put the remaining roots as ready for marking and drain them. - work.markrootJobs += uint32(work.nStackRoots - work.nLiveStackRoots) + work.markrootJobs.Add(int32(work.nStackRoots - work.nLiveStackRoots)) work.nLiveStackRoots = work.nStackRoots return true } @@ -1854,9 +1854,7 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - rootNext := atomic.Load(&work.markrootNext) - rootJobs := atomic.Load(&work.markrootJobs) - if rootNext < rootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { return true // root scan work available } return false @@ -1872,10 +1870,8 @@ func gcMark(startTime int64) { work.tstart = startTime // Check that there's no marking work remaining. - rootNext := atomic.Load(&work.markrootNext) - rootJobs := atomic.Load(&work.markrootJobs) - if work.full != 0 || rootNext < rootJobs { - print("runtime: full=", hex(work.full), " next=", rootNext, " jobs=", rootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") + if work.full != 0 || work.markrootNext.Load() < work.markrootJobs.Load() { + print("runtime: full=", hex(work.full), " next=", work.markrootNext.Load(), " jobs=", work.markrootJobs.Load(), " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") panic("non-empty mark queue after concurrent mark") } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index e16d7c35da48ba..05e9b3e0f7f2bb 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -162,8 +162,8 @@ func gcPrepareMarkRoots() { work.nStackRoots = len(work.stackRoots) - work.markrootNext = 0 - work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nLiveStackRoots) + work.markrootNext.Store(0) + work.markrootJobs.Store(uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nLiveStackRoots)) // Calculate base indexes of each root type work.baseData = uint32(fixedRootCount) @@ -176,10 +176,8 @@ func gcPrepareMarkRoots() { // gcMarkRootCheck checks that all roots have been scanned. It is // purely for debugging. func gcMarkRootCheck() { - rootNext := atomic.Load(&work.markrootNext) - rootJobs := atomic.Load(&work.markrootJobs) - if rootNext < rootJobs { - print(rootNext, " of ", rootJobs, " markroot jobs done\n") + if work.markrootNext.Load() < work.markrootJobs.Load() { + print(work.markrootNext.Load(), " of ", work.markrootJobs.Load(), " markroot jobs done\n") throw("left over markroot jobs") } @@ -1197,21 +1195,19 @@ func gcDrainMarkWorkerFractional(gcw *gcWork) { func gcUpdateMarkrootNext() (uint32, bool) { var success bool - var next uint32 = atomic.Load(&work.markrootNext) - var jobs uint32 = atomic.Load(&work.markrootJobs) + next, jobs := work.markrootNext.Load(), work.markrootJobs.Load() if next < jobs { // still work available at the moment for !success { - success = atomic.Cas(&work.markrootNext, next, next+1) + success = work.markrootNext.CompareAndSwap(next, next+1) // We manage to snatch a root job. Return the root index. if success { return next, true } // Get the latest value of markrootNext. - next = atomic.Load(&work.markrootNext) - jobs := atomic.Load(&work.markrootJobs) + next = work.markrootNext.Load() // We are out of markroot jobs. if next >= jobs { break @@ -1279,9 +1275,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { } } - rootNext := atomic.Load(&work.markrootNext) - rootJobs := atomic.Load(&work.markrootJobs) - if rootNext < rootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { @@ -1432,7 +1426,7 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { wbBufFlush() if b = gcw.tryGetObj(); b == 0 { // Try to do a root job. - if atomic.Load(&work.markrootNext) < atomic.Load(&work.markrootJobs) { + if work.markrootNext.Load() < work.markrootJobs.Load() { job, success := gcUpdateMarkrootNext() if success { workFlushed += markroot(gcw, job, false) diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 424745d2357dc9..0ebef6d6e2adb1 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -316,6 +316,7 @@ var debug struct { dontfreezetheworld int32 efence int32 gccheckmark int32 + gcgoroutineleaks int32 gcpacertrace int32 gcshrinkstackoff int32 gcstoptheworld int32 @@ -381,6 +382,7 @@ var dbgvars = []*dbgVar{ {name: "efence", value: &debug.efence}, {name: "gccheckmark", value: &debug.gccheckmark}, {name: "gcpacertrace", value: &debug.gcpacertrace}, + {name: "gcgoroutineleaks", value: &debug.gcgoroutineleaks}, {name: "gcshrinkstackoff", value: &debug.gcshrinkstackoff}, {name: "gcstoptheworld", value: &debug.gcstoptheworld}, {name: "gctrace", value: &debug.gctrace}, From b5a4ae9af1d541262eebee564845e5d2dbd58d36 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 25 Jul 2025 11:52:25 +0200 Subject: [PATCH 10/36] Tests for goroutine leak finder GC --- src/runtime/crash_test.go | 17 ++ src/runtime/gc_test.go | 138 +++++++++++++ src/runtime/mgc.go | 12 +- .../testdata/testprog/goroutineleakgc.go | 186 ++++++++++++++++++ 4 files changed, 350 insertions(+), 3 deletions(-) create mode 100644 src/runtime/testdata/testprog/goroutineleakgc.go diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index 2db86e0562d6ae..156e072a12a6d0 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -186,6 +186,23 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) t.Logf("running %v", cmd) cmd.Dir = "testdata/" + binary cmd = testenv.CleanCmdEnv(cmd) + + // Add the goroutineleakfindergc GOEXPERIMENT unconditionally since some tests depend on it. + // TODO(61405): Remove this once it's enabled by default. + // + // FIXME: Remove this once profiling is enabled and goroutineleakfindergc experiment is phased out. + edited := false + for i := range cmd.Env { + e := cmd.Env[i] + if _, vars, ok := strings.Cut(e, "GOEXPERIMENT="); ok { + cmd.Env[i] = "GOEXPERIMENT=" + vars + ",goroutineleakfindergc" + edited = true + } + } + if !edited { + cmd.Env = append(cmd.Env, "GOEXPERIMENT=goroutineleakfindergc") + } + out, err := cmd.CombinedOutput() if err != nil { target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0a1e01cbcf9d7c..6143e6708326ca 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -14,6 +14,7 @@ import ( "math/rand" "os" "reflect" + "regexp" "runtime" "runtime/debug" "slices" @@ -1095,3 +1096,140 @@ func TestDetectFinalizerAndCleanupLeaks(t *testing.T) { t.Fatalf("expected %d symbolized locations, got:\n%s", wantSymbolizedLocations, got) } } + +func TestGoroutineLeakGC(t *testing.T) { + type testCase struct { + tname string + funcName string + expectedLeaks map[*regexp.Regexp]int + } + + testCases := []testCase{{ + tname: "ChanReceiveNil", + funcName: "GoroutineLeakNilRecv", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[chan receive \(nil chan\)\]`): 0, + }, + }, { + tname: "ChanSendNil", + funcName: "GoroutineLeakNilSend", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[chan send \(nil chan\)\]`): 0, + }, + }, { + tname: "SelectNoCases", + funcName: "GoroutineLeakSelectNoCases", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[select \(no cases\)\]`): 0, + }, + }, { + tname: "ChanRecv", + funcName: "GoroutineLeakChanRecv", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[chan receive\]`): 0, + }, + }, { + tname: "ChanSend", + funcName: "GoroutineLeakChanSend", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[chan send\]`): 0, + }, + }, { + tname: "Select", + funcName: "GoroutineLeakSelect", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[select\]`): 0, + }, + }, { + tname: "WaitGroup", + funcName: "GoroutineLeakWaitGroup", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[sync\.WaitGroup\.Wait\]`): 0, + }, + }, { + tname: "MutexStack", + funcName: "GoroutineLeakMutexStack", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[sync\.Mutex\.Lock\]`): 0, + }, + }, { + tname: "MutexHeap", + funcName: "GoroutineLeakMutexHeap", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[sync\.Mutex\.Lock\]`): 0, + }, + }, { + tname: "Cond", + funcName: "GoroutineLeakCond", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[sync\.Cond\.Wait\]`): 0, + }, + }, { + tname: "RWMutexRLock", + funcName: "GoroutineLeakRWMutexRLock", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[sync\.RWMutex\.RLock\]`): 0, + }, + }, { + tname: "RWMutexLock", + funcName: "GoroutineLeakRWMutexLock", + expectedLeaks: map[*regexp.Regexp]int{ + // Invoking Lock on a RWMutex may either put a goroutine a waiting state + // of either sync.RWMutex.Lock or sync.Mutex.Lock. + regexp.MustCompile(`\[sync\.(RW)?Mutex\.Lock\]`): 0, + }, + }, { + tname: "Mixed", + funcName: "GoroutineLeakMixed", + expectedLeaks: map[*regexp.Regexp]int{ + regexp.MustCompile(`\[sync\.WaitGroup\.Wait\]`): 0, + regexp.MustCompile(`\[chan send\]`): 0, + }, + }, { + tname: "NoLeakGlobal", + funcName: "NoGoroutineLeakGlobal", + }} + + failStates := regexp.MustCompile(`fatal|panic`) + + for _, tcase := range testCases { + t.Run(tcase.tname, func(t *testing.T) { + exe, err := buildTestProg(t, "testprog") + if err != nil { + t.Fatal(fmt.Sprintf("building testprog failed: %v", err)) + } + output := runBuiltTestProg(t, exe, tcase.funcName, "GODEBUG=gctrace=1,gcgoroutineleaks=1") + + if len(tcase.expectedLeaks) == 0 && strings.Contains(output, "goroutine leak!") { + t.Fatalf("output:\n%s\n\nunexpected goroutines leaks detected", output) + return + } + + if failStates.MatchString(output) { + t.Fatalf("output:\n%s\n\nunexpected fatal exception or panic", output) + return + } + + for _, line := range strings.Split(output, "\n") { + if strings.Contains(line, "goroutine leak!") { + for expectedLeak, count := range tcase.expectedLeaks { + if expectedLeak.MatchString(line) { + tcase.expectedLeaks[expectedLeak] = count + 1 + } + } + } + } + + missingLeakStrs := make([]string, 0, len(tcase.expectedLeaks)) + for expectedLeak, count := range tcase.expectedLeaks { + if count == 0 { + missingLeakStrs = append(missingLeakStrs, expectedLeak.String()) + } + } + + if len(missingLeakStrs) > 0 { + t.Fatalf("output:\n%s\n\nnot enough goroutines leaks detected. Missing:\n%s", output, strings.Join(missingLeakStrs, ", ")) + } + }) + } +} diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 3d56c11862d9f0..c9a86470b0e99a 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1253,9 +1253,11 @@ func findGoleaks() bool { casgstatus(gp, _Gwaiting, _Gleaked) fn := findfunc(gp.startpc) if fn.valid() { - print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") + print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ", + "[", waitReasonStrings[gp.waitreason], "]\n") } else { - print("goroutine leak! goroutine ", gp.goid, ": !unnamed goroutine!", " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") + print("goroutine leak! goroutine ", gp.goid, ": !unnamed goroutine!", " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ", + "[", waitReasonStrings[gp.waitreason], "]\n") } traceback(gp.sched.pc, gp.sched.sp, gp.sched.lr, gp) println() @@ -1500,7 +1502,11 @@ func gcMarkTermination(stw worldStop) { printlock() print("gc ", memstats.numgc, " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", - util, "%: ") + util, "%") + if work.goroutineLeakFinder.done { + print(" (goroutine leak finder GC)") + } + print(": ") prev := work.tSweepTerm for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} { if i != 0 { diff --git a/src/runtime/testdata/testprog/goroutineleakgc.go b/src/runtime/testdata/testprog/goroutineleakgc.go new file mode 100644 index 00000000000000..fb255afa41cab8 --- /dev/null +++ b/src/runtime/testdata/testprog/goroutineleakgc.go @@ -0,0 +1,186 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("GoroutineLeakNilRecv", GoroutineLeakNilRecv) + register("GoroutineLeakNilSend", GoroutineLeakNilSend) + register("GoroutineLeakSelectNoCases", GoroutineLeakSelectNoCases) + register("GoroutineLeakChanRecv", GoroutineLeakChanRecv) + register("GoroutineLeakChanSend", GoroutineLeakChanSend) + register("GoroutineLeakSelect", GoroutineLeakSelect) + register("GoroutineLeakWaitGroup", GoroutineLeakWaitGroup) + register("GoroutineLeakMutexStack", GoroutineLeakMutexStack) + register("GoroutineLeakMutexHeap", GoroutineLeakMutexHeap) + register("GoroutineLeakRWMutexRLock", GoroutineLeakRWMutexRLock) + register("GoroutineLeakRWMutexLock", GoroutineLeakRWMutexLock) + register("GoroutineLeakCond", GoroutineLeakCond) + register("GoroutineLeakMixed", GoroutineLeakMixed) + register("NoGoroutineLeakGlobal", NoGoroutineLeakGlobal) +} + +func GoroutineLeakNilRecv() { + go func() { + var c chan int + <-c + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakNilSend() { + go func() { + var c chan int + c <- 0 + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakChanRecv() { + go func() { + <-make(chan int) + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakSelectNoCases() { + go func() { + select {} + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakChanSend() { + go func() { + make(chan int) <- 0 + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakSelect() { + go func() { + select { + case make(chan int) <- 0: + case <-make(chan int): + } + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakWaitGroup() { + go func() { + var wg sync.WaitGroup + wg.Add(1) + wg.Wait() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakMutexStack() { + for i := 0; i < 1000; i++ { + go func() { + var mu sync.Mutex + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + } + time.Sleep(10 * time.Millisecond) + runtime.GC() + time.Sleep(10 * time.Millisecond) +} + +func GoroutineLeakMutexHeap() { + for i := 0; i < 1000; i++ { + go func() { + mu := &sync.Mutex{} + go func() { + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + }() + } + time.Sleep(10 * time.Millisecond) + runtime.GC() + time.Sleep(10 * time.Millisecond) +} + +func GoroutineLeakRWMutexRLock() { + go func() { + mu := &sync.RWMutex{} + mu.Lock() + mu.RLock() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakRWMutexLock() { + go func() { + mu := &sync.RWMutex{} + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakCond() { + go func() { + cond := sync.NewCond(&sync.Mutex{}) + cond.L.Lock() + cond.Wait() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func GoroutineLeakMixed() { + go func() { + ch := make(chan int) + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + ch <- 0 + wg.Done() + panic("should not be reached") + }() + wg.Wait() + <-ch + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +var ch = make(chan int) + +// No leak should be reported by this test +func NoGoroutineLeakGlobal() { + go func() { + <-ch + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} From a3d96f8546ff317ed3ed33f062e107fffe0d86a5 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 25 Jul 2025 13:34:42 +0200 Subject: [PATCH 11/36] Renamed maybeLivePtr to maybeTraceablePtr. --- src/runtime/mgc.go | 8 ++++---- src/runtime/runtime2.go | 42 ++++++++++++++++++++--------------------- src/runtime/sema.go | 4 ++-- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index c9a86470b0e99a..53a041eaf7eaf6 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1188,8 +1188,8 @@ func gcUntrackSyncObjects() { forEachGRace(func(gp *g) { for sg := gp.waiting; sg != nil; sg = sg.waitlink { - sg.elem.untrack() - sg.c.untrack() + sg.elem.setUntraceable() + sg.c.setUntraceable() } }) } @@ -1203,8 +1203,8 @@ func gcRestoreSyncObjects() { forEachGRace(func(gp *g) { for sg := gp.waiting; sg != nil; sg = sg.waitlink { - sg.elem.track() - sg.c.track() + sg.elem.setTraceable() + sg.c.setTraceable() } }) } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index e1aaaa40a16b6c..7193d97c9e75f6 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -319,18 +319,18 @@ type gobuf struct { bp uintptr // for framepointer-enabled architectures } -// maybeLivePtr is a special pointer that is conditionally trackable +// maybeTraceablePtr is a special pointer that is conditionally trackable // by the GC. It consists of an address as a uintptr (vu) and a pointer // to a data element (vp). // -// maybeLivePtr values can be in one of three states: +// maybeTraceablePtr values can be in one of three states: // 1. Unset: vu == 0 && vp == nil // 2. Untracked: vu != 0 && vp == nil // 3. Tracked: vu != 0 && vp != nil // // Do not set fields manually. Use methods instead. // Extend this type with additional methods if needed. -type maybeLivePtr struct { +type maybeTraceablePtr struct { vp unsafe.Pointer // For liveness only. vu uintptr // Source of truth. } @@ -339,22 +339,22 @@ type maybeLivePtr struct { // This is used to hide the pointer from the GC. // //go:nosplit -func (p *maybeLivePtr) untrack() { +func (p *maybeTraceablePtr) setUntraceable() { p.vp = nil } -// track resets the pointer to the stored address. +// setTraceable resets the pointer to the stored address. // This is used to make the pointer visible to the GC. // //go:nosplit -func (p *maybeLivePtr) track() { +func (p *maybeTraceablePtr) setTraceable() { p.vp = unsafe.Pointer(p.vu) } // set sets the pointer to the data element and updates the address. // //go:nosplit -func (p *maybeLivePtr) set(v unsafe.Pointer) { +func (p *maybeTraceablePtr) set(v unsafe.Pointer) { p.vp = v p.vu = uintptr(v) } @@ -362,33 +362,33 @@ func (p *maybeLivePtr) set(v unsafe.Pointer) { // get retrieves the pointer to the data element. // //go:nosplit -func (p *maybeLivePtr) get() unsafe.Pointer { +func (p *maybeTraceablePtr) get() unsafe.Pointer { return unsafe.Pointer(p.vu) } // uintptr returns the uintptr address of the pointer. // //go:nosplit -func (p *maybeLivePtr) uintptr() uintptr { +func (p *maybeTraceablePtr) uintptr() uintptr { return p.vu } -// maybeLiveChan extends conditionally trackable pointers (maybeLivePtr) +// maybeTraceableChan extends conditionally trackable pointers (maybeTraceablePtr) // to track hchan pointers. // // Do not set fields manually. Use methods instead. -type maybeLiveChan struct { - maybeLivePtr +type maybeTraceableChan struct { + maybeTraceablePtr } //go:nosplit -func (p *maybeLiveChan) set(c *hchan) { - p.maybeLivePtr.set(unsafe.Pointer(c)) +func (p *maybeTraceableChan) set(c *hchan) { + p.maybeTraceablePtr.set(unsafe.Pointer(c)) } //go:nosplit -func (p *maybeLiveChan) get() *hchan { - return (*hchan)(p.maybeLivePtr.get()) +func (p *maybeTraceableChan) get() *hchan { + return (*hchan)(p.maybeTraceablePtr.get()) } // sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving @@ -411,7 +411,7 @@ type sudog struct { next *sudog prev *sudog - elem maybeLivePtr // data element (may point to stack) + elem maybeTraceablePtr // data element (may point to stack) // The following fields are never accessed concurrently. // For channels, waitlink is only accessed by g. @@ -439,10 +439,10 @@ type sudog struct { // in the second entry in the list.) waiters uint16 - parent *sudog // semaRoot binary tree - waitlink *sudog // g.waiting list or semaRoot - waittail *sudog // semaRoot - c maybeLiveChan // channel + parent *sudog // semaRoot binary tree + waitlink *sudog // g.waiting list or semaRoot + waittail *sudog // semaRoot + c maybeTraceableChan // channel } type libcall struct { diff --git a/src/runtime/sema.go b/src/runtime/sema.go index e240d336202ac5..833829f70ed219 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -310,7 +310,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { // When dealing with sync semaphores, hide the elem field from the GC // to prevent it from prematurely marking the semaphore when running // goroutine leak detection. - s.elem.untrack() + s.elem.setUntraceable() } s.next = nil s.prev = nil @@ -609,7 +609,7 @@ func notifyListWait(l *notifyList, t uint32) { // the condvar address from the blocked goroutine when // checking for goroutine leaks. s.elem.set(unsafe.Pointer(l)) - s.elem.untrack() + s.elem.setUntraceable() s.g.waiting = s } s.ticket = t From fb905bb2c0c664a35aee5be681074e2ac7e17684 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 25 Jul 2025 15:00:02 +0200 Subject: [PATCH 12/36] Cleaned up experimental flags some more. --- src/runtime/mgc.go | 46 +++++++++++++++++++----------------------- src/runtime/mgcmark.go | 4 +++- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 53a041eaf7eaf6..d733a78ae0a09b 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -733,12 +733,10 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode - } else if goexperiment.GoroutineLeakFinderGC { - if work.goroutineLeakFinder.pending.Load() || - debug.gcgoroutineleaks > 0 { - // Fully stop the world if running deadlock detection. - mode = gcForceBlockMode - } + } else if work.goroutineLeakFinder.pending.Load() || debug.gcgoroutineleaks > 0 { + // If goroutine leak detection has been enabled (via GODEBUG=gcgoroutineleaks=1), + // or via profiling, fully stop the world. + mode = gcForceBlockMode } // Ok, we're doing it! Stop everybody else @@ -1035,7 +1033,8 @@ top: } } }) - if restart { + switch { + case restart: gcDebugMarkDone.restartedDueTo27993 = true getg().m.preemptoff = "" @@ -1049,25 +1048,22 @@ top: }) semrelease(&worldsema) goto top - } else if goexperiment.GoroutineLeakFinderGC { - // If we are detecting goroutine leaks, do so now. - if work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done { - // Detect goroutine leaks. If the returned value is true, then - // detection was performed during this cycle. Otherwise, more mark work is needed, - // or live goroutines were found. - work.goroutineLeakFinder.done = findGoleaks() - - getg().m.preemptoff = "" - systemstack(func() { - // Accumulate the time we were stopped before we had to start again. - work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) + case work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done: + // Detect goroutine leaks. If the returned value is true, then detection was + // performed during this cycle. Otherwise, more runnable goroutines were discovered, + // requiring additional mark work. + work.goroutineLeakFinder.done = findGoleaks() - now := startTheWorldWithSema(0, stw) - work.pauseNS += now - stw.startedStopping - }) - semrelease(&worldsema) - goto top - } + getg().m.preemptoff = "" + systemstack(func() { + // Accumulate the time we were stopped before we had to start again. + work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) + + now := startTheWorldWithSema(0, stw) + work.pauseNS += now - stw.startedStopping + }) + semrelease(&worldsema) + goto top } gcComputeStartingStackSize() diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 05e9b3e0f7f2bb..66af09e309d30a 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -153,9 +153,11 @@ func gcPrepareMarkRoots() { // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. if work.goroutineLeakFinder.enabled { + // goroutine leak finder GC --- only prepare runnable + // goroutines for marking. work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() } else { - // regular GC --- scan every go routine + // regular GC --- scan every goroutine work.stackRoots = allGsSnapshot() work.nLiveStackRoots = len(work.stackRoots) } From 710b419b5312d1dd83049f07de30eca8439eb50b Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Wed, 30 Jul 2025 11:25:36 +0200 Subject: [PATCH 13/36] Renamed nMaybeLiveStackRoots to nMaybeRunnableStackRoots. Refactoring. --- src/runtime/mgc.go | 126 +++++++++++++++++------------------------ src/runtime/mgcmark.go | 6 +- 2 files changed, 54 insertions(+), 78 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index d733a78ae0a09b..5e0fa8a2581521 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -373,11 +373,11 @@ type workType struct { // Number of roots of various root types. Set by gcPrepareMarkRoots. // - // During normal GC cycle, nStackRoots == nLiveStackRoots == len(stackRoots); - // during goroutine leak detection, nLiveStackRoots is the number of stackRoots - // to examine, and nStackRoots == len(stackRoots), which include goroutines that are - // unmarked / not runnable - nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nLiveStackRoots int + // During normal GC cycle, nStackRoots == nMaybeRunnableStackRoots == len(stackRoots); + // during goroutine leak detection, nMaybeRunnableStackRoots is the number of stackRoots + // scheduled for marking. + // In both variants, nStackRoots == len(stackRoots). + nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nMaybeRunnableStackRoots int // The following fields monitor the GC phase of the current cycle during // goroutine leak detection. @@ -390,7 +390,7 @@ type workType struct { enabled bool // The GC has performed goroutine leak detection during the current GC cycle; it is set // during gcMarkDone(), right after goroutine leak detection has concluded, and unset during - // gcStart(). Is protected by STW. + // gcMarkTermination(). Is protected by STW. done bool } @@ -399,11 +399,10 @@ type workType struct { // stackRoots is a snapshot of all of the Gs that existed before the // beginning of concurrent marking. During goroutine leak detection, stackRoots - // is partitioned into two sets; to the left of nLiveStackRoots are stackRoots - // of running / runnable goroutines and to the right of nLiveStackRoots are + // is partitioned into two sets; to the left of nMaybeRunnableStackRoots are stackRoots + // of running / runnable goroutines and to the right of nMaybeRunnableStackRoots are // stackRoots of unmarked / not runnable goroutines - // gcDiscoverMoreStackRoots modifies the stackRoots array to redo the partition - // after each marking phase iteration. + // The stackRoots array is re-partitioned after each marking phase iteration. stackRoots []*g // Each type of GC state transition is protected by a lock. @@ -735,8 +734,8 @@ func gcStart(trigger gcTrigger) { mode = gcForceBlockMode } else if work.goroutineLeakFinder.pending.Load() || debug.gcgoroutineleaks > 0 { // If goroutine leak detection has been enabled (via GODEBUG=gcgoroutineleaks=1), - // or via profiling, fully stop the world. - mode = gcForceBlockMode + // or via profiling, stop the world during the marking phase. + mode = gcForceMode } // Ok, we're doing it! Stop everybody else @@ -799,7 +798,6 @@ func gcStart(trigger gcTrigger) { clearpools() work.cycles.Add(1) - work.goroutineLeakFinder.done = false // Assists and workers can start the moment we start // the world. @@ -939,7 +937,7 @@ func gcMarkDone() { // time. semacquire(&work.markDoneSema) if work.goroutineLeakFinder.enabled { - gcDiscoverMoreStackRoots() + findMaybeRunnableGoroutines() } top: @@ -1033,32 +1031,27 @@ top: } } }) - switch { - case restart: - gcDebugMarkDone.restartedDueTo27993 = true - getg().m.preemptoff = "" - systemstack(func() { - // Accumulate the time we were stopped before we had to start again. - work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) - - // Start the world again. - now := startTheWorldWithSema(0, stw) - work.pauseNS += now - stw.startedStopping - }) - semrelease(&worldsema) - goto top - case work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done: - // Detect goroutine leaks. If the returned value is true, then detection was - // performed during this cycle. Otherwise, more runnable goroutines were discovered, - // requiring additional mark work. - work.goroutineLeakFinder.done = findGoleaks() + // Check whether we need to resume the marking phase because of issue #27993 + // or because of goroutine leak detection. + if restart || (work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done) { + if restart { + // Restart because of issue #27993. + gcDebugMarkDone.restartedDueTo27993 = true + } else { + // Marking has reached a fixed-point. Attempt to detect goroutine leaks. + // + // If the returned value is true, then detection was performed during this cycle. + // Otherwise, more runnable goroutines were discovered, requiring additional mark work. + work.goroutineLeakFinder.done = findGoleaks() + } getg().m.preemptoff = "" systemstack(func() { // Accumulate the time we were stopped before we had to start again. work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) + // Start the world again. now := startTheWorldWithSema(0, stw) work.pauseNS += now - stw.startedStopping }) @@ -1142,16 +1135,17 @@ func (gp *g) checkIfMaybeRunnable() bool { return true } -// Check to see if more blocked but marked goroutines exist; -// if so add them into root set and increment work.markrootJobs accordingly -// return true if we need to run another phase of markroots; return false otherwise -func gcDiscoverMoreStackRoots() { - // to begin with we have a set of unchecked stackRoots between - // vIndex and ivIndex. During the loop, anything < vIndex should be - // valid stackRoots and anything >= ivIndex should be invalid stackRoots - // and the loop terminates when the two indices meet - var vIndex, ivIndex int = work.nLiveStackRoots, work.nStackRoots - +// findMaybeRunnableGoroutines checks to see if more blocked but maybe-runnable goroutines exist. +// If so, it adds them into root set and increments work.markrootJobs accordingly. +// Returns true if we need to run another phase of markroots; returns false otherwise. +func findMaybeRunnableGoroutines() (moreWork bool) { + oldRootJobs := work.markrootJobs.Load() + + // To begin with we have a set of unchecked stackRoots between + // vIndex and ivIndex. During the loop, anything < vIndex should be + // valid stackRoots and anything >= ivIndex should be invalid stackRoots. + // The loop terminates when the two indices meet. + var vIndex, ivIndex int = work.nMaybeRunnableStackRoots, work.nStackRoots // Reorder goroutine list for vIndex < ivIndex { if work.stackRoots[vIndex].checkIfMaybeRunnable() { @@ -1168,12 +1162,12 @@ func gcDiscoverMoreStackRoots() { } } - var newRootJobs int32 = int32(work.baseStacks) + int32(vIndex) - if newRootJobs > int32(work.markrootJobs.Load()) { - // reset markrootNext as it could have been incremented past markrootJobs - work.nLiveStackRoots = vIndex - work.markrootJobs.Store(uint32(newRootJobs)) + newRootJobs := work.baseStacks + uint32(vIndex) + if newRootJobs > oldRootJobs { + work.nMaybeRunnableStackRoots = vIndex + work.markrootJobs.Store(newRootJobs) } + return newRootJobs > oldRootJobs } // getSyncObjectsUnreachable scans allgs and sets the elem and c fields of all sudogs to @@ -1208,43 +1202,24 @@ func gcRestoreSyncObjects() { // findGoleaks scans the remaining stackRoots and marks any which are // blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). // Returns true if the goroutine leak check was performed (or unnecessary). -// Returns false if the GC cycle has not yet computed all (maybe-)live goroutines. +// Returns false if the GC cycle has not yet computed all maybe-runnable goroutines. func findGoleaks() bool { // Report goroutine leaks and mark them unreachable, and resume marking // we still need to mark these unreachable *g structs as they // get reused, but their stack won't get scanned - if work.nLiveStackRoots == work.nStackRoots { - // nStackRoots == nLiveStackRoots means that all goroutines are marked. + if work.nMaybeRunnableStackRoots == work.nStackRoots { + // nMaybeRunnableStackRoots == nStackRoots means that all goroutines are marked. return true } - // Try to reach another fix point here. Keep scouting for runnable goroutines until - // none are left. - // Valid goroutines may be found after all GC work is drained. - // Make sure these are pushed to the runnable set and ready to be marked. - var foundMoreWork bool - for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { - gp := work.stackRoots[i] - if readgstatus(gp) == _Gwaiting && !gp.checkIfMaybeRunnable() { - // Blocking unrunnable goroutines will be skipped. - continue - } - work.stackRoots[i] = work.stackRoots[work.nLiveStackRoots] - work.stackRoots[work.nLiveStackRoots] = gp - work.nLiveStackRoots += 1 - // We now have one more markroot job. - work.markrootJobs.Add(1) - // We might still have some work to do. - // Make sure in the next iteration we will check re-check for new runnable goroutines. - foundMoreWork = true - } - if foundMoreWork { + // Check whether any more maybe-runnable goroutines can be found by the GC. + if findMaybeRunnableGoroutines() { // We found more work, so we need to resume the marking phase. return false } // For the remaining goroutines, mark them as unreachable and leaked. - for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { + for i := work.nMaybeRunnableStackRoots; i < work.nStackRoots; i++ { gp := work.stackRoots[i] casgstatus(gp, _Gwaiting, _Gleaked) fn := findfunc(gp.startpc) @@ -1259,8 +1234,8 @@ func findGoleaks() bool { println() } // Put the remaining roots as ready for marking and drain them. - work.markrootJobs.Add(int32(work.nStackRoots - work.nLiveStackRoots)) - work.nLiveStackRoots = work.nStackRoots + work.markrootJobs.Add(int32(work.nStackRoots - work.nMaybeRunnableStackRoots)) + work.nMaybeRunnableStackRoots = work.nStackRoots return true } @@ -1424,6 +1399,7 @@ func gcMarkTermination(stw worldStop) { systemstack(func() { // Pull the GC out of goroutine leak detection mode. work.goroutineLeakFinder.enabled = false + work.goroutineLeakFinder.done = false // The memstats updated above must be updated with the world // stopped to ensure consistency of some values, such as diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 66af09e309d30a..d7424a1970c811 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -155,17 +155,17 @@ func gcPrepareMarkRoots() { if work.goroutineLeakFinder.enabled { // goroutine leak finder GC --- only prepare runnable // goroutines for marking. - work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() + work.stackRoots, work.nMaybeRunnableStackRoots = allGsSnapshotSortedForGC() } else { // regular GC --- scan every goroutine work.stackRoots = allGsSnapshot() - work.nLiveStackRoots = len(work.stackRoots) + work.nMaybeRunnableStackRoots = len(work.stackRoots) } work.nStackRoots = len(work.stackRoots) work.markrootNext.Store(0) - work.markrootJobs.Store(uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nLiveStackRoots)) + work.markrootJobs.Store(uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nMaybeRunnableStackRoots)) // Calculate base indexes of each root type work.baseData = uint32(fixedRootCount) From 7da26f44355c22476b9d806c0bd699f70978acf4 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 1 Aug 2025 17:55:13 +0200 Subject: [PATCH 14/36] Test for goroutine leak GC. --- src/runtime/gc_test.go | 505 ++++++++++++++---- .../testgoroutineleakgc/cockroach10214.go | 133 +++++ .../testgoroutineleakgc/cockroach1055.go | 148 +++++ .../testgoroutineleakgc/cockroach10790.go | 125 +++++ .../testgoroutineleakgc/cockroach13197.go | 71 +++ .../testgoroutineleakgc/cockroach13755.go | 58 ++ .../testgoroutineleakgc/cockroach1462.go | 184 +++++++ .../testgoroutineleakgc/cockroach16167.go | 121 +++++ .../testgoroutineleakgc/cockroach18101.go | 73 +++ .../testgoroutineleakgc/cockroach2448.go | 136 +++++ .../testgoroutineleakgc/cockroach24808.go | 81 +++ .../testgoroutineleakgc/cockroach25456.go | 91 ++++ .../testgoroutineleakgc/cockroach35073.go | 117 ++++ .../testgoroutineleakgc/cockroach35931.go | 127 +++++ .../testgoroutineleakgc/cockroach3710.go | 132 +++++ .../testgoroutineleakgc/cockroach584.go | 58 ++ .../testgoroutineleakgc/cockroach6181.go | 100 ++++ .../testgoroutineleakgc/cockroach7504.go | 196 +++++++ .../testgoroutineleakgc/cockroach9935.go | 58 ++ .../testdata/testgoroutineleakgc/etcd10492.go | 65 +++ .../testdata/testgoroutineleakgc/etcd5509.go | 120 +++++ .../testdata/testgoroutineleakgc/etcd6708.go | 94 ++++ .../testdata/testgoroutineleakgc/etcd6857.go | 91 ++++ .../testdata/testgoroutineleakgc/etcd6873.go | 92 ++++ .../testdata/testgoroutineleakgc/etcd7492.go | 178 ++++++ .../testdata/testgoroutineleakgc/etcd7902.go | 113 ++++ .../goroutineleakgc.go | 59 +- .../testdata/testgoroutineleakgc/grpc1275.go | 116 ++++ .../testdata/testgoroutineleakgc/grpc1424.go | 112 ++++ .../testdata/testgoroutineleakgc/grpc1460.go | 86 +++ .../testdata/testgoroutineleakgc/grpc3017.go | 146 +++++ .../testdata/testgoroutineleakgc/grpc660.go | 69 +++ .../testdata/testgoroutineleakgc/grpc795.go | 70 +++ .../testdata/testgoroutineleakgc/grpc862.go | 109 ++++ .../testdata/testgoroutineleakgc/hugo3251.go | 99 ++++ .../testdata/testgoroutineleakgc/hugo5379.go | 315 +++++++++++ .../testgoroutineleakgc/istio16224.go | 125 +++++ .../testgoroutineleakgc/istio17860.go | 139 +++++ .../testgoroutineleakgc/istio18454.go | 149 ++++++ .../testgoroutineleakgc/kubernetes10182.go | 97 ++++ .../testgoroutineleakgc/kubernetes11298.go | 114 ++++ .../testgoroutineleakgc/kubernetes13135.go | 183 +++++++ .../testgoroutineleakgc/kubernetes1321.go | 121 +++++ .../testgoroutineleakgc/kubernetes25331.go | 81 +++ .../testgoroutineleakgc/kubernetes26980.go | 83 +++ .../testgoroutineleakgc/kubernetes30872.go | 256 +++++++++ .../testgoroutineleakgc/kubernetes38669.go | 75 +++ .../testgoroutineleakgc/kubernetes5316.go | 69 +++ .../testgoroutineleakgc/kubernetes58107.go | 134 +++++ .../testgoroutineleakgc/kubernetes62464.go | 119 +++++ .../testgoroutineleakgc/kubernetes6632.go | 99 ++++ .../testgoroutineleakgc/kubernetes70277.go | 92 ++++ .../testdata/testgoroutineleakgc/main.go | 35 ++ .../testdata/testgoroutineleakgc/moby17176.go | 74 +++ .../testdata/testgoroutineleakgc/moby21233.go | 167 ++++++ .../testdata/testgoroutineleakgc/moby25348.go | 58 ++ .../testdata/testgoroutineleakgc/moby27782.go | 266 +++++++++ .../testdata/testgoroutineleakgc/moby28462.go | 139 +++++ .../testdata/testgoroutineleakgc/moby29733.go | 74 +++ .../testdata/testgoroutineleakgc/moby30408.go | 62 +++ .../testdata/testgoroutineleakgc/moby33781.go | 84 +++ .../testdata/testgoroutineleakgc/moby36114.go | 52 ++ .../testdata/testgoroutineleakgc/moby4395.go | 49 ++ .../testdata/testgoroutineleakgc/moby4951.go | 102 ++++ .../testdata/testgoroutineleakgc/moby7559.go | 52 ++ .../testdata/testgoroutineleakgc/patterns.go | 262 +++++++++ .../testgoroutineleakgc/serving2137.go | 143 +++++ .../testgoroutineleakgc/syncthing4829.go | 85 +++ .../testgoroutineleakgc/syncthing5795.go | 123 +++++ 69 files changed, 8071 insertions(+), 140 deletions(-) create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach10214.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach1055.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach10790.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach13197.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach13755.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach1462.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach16167.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach18101.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach2448.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach24808.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach25456.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach35073.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach35931.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach3710.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach584.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach6181.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach7504.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/cockroach9935.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/etcd10492.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/etcd5509.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/etcd6708.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/etcd6857.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/etcd6873.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/etcd7492.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/etcd7902.go rename src/runtime/testdata/{testprog => testgoroutineleakgc}/goroutineleakgc.go (65%) create mode 100644 src/runtime/testdata/testgoroutineleakgc/grpc1275.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/grpc1424.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/grpc1460.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/grpc3017.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/grpc660.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/grpc795.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/grpc862.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/hugo3251.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/hugo5379.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/istio16224.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/istio17860.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/istio18454.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/main.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby17176.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby21233.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby25348.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby27782.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby28462.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby29733.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby30408.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby33781.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby36114.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby4395.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby4951.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/moby7559.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/patterns.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/serving2137.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/syncthing4829.go create mode 100644 src/runtime/testdata/testgoroutineleakgc/syncthing5795.go diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 6143e6708326ca..ba53487625df72 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -18,6 +18,7 @@ import ( "runtime" "runtime/debug" "slices" + "strconv" "strings" "sync" "sync/atomic" @@ -1097,132 +1098,412 @@ func TestDetectFinalizerAndCleanupLeaks(t *testing.T) { } } +// This tests the goroutine leak garbage collector. func TestGoroutineLeakGC(t *testing.T) { + // Goroutine leak test case. + // + // Test cases can be configured with test name, the name of the entry point function, + // a set of expected leaks identified by regular expressions, and the number of times + // the test should be repeated. + // + // Repetitions are used to amortize flakiness in some tests. type testCase struct { - tname string - funcName string - expectedLeaks map[*regexp.Regexp]int - } - - testCases := []testCase{{ - tname: "ChanReceiveNil", - funcName: "GoroutineLeakNilRecv", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[chan receive \(nil chan\)\]`): 0, - }, - }, { - tname: "ChanSendNil", - funcName: "GoroutineLeakNilSend", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[chan send \(nil chan\)\]`): 0, - }, - }, { - tname: "SelectNoCases", - funcName: "GoroutineLeakSelectNoCases", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[select \(no cases\)\]`): 0, - }, - }, { - tname: "ChanRecv", - funcName: "GoroutineLeakChanRecv", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[chan receive\]`): 0, - }, - }, { - tname: "ChanSend", - funcName: "GoroutineLeakChanSend", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[chan send\]`): 0, - }, - }, { - tname: "Select", - funcName: "GoroutineLeakSelect", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[select\]`): 0, - }, - }, { - tname: "WaitGroup", - funcName: "GoroutineLeakWaitGroup", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[sync\.WaitGroup\.Wait\]`): 0, - }, - }, { - tname: "MutexStack", - funcName: "GoroutineLeakMutexStack", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[sync\.Mutex\.Lock\]`): 0, - }, - }, { - tname: "MutexHeap", - funcName: "GoroutineLeakMutexHeap", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[sync\.Mutex\.Lock\]`): 0, - }, - }, { - tname: "Cond", - funcName: "GoroutineLeakCond", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[sync\.Cond\.Wait\]`): 0, - }, - }, { - tname: "RWMutexRLock", - funcName: "GoroutineLeakRWMutexRLock", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[sync\.RWMutex\.RLock\]`): 0, - }, - }, { - tname: "RWMutexLock", - funcName: "GoroutineLeakRWMutexLock", - expectedLeaks: map[*regexp.Regexp]int{ - // Invoking Lock on a RWMutex may either put a goroutine a waiting state - // of either sync.RWMutex.Lock or sync.Mutex.Lock. - regexp.MustCompile(`\[sync\.(RW)?Mutex\.Lock\]`): 0, - }, - }, { - tname: "Mixed", - funcName: "GoroutineLeakMixed", - expectedLeaks: map[*regexp.Regexp]int{ - regexp.MustCompile(`\[sync\.WaitGroup\.Wait\]`): 0, - regexp.MustCompile(`\[chan send\]`): 0, - }, - }, { - tname: "NoLeakGlobal", - funcName: "NoGoroutineLeakGlobal", - }} + name string + repetitions int + expectedLeaks map[*regexp.Regexp]bool - failStates := regexp.MustCompile(`fatal|panic`) + // flakyLeaks are goroutine leaks that are too flaky to be reliably detected. + // Still, they might pop up every once in a while. + // If these occur, do not fail the test due to unexpected leaks. + flakyLeaks map[*regexp.Regexp]struct{} + } - for _, tcase := range testCases { - t.Run(tcase.tname, func(t *testing.T) { - exe, err := buildTestProg(t, "testprog") - if err != nil { - t.Fatal(fmt.Sprintf("building testprog failed: %v", err)) + // makeTest is a short-hand for creating test cases. + // Each of the leaks in the list is identified by a regular expression. + // + // If a leak is the string "FLAKY", it notifies makeTest that any remaining + // leak patterns should be added to the flakyLeaks map. + makeTest := func( + cfg testCase, + leaks ...string) testCase { + tc := testCase{ + name: cfg.name, + expectedLeaks: make(map[*regexp.Regexp]bool, len(leaks)), + flakyLeaks: make(map[*regexp.Regexp]struct{}, len(leaks)), + } + // Default to 1 repetition if not specified. + // One extra rep for configured tests is irrelevant. + tc.repetitions = cfg.repetitions | 1 + + const ( + EXPECTED int = iota + FLAKY + ) + + mode := EXPECTED + for _, leak := range leaks { + if leak == "FLAKY" { + mode = FLAKY + continue } - output := runBuiltTestProg(t, exe, tcase.funcName, "GODEBUG=gctrace=1,gcgoroutineleaks=1") - if len(tcase.expectedLeaks) == 0 && strings.Contains(output, "goroutine leak!") { - t.Fatalf("output:\n%s\n\nunexpected goroutines leaks detected", output) - return + switch mode { + case EXPECTED: + tc.expectedLeaks[regexp.MustCompile(leak)] = false + case FLAKY: + tc.flakyLeaks[regexp.MustCompile(leak)] = struct{}{} } + } + return tc + } + + // Micro tests involve very simple leaks for each type of concurrency primitive operation. + microTests := []testCase{ + makeTest(testCase{name: "NilRecv"}, `\[chan receive \(nil chan\)\]`), + makeTest(testCase{name: "NilSend"}, `\[chan send \(nil chan\)\]`), + makeTest(testCase{name: "SelectNoCases"}, `\[select \(no cases\)\]`), + makeTest(testCase{name: "ChanRecv"}, `\[chan receive\]`), + makeTest(testCase{name: "ChanSend"}, `\[chan send\]`), + makeTest(testCase{name: "Select"}, `\[select\]`), + makeTest(testCase{name: "WaitGroup"}, `\[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "MutexStack"}, `\[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "MutexHeap"}, `\[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cond"}, `\[sync\.Cond\.Wait\]`), + makeTest(testCase{name: "RWMutexRLock"}, `\[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "RWMutexLock"}, `\[sync\.(RW)?Mutex\.Lock\]`), + makeTest(testCase{name: "Mixed"}, `\[sync\.WaitGroup\.Wait\]`, `\[chan send\]`), + makeTest(testCase{name: "NoLeakGlobal"}), + } + + // Common goroutine leak patterns. + // Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" + // doi:10.1109/CGO57630.2024.10444835 + patternTestCases := []testCase{ + makeTest(testCase{name: "NoCloseRange"}, + `main\.NoCloseRange\.gowrap1 .* \[chan send\]`, + `main\.noCloseRange\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "MethodContractViolation"}, + `main\.worker\.Start\.func1 .* \[select\]`), + makeTest(testCase{name: "DoubleSend"}, + `main\.DoubleSend\.func3 .* \[chan send\]`), + makeTest(testCase{name: "EarlyReturn"}, + `main\.earlyReturn\.func1 .* \[chan send\]`), + makeTest(testCase{name: "NCastLeak"}, + `main\.nCastLeak\.func1 .* \[chan send\]`, + `main\.NCastLeak\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Timeout"}, + `main\.timeout\.func1 .* \[chan send\]`), + } + + // GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs". + // White paper found at https://lujie.ac.cn/files/papers/GoBench.pdf + // doi:10.1109/CGO51591.2021.9370317. + // + // This list is curated for tests that are not excessively flaky. + gokerTestCases := []testCase{ + makeTest(testCase{name: "Cockroach584"}, + `main\.Cockroach584\.func2\.1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach1055"}, + `main\.Cockroach1055\.func2 .* \[chan receive\]`, + `main\.Cockroach1055\.func2\.1 .* \[chan receive\]`, + `main\.Cockroach1055\.func2\.2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Cockroach1462"}, + `main\.\(\*Stopper_cockroach1462\)\.RunWorker\.func1 .* \[chan send\]`, + `main\.Cockroach1462\.func2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Cockroach2448"}, + `main\.Cockroach2448\.func2\.gowrap1 .* \[select\]`, + `main\.Cockroach2448\.func2\.gowrap2 .* \[select\]`), + makeTest(testCase{name: "Cockroach3710"}, + `main\.Cockroach3710\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + `main\.\(\*Store_cockroach3710\)\.processRaft\.func1 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Cockroach6181", repetitions: 50}, + `main\.testRangeCacheCoalescedRequests_cockroach6181 .* \[sync\.WaitGroup\.Wait\]`, + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.Lock\]`, + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "Cockroach7504", repetitions: 100}, + `main\.Cockroach7504\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.Cockroach7504\.func2\.2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach9935"}, + `main\.Cockroach9935\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach10214"}, + `main\.Cockroach10214\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.Cockroach10214\.func2\.2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach10790"}, + `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13197"}, + `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13755"}, + `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach16167"}, + `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, + `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Cockroach10790"}, + `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13197"}, + `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13755"}, + `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach16167"}, + `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, + `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Cockroach18101"}, + `main\.restore_cockroach18101\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Cockroach24808"}, + `main\.Cockroach24808\.func2 .* \[chan send\]`), + makeTest(testCase{name: "Cockroach25456"}, + `main\.Cockroach25456\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach35073"}, + `main\.Cockroach35073\.func2.1 .* \[chan send\]`, + `main\.Cockroach35073\.func2 .* \[chan send\]`), + makeTest(testCase{name: "Cockroach35931"}, + `main\.Cockroach35931\.func2 .* \[chan send\]`), + makeTest(testCase{name: "Etcd5509"}, + `main\.Etcd5509\.func2 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Etcd6857"}, + `main\.Etcd6857\.func2\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Etcd6873"}, + `main\.Etcd6873\.func2\.gowrap1 .* \[chan receive\]`, + `main\.newWatchBroadcasts_etcd6873\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Etcd7492"}, + `main\.Etcd7492\.func2 .* \[sync\.WaitGroup\.Wait\]`, + `main\.Etcd7492\.func2\.1 .* \[chan send\]`, + `main\.NewSimpleTokenTTLKeeper_etcd7492\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Etcd7902"}, + `main\.doRounds_etcd7902\.gowrap1 .* \[chan receive\]`, + `main\.doRounds_etcd7902\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.runElectionFunc_etcd7902 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Etcd10492"}, + `main\.Etcd10492\.func2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Grpc660"}, + `main\.\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Grpc795"}, + `main\.\(\*test_grpc795\)\.startServer\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.testServerGracefulStopIdempotent_grpc795 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Grpc862"}, + `main\.DialContext_grpc862\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Grpc1275"}, + `main\.testInflightStreamClosing_grpc1275\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Grpc1424"}, + `main\.DialContext_grpc1424\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Grpc1460"}, + `main\.Grpc1460\.func2\.gowrap1 .* \[chan receive\]`, + `main\.Grpc1460\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Grpc3017", repetitions: 50}, + // grpc/3017 involves a goroutine leak that also simultaneously engages many GC assists. + // Testing runtime behaviour when pivoting between regular and goroutine leak detection modes. + `main\.Grpc3017\.func2 .* \[chan receive\]`, + `main\.Grpc3017\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Hugo3251", repetitions: 20}, + `main\.Hugo3251\.func2 .* \[sync\.WaitGroup\.Wait\]`, + `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "Hugo5379"}, + `main\.\(\*Page_hugo5379\)\.initContent\.func1\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*Site_hugo5379\)\.renderPages\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Hugo5379\.func2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Istio16224"}, + `main\.Istio16224\.func2 .* \[sync\.Mutex\.Lock\]`, + `main\.Istio16224\.func2\.gowrap1 .* \[chan send\]`, + // This is also a leak, but it is too flaky to be reliably detected. + `FLAKY`, + `main\.Istio16224\.func2\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Istio17860"}, + `main\.\(\*agent_istio17860\)\.Restart\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Istio18454"}, + `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan receive\]`, + `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes1321"}, + `main\.NewMux_kubernetes1321\.gowrap1 .* \[chan send\]`, + `main\.testMuxWatcherClose_kubernetes1321 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes5316"}, + `main\.finishRequest_kubernetes5316\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes6632"}, + `main\.Kubernetes6632\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Kubernetes6632\.func2\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes10182"}, + `main\.\(\*statusManager_kubernetes10182\)\.Start\.func1 .* \[sync\.Mutex\.Lock\]`, + `main\.Kubernetes10182\.func2\.gowrap2 .* \[chan send\]`, + `main\.Kubernetes10182\.func2\.gowrap3 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes11298"}, + `main\.After_kubernetes11298\.func1 .* \[chan receive\]`, + `main\.After_kubernetes11298\.func1 .* \[sync\.Cond\.Wait\]`, + `main\.Kubernetes11298\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Kubernetes13135"}, + `main\.Kubernetes13135\.func2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Kubernetes25331"}, + `main\.Kubernetes25331\.func2\.gowrap1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes26980"}, + `main\.Kubernetes26980\.func2 .* \[chan receive\]`, + `main\.Kubernetes26980\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.Kubernetes26980\.func2\.gowrap2 .* \[chan receive\]`), + makeTest(testCase{name: "Kubernetes30872"}, + `main\.\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*federatedInformerImpl_kubernetes30872\)\.Start\.gowrap2 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*NamespaceController_kubernetes30872\)\.Run\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes38669"}, + `main\.newCacheWatcher_kubernetes38669\.gowrap1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes58107"}, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap1 .* \[sync\.Cond\.Wait\]`, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap2 .* \[sync\.Cond\.Wait\]`, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap2 .* \[sync\.RWMutex\.RLock\]`, + `main\.startResourceQuotaController_kubernetes58107\.gowrap2 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes62464"}, + `main\.Kubernetes62464\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + `main\.Kubernetes62464\.func2\.gowrap2 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes70277"}, + `main\.Kubernetes70277\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Moby4395"}, + `main\.Go_moby4395\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Moby4951"}, + `main\.Moby4951\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Moby4951\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Moby7559"}, + `main\.Moby7559\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Moby17176"}, + `main\.testDevmapperLockReleasedDeviceDeletion_moby17176\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Moby21233"}, + `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[chan send\]`, + `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[select\]`, + `main\.testTransfer_moby21233 .* \[chan receive\]`), + makeTest(testCase{name: "Moby25348"}, + `main\.Moby25348\.func2\.gowrap1 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Moby27782"}, + `main\.\(\*JSONFileLogger_moby27782\)\.ReadLogs\.gowrap1 .* \[sync\.Cond\.Wait\]`, + `main\.NewWatcher_moby27782\.gowrap1 .* \[select\]`), + makeTest(testCase{name: "Moby28462"}, + `main\.Moby28462\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Moby28462\.func2\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Moby29733"}, + `main\.Moby29733\.func2 .* \[chan receive\]`, + `main\.testActive_moby29733\.func1 .* \[sync\.Cond\.Wait\]`), + makeTest(testCase{name: "Moby30408"}, + `main\.Moby30408\.func2 .* \[chan receive\]`, + `main\.testActive_moby30408\.func1 .* \[sync\.Cond\.Wait\]`), + makeTest(testCase{name: "Moby33781"}, + `main\.monitor_moby33781\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Moby36114"}, + `main\.Moby36114\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Serving2137"}, + `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[chan send\]`, + `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[sync\.Mutex\.Lock\]`, + `main\.Serving2137\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Syncthing4829"}, + `main\.Syncthing4829\.func2 .* \[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "Syncthing5795"}, + `main\.\(\*rawConnection_syncthing5795\)\.Start\.func1 .* \[chan receive\]`, + `main\.Syncthing5795\.func2 .* \[chan receive\]`), + } + + // Combine all test cases into a single list. + testCases := append(microTests, patternTestCases...) + testCases = append(testCases, gokerTestCases...) + + // Test cases must not panic or cause fatal exceptions. + failStates := regexp.MustCompile(`fatal|panic`) - if failStates.MatchString(output) { - t.Fatalf("output:\n%s\n\nunexpected fatal exception or panic", output) - return - } + // Build the test program once. + exe, err := buildTestProg(t, "testgoroutineleakgc") + if err != nil { + t.Fatal(fmt.Sprintf("building testgoroutineleakgc failed: %v", err)) + } - for _, line := range strings.Split(output, "\n") { - if strings.Contains(line, "goroutine leak!") { - for expectedLeak, count := range tcase.expectedLeaks { - if expectedLeak.MatchString(line) { - tcase.expectedLeaks[expectedLeak] = count + 1 + for _, tcase := range testCases { + t.Run(tcase.name, func(t *testing.T) { + // Run tests in parallel. + t.Parallel() + + // Default to 1 repetition if not specified. + // One extra rep for tests with a specified number of repetitions + // is irrelevant. + repetitions := tcase.repetitions | 1 + + // Output trace. Aggregated across all repetitions. + var output string + // Output and trace are protected by separate mutexes to reduce contention. + var outputMu sync.Mutex + var traceMu sync.RWMutex + // Wait group coordinates across all repetitions. + var wg sync.WaitGroup + + wg.Add(repetitions) + for i := 0; i < repetitions; i++ { + go func() { + defer wg.Done() + + // FIXME: Use GODEBUG flag only temporarily until we can use pprof/goroutineleaks. + repOutput := runBuiltTestProg(t, exe, tcase.name, "GODEBUG=gctrace=1,gcgoroutineleaks=1") + + // If the test case was not expected to produce leaks, but some were reported, + // stop the test immediately. Zero tolerance policy for false positives. + if len(tcase.expectedLeaks)+len(tcase.flakyLeaks) == 0 && strings.Contains(repOutput, "goroutine leak!") { + t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", repOutput) + } + + // Zero tolerance policy for fatal exceptions or panics. + if failStates.MatchString(repOutput) { + t.Errorf("output:\n%s\n\nunexpected fatal exception or panic", repOutput) + } + + // Parse the output line by line and look for the `goroutine leak!` message. + LINES: + for _, line := range strings.Split(repOutput, "\n") { + // We are not interested in anything else. + if !strings.Contains(line, "goroutine leak!") { + continue + } + + // Check if the leak is expected. + // If it is, check whether it has been encountered before. + var foundNew bool + var leakPattern *regexp.Regexp + traceMu.RLock() + for expectedLeak, ok := range tcase.expectedLeaks { + if expectedLeak.MatchString(line) { + if !ok { + foundNew = true + } + + leakPattern = expectedLeak + break + } + } + traceMu.RUnlock() + + if foundNew { + // Only bother writing if we found a new leak. + traceMu.Lock() + tcase.expectedLeaks[leakPattern] = true + traceMu.Unlock() + } + + if leakPattern == nil { + // We are dealing with a leak not marked as expected. + // Check if it is a flaky leak. + for flakyLeak := range tcase.flakyLeaks { + if flakyLeak.MatchString(line) { + // The leak is flaky. Carry on to the next line. + continue LINES + } + } + + t.Errorf("output:\n%s\n\nunexpected goroutine leak: %s", repOutput, line) } } - } + + outputMu.Lock() + output += "\nRepetition " + strconv.Itoa(i) + ":\n" + repOutput + "\n--------------------------\n" + outputMu.Unlock() + }() } + // Coordinate across all repetitions. + wg.Wait() missingLeakStrs := make([]string, 0, len(tcase.expectedLeaks)) - for expectedLeak, count := range tcase.expectedLeaks { - if count == 0 { + for expectedLeak, found := range tcase.expectedLeaks { + if !found { missingLeakStrs = append(missingLeakStrs, expectedLeak.String()) } } diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go new file mode 100644 index 00000000000000..cc06c4b77541ca --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go @@ -0,0 +1,133 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/10214 + * Buggy version: 7207111aa3a43df0552509365fdec741a53f873f + * fix commit-id: 27e863d90ab0660494778f1c35966cc5ddc38e32 + * Flaky: 3/100 + * Description: This deadlock is caused by different order when acquiring + * coalescedMu.Lock() and raftMu.Lock(). The fix is to refactor sendQueuedHeartbeats() + * so that cockroachdb can unlock coalescedMu before locking raftMu. + */ +package main + +import ( + "runtime" + "sync" + "time" + "unsafe" +) + +func init() { + register("Cockroach10214", Cockroach10214) +} + +type Store_cockroach10214 struct { + coalescedMu struct { + sync.Mutex + heartbeatResponses []int + } + mu struct { + replicas map[int]*Replica_cockroach10214 + } +} + +func (s *Store_cockroach10214) sendQueuedHeartbeats() { + s.coalescedMu.Lock() // LockA acquire + runtime.Gosched() + defer s.coalescedMu.Unlock() + for i := 0; i < len(s.coalescedMu.heartbeatResponses); i++ { + s.sendQueuedHeartbeatsToNode() // LockB + } + // LockA release +} + +func (s *Store_cockroach10214) sendQueuedHeartbeatsToNode() { + for i := 0; i < len(s.mu.replicas); i++ { + r := s.mu.replicas[i] + r.reportUnreachable() // LockB + } +} + +type Replica_cockroach10214 struct { + raftMu sync.Mutex + mu sync.Mutex + store *Store_cockroach10214 +} + +func (r *Replica_cockroach10214) reportUnreachable() { + r.raftMu.Lock() // LockB acquire + runtime.Gosched() + //+time.Sleep(time.Nanosecond) + defer r.raftMu.Unlock() + // LockB release +} + +func (r *Replica_cockroach10214) tick() { + r.raftMu.Lock() // LockB acquire + runtime.Gosched() + defer r.raftMu.Unlock() + r.tickRaftMuLocked() + // LockB release +} + +func (r *Replica_cockroach10214) tickRaftMuLocked() { + r.mu.Lock() + defer r.mu.Unlock() + if r.maybeQuiesceLocked() { + return + } +} +func (r *Replica_cockroach10214) maybeQuiesceLocked() bool { + for i := 0; i < 2; i++ { + if !r.maybeCoalesceHeartbeat() { + return true + } + } + return false +} +func (r *Replica_cockroach10214) maybeCoalesceHeartbeat() bool { + msgtype := uintptr(unsafe.Pointer(r)) % 3 + switch msgtype { + case 0, 1, 2: + r.store.coalescedMu.Lock() // LockA acquire + default: + return false + } + r.store.coalescedMu.Unlock() // LockA release + return true +} + +func Cockroach10214() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go func() { + store := &Store_cockroach10214{} + responses := &store.coalescedMu.heartbeatResponses + *responses = append(*responses, 1, 2) + store.mu.replicas = make(map[int]*Replica_cockroach10214) + + rp1 := &Replica_cockroach10214{ + store: store, + } + rp2 := &Replica_cockroach10214{ + store: store, + } + store.mu.replicas[0] = rp1 + store.mu.replicas[1] = rp2 + + go func() { + // deadlocks: x > 0 + store.sendQueuedHeartbeats() + }() + + go func() { + // deadlocks: x > 0 + rp1.tick() + }() + + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go new file mode 100644 index 00000000000000..f479a29dc36855 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go @@ -0,0 +1,148 @@ +package main + +import ( + "runtime" + "sync" + "sync/atomic" + "time" +) + +func init() { + register("Cockroach1055", Cockroach1055) +} + +type Stopper_cockroach1055 struct { + stopper chan struct{} + stop sync.WaitGroup + mu sync.Mutex + draining int32 + drain sync.WaitGroup +} + +func (s *Stopper_cockroach1055) AddWorker() { + s.stop.Add(1) +} + +func (s *Stopper_cockroach1055) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach1055) SetStopped() { + if s != nil { + s.stop.Done() + } +} + +func (s *Stopper_cockroach1055) Quiesce() { + s.mu.Lock() + defer s.mu.Unlock() + s.draining = 1 + s.drain.Wait() + s.draining = 0 +} + +func (s *Stopper_cockroach1055) Stop() { + s.mu.Lock() // L1 + defer s.mu.Unlock() + atomic.StoreInt32(&s.draining, 1) + s.drain.Wait() + close(s.stopper) + s.stop.Wait() +} + +func (s *Stopper_cockroach1055) StartTask() bool { + if atomic.LoadInt32(&s.draining) == 0 { + s.mu.Lock() + defer s.mu.Unlock() + s.drain.Add(1) + return true + } + return false +} + +func NewStopper_cockroach1055() *Stopper_cockroach1055 { + return &Stopper_cockroach1055{ + stopper: make(chan struct{}), + } +} + +func Cockroach1055() { + defer func() { + time.Sleep(1 * time.Second) + runtime.GC() + }() + + for i := 0; i <= 1000; i++ { + go func() { // G1 + // deadlocks: x > 0 + var stoppers []*Stopper_cockroach1055 + for i := 0; i < 2; i++ { + stoppers = append(stoppers, NewStopper_cockroach1055()) + } + + for i := range stoppers { + s := stoppers[i] + s.AddWorker() + go func() { // G2 + // deadlocks: x > 0 + s.StartTask() + <-s.ShouldStop() + s.SetStopped() + }() + } + + done := make(chan struct{}) + go func() { // G3 + // deadlocks: x > 0 + for _, s := range stoppers { + s.Quiesce() + } + for _, s := range stoppers { + s.Stop() + } + close(done) + }() + + <-done + }() + } +} + +// Example deadlock trace: +// +// G1 G2.0 G2.1 G2.2 G3 +// --------------------------------------------------------------------------------------------------------------------- +// s[0].stop.Add(1) [1] +// go func() [G2.0] +// s[1].stop.Add(1) [1] . +// go func() [G2.1] . +// s[2].stop.Add(1) [1] . . +// go func() [G2.2] . . +// go func() [G3] . . . +// <-done . . . . +// . s[0].StartTask() . . . +// . s[0].draining == 0 . . . +// . . s[1].StartTask() . . +// . . s[1].draining == 0 . . +// . . . s[2].StartTask() . +// . . . s[2].draining == 0 . +// . . . . s[0].Quiesce() +// . . . . s[0].mu.Lock() [L1[0]] +// . s[0].mu.Lock() [L1[0]] . . . +// . s[0].drain.Add(1) [1] . . . +// . s[0].mu.Unlock() [L1[0]] . . . +// . <-s[0].ShouldStop() . . . +// . . . . s[0].draining = 1 +// . . . . s[0].drain.Wait() +// . . s[0].mu.Lock() [L1[1]] . . +// . . s[1].drain.Add(1) [1] . . +// . . s[1].mu.Unlock() [L1[1]] . . +// . . <-s[1].ShouldStop() . . +// . . . s[2].mu.Lock() [L1[2]] . +// . . . s[2].drain.Add() [1] . +// . . . s[2].mu.Unlock() [L1[2]] . +// . . . <-s[2].ShouldStop() . +// ----------------------------------------------------G1, G2.[0..2], G3 leak------------------------------------------------ diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go new file mode 100644 index 00000000000000..57c0d0c81bb4d4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go @@ -0,0 +1,125 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/10790 + * Buggy version: 96b5452557ebe26bd9d85fe7905155009204d893 + * fix commit-id: f1a5c19125c65129b966fbdc0e6408e8df214aba + * Flaky: 28/100 + * Description: + * It is possible that a message from ctxDone will make the function beginCmds + * returns without draining the channel ch, so that goroutines created by anonymous + * function will leak. + */ + +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach10790", Cockroach10790) +} + +type Stopper_cockroach10790 struct { + quiescer chan struct{} + mu struct { + sync.Mutex + quiescing bool + } +} + +func (s *Stopper_cockroach10790) ShouldQuiesce() <-chan struct{} { + if s == nil { + return nil + } + return s.quiescer +} + +func (s *Stopper_cockroach10790) Quiesce() { + s.mu.Lock() + defer s.mu.Unlock() + if !s.mu.quiescing { + s.mu.quiescing = true + close(s.quiescer) + } +} + +func (s *Stopper_cockroach10790) Stop() { + s.Quiesce() +} + +type Replica_cockroach10790 struct { + chans []chan bool + stopper *Stopper_cockroach10790 +} + +func (r *Replica_cockroach10790) beginCmds(ctx context.Context) { + ctxDone := ctx.Done() + for _, ch := range r.chans { + select { + case <-ch: + case <-ctxDone: + go func() { + // deadlocks: x > 0 + for _, ch := range r.chans { + <-ch + } + }() + } + } +} + +func (r *Replica_cockroach10790) sendChans(ctx context.Context) { + for _, ch := range r.chans { + select { + case ch <- true: + case <-ctx.Done(): + return + } + } +} + +func NewReplica_cockroach10790() *Replica_cockroach10790 { + r := &Replica_cockroach10790{ + stopper: &Stopper_cockroach10790{ + quiescer: make(chan struct{}), + }, + } + r.chans = append(r.chans, make(chan bool)) + r.chans = append(r.chans, make(chan bool)) + return r +} + +/// +/// G1 G2 helper goroutine +/// r.sendChans() +/// r.beginCmds() +/// ch1 <- true +/// <- ch1 +/// ch2 <- true +/// ... ... ... +/// cancel() +/// <- ch1 +/// ------------------G1 leak-------------------------- +/// + +func Cockroach10790() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + r := NewReplica_cockroach10790() + ctx, cancel := context.WithCancel(context.Background()) + go r.sendChans(ctx) // helper goroutine + go r.beginCmds(ctx) // G1 + go cancel() // G2 + r.stopper.Stop() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go new file mode 100644 index 00000000000000..1745df4dbc66a0 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go @@ -0,0 +1,71 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/13197 + * Buggy version: fff27aedabafe20cef57f75905fe340cab48c2a4 + * fix commit-id: 9bf770cd8f6eaff5441b80d3aec1a5614e8747e1 + * Flaky: 100/100 + * Description: One goroutine executing (*Tx).awaitDone() blocks and + * waiting for a signal context.Done(). + */ +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Cockroach13197", Cockroach13197) +} + +type DB_cockroach13197 struct{} + +func (db *DB_cockroach13197) begin(ctx context.Context) *Tx_cockroach13197 { + ctx, cancel := context.WithCancel(ctx) + tx := &Tx_cockroach13197{ + cancel: cancel, + ctx: ctx, + } + // deadlocks: 1 + go tx.awaitDone() // G2 + return tx +} + +type Tx_cockroach13197 struct { + cancel context.CancelFunc + ctx context.Context +} + +func (tx *Tx_cockroach13197) awaitDone() { + <-tx.ctx.Done() +} + +func (tx *Tx_cockroach13197) Rollback() { + tx.rollback() +} + +func (tx *Tx_cockroach13197) rollback() { + tx.close() +} + +func (tx *Tx_cockroach13197) close() { + tx.cancel() +} + +/// G1 G2 +/// begin() +/// awaitDone() +/// <-tx.ctx.Done() +/// return +/// -----------G2 leak------------- + +func Cockroach13197() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + db := &DB_cockroach13197{} + db.begin(context.Background()) // G1 +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go new file mode 100644 index 00000000000000..690a5586f2df32 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go @@ -0,0 +1,58 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/13755 + * Buggy version: 7acb881bbb8f23e87b69fce9568d9a3316b5259c + * fix commit-id: ef906076adc1d0e3721944829cfedfed51810088 + * Flaky: 100/100 + * Description: The buggy code does not close the db query result (rows), + * so that one goroutine running (*Rows).awaitDone is blocked forever. + * The blocking goroutine is waiting for cancel signal from context. + */ + +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Cockroach13755", Cockroach13755) +} + +type Rows_cockroach13755 struct { + cancel context.CancelFunc +} + +func (rs *Rows_cockroach13755) initContextClose(ctx context.Context) { + ctx, rs.cancel = context.WithCancel(ctx) + // deadlocks: 1 + go rs.awaitDone(ctx) +} + +func (rs *Rows_cockroach13755) awaitDone(ctx context.Context) { + <-ctx.Done() + rs.close(ctx.Err()) +} + +func (rs *Rows_cockroach13755) close(err error) { + rs.cancel() +} + +/// G1 G2 +/// initContextClose() +/// awaitDone() +/// <-tx.ctx.Done() +/// return +/// ---------------G2 leak----------------- + +func Cockroach13755() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + rs := &Rows_cockroach13755{} + rs.initContextClose(context.Background()) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go new file mode 100644 index 00000000000000..b2365a8b95efdc --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go @@ -0,0 +1,184 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach1462", Cockroach1462) +} + +type Stopper_cockroach1462 struct { + stopper chan struct{} + stopped chan struct{} + stop sync.WaitGroup + mu sync.Mutex + drain *sync.Cond + draining bool + numTasks int +} + +func NewStopper_cockroach1462() *Stopper_cockroach1462 { + s := &Stopper_cockroach1462{ + stopper: make(chan struct{}), + stopped: make(chan struct{}), + } + s.drain = sync.NewCond(&s.mu) + return s +} + +func (s *Stopper_cockroach1462) RunWorker(f func()) { + s.AddWorker() + go func() { // G2, G3 + defer s.SetStopped() + // deadlocks: x > 0 + f() + }() +} + +func (s *Stopper_cockroach1462) AddWorker() { + s.stop.Add(1) +} +func (s *Stopper_cockroach1462) StartTask() bool { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + if s.draining { + return false + } + s.numTasks++ + return true +} + +func (s *Stopper_cockroach1462) FinishTask() { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + s.numTasks-- + s.drain.Broadcast() +} +func (s *Stopper_cockroach1462) SetStopped() { + if s != nil { + s.stop.Done() + } +} +func (s *Stopper_cockroach1462) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach1462) Quiesce() { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + s.draining = true + for s.numTasks > 0 { + // Unlock s.mu, wait for the signal, and lock s.mu. + s.drain.Wait() + } +} + +func (s *Stopper_cockroach1462) Stop() { + s.Quiesce() + close(s.stopper) + s.stop.Wait() + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + close(s.stopped) +} + +type interceptMessage_cockroach1462 int + +type localInterceptableTransport_cockroach1462 struct { + mu sync.Mutex + Events chan interceptMessage_cockroach1462 + stopper *Stopper_cockroach1462 +} + +func (lt *localInterceptableTransport_cockroach1462) Close() {} + +type Transport_cockroach1462 interface { + Close() +} + +func NewLocalInterceptableTransport_cockroach1462(stopper *Stopper_cockroach1462) Transport_cockroach1462 { + lt := &localInterceptableTransport_cockroach1462{ + Events: make(chan interceptMessage_cockroach1462), + stopper: stopper, + } + lt.start() + return lt +} + +func (lt *localInterceptableTransport_cockroach1462) start() { + lt.stopper.RunWorker(func() { + for { + select { + case <-lt.stopper.ShouldStop(): + return + default: + lt.Events <- interceptMessage_cockroach1462(0) + } + } + }) +} + +func processEventsUntil_cockroach1462(ch <-chan interceptMessage_cockroach1462, stopper *Stopper_cockroach1462) { + for { + select { + case _, ok := <-ch: + runtime.Gosched() + if !ok { + return + } + case <-stopper.ShouldStop(): + return + } + } +} + +func Cockroach1462() { + defer func() { + time.Sleep(2000 * time.Millisecond) + runtime.GC() + }() + for i := 0; i <= 1000; i++ { + go func() { // G1 + // deadlocks: x > 0 + stopper := NewStopper_cockroach1462() + transport := NewLocalInterceptableTransport_cockroach1462(stopper).(*localInterceptableTransport_cockroach1462) + stopper.RunWorker(func() { + processEventsUntil_cockroach1462(transport.Events, stopper) + }) + stopper.Stop() + }() + } +} + +// Example of a deadlocking trace +// G1 G2 G3 +// --------------------------------------------------------------------------------------------------------------------- +// NewLocalInterceptableTransport() +// lt.start() +// lt.stopper.RunWorker() +// s.AddWorker() +// s.stop.Add(1) [1] +// go func() [G2] +// stopper.RunWorker() . +// s.AddWorker() . +// s.stop.Add(1) [2] . +// go func() [G3] . +// s.Stop() . . +// s.Quiesce() . . +// . select [default] . +// . lt.Events <- interceptMessage(0) . +// close(s.stopper) . . +// . . select [<-stopper.ShouldStop()] +// . . <<>> +// s.stop.Wait() . +// -----------------------------------------------------G1,G2 leak------------------------------------------------------ diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go new file mode 100644 index 00000000000000..7860b74b437850 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go @@ -0,0 +1,121 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/16167 + * Buggy version: 36fa784aa846b46c29e077634c4e362635f6e74a + * fix commit-id: d064942b067ab84628f79cbfda001fa3138d8d6e + * Flaky: 1/100 + * Description: + * This is another example for deadlock caused by recursively + * acquiring RWLock. There are two lock variables (systemConfigCond and systemConfigMu) + * involved in this bug, but they are actually the same lock, which can be found from + * the following code. + * There are two goroutine involved in this deadlock. The first goroutine acquires + * systemConfigMu.Lock() firstly, then tries to acquire systemConfigMu.RLock(). The + * second goroutine tries to acquire systemConfigMu.Lock(). If the second goroutine + * interleaves in between the two lock operations of the first goroutine, deadlock will happen. + */ + +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach16167", Cockroach16167) +} + +type PreparedStatements_cockroach16167 struct { + session *Session_cockroach16167 +} + +func (ps PreparedStatements_cockroach16167) New(e *Executor_cockroach16167) { + e.Prepare(ps.session) +} + +type Session_cockroach16167 struct { + PreparedStatements PreparedStatements_cockroach16167 +} + +func (s *Session_cockroach16167) resetForBatch(e *Executor_cockroach16167) { + e.getDatabaseCache() +} + +type Executor_cockroach16167 struct { + systemConfigCond *sync.Cond + systemConfigMu sync.RWMutex +} + +func (e *Executor_cockroach16167) Start() { + e.updateSystemConfig() +} + +func (e *Executor_cockroach16167) execParsed(session *Session_cockroach16167) { + e.systemConfigCond.L.Lock() // Same as e.systemConfigMu.RLock() + runtime.Gosched() + defer e.systemConfigCond.L.Unlock() + runTxnAttempt_cockroach16167(e, session) +} + +func (e *Executor_cockroach16167) execStmtsInCurrentTxn(session *Session_cockroach16167) { + e.execStmtInOpenTxn(session) +} + +func (e *Executor_cockroach16167) execStmtInOpenTxn(session *Session_cockroach16167) { + session.PreparedStatements.New(e) +} + +func (e *Executor_cockroach16167) Prepare(session *Session_cockroach16167) { + session.resetForBatch(e) +} + +func (e *Executor_cockroach16167) getDatabaseCache() { + e.systemConfigMu.RLock() + defer e.systemConfigMu.RUnlock() +} + +func (e *Executor_cockroach16167) updateSystemConfig() { + e.systemConfigMu.Lock() + runtime.Gosched() + defer e.systemConfigMu.Unlock() +} + +func runTxnAttempt_cockroach16167(e *Executor_cockroach16167, session *Session_cockroach16167) { + e.execStmtsInCurrentTxn(session) +} + +func NewExectorAndSession_cockroach16167() (*Executor_cockroach16167, *Session_cockroach16167) { + session := &Session_cockroach16167{} + session.PreparedStatements = PreparedStatements_cockroach16167{session} + e := &Executor_cockroach16167{} + return e, session +} + +/// G1 G2 +/// e.Start() +/// e.updateSystemConfig() +/// e.execParsed() +/// e.systemConfigCond.L.Lock() +/// e.systemConfigMu.Lock() +/// e.systemConfigMu.RLock() +/// ----------------------G1,G2 deadlock-------------------- + +func Cockroach16167() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + e, s := NewExectorAndSession_cockroach16167() + e.systemConfigCond = sync.NewCond(e.systemConfigMu.RLocker()) + // deadlocks: x > 0 + go e.Start() // G1 + e.execParsed(s) // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go new file mode 100644 index 00000000000000..e85007a675d1e4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go @@ -0,0 +1,73 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/18101 + * Buggy version: f7a8e2f57b6bcf00b9abaf3da00598e4acd3a57f + * fix commit-id: 822bd176cc725c6b50905ea615023200b395e14f + * Flaky: 100/100 + * Description: + * context.Done() signal only stops the goroutine who pulls data + * from a channel, while does not stops goroutines which send data + * to the channel. This causes all goroutines trying to send data + * through the channel to block. + */ + +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Cockroach18101", Cockroach18101) +} + +const chanSize_cockroach18101 = 6 + +func restore_cockroach18101(ctx context.Context) bool { + readyForImportCh := make(chan bool, chanSize_cockroach18101) + go func() { // G2 + defer close(readyForImportCh) + // deadlocks: x > 0 + splitAndScatter_cockroach18101(ctx, readyForImportCh) + }() + for readyForImportSpan := range readyForImportCh { + select { + case <-ctx.Done(): + return readyForImportSpan + } + } + return true +} + +func splitAndScatter_cockroach18101(ctx context.Context, readyForImportCh chan bool) { + for i := 0; i < chanSize_cockroach18101+2; i++ { + readyForImportCh <- (false || i != 0) + } +} + +/// +/// G1 G2 helper goroutine +/// restore() +/// splitAndScatter() +/// <-readyForImportCh +/// readyForImportCh<- +/// ... ... +/// cancel() +/// return +/// readyForImportCh<- +/// -----------------------G2 leak------------------------- +/// + +func Cockroach18101() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + ctx, cancel := context.WithCancel(context.Background()) + go restore_cockroach18101(ctx) // G1 + go cancel() // helper goroutine + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go new file mode 100644 index 00000000000000..7ab60bd11b2a60 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go @@ -0,0 +1,136 @@ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Cockroach2448", Cockroach2448) +} + +type Stopper_cockroach2448 struct { + Done chan bool +} + +func (s *Stopper_cockroach2448) ShouldStop() <-chan bool { + return s.Done +} + +type EventMembershipChangeCommitted_cockroach2448 struct { + Callback func() +} +type MultiRaft_cockroach2448 struct { + stopper *Stopper_cockroach2448 + Events chan interface{} + callbackChan chan func() +} + +// sendEvent can be invoked many times +func (m *MultiRaft_cockroach2448) sendEvent(event interface{}) { + select { + case m.Events <- event: // Waiting for events consumption + case <-m.stopper.ShouldStop(): + } +} + +type state_cockroach2448 struct { + *MultiRaft_cockroach2448 +} + +func (s *state_cockroach2448) start() { + for { + select { + case <-s.stopper.ShouldStop(): + return + case cb := <-s.callbackChan: + cb() + default: + s.handleWriteResponse() + time.Sleep(time.Millisecond) + } + } +} + +func (s *state_cockroach2448) handleWriteResponse() { + s.sendEvent(&EventMembershipChangeCommitted_cockroach2448{ + Callback: func() { + select { + case s.callbackChan <- func() { // Waiting for callbackChan consumption + time.Sleep(time.Nanosecond) + }: + case <-s.stopper.ShouldStop(): + } + }, + }) +} + +type Store_cockroach2448 struct { + multiraft *MultiRaft_cockroach2448 +} + +func (s *Store_cockroach2448) processRaft() { + for { + select { + case e := <-s.multiraft.Events: + switch e := e.(type) { + case *EventMembershipChangeCommitted_cockroach2448: + callback := e.Callback + runtime.Gosched() + if callback != nil { + callback() // Waiting for callbackChan consumption + } + } + case <-s.multiraft.stopper.ShouldStop(): + return + } + } +} + +func NewStoreAndState_cockroach2448() (*Store_cockroach2448, *state_cockroach2448) { + stopper := &Stopper_cockroach2448{ + Done: make(chan bool), + } + mltrft := &MultiRaft_cockroach2448{ + stopper: stopper, + Events: make(chan interface{}), + callbackChan: make(chan func()), + } + st := &state_cockroach2448{mltrft} + s := &Store_cockroach2448{mltrft} + return s, st +} + +func Cockroach2448() { + defer func() { + time.Sleep(time.Second) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go func() { + s, st := NewStoreAndState_cockroach2448() + // deadlocks: x > 0 + go s.processRaft() // G1 + // deadlocks: x > 0 + go st.start() // G2 + }() + } +} + +// Example of deadlock trace: +// +// G1 G2 +// -------------------------------------------------------------------------------------------------- +// s.processRaft() st.start() +// select . +// . select [default] +// . s.handleWriteResponse() +// . s.sendEvent() +// . select +// <-s.multiraft.Events <----> m.Events <- event +// . select [default] +// . s.handleWriteResponse() +// . s.sendEvent() +// . select [m.Events<-, <-s.stopper.ShouldStop()] +// callback() +// select [m.callbackChan<-,<-s.stopper.ShouldStop()] . diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go new file mode 100644 index 00000000000000..b16d4db25dd1a7 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go @@ -0,0 +1,81 @@ +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach24808", Cockroach24808) +} + +type Compactor_cockroach24808 struct { + ch chan struct{} +} + +type Stopper_cockroach24808 struct { + stop sync.WaitGroup + stopper chan struct{} +} + +func (s *Stopper_cockroach24808) RunWorker(ctx context.Context, f func(context.Context)) { + s.stop.Add(1) + go func() { + defer s.stop.Done() + f(ctx) + }() +} + +func (s *Stopper_cockroach24808) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach24808) Stop() { + close(s.stopper) +} + +func NewStopper_cockroach24808() *Stopper_cockroach24808 { + s := &Stopper_cockroach24808{ + stopper: make(chan struct{}), + } + return s +} + +func NewCompactor_cockroach24808() *Compactor_cockroach24808 { + return &Compactor_cockroach24808{ch: make(chan struct{}, 1)} +} + +func (c *Compactor_cockroach24808) Start(ctx context.Context, stopper *Stopper_cockroach24808) { + c.ch <- struct{}{} + stopper.RunWorker(ctx, func(ctx context.Context) { + for { + select { + case <-stopper.ShouldStop(): + return + case <-c.ch: + } + } + }) +} + +func Cockroach24808() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + stopper := NewStopper_cockroach24808() + defer stopper.Stop() + + compactor := NewCompactor_cockroach24808() + compactor.ch <- struct{}{} + + compactor.Start(context.Background(), stopper) + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go new file mode 100644 index 00000000000000..961c2fe7c065f3 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go @@ -0,0 +1,91 @@ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Cockroach25456", Cockroach25456) +} + +type Stopper_cockroach25456 struct { + quiescer chan struct{} +} + +func (s *Stopper_cockroach25456) ShouldQuiesce() <-chan struct{} { + if s == nil { + return nil + } + return s.quiescer +} + +func NewStopper_cockroach25456() *Stopper_cockroach25456 { + return &Stopper_cockroach25456{quiescer: make(chan struct{})} +} + +type Store_cockroach25456 struct { + stopper *Stopper_cockroach25456 + consistencyQueue *consistencyQueue_cockroach25456 +} + +func (s *Store_cockroach25456) Stopper() *Stopper_cockroach25456 { + return s.stopper +} +func (s *Store_cockroach25456) Start(stopper *Stopper_cockroach25456) { + s.stopper = stopper +} + +func NewStore_cockroach25456() *Store_cockroach25456 { + return &Store_cockroach25456{ + consistencyQueue: newConsistencyQueue_cockroach25456(), + } +} + +type Replica_cockroach25456 struct { + store *Store_cockroach25456 +} + +func NewReplica_cockroach25456(store *Store_cockroach25456) *Replica_cockroach25456 { + return &Replica_cockroach25456{store: store} +} + +type consistencyQueue_cockroach25456 struct{} + +func (q *consistencyQueue_cockroach25456) process(repl *Replica_cockroach25456) { + <-repl.store.Stopper().ShouldQuiesce() +} + +func newConsistencyQueue_cockroach25456() *consistencyQueue_cockroach25456 { + return &consistencyQueue_cockroach25456{} +} + +type testContext_cockroach25456 struct { + store *Store_cockroach25456 + repl *Replica_cockroach25456 +} + +func (tc *testContext_cockroach25456) StartWithStoreConfig(stopper *Stopper_cockroach25456) { + if tc.store == nil { + tc.store = NewStore_cockroach25456() + } + tc.store.Start(stopper) + tc.repl = NewReplica_cockroach25456(tc.store) +} + +func Cockroach25456() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + stopper := NewStopper_cockroach25456() + tc := testContext_cockroach25456{} + tc.StartWithStoreConfig(stopper) + + for i := 0; i < 2; i++ { + tc.store.consistencyQueue.process(tc.repl) + } + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go new file mode 100644 index 00000000000000..9ca074f83f279a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go @@ -0,0 +1,117 @@ +package main + +import ( + "runtime" + "sync" + "sync/atomic" + "time" +) + +func init() { + register("Cockroach35073", Cockroach35073) +} + +type ConsumerStatus_cockroach35073 uint32 + +const ( + NeedMoreRows_cockroach35073 ConsumerStatus_cockroach35073 = iota + DrainRequested_cockroach35073 + ConsumerClosed_cockroach35073 +) + +const rowChannelBufSize_cockroach35073 = 16 +const outboxBufRows_cockroach35073 = 16 + +type rowSourceBase_cockroach35073 struct { + consumerStatus ConsumerStatus_cockroach35073 +} + +func (rb *rowSourceBase_cockroach35073) consumerClosed() { + atomic.StoreUint32((*uint32)(&rb.consumerStatus), uint32(ConsumerClosed_cockroach35073)) +} + +type RowChannelMsg_cockroach35073 int + +type RowChannel_cockroach35073 struct { + rowSourceBase_cockroach35073 + dataChan chan RowChannelMsg_cockroach35073 +} + +func (rc *RowChannel_cockroach35073) ConsumerClosed() { + rc.consumerClosed() + select { + case <-rc.dataChan: + default: + } +} + +func (rc *RowChannel_cockroach35073) Push() ConsumerStatus_cockroach35073 { + consumerStatus := ConsumerStatus_cockroach35073( + atomic.LoadUint32((*uint32)(&rc.consumerStatus))) + switch consumerStatus { + case NeedMoreRows_cockroach35073: + rc.dataChan <- RowChannelMsg_cockroach35073(0) + case DrainRequested_cockroach35073: + case ConsumerClosed_cockroach35073: + } + return consumerStatus +} + +func (rc *RowChannel_cockroach35073) InitWithNumSenders() { + rc.initWithBufSizeAndNumSenders(rowChannelBufSize_cockroach35073) +} + +func (rc *RowChannel_cockroach35073) initWithBufSizeAndNumSenders(chanBufSize int) { + rc.dataChan = make(chan RowChannelMsg_cockroach35073, chanBufSize) +} + +type outbox_cockroach35073 struct { + RowChannel_cockroach35073 +} + +func (m *outbox_cockroach35073) init() { + m.RowChannel_cockroach35073.InitWithNumSenders() +} + +func (m *outbox_cockroach35073) start(wg *sync.WaitGroup) { + if wg != nil { + wg.Add(1) + } + go m.run(wg) +} + +func (m *outbox_cockroach35073) run(wg *sync.WaitGroup) { + if wg != nil { + wg.Done() + } +} + +func Cockroach35073() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + outbox := &outbox_cockroach35073{} + outbox.init() + + var wg sync.WaitGroup + for i := 0; i < outboxBufRows_cockroach35073; i++ { + outbox.Push() + } + + var blockedPusherWg sync.WaitGroup + blockedPusherWg.Add(1) + go func() { + // deadlocks: 1 + outbox.Push() + blockedPusherWg.Done() + }() + + outbox.start(&wg) + + wg.Wait() + outbox.RowChannel_cockroach35073.Push() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go new file mode 100644 index 00000000000000..587c1dfc58f4bc --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go @@ -0,0 +1,127 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach35931", Cockroach35931) +} + +type RowReceiver_cockroach35931 interface { + Push() +} + +type inboundStreamInfo_cockroach35931 struct { + receiver RowReceiver_cockroach35931 +} + +type RowChannel_cockroach35931 struct { + dataChan chan struct{} +} + +func (rc *RowChannel_cockroach35931) Push() { + // The buffer size can be either 0 or 1 when this function is entered. + // We need context sensitivity or a path-condition on the buffer size + // to find this bug. + rc.dataChan <- struct{}{} +} + +func (rc *RowChannel_cockroach35931) initWithBufSizeAndNumSenders(chanBufSize int) { + rc.dataChan = make(chan struct{}, chanBufSize) +} + +type flowEntry_cockroach35931 struct { + flow *Flow_cockroach35931 + inboundStreams map[int]*inboundStreamInfo_cockroach35931 +} + +type flowRegistry_cockroach35931 struct { + sync.Mutex + flows map[int]*flowEntry_cockroach35931 +} + +func (fr *flowRegistry_cockroach35931) getEntryLocked(id int) *flowEntry_cockroach35931 { + entry, ok := fr.flows[id] + if !ok { + entry = &flowEntry_cockroach35931{} + fr.flows[id] = entry + } + return entry +} + +func (fr *flowRegistry_cockroach35931) cancelPendingStreamsLocked(id int) []RowReceiver_cockroach35931 { + entry := fr.flows[id] + pendingReceivers := make([]RowReceiver_cockroach35931, 0) + for _, is := range entry.inboundStreams { + pendingReceivers = append(pendingReceivers, is.receiver) + } + return pendingReceivers +} + +type Flow_cockroach35931 struct { + id int + flowRegistry *flowRegistry_cockroach35931 + inboundStreams map[int]*inboundStreamInfo_cockroach35931 +} + +func (f *Flow_cockroach35931) cancel() { + f.flowRegistry.Lock() + timedOutReceivers := f.flowRegistry.cancelPendingStreamsLocked(f.id) + f.flowRegistry.Unlock() + + for _, receiver := range timedOutReceivers { + receiver.Push() + } +} + +func (fr *flowRegistry_cockroach35931) RegisterFlow(f *Flow_cockroach35931, inboundStreams map[int]*inboundStreamInfo_cockroach35931) { + entry := fr.getEntryLocked(f.id) + entry.flow = f + entry.inboundStreams = inboundStreams +} + +func makeFlowRegistry_cockroach35931() *flowRegistry_cockroach35931 { + return &flowRegistry_cockroach35931{ + flows: make(map[int]*flowEntry_cockroach35931), + } +} + +func Cockroach35931() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + fr := makeFlowRegistry_cockroach35931() + + left := &RowChannel_cockroach35931{} + left.initWithBufSizeAndNumSenders(1) + right := &RowChannel_cockroach35931{} + right.initWithBufSizeAndNumSenders(1) + + inboundStreams := map[int]*inboundStreamInfo_cockroach35931{ + 0: { + receiver: left, + }, + 1: { + receiver: right, + }, + } + + left.Push() + + flow := &Flow_cockroach35931{ + id: 0, + flowRegistry: fr, + inboundStreams: inboundStreams, + } + + fr.RegisterFlow(flow, inboundStreams) + + flow.cancel() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go new file mode 100644 index 00000000000000..c1fe8abec02678 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go @@ -0,0 +1,132 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/3710 + * Buggy version: 4afdd4860fd7c3bd9e92489f84a95e5cc7d11a0d + * fix commit-id: cb65190f9caaf464723e7d072b1f1b69a044ef7b + * Flaky: 2/100 + * Description: This deadlock is casued by acquiring a RLock twice in a call chain. + * ForceRaftLogScanAndProcess(acquire s.mu.RLock()) ->MaybeAdd()->shouldQueue()-> + * getTruncatableIndexes()->RaftStatus(acquire s.mu.Rlock()) + */ + +package main + +import ( + "runtime" + "sync" + "time" + "unsafe" +) + +func init() { + register("Cockroach3710", Cockroach3710) +} + +type Store_cockroach3710 struct { + raftLogQueue *baseQueue + replicas map[int]*Replica_cockroach3710 + + mu struct { + sync.RWMutex + } +} + +func (s *Store_cockroach3710) ForceRaftLogScanAndProcess() { + s.mu.RLock() + runtime.Gosched() + for _, r := range s.replicas { + s.raftLogQueue.MaybeAdd(r) + } + s.mu.RUnlock() +} + +func (s *Store_cockroach3710) RaftStatus() { + s.mu.RLock() + defer s.mu.RUnlock() +} + +func (s *Store_cockroach3710) processRaft() { + go func() { + // deadlocks: x > 0 + for { + var replicas []*Replica_cockroach3710 + s.mu.Lock() + for _, r := range s.replicas { + replicas = append(replicas, r) + } + s.mu.Unlock() + break + } + }() +} + +type Replica_cockroach3710 struct { + store *Store_cockroach3710 +} + +type baseQueue struct { + sync.Mutex + impl *raftLogQueue +} + +func (bq *baseQueue) MaybeAdd(repl *Replica_cockroach3710) { + bq.Lock() + defer bq.Unlock() + bq.impl.shouldQueue(repl) +} + +type raftLogQueue struct{} + +func (*raftLogQueue) shouldQueue(r *Replica_cockroach3710) { + getTruncatableIndexes(r) +} + +func getTruncatableIndexes(r *Replica_cockroach3710) { + r.store.RaftStatus() +} + +func NewStore_cockroach3710() *Store_cockroach3710 { + rlq := &raftLogQueue{} + bq := &baseQueue{impl: rlq} + store := &Store_cockroach3710{ + raftLogQueue: bq, + replicas: make(map[int]*Replica_cockroach3710), + } + r1 := &Replica_cockroach3710{store} + r2 := &Replica_cockroach3710{store} + + makeKey := func(r *Replica_cockroach3710) int { + return int((uintptr(unsafe.Pointer(r)) >> 1) % 7) + } + store.replicas[makeKey(r1)] = r1 + store.replicas[makeKey(r2)] = r2 + + return store +} + +/// G1 G2 +/// store.ForceRaftLogScanAndProcess() +/// s.mu.RLock() +/// s.raftLogQueue.MaybeAdd() +/// bq.impl.shouldQueue() +/// getTruncatableIndexes() +/// r.store.RaftStatus() +/// store.processRaft() +/// s.mu.Lock() +/// s.mu.RLock() +/// ----------------------G1,G2 deadlock--------------------- + +func Cockroach3710() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 10000; i++ { + go func() { + store := NewStore_cockroach3710() + // deadlocks: x > 0 + go store.ForceRaftLogScanAndProcess() // G1 + go store.processRaft() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go new file mode 100644 index 00000000000000..89602804ca9727 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go @@ -0,0 +1,58 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach584", Cockroach584) +} + +type gossip_cockroach584 struct { + mu sync.Mutex + closed bool +} + +func (g *gossip_cockroach584) bootstrap() { + for { + g.mu.Lock() + if g.closed { + /// Missing g.mu.Unlock + break + } + g.mu.Unlock() + } +} + +func (g *gossip_cockroach584) manage() { + for { + g.mu.Lock() + if g.closed { + /// Missing g.mu.Unlock + break + } + g.mu.Unlock() + } +} + +func Cockroach584() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + g := &gossip_cockroach584{ + closed: true, + } + go func() { + // deadlocks: x > 0 + g.bootstrap() + g.manage() + }() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go new file mode 100644 index 00000000000000..20c440678e76a3 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go @@ -0,0 +1,100 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/6181 + * Buggy version: c0a232b5521565904b851699853bdbd0c670cf1e + * fix commit-id: d5814e4886a776bf7789b3c51b31f5206480d184 + * Flaky: 57/100 + */ +package main + +import ( + "fmt" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach6181", Cockroach6181) +} + +type testDescriptorDB_cockroach6181 struct { + cache *rangeDescriptorCache_cockroach6181 +} + +func initTestDescriptorDB_cockroach6181() *testDescriptorDB_cockroach6181 { + return &testDescriptorDB_cockroach6181{&rangeDescriptorCache_cockroach6181{}} +} + +type rangeDescriptorCache_cockroach6181 struct { + rangeCacheMu sync.RWMutex +} + +func (rdc *rangeDescriptorCache_cockroach6181) LookupRangeDescriptor() { + rdc.rangeCacheMu.RLock() + runtime.Gosched() + fmt.Println("lookup range descriptor:", rdc) + rdc.rangeCacheMu.RUnlock() + rdc.rangeCacheMu.Lock() + rdc.rangeCacheMu.Unlock() +} + +func (rdc *rangeDescriptorCache_cockroach6181) String() string { + rdc.rangeCacheMu.RLock() + defer rdc.rangeCacheMu.RUnlock() + return rdc.stringLocked() +} + +func (rdc *rangeDescriptorCache_cockroach6181) stringLocked() string { + return "something here" +} + +func doLookupWithToken_cockroach6181(rc *rangeDescriptorCache_cockroach6181) { + rc.LookupRangeDescriptor() +} + +func testRangeCacheCoalescedRequests_cockroach6181() { + // deadlocks: x > 0 + db := initTestDescriptorDB_cockroach6181() + pauseLookupResumeAndAssert := func() { + var wg sync.WaitGroup + for i := 0; i < 3; i++ { + wg.Add(1) + go func() { // G2,G3,... + // deadlocks: x > 0 + doLookupWithToken_cockroach6181(db.cache) + wg.Done() + }() + } + wg.Wait() + } + pauseLookupResumeAndAssert() +} + +/// G1 G2 G3 ... +/// testRangeCacheCoalescedRquests() +/// initTestDescriptorDB() +/// pauseLookupResumeAndAssert() +/// return +/// doLookupWithToken() +/// doLookupWithToken() +/// rc.LookupRangeDescriptor() +/// rc.LookupRangeDescriptor() +/// rdc.rangeCacheMu.RLock() +/// rdc.String() +/// rdc.rangeCacheMu.RLock() +/// fmt.Printf() +/// rdc.rangeCacheMu.RUnlock() +/// rdc.rangeCacheMu.Lock() +/// rdc.rangeCacheMu.RLock() +/// -------------------------------------G2,G3,... deadlock-------------------------------------- + +func Cockroach6181() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go testRangeCacheCoalescedRequests_cockroach6181() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go new file mode 100644 index 00000000000000..9fd36f243b9ef9 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go @@ -0,0 +1,196 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/7504 + * Buggy version: bc963b438cdc3e0ad058a5282358e5aee0595e17 + * fix commit-id: cab761b9f5ee5dee1448bc5d6b1d9f5a0ff0bad5 + * Flaky: 1/100 + * Description: There are locking leaseState, tableNameCache in Release(), but + * tableNameCache,LeaseState in AcquireByName. It is AB and BA deadlock. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach7504", Cockroach7504) +} + +func MakeCacheKey_cockroach7504(lease *LeaseState_cockroach7504) int { + return lease.id +} + +type LeaseState_cockroach7504 struct { + mu sync.Mutex // L1 + id int +} +type LeaseSet_cockroach7504 struct { + data []*LeaseState_cockroach7504 +} + +func (l *LeaseSet_cockroach7504) find(id int) *LeaseState_cockroach7504 { + return l.data[id] +} + +func (l *LeaseSet_cockroach7504) remove(s *LeaseState_cockroach7504) { + for i := 0; i < len(l.data); i++ { + if s == l.data[i] { + l.data = append(l.data[:i], l.data[i+1:]...) + break + } + } +} + +type tableState_cockroach7504 struct { + tableNameCache *tableNameCache_cockroach7504 + mu sync.Mutex // L3 + active *LeaseSet_cockroach7504 +} + +func (t *tableState_cockroach7504) release(lease *LeaseState_cockroach7504) { + t.mu.Lock() // L3 + defer t.mu.Unlock() // L3 + + s := t.active.find(MakeCacheKey_cockroach7504(lease)) + s.mu.Lock() // L1 + runtime.Gosched() + defer s.mu.Unlock() // L1 + + t.removeLease(s) +} +func (t *tableState_cockroach7504) removeLease(lease *LeaseState_cockroach7504) { + t.active.remove(lease) + t.tableNameCache.remove(lease) // L1 acquire/release +} + +type tableNameCache_cockroach7504 struct { + mu sync.Mutex // L2 + tables map[int]*LeaseState_cockroach7504 +} + +func (c *tableNameCache_cockroach7504) get(id int) { + c.mu.Lock() // L2 + defer c.mu.Unlock() // L2 + lease, ok := c.tables[id] + if !ok { + return + } + if lease == nil { + panic("nil lease in name cache") + } + lease.mu.Lock() // L1 + defer lease.mu.Unlock() // L1 +} + +func (c *tableNameCache_cockroach7504) remove(lease *LeaseState_cockroach7504) { + c.mu.Lock() // L2 + runtime.Gosched() + defer c.mu.Unlock() // L2 + key := MakeCacheKey_cockroach7504(lease) + existing, ok := c.tables[key] + if !ok { + return + } + if existing == lease { + delete(c.tables, key) + } +} + +type LeaseManager_cockroach7504 struct { + _ [64]byte + tableNames *tableNameCache_cockroach7504 + tables map[int]*tableState_cockroach7504 +} + +func (m *LeaseManager_cockroach7504) AcquireByName(id int) { + m.tableNames.get(id) +} + +func (m *LeaseManager_cockroach7504) findTableState(lease *LeaseState_cockroach7504) *tableState_cockroach7504 { + existing, ok := m.tables[lease.id] + if !ok { + return nil + } + return existing +} + +func (m *LeaseManager_cockroach7504) Release(lease *LeaseState_cockroach7504) { + t := m.findTableState(lease) + t.release(lease) +} +func NewLeaseManager_cockroach7504(tname *tableNameCache_cockroach7504, ts *tableState_cockroach7504) *LeaseManager_cockroach7504 { + mgr := &LeaseManager_cockroach7504{ + tableNames: tname, + tables: make(map[int]*tableState_cockroach7504), + } + mgr.tables[0] = ts + return mgr +} +func NewLeaseSet_cockroach7504(n int) *LeaseSet_cockroach7504 { + lset := &LeaseSet_cockroach7504{} + for i := 0; i < n; i++ { + lease := new(LeaseState_cockroach7504) + lset.data = append(lset.data, lease) + } + return lset +} + +func Cockroach7504() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func() { + leaseNum := 2 + lset := NewLeaseSet_cockroach7504(leaseNum) + + nc := &tableNameCache_cockroach7504{ + tables: make(map[int]*LeaseState_cockroach7504), + } + for i := 0; i < leaseNum; i++ { + nc.tables[i] = lset.find(i) + } + + ts := &tableState_cockroach7504{ + tableNameCache: nc, + active: lset, + } + + mgr := NewLeaseManager_cockroach7504(nc, ts) + + // G1 + go func() { + // deadlocks: x > 0 + // lock L2-L1 + mgr.AcquireByName(0) + }() + + // G2 + go func() { + // deadlocks: x > 0 + // lock L1-L2 + mgr.Release(lset.find(0)) + }() + }() + } +} + +// Example deadlock trace: +// +// G1 G2 +// ------------------------------------------------------------------------------------------------ +// mgr.AcquireByName(0) mgr.Release(lset.find(0)) +// m.tableNames.get(id) . +// c.mu.Lock() [L2] . +// . t.release(lease) +// . t.mu.Lock() [L3] +// . s.mu.Lock() [L1] +// lease.mu.Lock() [L1] . +// . t.removeLease(s) +// . t.tableNameCache.remove(lease) +// . c.mu.Lock() [L2] +// ---------------------------------------G1, G2 leak---------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go new file mode 100644 index 00000000000000..e0d1a44a2b4ef9 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go @@ -0,0 +1,58 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/9935 + * Buggy version: 4df302cc3f03328395dc3fefbfba58b7718e4f2f + * fix commit-id: ed6a100ba38dd51b0888b9a3d3ac6bdbb26c528c + * Flaky: 100/100 + * Description: This bug is caused by acquiring l.mu.Lock() twice. The fix is + * to release l.mu.Lock() before acquiring l.mu.Lock for the second time. + */ +package main + +import ( + "errors" + "math/rand" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach9935", Cockroach9935) +} + +type loggingT_cockroach9935 struct { + mu sync.Mutex +} + +func (l *loggingT_cockroach9935) outputLogEntry() { + l.mu.Lock() + if err := l.createFile(); err != nil { + l.exit(err) + } + l.mu.Unlock() +} +func (l *loggingT_cockroach9935) createFile() error { + if rand.Intn(8)%4 > 0 { + return errors.New("") + } + return nil +} +func (l *loggingT_cockroach9935) exit(err error) { + l.mu.Lock() + defer l.mu.Unlock() +} +func Cockroach9935() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + l := &loggingT_cockroach9935{} + // deadlocks: x > 0 + go l.outputLogEntry() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go new file mode 100644 index 00000000000000..ed9165f751ed3c --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go @@ -0,0 +1,65 @@ +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd10492", Etcd10492) +} + +type Checkpointer_etcd10492 func(ctx context.Context) + +type lessor_etcd10492 struct { + mu sync.RWMutex + cp Checkpointer_etcd10492 + checkpointInterval time.Duration +} + +func (le *lessor_etcd10492) Checkpoint() { + le.mu.Lock() + defer le.mu.Unlock() +} + +func (le *lessor_etcd10492) SetCheckpointer(cp Checkpointer_etcd10492) { + le.mu.Lock() + defer le.mu.Unlock() + + le.cp = cp +} + +func (le *lessor_etcd10492) Renew() { + le.mu.Lock() + unlock := func() { le.mu.Unlock() } + defer func() { unlock() }() + + if le.cp != nil { + le.cp(context.Background()) + } +} +func Etcd10492() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + + le := &lessor_etcd10492{ + checkpointInterval: 0, + } + fakerCheckerpointer_etcd10492 := func(ctx context.Context) { + le.Checkpoint() + } + le.SetCheckpointer(fakerCheckerpointer_etcd10492) + le.mu.Lock() + le.mu.Unlock() + le.Renew() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go new file mode 100644 index 00000000000000..3b8e8ed1cafcaa --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go @@ -0,0 +1,120 @@ +package main + +import ( + "context" + "fmt" + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd5509", Etcd5509) +} + +var ErrConnClosed_etcd5509 error + +type Client_etcd5509 struct { + mu sync.RWMutex + ctx context.Context + cancel context.CancelFunc +} + +func (c *Client_etcd5509) Close() { + c.mu.Lock() + defer c.mu.Unlock() + if c.cancel == nil { + return + } + c.cancel() + c.cancel = nil + c.mu.Unlock() + c.mu.Lock() +} + +type remoteClient_etcd5509 struct { + client *Client_etcd5509 + mu sync.Mutex +} + +func (r *remoteClient_etcd5509) acquire(ctx context.Context) error { + for { + r.client.mu.RLock() + closed := r.client.cancel == nil + r.mu.Lock() + r.mu.Unlock() + if closed { + return ErrConnClosed_etcd5509 // Missing RUnlock before return + } + r.client.mu.RUnlock() + } +} + +type kv_etcd5509 struct { + rc *remoteClient_etcd5509 +} + +func (kv *kv_etcd5509) Get(ctx context.Context) error { + return kv.Do(ctx) +} + +func (kv *kv_etcd5509) Do(ctx context.Context) error { + for { + err := kv.do(ctx) + if err == nil { + return nil + } + return err + } +} + +func (kv *kv_etcd5509) do(ctx context.Context) error { + err := kv.getRemote(ctx) + return err +} + +func (kv *kv_etcd5509) getRemote(ctx context.Context) error { + return kv.rc.acquire(ctx) +} + +type KV interface { + Get(ctx context.Context) error + Do(ctx context.Context) error +} + +func NewKV_etcd5509(c *Client_etcd5509) KV { + return &kv_etcd5509{rc: &remoteClient_etcd5509{ + client: c, + }} +} + +func Etcd5509() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 10; i++ { + go func() { + // deadlocks: x > 0 + ctx, _ := context.WithCancel(context.TODO()) + cli := &Client_etcd5509{ + ctx: ctx, + } + kv := NewKV_etcd5509(cli) + donec := make(chan struct{}) + go func() { + defer close(donec) + err := kv.Get(context.TODO()) + if err != nil && err != ErrConnClosed_etcd5509 { + fmt.Println("Expect ErrConnClosed") + } + }() + + runtime.Gosched() + cli.Close() + + <-donec + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go new file mode 100644 index 00000000000000..40690395e5589d --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go @@ -0,0 +1,94 @@ +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd6708", Etcd6708) +} + +type EndpointSelectionMode_etcd6708 int + +const ( + EndpointSelectionRandom_etcd6708 EndpointSelectionMode_etcd6708 = iota + EndpointSelectionPrioritizeLeader_etcd6708 +) + +type MembersAPI_etcd6708 interface { + Leader(ctx context.Context) +} + +type Client_etcd6708 interface { + Sync(ctx context.Context) + SetEndpoints() + httpClient_etcd6708 +} + +type httpClient_etcd6708 interface { + Do(context.Context) +} + +type httpClusterClient_etcd6708 struct { + sync.RWMutex + selectionMode EndpointSelectionMode_etcd6708 +} + +func (c *httpClusterClient_etcd6708) getLeaderEndpoint() { + mAPI := NewMembersAPI_etcd6708(c) + mAPI.Leader(context.Background()) +} + +func (c *httpClusterClient_etcd6708) SetEndpoints() { + switch c.selectionMode { + case EndpointSelectionRandom_etcd6708: + case EndpointSelectionPrioritizeLeader_etcd6708: + c.getLeaderEndpoint() + } +} + +func (c *httpClusterClient_etcd6708) Do(ctx context.Context) { + c.RLock() + c.RUnlock() +} + +func (c *httpClusterClient_etcd6708) Sync(ctx context.Context) { + c.Lock() + defer c.Unlock() + + c.SetEndpoints() +} + +type httpMembersAPI_etcd6708 struct { + client httpClient_etcd6708 +} + +func (m *httpMembersAPI_etcd6708) Leader(ctx context.Context) { + m.client.Do(ctx) +} + +func NewMembersAPI_etcd6708(c Client_etcd6708) MembersAPI_etcd6708 { + return &httpMembersAPI_etcd6708{ + client: c, + } +} + +func Etcd6708() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + hc := &httpClusterClient_etcd6708{ + selectionMode: EndpointSelectionPrioritizeLeader_etcd6708, + } + hc.Sync(context.Background()) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6857.go b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go new file mode 100644 index 00000000000000..7411a3216143d0 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go @@ -0,0 +1,91 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/pull/6857 + * Buggy version: 7c8f13aed7fe251e7066ed6fc1a090699c2cae0e + * fix commit-id: 7afc490c95789c408fbc256d8e790273d331c984 + * Flaky: 19/100 + */ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Etcd6857", Etcd6857) +} + +type Status_etcd6857 struct{} + +type node_etcd6857 struct { + status chan chan Status_etcd6857 + stop chan struct{} + done chan struct{} +} + +func (n *node_etcd6857) Status() Status_etcd6857 { + c := make(chan Status_etcd6857) + n.status <- c + return <-c +} + +func (n *node_etcd6857) run() { + for { + select { + case c := <-n.status: + c <- Status_etcd6857{} + case <-n.stop: + close(n.done) + return + } + } +} + +func (n *node_etcd6857) Stop() { + select { + case n.stop <- struct{}{}: + case <-n.done: + return + } + <-n.done +} + +func NewNode_etcd6857() *node_etcd6857 { + return &node_etcd6857{ + status: make(chan chan Status_etcd6857), + stop: make(chan struct{}), + done: make(chan struct{}), + } +} + +/// +/// G1 G2 G3 +/// n.run() +/// n.Stop() +/// n.stop<- +/// <-n.stop +/// <-n.done +/// close(n.done) +/// return +/// return +/// n.Status() +/// n.status<- +///----------------G2 leak------------------- +/// + +func Etcd6857() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i <= 100; i++ { + go func() { + n := NewNode_etcd6857() + go n.run() // G1 + // deadlocks: x > 0 + go n.Status() // G2 + go n.Stop() // G3 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6873.go b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go new file mode 100644 index 00000000000000..848c56d801492a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go @@ -0,0 +1,92 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/commit/7618fdd1d642e47cac70c03f637b0fd798a53a6e + * Buggy version: 377f19b0031f9c0aafe2aec28b6f9019311f52f9 + * fix commit-id: 7618fdd1d642e47cac70c03f637b0fd798a53a6e + * Flaky: 9/100 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd6873", Etcd6873) +} + +type watchBroadcast_etcd6873 struct{} + +type watchBroadcasts_etcd6873 struct { + mu sync.Mutex + updatec chan *watchBroadcast_etcd6873 + donec chan struct{} +} + +func newWatchBroadcasts_etcd6873() *watchBroadcasts_etcd6873 { + wbs := &watchBroadcasts_etcd6873{ + updatec: make(chan *watchBroadcast_etcd6873, 1), + donec: make(chan struct{}), + } + go func() { // G2 + defer close(wbs.donec) + // deadlocks: x > 0 + for wb := range wbs.updatec { + wbs.coalesce(wb) + } + }() + return wbs +} + +func (wbs *watchBroadcasts_etcd6873) coalesce(wb *watchBroadcast_etcd6873) { + wbs.mu.Lock() + wbs.mu.Unlock() +} + +func (wbs *watchBroadcasts_etcd6873) stop() { + wbs.mu.Lock() + defer wbs.mu.Unlock() + close(wbs.updatec) + <-wbs.donec +} + +func (wbs *watchBroadcasts_etcd6873) update(wb *watchBroadcast_etcd6873) { + select { + case wbs.updatec <- wb: + default: + } +} + +/// +/// G1 G2 G3 +/// newWatchBroadcasts() +/// wbs.update() +/// wbs.updatec <- +/// return +/// <-wbs.updatec +/// wbs.coalesce() +/// wbs.stop() +/// wbs.mu.Lock() +/// close(wbs.updatec) +/// <-wbs.donec +/// wbs.mu.Lock() +///---------------------G2,G3 deadlock------------------------- +/// + +func Etcd6873() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + wbs := newWatchBroadcasts_etcd6873() // G1 + wbs.update(&watchBroadcast_etcd6873{}) + // deadlocks: x > 0 + go wbs.stop() // G3 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7492.go b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go new file mode 100644 index 00000000000000..feb497f9b4512a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go @@ -0,0 +1,178 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/pull/7492 + * Buggy version: 51939650057d602bb5ab090633138fffe36854dc + * fix commit-id: 1b1fabef8ffec606909f01c3983300fff539f214 + * Flaky: 40/100 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd7492", Etcd7492) +} + +type TokenProvider_etcd7492 interface { + assign() + enable() + disable() +} + +type simpleTokenTTLKeeper_etcd7492 struct { + tokens map[string]time.Time + addSimpleTokenCh chan struct{} + stopCh chan chan struct{} + deleteTokenFunc func(string) +} + +type authStore_etcd7492 struct { + tokenProvider TokenProvider_etcd7492 +} + +func (as *authStore_etcd7492) Authenticate() { + as.tokenProvider.assign() +} + +func NewSimpleTokenTTLKeeper_etcd7492(deletefunc func(string)) *simpleTokenTTLKeeper_etcd7492 { + stk := &simpleTokenTTLKeeper_etcd7492{ + tokens: make(map[string]time.Time), + addSimpleTokenCh: make(chan struct{}, 1), + stopCh: make(chan chan struct{}), + deleteTokenFunc: deletefunc, + } + // deadlocks: x > 0 + go stk.run() // G1 + return stk +} + +func (tm *simpleTokenTTLKeeper_etcd7492) run() { + tokenTicker := time.NewTicker(time.Nanosecond) + defer tokenTicker.Stop() + for { + select { + case <-tm.addSimpleTokenCh: + runtime.Gosched() + /// Make tm.tokens not empty is enough + tm.tokens["1"] = time.Now() + case <-tokenTicker.C: + runtime.Gosched() + for t, _ := range tm.tokens { + tm.deleteTokenFunc(t) + delete(tm.tokens, t) + } + case waitCh := <-tm.stopCh: + waitCh <- struct{}{} + return + } + } +} + +func (tm *simpleTokenTTLKeeper_etcd7492) addSimpleToken() { + tm.addSimpleTokenCh <- struct{}{} + runtime.Gosched() +} + +func (tm *simpleTokenTTLKeeper_etcd7492) stop() { + waitCh := make(chan struct{}) + tm.stopCh <- waitCh + <-waitCh + close(tm.stopCh) +} + +type tokenSimple_etcd7492 struct { + simpleTokenKeeper *simpleTokenTTLKeeper_etcd7492 + simpleTokensMu sync.RWMutex +} + +func (t *tokenSimple_etcd7492) assign() { + t.assignSimpleTokenToUser() +} + +func (t *tokenSimple_etcd7492) assignSimpleTokenToUser() { + t.simpleTokensMu.Lock() + runtime.Gosched() + t.simpleTokenKeeper.addSimpleToken() + t.simpleTokensMu.Unlock() +} +func newDeleterFunc(t *tokenSimple_etcd7492) func(string) { + return func(tk string) { + t.simpleTokensMu.Lock() + defer t.simpleTokensMu.Unlock() + } +} + +func (t *tokenSimple_etcd7492) enable() { + t.simpleTokenKeeper = NewSimpleTokenTTLKeeper_etcd7492(newDeleterFunc(t)) +} + +func (t *tokenSimple_etcd7492) disable() { + if t.simpleTokenKeeper != nil { + t.simpleTokenKeeper.stop() + t.simpleTokenKeeper = nil + } + t.simpleTokensMu.Lock() + t.simpleTokensMu.Unlock() +} + +func newTokenProviderSimple_etcd7492() *tokenSimple_etcd7492 { + return &tokenSimple_etcd7492{} +} + +func setupAuthStore_etcd7492() (store *authStore_etcd7492, teardownfunc func()) { + as := &authStore_etcd7492{ + tokenProvider: newTokenProviderSimple_etcd7492(), + } + as.tokenProvider.enable() + tearDown := func() { + as.tokenProvider.disable() + } + return as, tearDown +} + +/// +/// G2 G1 +/// stk.run() +/// ts.assignSimpleTokenToUser() +/// t.simpleTokensMu.Lock() +/// t.simpleTokenKeeper.addSimpleToken() +/// tm.addSimpleTokenCh <- true +/// <-tm.addSimpleTokenCh +/// t.simpleTokensMu.Unlock() +/// ts.assignSimpleTokenToUser() +/// ... ... +/// t.simpleTokensMu.Lock() +/// <-tokenTicker.C +/// tm.addSimpleTokenCh <- true +/// tm.deleteTokenFunc() +/// t.simpleTokensMu.Lock() +///------------------------------------G1,G2 deadlock--------------------------------------------- +/// + +func Etcd7492() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + as, tearDown := setupAuthStore_etcd7492() + defer tearDown() + var wg sync.WaitGroup + wg.Add(3) + for i := 0; i < 3; i++ { + go func() { // G2 + // deadlocks: x > 0 + as.Authenticate() + defer wg.Done() + }() + } + wg.Wait() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7902.go b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go new file mode 100644 index 00000000000000..72f713ed44cc7b --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go @@ -0,0 +1,113 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/coreos/etcd/pull/7902 + * Buggy version: dfdaf082c51ba14861267f632f6af795a27eb4ef + * fix commit-id: 87d99fe0387ee1df1cf1811d88d37331939ef4ae + * Flaky: 100/100 + * Description: + * At least two goroutines are needed to trigger this bug, + * one is leader and the other is follower. Both the leader + * and the follower execute the code above. If the follower + * acquires mu.Lock() firstly and enter rc.release(), it will + * be blocked at <- rcNextc (nextc). Only the leader can execute + * close(nextc) to unblock the follower inside rc.release(). + * However, in order to invoke rc.release(), the leader needs + * to acquires mu.Lock(). + * The fix is to remove the lock and unlock around rc.release(). + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd7902", Etcd7902) +} + +type roundClient_etcd7902 struct { + progress int + acquire func() + validate func() + release func() +} + +func runElectionFunc_etcd7902() { + // deadlocks: x > 0 + rcs := make([]roundClient_etcd7902, 3) + nextc := make(chan bool) + for i := range rcs { + var rcNextc chan bool + setRcNextc := func() { + rcNextc = nextc + } + rcs[i].acquire = func() {} + rcs[i].validate = func() { + setRcNextc() + } + rcs[i].release = func() { + if i == 0 { // Assume the first roundClient is the leader + close(nextc) + nextc = make(chan bool) + } + <-rcNextc // Follower is blocking here + } + } + doRounds_etcd7902(rcs, 100) +} + +func doRounds_etcd7902(rcs []roundClient_etcd7902, rounds int) { + var mu sync.Mutex + var wg sync.WaitGroup + wg.Add(len(rcs)) + for i := range rcs { + go func(rc *roundClient_etcd7902) { // G2,G3 + // deadlocks: x > 0 + defer wg.Done() + for rc.progress < rounds || rounds <= 0 { + rc.acquire() + mu.Lock() + rc.validate() + mu.Unlock() + time.Sleep(10 * time.Millisecond) + rc.progress++ + mu.Lock() + rc.release() + mu.Unlock() + } + }(&rcs[i]) + } + wg.Wait() +} + +/// +/// G1 G2 (leader) G3 (follower) +/// runElectionFunc() +/// doRounds() +/// wg.Wait() +/// ... +/// mu.Lock() +/// rc.validate() +/// rcNextc = nextc +/// mu.Unlock() ... +/// mu.Lock() +/// rc.validate() +/// mu.Unlock() +/// mu.Lock() +/// rc.release() +/// <-rcNextc +/// mu.Lock() +/// -------------------------G1,G2,G3 deadlock-------------------------- +/// + +func Etcd7902() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go runElectionFunc_etcd7902() // G1 + } +} diff --git a/src/runtime/testdata/testprog/goroutineleakgc.go b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go similarity index 65% rename from src/runtime/testdata/testprog/goroutineleakgc.go rename to src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go index fb255afa41cab8..e7daebcb14f74c 100644 --- a/src/runtime/testdata/testprog/goroutineleakgc.go +++ b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go @@ -6,24 +6,27 @@ import ( "time" ) +// This is a set of micro-tests with obvious goroutine leaks that +// ensures goroutine leak detection works. + func init() { - register("GoroutineLeakNilRecv", GoroutineLeakNilRecv) - register("GoroutineLeakNilSend", GoroutineLeakNilSend) - register("GoroutineLeakSelectNoCases", GoroutineLeakSelectNoCases) - register("GoroutineLeakChanRecv", GoroutineLeakChanRecv) - register("GoroutineLeakChanSend", GoroutineLeakChanSend) - register("GoroutineLeakSelect", GoroutineLeakSelect) - register("GoroutineLeakWaitGroup", GoroutineLeakWaitGroup) - register("GoroutineLeakMutexStack", GoroutineLeakMutexStack) - register("GoroutineLeakMutexHeap", GoroutineLeakMutexHeap) - register("GoroutineLeakRWMutexRLock", GoroutineLeakRWMutexRLock) - register("GoroutineLeakRWMutexLock", GoroutineLeakRWMutexLock) - register("GoroutineLeakCond", GoroutineLeakCond) - register("GoroutineLeakMixed", GoroutineLeakMixed) - register("NoGoroutineLeakGlobal", NoGoroutineLeakGlobal) + register("NilRecv", NilRecv) + register("NilSend", NilSend) + register("SelectNoCases", SelectNoCases) + register("ChanRecv", ChanRecv) + register("ChanSend", ChanSend) + register("Select", Select) + register("WaitGroup", WaitGroup) + register("MutexStack", MutexStack) + register("MutexHeap", MutexHeap) + register("RWMutexRLock", RWMutexRLock) + register("RWMutexLock", RWMutexLock) + register("Cond", Cond) + register("Mixed", Mixed) + register("NoLeakGlobal", NoLeakGlobal) } -func GoroutineLeakNilRecv() { +func NilRecv() { go func() { var c chan int <-c @@ -33,7 +36,7 @@ func GoroutineLeakNilRecv() { runtime.GC() } -func GoroutineLeakNilSend() { +func NilSend() { go func() { var c chan int c <- 0 @@ -43,7 +46,7 @@ func GoroutineLeakNilSend() { runtime.GC() } -func GoroutineLeakChanRecv() { +func ChanRecv() { go func() { <-make(chan int) panic("should not be reached") @@ -52,7 +55,7 @@ func GoroutineLeakChanRecv() { runtime.GC() } -func GoroutineLeakSelectNoCases() { +func SelectNoCases() { go func() { select {} panic("should not be reached") @@ -61,7 +64,7 @@ func GoroutineLeakSelectNoCases() { runtime.GC() } -func GoroutineLeakChanSend() { +func ChanSend() { go func() { make(chan int) <- 0 panic("should not be reached") @@ -70,7 +73,7 @@ func GoroutineLeakChanSend() { runtime.GC() } -func GoroutineLeakSelect() { +func Select() { go func() { select { case make(chan int) <- 0: @@ -82,7 +85,7 @@ func GoroutineLeakSelect() { runtime.GC() } -func GoroutineLeakWaitGroup() { +func WaitGroup() { go func() { var wg sync.WaitGroup wg.Add(1) @@ -93,7 +96,7 @@ func GoroutineLeakWaitGroup() { runtime.GC() } -func GoroutineLeakMutexStack() { +func MutexStack() { for i := 0; i < 1000; i++ { go func() { var mu sync.Mutex @@ -107,7 +110,7 @@ func GoroutineLeakMutexStack() { time.Sleep(10 * time.Millisecond) } -func GoroutineLeakMutexHeap() { +func MutexHeap() { for i := 0; i < 1000; i++ { go func() { mu := &sync.Mutex{} @@ -123,7 +126,7 @@ func GoroutineLeakMutexHeap() { time.Sleep(10 * time.Millisecond) } -func GoroutineLeakRWMutexRLock() { +func RWMutexRLock() { go func() { mu := &sync.RWMutex{} mu.Lock() @@ -134,7 +137,7 @@ func GoroutineLeakRWMutexRLock() { runtime.GC() } -func GoroutineLeakRWMutexLock() { +func RWMutexLock() { go func() { mu := &sync.RWMutex{} mu.Lock() @@ -145,7 +148,7 @@ func GoroutineLeakRWMutexLock() { runtime.GC() } -func GoroutineLeakCond() { +func Cond() { go func() { cond := sync.NewCond(&sync.Mutex{}) cond.L.Lock() @@ -156,7 +159,7 @@ func GoroutineLeakCond() { runtime.GC() } -func GoroutineLeakMixed() { +func Mixed() { go func() { ch := make(chan int) wg := sync.WaitGroup{} @@ -177,7 +180,7 @@ func GoroutineLeakMixed() { var ch = make(chan int) // No leak should be reported by this test -func NoGoroutineLeakGlobal() { +func NoLeakGlobal() { go func() { <-ch }() diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1275.go b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go new file mode 100644 index 00000000000000..f9a876cb9d0e61 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go @@ -0,0 +1,116 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/1275 + * Buggy version: (missing) + * fix commit-id: 0669f3f89e0330e94bb13fa1ce8cc704aab50c9c + * Flaky: 100/100 + * Description: + * Two goroutines are invovled in this deadlock. The first goroutine + * is the main goroutine. It is blocked at case <- donec, and it is + * waiting for the second goroutine to close the channel. + * The second goroutine is created by the main goroutine. It is blocked + * when calling stream.Read(). stream.Read() invokes recvBufferRead.Read(). + * The second goroutine is blocked at case i := r.recv.get(), and it is + * waiting for someone to send a message to this channel. + * It is the client.CloseSream() method called by the main goroutine that + * should send the message, but it is not. The patch is to send out this message. + */ +package main + +import ( + "io" + "runtime" + "time" +) + +func init() { + register("Grpc1275", Grpc1275) +} + +type recvBuffer_grpc1275 struct { + c chan bool +} + +func (b *recvBuffer_grpc1275) get() <-chan bool { + return b.c +} + +type recvBufferReader_grpc1275 struct { + recv *recvBuffer_grpc1275 +} + +func (r *recvBufferReader_grpc1275) Read(p []byte) (int, error) { + select { + case <-r.recv.get(): + } + return 0, nil +} + +type Stream_grpc1275 struct { + trReader io.Reader +} + +func (s *Stream_grpc1275) Read(p []byte) (int, error) { + return io.ReadFull(s.trReader, p) +} + +type http2Client_grpc1275 struct{} + +func (t *http2Client_grpc1275) CloseStream(s *Stream_grpc1275) { + // It is the client.CloseSream() method called by the + // main goroutine that should send the message, but it + // is not. The patch is to send out this message. +} + +func (t *http2Client_grpc1275) NewStream() *Stream_grpc1275 { + return &Stream_grpc1275{ + trReader: &recvBufferReader_grpc1275{ + recv: &recvBuffer_grpc1275{ + c: make(chan bool), + }, + }, + } +} + +func testInflightStreamClosing_grpc1275() { + client := &http2Client_grpc1275{} + stream := client.NewStream() + donec := make(chan bool) + go func() { // G2 + defer close(donec) + // deadlocks: 1 + stream.Read([]byte{1}) + }() + + client.CloseStream(stream) + + timeout := time.NewTimer(300 * time.Nanosecond) + select { + case <-donec: + if !timeout.Stop() { + <-timeout.C + } + case <-timeout.C: + } +} + +/// +/// G1 G2 +/// testInflightStreamClosing() +/// stream.Read() +/// io.ReadFull() +/// <- r.recv.get() +/// CloseStream() +/// <- donec +/// ------------G1 timeout, G2 leak--------------------- +/// + +func Grpc1275() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + testInflightStreamClosing_grpc1275() // G1 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1424.go b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go new file mode 100644 index 00000000000000..262335b1d1a1d7 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go @@ -0,0 +1,112 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/1424 + * Buggy version: 39c8c3866d926d95e11c03508bf83d00f2963f91 + * fix commit-id: 64bd0b04a7bb1982078bae6a2ab34c226125fbc1 + * Flaky: 100/100 + * Description: + * The parent function could return without draining the done channel. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Grpc1424", Grpc1424) +} + +type Balancer_grpc1424 interface { + Notify() <-chan bool +} + +type roundRobin_grpc1424 struct { + mu sync.Mutex + addrCh chan bool +} + +func (rr *roundRobin_grpc1424) Notify() <-chan bool { + return rr.addrCh +} + +type addrConn_grpc1424 struct { + mu sync.Mutex +} + +func (ac *addrConn_grpc1424) tearDown() { + ac.mu.Lock() + defer ac.mu.Unlock() +} + +type dialOption_grpc1424 struct { + balancer Balancer_grpc1424 +} + +type ClientConn_grpc1424 struct { + dopts dialOption_grpc1424 + conns []*addrConn_grpc1424 +} + +func (cc *ClientConn_grpc1424) lbWatcher(doneChan chan bool) { + for addr := range cc.dopts.balancer.Notify() { + if addr { + // nop, make compiler happy + } + var ( + del []*addrConn_grpc1424 + ) + for _, a := range cc.conns { + del = append(del, a) + } + for _, c := range del { + c.tearDown() + } + } +} + +func NewClientConn_grpc1424() *ClientConn_grpc1424 { + cc := &ClientConn_grpc1424{ + dopts: dialOption_grpc1424{ + &roundRobin_grpc1424{addrCh: make(chan bool)}, + }, + } + return cc +} + +func DialContext_grpc1424() { + cc := NewClientConn_grpc1424() + waitC := make(chan error, 1) + go func() { // G2 + defer close(waitC) + // deadlocks: 1 + ch := cc.dopts.balancer.Notify() + if ch != nil { + doneChan := make(chan bool) + go cc.lbWatcher(doneChan) // G3 + <-doneChan + } + }() + /// close addrCh + close(cc.dopts.balancer.(*roundRobin_grpc1424).addrCh) +} + +/// +/// G1 G2 G3 +/// DialContext() +/// cc.dopts.balancer.Notify() +/// cc.lbWatcher() +/// <-doneChan +/// close() +/// -----------------------G2 leak------------------------------------ +/// + +func Grpc1424() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go DialContext_grpc1424() // G1 +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1460.go b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go new file mode 100644 index 00000000000000..44e761c7f37e26 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go @@ -0,0 +1,86 @@ +/* + * Project: grpc + * Issue or PR : https://github.com/grpc/grpc-go/pull/1460 + * Buggy version: 7db1564ba1229bc42919bb1f6d9c4186f3aa8678 + * fix commit-id: e605a1ecf24b634f94f4eefdab10a9ada98b70dd + * Flaky: 100/100 + * Description: + * When gRPC keepalives are enabled (which isn't the case + * by default at this time) and PermitWithoutStream is false + * (the default), the client can deadlock when transitioning + * between having no active stream and having one active + * stream.The keepalive() goroutine is stuck at “<-t.awakenKeepalive”, + * while the main goroutine is stuck in NewStream() on t.mu.Lock(). + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Grpc1460", Grpc1460) +} + +type Stream_grpc1460 struct{} + +type http2Client_grpc1460 struct { + mu sync.Mutex + awakenKeepalive chan struct{} + activeStream []*Stream_grpc1460 +} + +func (t *http2Client_grpc1460) keepalive() { + t.mu.Lock() + if len(t.activeStream) < 1 { + <-t.awakenKeepalive + runtime.Gosched() + t.mu.Unlock() + } else { + t.mu.Unlock() + } +} + +func (t *http2Client_grpc1460) NewStream() { + t.mu.Lock() + runtime.Gosched() + t.activeStream = append(t.activeStream, &Stream_grpc1460{}) + if len(t.activeStream) == 1 { + select { + case t.awakenKeepalive <- struct{}{}: + default: + } + } + t.mu.Unlock() +} + +/// +/// G1 G2 +/// client.keepalive() +/// client.NewStream() +/// t.mu.Lock() +/// <-t.awakenKeepalive +/// t.mu.Lock() +/// ---------------G1, G2 deadlock-------------- +/// + +func Grpc1460() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + client := &http2Client_grpc1460{ + awakenKeepalive: make(chan struct{}), + } + // deadlocks: x > 0 + go client.keepalive() //G1 + // deadlocks: x > 0 + go client.NewStream() //G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc3017.go b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go new file mode 100644 index 00000000000000..47cea8068cd781 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go @@ -0,0 +1,146 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +// This test case is a reproduction of grpc/3017. +// +// It is a goroutine leak that also simultaneously engages many GC assists. +// Testing runtime behaviour when pivoting between regular and goroutine leak detection modes. + +func init() { + register("Grpc3017", Grpc3017) +} + +type Address_grpc3017 int +type SubConn_grpc3017 int + +type subConnCacheEntry_grpc3017 struct { + sc SubConn_grpc3017 + cancel func() + abortDeleting bool +} + +type lbCacheClientConn_grpc3017 struct { + mu sync.Mutex // L1 + timeout time.Duration + subConnCache map[Address_grpc3017]*subConnCacheEntry_grpc3017 + subConnToAddr map[SubConn_grpc3017]Address_grpc3017 +} + +func (ccc *lbCacheClientConn_grpc3017) NewSubConn(addrs []Address_grpc3017) SubConn_grpc3017 { + if len(addrs) != 1 { + return SubConn_grpc3017(1) + } + addrWithoutMD := addrs[0] + ccc.mu.Lock() // L1 + defer ccc.mu.Unlock() + if entry, ok := ccc.subConnCache[addrWithoutMD]; ok { + entry.cancel() + delete(ccc.subConnCache, addrWithoutMD) + return entry.sc + } + scNew := SubConn_grpc3017(1) + ccc.subConnToAddr[scNew] = addrWithoutMD + return scNew +} + +func (ccc *lbCacheClientConn_grpc3017) RemoveSubConn(sc SubConn_grpc3017) { + ccc.mu.Lock() // L1 + defer ccc.mu.Unlock() + addr, ok := ccc.subConnToAddr[sc] + if !ok { + return + } + + if entry, ok := ccc.subConnCache[addr]; ok { + if entry.sc != sc { + delete(ccc.subConnToAddr, sc) + } + return + } + + entry := &subConnCacheEntry_grpc3017{ + sc: sc, + } + ccc.subConnCache[addr] = entry + + timer := time.AfterFunc(ccc.timeout, func() { // G3 + runtime.Gosched() + ccc.mu.Lock() // L1 + // deadlocks: x > 0 + if entry.abortDeleting { + return // Missing unlock + } + delete(ccc.subConnToAddr, sc) + delete(ccc.subConnCache, addr) + ccc.mu.Unlock() + }) + + entry.cancel = func() { + if !timer.Stop() { + entry.abortDeleting = true + } + } +} + +func Grpc3017() { + defer func() { + time.Sleep(100 * time.Millisecond) + }() + + for i := 0; i < 100; i++ { + go func() { //G1 + done := make(chan struct{}) + + // deadlocks: x > 0 + ccc := &lbCacheClientConn_grpc3017{ + timeout: time.Nanosecond, + subConnCache: make(map[Address_grpc3017]*subConnCacheEntry_grpc3017), + subConnToAddr: make(map[SubConn_grpc3017]Address_grpc3017), + } + + sc := ccc.NewSubConn([]Address_grpc3017{Address_grpc3017(1)}) + go func() { // G2 + // deadlocks: x > 0 + for i := 0; i < 10000; i++ { + ccc.RemoveSubConn(sc) + sc = ccc.NewSubConn([]Address_grpc3017{Address_grpc3017(1)}) + } + close(done) + }() + <-done + }() + } +} + +// Example of a deadlocking trace +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// NewSubConn([1]) +// ccc.mu.Lock() [L1] +// sc = 1 +// ccc.subConnToAddr[1] = 1 +// go func() [G2] +// <-done . +// . ccc.RemoveSubConn(1) +// . ccc.mu.Lock() +// . addr = 1 +// . entry = &subConnCacheEntry_grpc3017{sc: 1} +// . cc.subConnCache[1] = entry +// . timer = time.AfterFunc() [G3] +// . entry.cancel = func() . +// . sc = ccc.NewSubConn([1]) . +// . ccc.mu.Lock() [L1] . +// . entry.cancel() . +// . !timer.Stop() [true] . +// . entry.abortDeleting = true . +// . . ccc.mu.Lock() +// . . <<>> +// . ccc.RemoveSubConn(1) +// . ccc.mu.Lock() [L1] +// -------------------------------------------G1, G2 leak----------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc660.go b/src/runtime/testdata/testgoroutineleakgc/grpc660.go new file mode 100644 index 00000000000000..08a1ee502ca47e --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc660.go @@ -0,0 +1,69 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/660 + * Buggy version: db85417dd0de6cc6f583672c6175a7237e5b5dd2 + * fix commit-id: ceacfbcbc1514e4e677932fd55938ac455d182fb + * Flaky: 100/100 + * Description: + * The parent function could return without draining the done channel. + */ +package main + +import ( + "math/rand" + "runtime" + "time" +) + +func init() { + register("Grpc660", Grpc660) +} + +type benchmarkClient_grpc660 struct { + stop chan bool +} + +func (bc *benchmarkClient_grpc660) doCloseLoopUnary() { + for { + done := make(chan bool) + go func() { // G2 + // deadlocks: 1 + if rand.Intn(10) > 7 { + done <- false + return + } + done <- true + }() + select { + case <-bc.stop: + return + case <-done: + } + } +} + +/// +/// G1 G2 helper goroutine +/// doCloseLoopUnary() +/// bc.stop <- true +/// <-bc.stop +/// return +/// done <- +/// ----------------------G2 leak-------------------------- +/// + +func Grpc660() { + defer func() { + time.Sleep(1 * time.Second) + runtime.GC() + }() + go func() { + bc := &benchmarkClient_grpc660{ + stop: make(chan bool), + } + go bc.doCloseLoopUnary() // G1 + go func() { // helper goroutine + bc.stop <- true + }() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc795.go b/src/runtime/testdata/testgoroutineleakgc/grpc795.go new file mode 100644 index 00000000000000..c97f9c96c67d71 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc795.go @@ -0,0 +1,70 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Grpc795", Grpc795) +} + +type Server_grpc795 struct { + mu sync.Mutex + drain bool +} + +func (s *Server_grpc795) GracefulStop() { + s.mu.Lock() + if s.drain { + s.mu.Lock() + return + } + s.drain = true + s.mu.Unlock() +} +func (s *Server_grpc795) Serve() { + s.mu.Lock() + s.mu.Unlock() +} + +func NewServer_grpc795() *Server_grpc795 { + return &Server_grpc795{} +} + +type test_grpc795 struct { + srv *Server_grpc795 +} + +func (te *test_grpc795) startServer() { + s := NewServer_grpc795() + te.srv = s + // deadlocks: x > 0 + go s.Serve() +} + +func newTest_grpc795() *test_grpc795 { + return &test_grpc795{} +} + +func testServerGracefulStopIdempotent_grpc795() { + // deadlocks: x > 0 + te := newTest_grpc795() + + te.startServer() + + for i := 0; i < 3; i++ { + te.srv.GracefulStop() + } +} + +func Grpc795() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go testServerGracefulStopIdempotent_grpc795() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc862.go b/src/runtime/testdata/testgoroutineleakgc/grpc862.go new file mode 100644 index 00000000000000..15f787cf6e53d9 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc862.go @@ -0,0 +1,109 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/862 + * Buggy version: d8f4ebe77f6b7b6403d7f98626de8a534f9b93a7 + * fix commit-id: dd5645bebff44f6b88780bb949022a09eadd7dae + * Flaky: 100/100 + * Description: + * When return value conn is nil, cc (ClientConn) is not closed. + * The goroutine executing resetAddrConn is leaked. The patch is to + * close ClientConn in the defer func(). + */ +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Grpc862", Grpc862) +} + +type ClientConn_grpc862 struct { + ctx context.Context + cancel context.CancelFunc + conns []*addrConn_grpc862 +} + +func (cc *ClientConn_grpc862) Close() { + cc.cancel() + conns := cc.conns + cc.conns = nil + for _, ac := range conns { + ac.tearDown() + } +} + +func (cc *ClientConn_grpc862) resetAddrConn() { + ac := &addrConn_grpc862{ + cc: cc, + } + cc.conns = append(cc.conns, ac) + ac.ctx, ac.cancel = context.WithCancel(cc.ctx) + ac.resetTransport() +} + +type addrConn_grpc862 struct { + cc *ClientConn_grpc862 + ctx context.Context + cancel context.CancelFunc +} + +func (ac *addrConn_grpc862) resetTransport() { + for retries := 1; ; retries++ { + _ = 2 * time.Nanosecond * time.Duration(retries) + timeout := 10 * time.Nanosecond + _, cancel := context.WithTimeout(ac.ctx, timeout) + _ = time.Now() + cancel() + <-ac.ctx.Done() + return + } +} + +func (ac *addrConn_grpc862) tearDown() { + ac.cancel() +} + +func DialContext_grpc862(ctx context.Context) (conn *ClientConn_grpc862) { + cc := &ClientConn_grpc862{} + cc.ctx, cc.cancel = context.WithCancel(context.Background()) + defer func() { + select { + case <-ctx.Done(): + if conn != nil { + conn.Close() + } + conn = nil + default: + } + }() + go func() { // G2 + // deadlocks: 1 + cc.resetAddrConn() + }() + return conn +} + +/// +/// G1 G2 +/// DialContext() +/// cc.resetAddrConn() +/// resetTransport() +/// <-ac.ctx.Done() +/// --------------G2 leak------------------ +/// + +func Grpc862() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + ctx, cancel := context.WithCancel(context.Background()) + go DialContext_grpc862(ctx) // G1 + go cancel() // helper goroutine + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo3251.go b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go new file mode 100644 index 00000000000000..1c1f598c4578cc --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go @@ -0,0 +1,99 @@ +package main + +import ( + "fmt" + "runtime" + "sync" + "time" +) + +func init() { + register("Hugo3251", Hugo3251) +} + +type remoteLock_hugo3251 struct { + sync.RWMutex // L1 + m map[string]*sync.Mutex // L2 +} + +func (l *remoteLock_hugo3251) URLLock(url string) { + l.Lock() // L1 + if _, ok := l.m[url]; !ok { + l.m[url] = &sync.Mutex{} + } + l.m[url].Lock() // L2 + runtime.Gosched() + l.Unlock() // L1 + // runtime.Gosched() +} + +func (l *remoteLock_hugo3251) URLUnlock(url string) { + l.RLock() // L1 + defer l.RUnlock() // L1 + if um, ok := l.m[url]; ok { + um.Unlock() // L2 + } +} + +func resGetRemote_hugo3251(remoteURLLock *remoteLock_hugo3251, url string) error { + remoteURLLock.URLLock(url) + defer func() { remoteURLLock.URLUnlock(url) }() + + return nil +} + +func Hugo3251() { + defer func() { + time.Sleep(time.Second) + runtime.GC() + }() + + for i := 0; i < 10; i++ { + go func() { // G1 + // deadlocks: x > 0 + url := "http://Foo.Bar/foo_Bar-Foo" + remoteURLLock := &remoteLock_hugo3251{m: make(map[string]*sync.Mutex)} + for range []bool{false, true} { + var wg sync.WaitGroup + for i := 0; i < 100; i++ { + wg.Add(1) + go func(gor int) { // G2 + // deadlocks: x > 0 + defer wg.Done() + for j := 0; j < 200; j++ { + err := resGetRemote_hugo3251(remoteURLLock, url) + if err != nil { + fmt.Errorf("Error getting resource content: %s", err) + } + time.Sleep(300 * time.Nanosecond) + } + }(i) + } + wg.Wait() + } + }() + } +} + +// Example of deadlocking trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// wg.Add(1) [W1: 1] +// go func() [G2] +// go func() [G3] +// . resGetRemote() +// . remoteURLLock.URLLock(url) +// . l.Lock() [L1] +// . l.m[url] = &sync.Mutex{} [L2] +// . l.m[url].Lock() [L2] +// . l.Unlock() [L1] +// . . resGetRemote() +// . . remoteURLLock.URLLock(url) +// . . l.Lock() [L1] +// . . l.m[url].Lock() [L2] +// . remoteURLLock.URLUnlock(url) +// . l.RLock() [L1] +// ... +// wg.Wait() [W1] +// ----------------------------------------G1,G2,G3 leak------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo5379.go b/src/runtime/testdata/testgoroutineleakgc/hugo5379.go new file mode 100644 index 00000000000000..cee52801319d04 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/hugo5379.go @@ -0,0 +1,315 @@ +package main + +import ( + "context" + "runtime" + + "log" + "sync" + "time" +) + +func init() { + register("Hugo5379", Hugo5379) +} + +type shortcodeHandler_hugo5379 struct { + p *PageWithoutContent_hugo5379 + contentShortcodes map[int]func() error + contentShortcodesDelta map[int]func() error + init sync.Once // O1 +} + +func (s *shortcodeHandler_hugo5379) executeShortcodesForDelta(p *PageWithoutContent_hugo5379) error { + for k, _ := range s.contentShortcodesDelta { + render := s.contentShortcodesDelta[k] + if err := render(); err != nil { + continue + } + } + return nil +} + +func (s *shortcodeHandler_hugo5379) updateDelta() { + s.init.Do(func() { + s.contentShortcodes = createShortcodeRenderers_hugo5379(s.p.withoutContent()) + }) + + delta := make(map[int]func() error) + + for k, v := range s.contentShortcodes { + if _, ok := delta[k]; !ok { + delta[k] = v + } + } + + s.contentShortcodesDelta = delta +} + +type Page_hugo5379 struct { + *pageInit_hugo5379 + *pageContentInit_hugo5379 + pageWithoutContent *PageWithoutContent_hugo5379 + contentInit sync.Once // O2 + contentInitMu sync.Mutex // L1 + shortcodeState *shortcodeHandler_hugo5379 +} + +func (p *Page_hugo5379) WordCount() { + p.initContentPlainAndMeta() +} + +func (p *Page_hugo5379) initContentPlainAndMeta() { + p.initContent() + p.initPlain(true) +} + +func (p *Page_hugo5379) initPlain(lock bool) { + p.plainInit.Do(func() { + if lock { + /// Double locking here. + p.contentInitMu.Lock() + defer p.contentInitMu.Unlock() + } + }) +} + +func (p *Page_hugo5379) withoutContent() *PageWithoutContent_hugo5379 { + p.pageInit_hugo5379.withoutContentInit.Do(func() { + p.pageWithoutContent = &PageWithoutContent_hugo5379{Page_hugo5379: p} + }) + return p.pageWithoutContent +} + +func (p *Page_hugo5379) prepareForRender() error { + var err error + if err = handleShortcodes_hugo5379(p.withoutContent()); err != nil { + return err + } + return nil +} + +func (p *Page_hugo5379) setContentInit() { + p.shortcodeState.updateDelta() +} + +func (p *Page_hugo5379) initContent() { + p.contentInit.Do(func() { + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + c := make(chan error, 1) + + go func() { // G2 + // deadlocks: x > 0 + var err error + p.contentInitMu.Lock() // first lock here + defer p.contentInitMu.Unlock() + + err = p.prepareForRender() + if err != nil { + c <- err + return + } + c <- err + }() + + select { + case <-ctx.Done(): + case <-c: + } + }) +} + +type PageWithoutContent_hugo5379 struct { + *Page_hugo5379 +} + +type pageInit_hugo5379 struct { + withoutContentInit sync.Once +} + +type pageContentInit_hugo5379 struct { + contentInit sync.Once // O3 + plainInit sync.Once // O4 +} + +type HugoSites_hugo5379 struct { + Sites []*Site_hugo5379 +} + +func (h *HugoSites_hugo5379) render() { + for _, s := range h.Sites { + for _, s2 := range h.Sites { + s2.preparePagesForRender() + } + s.renderPages() + } +} + +func (h *HugoSites_hugo5379) Build() { + h.render() +} + +type Pages_hugo5379 []*Page_hugo5379 + +type PageCollections_hugo5379 struct { + Pages Pages_hugo5379 +} + +type Site_hugo5379 struct { + *PageCollections_hugo5379 +} + +func (s *Site_hugo5379) preparePagesForRender() { + for _, p := range s.Pages { + p.setContentInit() + } +} + +func (s *Site_hugo5379) renderForLayouts() { + /// Omit reflections + for _, p := range s.Pages { + p.WordCount() + } +} + +func (s *Site_hugo5379) renderAndWritePage() { + s.renderForLayouts() +} + +func (s *Site_hugo5379) renderPages() { + numWorkers := 2 + wg := &sync.WaitGroup{} + + for i := 0; i < numWorkers; i++ { + wg.Add(1) + // deadlocks: x > 0 + go pageRenderer_hugo5379(s, wg) // G3 + } + + wg.Wait() +} + +type sitesBuilder_hugo5379 struct { + H *HugoSites_hugo5379 +} + +func (s *sitesBuilder_hugo5379) Build() *sitesBuilder_hugo5379 { + return s.build() +} + +func (s *sitesBuilder_hugo5379) build() *sitesBuilder_hugo5379 { + s.H.Build() + return s +} + +func (s *sitesBuilder_hugo5379) CreateSitesE() error { + sites, err := NewHugoSites_hugo5379() + if err != nil { + return err + } + s.H = sites + return nil +} + +func (s *sitesBuilder_hugo5379) CreateSites() *sitesBuilder_hugo5379 { + if err := s.CreateSitesE(); err != nil { + log.Fatalf("Failed to create sites: %s", err) + } + return s +} + +func newHugoSites_hugo5379(sites ...*Site_hugo5379) (*HugoSites_hugo5379, error) { + h := &HugoSites_hugo5379{Sites: sites} + return h, nil +} + +func newSite_hugo5379() *Site_hugo5379 { + c := &PageCollections_hugo5379{} + s := &Site_hugo5379{ + PageCollections_hugo5379: c, + } + return s +} + +func createSitesFromConfig_hugo5379() []*Site_hugo5379 { + var ( + sites []*Site_hugo5379 + ) + + var s *Site_hugo5379 = newSite_hugo5379() + sites = append(sites, s) + return sites +} + +func NewHugoSites_hugo5379() (*HugoSites_hugo5379, error) { + sites := createSitesFromConfig_hugo5379() + return newHugoSites_hugo5379(sites...) +} + +func prepareShortcodeForPage_hugo5379(p *PageWithoutContent_hugo5379) map[int]func() error { + m := make(map[int]func() error) + m[0] = func() error { + return renderShortcode_hugo5379(p) + } + return m +} + +func renderShortcode_hugo5379(p *PageWithoutContent_hugo5379) error { + return renderShortcodeWithPage_hugo5379(p) +} + +func renderShortcodeWithPage_hugo5379(p *PageWithoutContent_hugo5379) error { + /// Omit reflections + p.WordCount() + return nil +} + +func createShortcodeRenderers_hugo5379(p *PageWithoutContent_hugo5379) map[int]func() error { + return prepareShortcodeForPage_hugo5379(p) +} + +func newShortcodeHandler_hugo5379(p *Page_hugo5379) *shortcodeHandler_hugo5379 { + return &shortcodeHandler_hugo5379{ + p: p.withoutContent(), + contentShortcodes: make(map[int]func() error), + contentShortcodesDelta: make(map[int]func() error), + } +} + +func handleShortcodes_hugo5379(p *PageWithoutContent_hugo5379) error { + return p.shortcodeState.executeShortcodesForDelta(p) +} + +func pageRenderer_hugo5379(s *Site_hugo5379, wg *sync.WaitGroup) { + defer wg.Done() + s.renderAndWritePage() +} + +func Hugo5379() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { // G1 + // deadlocks: x > 0 + b := &sitesBuilder_hugo5379{} + s := b.CreateSites() + for _, site := range s.H.Sites { + p := &Page_hugo5379{ + pageInit_hugo5379: &pageInit_hugo5379{}, + pageContentInit_hugo5379: &pageContentInit_hugo5379{}, + pageWithoutContent: &PageWithoutContent_hugo5379{}, + contentInit: sync.Once{}, + contentInitMu: sync.Mutex{}, + shortcodeState: nil, + } + p.shortcodeState = newShortcodeHandler_hugo5379(p) + site.Pages = append(site.Pages, p) + } + s.Build() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio16224.go b/src/runtime/testdata/testgoroutineleakgc/istio16224.go new file mode 100644 index 00000000000000..f3d4cfd1b1f6a4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio16224.go @@ -0,0 +1,125 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Istio16224", Istio16224) +} + +type ConfigStoreCache_istio16224 interface { + RegisterEventHandler(handler func()) + Run() +} + +type Event_istio16224 int + +type Handler_istio16224 func(Event_istio16224) + +type configstoreMonitor_istio16224 struct { + handlers []Handler_istio16224 + eventCh chan Event_istio16224 +} + +func (m *configstoreMonitor_istio16224) Run(stop <-chan struct{}) { + for { + select { + case <-stop: + // This bug is not descibed, but is a true positive (in our eyes) + // In a real run main exits when the goro is blocked here. + if _, ok := <-m.eventCh; ok { + close(m.eventCh) + } + return + case ce, ok := <-m.eventCh: + if ok { + m.processConfigEvent(ce) + } + } + } +} + +func (m *configstoreMonitor_istio16224) processConfigEvent(ce Event_istio16224) { + m.applyHandlers(ce) +} + +func (m *configstoreMonitor_istio16224) AppendEventHandler(h Handler_istio16224) { + m.handlers = append(m.handlers, h) +} + +func (m *configstoreMonitor_istio16224) applyHandlers(e Event_istio16224) { + for _, f := range m.handlers { + f(e) + } +} +func (m *configstoreMonitor_istio16224) ScheduleProcessEvent(configEvent Event_istio16224) { + m.eventCh <- configEvent +} + +type Monitor_istio16224 interface { + Run(<-chan struct{}) + AppendEventHandler(Handler_istio16224) + ScheduleProcessEvent(Event_istio16224) +} + +type controller_istio16224 struct { + monitor Monitor_istio16224 +} + +func (c *controller_istio16224) RegisterEventHandler(f func(Event_istio16224)) { + c.monitor.AppendEventHandler(f) +} + +func (c *controller_istio16224) Run(stop <-chan struct{}) { + c.monitor.Run(stop) +} + +func (c *controller_istio16224) Create() { + c.monitor.ScheduleProcessEvent(Event_istio16224(0)) +} + +func NewMonitor_istio16224() Monitor_istio16224 { + return NewBufferedMonitor_istio16224() +} + +func NewBufferedMonitor_istio16224() Monitor_istio16224 { + return &configstoreMonitor_istio16224{ + eventCh: make(chan Event_istio16224), + } +} + +func Istio16224() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + controller := &controller_istio16224{monitor: NewMonitor_istio16224()} + done := make(chan bool) + lock := sync.Mutex{} + controller.RegisterEventHandler(func(event Event_istio16224) { + lock.Lock() + defer lock.Unlock() + done <- true + }) + + stop := make(chan struct{}) + // deadlocks: x > 0 + go controller.Run(stop) + + controller.Create() + + lock.Lock() // blocks + lock.Unlock() + <-done + + close(stop) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio17860.go b/src/runtime/testdata/testgoroutineleakgc/istio17860.go new file mode 100644 index 00000000000000..a8aba84cd3f15f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio17860.go @@ -0,0 +1,139 @@ +package main + +import ( + "context" + "runtime" + + "sync" + "time" +) + +func init() { + register("Istio17860", Istio17860) +} + +type Proxy_istio17860 interface { + IsLive() bool +} + +type TestProxy_istio17860 struct { + live func() bool +} + +func (tp TestProxy_istio17860) IsLive() bool { + if tp.live == nil { + return true + } + return tp.live() +} + +type Agent_istio17860 interface { + Run(ctx context.Context) + Restart() +} + +type exitStatus_istio17860 int + +type agent_istio17860 struct { + proxy Proxy_istio17860 + mu *sync.Mutex + statusCh chan exitStatus_istio17860 + currentEpoch int + activeEpochs map[int]struct{} +} + +func (a *agent_istio17860) Run(ctx context.Context) { + for { + select { + case status := <-a.statusCh: + a.mu.Lock() + delete(a.activeEpochs, int(status)) + active := len(a.activeEpochs) + a.mu.Unlock() + if active == 0 { + return + } + case <-ctx.Done(): + return + } + } +} + +func (a *agent_istio17860) Restart() { + a.mu.Lock() + defer a.mu.Unlock() + + a.waitUntilLive() + a.currentEpoch++ + a.activeEpochs[a.currentEpoch] = struct{}{} + + // deadlocks: x > 0 + go a.runWait(a.currentEpoch) +} + +func (a *agent_istio17860) runWait(epoch int) { + a.statusCh <- exitStatus_istio17860(epoch) +} + +func (a *agent_istio17860) waitUntilLive() { + if len(a.activeEpochs) == 0 { + return + } + + interval := time.NewTicker(30 * time.Nanosecond) + timer := time.NewTimer(100 * time.Nanosecond) + defer func() { + interval.Stop() + timer.Stop() + }() + + if a.proxy.IsLive() { + return + } + + for { + select { + case <-timer.C: + return + case <-interval.C: + if a.proxy.IsLive() { + return + } + } + } +} + +func NewAgent_istio17860(proxy Proxy_istio17860) Agent_istio17860 { + return &agent_istio17860{ + proxy: proxy, + mu: &sync.Mutex{}, + statusCh: make(chan exitStatus_istio17860), + activeEpochs: make(map[int]struct{}), + } +} + +func Istio17860() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + neverLive := func() bool { + return false + } + + a := NewAgent_istio17860(TestProxy_istio17860{live: neverLive}) + go func() { a.Run(ctx) }() + + a.Restart() + go a.Restart() + + time.Sleep(200 * time.Nanosecond) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio18454.go b/src/runtime/testdata/testgoroutineleakgc/istio18454.go new file mode 100644 index 00000000000000..a795e7e8f0ff9a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio18454.go @@ -0,0 +1,149 @@ +package main + +import ( + "context" + "runtime" + + "sync" + "time" +) + +func init() { + register("Istio18454", Istio18454) +} + +const eventChCap_istio18454 = 1024 + +type Worker_istio18454 struct { + ctx context.Context + ctxCancel context.CancelFunc +} + +func (w *Worker_istio18454) Start(setupFn func(), runFn func(c context.Context)) { + if setupFn != nil { + setupFn() + } + go func() { + // deadlocks: x > 0 + runFn(w.ctx) + }() +} + +func (w *Worker_istio18454) Stop() { + w.ctxCancel() +} + +type Strategy_istio18454 struct { + timer *time.Timer + timerFrequency time.Duration + stateLock sync.Mutex + resetChan chan struct{} + worker *Worker_istio18454 + startTimerFn func() +} + +func (s *Strategy_istio18454) OnChange() { + s.stateLock.Lock() + if s.timer != nil { + s.stateLock.Unlock() + s.resetChan <- struct{}{} + return + } + s.startTimerFn() + s.stateLock.Unlock() +} + +func (s *Strategy_istio18454) startTimer() { + s.timer = time.NewTimer(s.timerFrequency) + eventLoop := func(ctx context.Context) { + for { + select { + case <-s.timer.C: + case <-s.resetChan: + if !s.timer.Stop() { + <-s.timer.C + } + s.timer.Reset(s.timerFrequency) + case <-ctx.Done(): + s.timer.Stop() + return + } + } + } + s.worker.Start(nil, eventLoop) +} + +func (s *Strategy_istio18454) Close() { + s.worker.Stop() +} + +type Event_istio18454 int + +type Processor_istio18454 struct { + stateStrategy *Strategy_istio18454 + worker *Worker_istio18454 + eventCh chan Event_istio18454 +} + +func (p *Processor_istio18454) processEvent() { + p.stateStrategy.OnChange() +} + +func (p *Processor_istio18454) Start() { + setupFn := func() { + for i := 0; i < eventChCap_istio18454; i++ { + p.eventCh <- Event_istio18454(0) + } + } + runFn := func(ctx context.Context) { + defer func() { + p.stateStrategy.Close() + }() + for { + select { + case <-ctx.Done(): + return + case <-p.eventCh: + p.processEvent() + } + } + } + p.worker.Start(setupFn, runFn) +} + +func (p *Processor_istio18454) Stop() { + p.worker.Stop() +} + +func NewWorker_istio18454() *Worker_istio18454 { + worker := &Worker_istio18454{} + worker.ctx, worker.ctxCancel = context.WithCancel(context.Background()) + return worker +} + +func Istio18454() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + stateStrategy := &Strategy_istio18454{ + timerFrequency: time.Nanosecond, + resetChan: make(chan struct{}, 1), + worker: NewWorker_istio18454(), + } + stateStrategy.startTimerFn = stateStrategy.startTimer + + p := &Processor_istio18454{ + stateStrategy: stateStrategy, + worker: NewWorker_istio18454(), + eventCh: make(chan Event_istio18454, eventChCap_istio18454), + } + + p.Start() + defer p.Stop() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go new file mode 100644 index 00000000000000..eed829866640fd --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go @@ -0,0 +1,97 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/10182 + * Buggy version: 4b990d128a17eea9058d28a3b3688ab8abafbd94 + * fix commit-id: 64ad3e17ad15cd0f9a4fd86706eec1c572033254 + * Flaky: 15/100 + * Description: + * This is a lock-channel bug. goroutine 1 is blocked on a lock + * held by goroutine 3, while goroutine 3 is blocked on sending + * message to ch, which is read by goroutine 1. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes10182", Kubernetes10182) +} + +type statusManager_kubernetes10182 struct { + podStatusesLock sync.RWMutex + podStatusChannel chan bool +} + +func (s *statusManager_kubernetes10182) Start() { + go func() { + // deadlocks: x > 0 + for i := 0; i < 2; i++ { + s.syncBatch() + } + }() +} + +func (s *statusManager_kubernetes10182) syncBatch() { + runtime.Gosched() + <-s.podStatusChannel + s.DeletePodStatus() +} + +func (s *statusManager_kubernetes10182) DeletePodStatus() { + s.podStatusesLock.Lock() + defer s.podStatusesLock.Unlock() +} + +func (s *statusManager_kubernetes10182) SetPodStatus() { + s.podStatusesLock.Lock() + defer s.podStatusesLock.Unlock() + s.podStatusChannel <- true +} + +func NewStatusManager_kubernetes10182() *statusManager_kubernetes10182 { + return &statusManager_kubernetes10182{ + podStatusChannel: make(chan bool), + } +} + +// Example of deadlock trace: +// +// G1 G2 G3 +// -------------------------------------------------------------------------------- +// s.Start() +// s.syncBatch() +// s.SetPodStatus() +// <-s.podStatusChannel +// s.podStatusesLock.Lock() +// s.podStatusChannel <- true +// s.podStatusesLock.Unlock() +// return +// s.DeletePodStatus() +// s.podStatusesLock.Lock() +// s.podStatusChannel <- true +// s.podStatusesLock.Lock() +// -----------------------------------G1,G3 leak------------------------------------- + +func Kubernetes10182() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: 0 + s := NewStatusManager_kubernetes10182() + // deadlocks: 0 + go s.Start() + // deadlocks: x > 0 + go s.SetPodStatus() + // deadlocks: x > 0 + go s.SetPodStatus() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go new file mode 100644 index 00000000000000..056495a6597b35 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go @@ -0,0 +1,114 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes11298", Kubernetes11298) +} + +type Signal_kubernetes11298 <-chan struct{} + +func After_kubernetes11298(f func()) Signal_kubernetes11298 { + ch := make(chan struct{}) + go func() { + // deadlocks: x > 0 + defer close(ch) + if f != nil { + f() + } + }() + return Signal_kubernetes11298(ch) +} + +func Until_kubernetes11298(f func(), period time.Duration, stopCh <-chan struct{}) { + if f == nil { + return + } + for { + select { + case <-stopCh: + return + default: + } + f() + select { + case <-stopCh: + case <-time.After(period): + } + } + +} + +type notifier_kubernetes11298 struct { + lock sync.Mutex + cond *sync.Cond +} + +// abort will be closed no matter what +func (n *notifier_kubernetes11298) serviceLoop(abort <-chan struct{}) { + n.lock.Lock() + defer n.lock.Unlock() + for { + select { + case <-abort: + return + default: + ch := After_kubernetes11298(func() { + n.cond.Wait() + }) + select { + case <-abort: + n.cond.Signal() + <-ch + return + case <-ch: + } + } + } +} + +// abort will be closed no matter what +func Notify_kubernetes11298(abort <-chan struct{}) { + n := ¬ifier_kubernetes11298{} + n.cond = sync.NewCond(&n.lock) + finished := After_kubernetes11298(func() { + Until_kubernetes11298(func() { + for { + select { + case <-abort: + return + default: + func() { + n.lock.Lock() + defer n.lock.Unlock() + n.cond.Signal() + }() + } + } + }, 0, abort) + }) + Until_kubernetes11298(func() { n.serviceLoop(finished) }, 0, abort) +} +func Kubernetes11298() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + done := make(chan struct{}) + notifyDone := After_kubernetes11298(func() { Notify_kubernetes11298(done) }) + go func() { + defer close(done) + time.Sleep(300 * time.Nanosecond) + }() + <-notifyDone + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go new file mode 100644 index 00000000000000..d0f445a0583fe2 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go @@ -0,0 +1,183 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/13135 + * Buggy version: 6ced66249d4fd2a81e86b4a71d8df0139fe5ceae + * fix commit-id: a12b7edc42c5c06a2e7d9f381975658692951d5a + * Flaky: 93/100 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes13135", Kubernetes13135) +} + +var ( + StopChannel_kubernetes13135 chan struct{} +) + +func Util_kubernetes13135(f func(), period time.Duration, stopCh <-chan struct{}) { + for { + select { + case <-stopCh: + return + default: + } + func() { + f() + }() + time.Sleep(period) + } +} + +type Store_kubernetes13135 interface { + Add(obj interface{}) + Replace(obj interface{}) +} + +type Reflector_kubernetes13135 struct { + store Store_kubernetes13135 +} + +func (r *Reflector_kubernetes13135) ListAndWatch(stopCh <-chan struct{}) error { + r.syncWith() + return nil +} + +func NewReflector_kubernetes13135(store Store_kubernetes13135) *Reflector_kubernetes13135 { + return &Reflector_kubernetes13135{ + store: store, + } +} + +func (r *Reflector_kubernetes13135) syncWith() { + r.store.Replace(nil) +} + +type Cacher_kubernetes13135 struct { + sync.Mutex + initialized sync.WaitGroup + initOnce sync.Once + watchCache *WatchCache_kubernetes13135 + reflector *Reflector_kubernetes13135 +} + +func (c *Cacher_kubernetes13135) processEvent() { + c.Lock() + defer c.Unlock() +} + +func (c *Cacher_kubernetes13135) startCaching(stopChannel <-chan struct{}) { + c.Lock() + for { + err := c.reflector.ListAndWatch(stopChannel) + if err == nil { + break + } + } +} + +type WatchCache_kubernetes13135 struct { + sync.RWMutex + onReplace func() + onEvent func() +} + +func (w *WatchCache_kubernetes13135) SetOnEvent(onEvent func()) { + w.Lock() + defer w.Unlock() + w.onEvent = onEvent +} + +func (w *WatchCache_kubernetes13135) SetOnReplace(onReplace func()) { + w.Lock() + defer w.Unlock() + w.onReplace = onReplace +} + +func (w *WatchCache_kubernetes13135) processEvent() { + w.Lock() + defer w.Unlock() + if w.onEvent != nil { + w.onEvent() + } +} + +func (w *WatchCache_kubernetes13135) Add(obj interface{}) { + w.processEvent() +} + +func (w *WatchCache_kubernetes13135) Replace(obj interface{}) { + w.Lock() + defer w.Unlock() + if w.onReplace != nil { + w.onReplace() + } +} + +func NewCacher_kubernetes13135() *Cacher_kubernetes13135 { + watchCache := &WatchCache_kubernetes13135{} + cacher := &Cacher_kubernetes13135{ + initialized: sync.WaitGroup{}, + watchCache: watchCache, + reflector: NewReflector_kubernetes13135(watchCache), + } + cacher.initialized.Add(1) + watchCache.SetOnReplace(func() { + cacher.initOnce.Do(func() { cacher.initialized.Done() }) + cacher.Unlock() + }) + watchCache.SetOnEvent(cacher.processEvent) + stopCh := StopChannel_kubernetes13135 + go Util_kubernetes13135(func() { cacher.startCaching(stopCh) }, 0, stopCh) // G2 + cacher.initialized.Wait() + return cacher +} + +/// +/// G1 G2 G3 +/// NewCacher() +/// watchCache.SetOnReplace() +/// watchCache.SetOnEvent() +/// cacher.startCaching() +/// c.Lock() +/// c.reflector.ListAndWatch() +/// r.syncWith() +/// r.store.Replace() +/// w.Lock() +/// w.onReplace() +/// cacher.initOnce.Do() +/// cacher.Unlock() +/// return cacher +/// c.watchCache.Add() +/// w.processEvent() +/// w.Lock() +/// cacher.startCaching() +/// c.Lock() +/// ... +/// c.Lock() +/// w.Lock() +///--------------------------------G2,G3 deadlock------------------------------------- +/// + +func Kubernetes13135() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + StopChannel_kubernetes13135 = make(chan struct{}) + for i := 0; i < 50; i++ { + go func() { + // deadlocks: x > 0 + c := NewCacher_kubernetes13135() // G1 + go c.watchCache.Add(nil) // G3 + }() + } + go close(StopChannel_kubernetes13135) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go new file mode 100644 index 00000000000000..4ca72c65ddb045 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go @@ -0,0 +1,121 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/1321 + * Buggy version: 9cd0fc70f1ca852c903b18b0933991036b3b2fa1 + * fix commit-id: 435e0b73bb99862f9dedf56a50260ff3dfef14ff + * Flaky: 1/100 + * Description: + * This is a lock-channel bug. The first goroutine invokes + * distribute() function. distribute() function holds m.lock.Lock(), + * while blocking at sending message to w.result. The second goroutine + * invokes stopWatching() funciton, which can unblock the first + * goroutine by closing w.result. However, in order to close w.result, + * stopWatching() function needs to acquire m.lock.Lock() firstly. + * The fix is to introduce another channel and put receive message + * from the second channel in the same select as the w.result. Close + * the second channel can unblock the first goroutine, while no need + * to hold m.lock.Lock(). + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes1321", Kubernetes1321) +} + +type muxWatcher_kubernetes1321 struct { + result chan struct{} + m *Mux_kubernetes1321 + id int64 +} + +func (mw *muxWatcher_kubernetes1321) Stop() { + mw.m.stopWatching(mw.id) +} + +type Mux_kubernetes1321 struct { + lock sync.Mutex + watchers map[int64]*muxWatcher_kubernetes1321 +} + +func NewMux_kubernetes1321() *Mux_kubernetes1321 { + m := &Mux_kubernetes1321{ + watchers: map[int64]*muxWatcher_kubernetes1321{}, + } + // deadlocks: x > 0 + go m.loop() // G2 + return m +} + +func (m *Mux_kubernetes1321) Watch() *muxWatcher_kubernetes1321 { + mw := &muxWatcher_kubernetes1321{ + result: make(chan struct{}), + m: m, + id: int64(len(m.watchers)), + } + m.watchers[mw.id] = mw + runtime.Gosched() + return mw +} + +func (m *Mux_kubernetes1321) loop() { + for i := 0; i < 100; i++ { + m.distribute() + } +} + +func (m *Mux_kubernetes1321) distribute() { + m.lock.Lock() + defer m.lock.Unlock() + for _, w := range m.watchers { + w.result <- struct{}{} + runtime.Gosched() + } +} + +func (m *Mux_kubernetes1321) stopWatching(id int64) { + m.lock.Lock() + defer m.lock.Unlock() + w, ok := m.watchers[id] + if !ok { + return + } + delete(m.watchers, id) + close(w.result) +} + +func testMuxWatcherClose_kubernetes1321() { + // deadlocks: x > 0 + m := NewMux_kubernetes1321() + m.watchers[m.Watch().id].Stop() +} + +/// +/// G1 G2 +/// testMuxWatcherClose() +/// NewMux() +/// m.loop() +/// m.distribute() +/// m.lock.Lock() +/// w.result <- true +/// w := m.Watch() +/// w.Stop() +/// mw.m.stopWatching() +/// m.lock.Lock() +/// ---------------G1,G2 deadlock--------------- +/// + +func Kubernetes1321() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go testMuxWatcherClose_kubernetes1321() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go new file mode 100644 index 00000000000000..e393ce9d5e0a40 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go @@ -0,0 +1,81 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/25331 + * Buggy version: 5dd087040bb13434f1ddf2f0693d0203c30f28cb + * fix commit-id: 97f4647dc3d8cf46c2b66b89a31c758a6edfb57c + * Flaky: 100/100 + * Description: + * In reflector.go, it could probably call Stop() without retrieving + * all results from ResultChan(). See here. A potential leak is that + * when an error has happened, it could block on resultChan, and then + * cancelling context in Stop() wouldn't unblock it. + */ +package main + +import ( + "context" + "errors" + "runtime" + "time" +) + +func init() { + register("Kubernetes25331", Kubernetes25331) +} + +type watchChan_kubernetes25331 struct { + ctx context.Context + cancel context.CancelFunc + resultChan chan bool + errChan chan error +} + +func (wc *watchChan_kubernetes25331) Stop() { + wc.errChan <- errors.New("Error") + wc.cancel() +} + +func (wc *watchChan_kubernetes25331) run() { + select { + case err := <-wc.errChan: + errResult := len(err.Error()) != 0 + wc.cancel() // Removed in fix + wc.resultChan <- errResult + case <-wc.ctx.Done(): + } +} + +func NewWatchChan_kubernetes25331() *watchChan_kubernetes25331 { + ctx, cancel := context.WithCancel(context.Background()) + return &watchChan_kubernetes25331{ + ctx: ctx, + cancel: cancel, + resultChan: make(chan bool), + errChan: make(chan error), + } +} + +/// +/// G1 G2 +/// wc.run() +/// wc.Stop() +/// wc.errChan <- +/// wc.cancel() +/// <-wc.errChan +/// wc.cancel() +/// wc.resultChan <- +/// -------------G1 leak---------------- +/// + +func Kubernetes25331() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + wc := NewWatchChan_kubernetes25331() + // deadlocks: 1 + go wc.run() // G1 + go wc.Stop() // G2 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go new file mode 100644 index 00000000000000..cf1b79cd57d8e5 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go @@ -0,0 +1,83 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes26980", Kubernetes26980) +} + +type processorListener_kubernetes26980 struct { + lock sync.RWMutex + cond sync.Cond + + pendingNotifications []interface{} +} + +func (p *processorListener_kubernetes26980) add(notification interface{}) { + p.lock.Lock() + defer p.lock.Unlock() + + p.pendingNotifications = append(p.pendingNotifications, notification) + p.cond.Broadcast() +} + +func (p *processorListener_kubernetes26980) pop(stopCh <-chan struct{}) { + p.lock.Lock() + runtime.Gosched() + defer p.lock.Unlock() + for { + for len(p.pendingNotifications) == 0 { + select { + case <-stopCh: + return + default: + } + p.cond.Wait() + } + select { + case <-stopCh: + return + } + } +} + +func newProcessListener_kubernetes26980() *processorListener_kubernetes26980 { + ret := &processorListener_kubernetes26980{ + pendingNotifications: []interface{}{}, + } + ret.cond.L = &ret.lock + return ret +} +func Kubernetes26980() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 3000; i++ { + go func() { + // deadlocks: x > 0 + pl := newProcessListener_kubernetes26980() + stopCh := make(chan struct{}) + defer close(stopCh) + pl.add(1) + runtime.Gosched() + // deadlocks: x > 0 + go pl.pop(stopCh) + + resultCh := make(chan struct{}) + go func() { + // deadlocks: x > 0 + pl.lock.Lock() + close(resultCh) + }() + runtime.Gosched() + <-resultCh + pl.lock.Unlock() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go new file mode 100644 index 00000000000000..4b306e40794337 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go @@ -0,0 +1,256 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes30872", Kubernetes30872) +} + +type PopProcessFunc_kubernetes30872 func() + +type ProcessFunc_kubernetes30872 func() + +func Util_kubernetes30872(f func(), stopCh <-chan struct{}) { + JitterUntil_kubernetes30872(f, stopCh) +} + +func JitterUntil_kubernetes30872(f func(), stopCh <-chan struct{}) { + for { + select { + case <-stopCh: + return + default: + } + func() { + f() + }() + } +} + +type Queue_kubernetes30872 interface { + HasSynced() + Pop(PopProcessFunc_kubernetes30872) +} + +type Config_kubernetes30872 struct { + Queue Queue_kubernetes30872 + Process ProcessFunc_kubernetes30872 +} + +type Controller_kubernetes30872 struct { + config Config_kubernetes30872 +} + +func (c *Controller_kubernetes30872) Run(stopCh <-chan struct{}) { + Util_kubernetes30872(c.processLoop, stopCh) +} + +func (c *Controller_kubernetes30872) HasSynced() { + c.config.Queue.HasSynced() +} + +func (c *Controller_kubernetes30872) processLoop() { + c.config.Queue.Pop(PopProcessFunc_kubernetes30872(c.config.Process)) +} + +type ControllerInterface_kubernetes30872 interface { + Run(<-chan struct{}) + HasSynced() +} + +type ResourceEventHandler_kubernetes30872 interface { + OnAdd() +} + +type ResourceEventHandlerFuncs_kubernetes30872 struct { + AddFunc func() +} + +func (r ResourceEventHandlerFuncs_kubernetes30872) OnAdd() { + if r.AddFunc != nil { + r.AddFunc() + } +} + +type informer_kubernetes30872 struct { + controller ControllerInterface_kubernetes30872 + + stopChan chan struct{} +} + +type federatedInformerImpl_kubernetes30872 struct { + sync.Mutex + clusterInformer informer_kubernetes30872 +} + +func (f *federatedInformerImpl_kubernetes30872) ClustersSynced() { + f.Lock() // L1 + defer f.Unlock() + f.clusterInformer.controller.HasSynced() +} + +func (f *federatedInformerImpl_kubernetes30872) addCluster() { + f.Lock() // L1 + defer f.Unlock() +} + +func (f *federatedInformerImpl_kubernetes30872) Start() { + f.Lock() // L1 + defer f.Unlock() + + f.clusterInformer.stopChan = make(chan struct{}) + // deadlocks: x > 0 + go f.clusterInformer.controller.Run(f.clusterInformer.stopChan) // G2 + runtime.Gosched() +} + +func (f *federatedInformerImpl_kubernetes30872) Stop() { + f.Lock() // L1 + defer f.Unlock() + close(f.clusterInformer.stopChan) +} + +type DelayingDeliverer_kubernetes30872 struct{} + +func (d *DelayingDeliverer_kubernetes30872) StartWithHandler(handler func()) { + go func() { // G4 + // deadlocks: x > 0 + handler() + }() +} + +type FederationView_kubernetes30872 interface { + ClustersSynced() +} + +type FederatedInformer_kubernetes30872 interface { + FederationView_kubernetes30872 + Start() + Stop() +} + +type NamespaceController_kubernetes30872 struct { + namespaceDeliverer *DelayingDeliverer_kubernetes30872 + namespaceFederatedInformer FederatedInformer_kubernetes30872 +} + +func (nc *NamespaceController_kubernetes30872) isSynced() { + nc.namespaceFederatedInformer.ClustersSynced() +} + +func (nc *NamespaceController_kubernetes30872) reconcileNamespace() { + nc.isSynced() +} + +func (nc *NamespaceController_kubernetes30872) Run(stopChan <-chan struct{}) { + nc.namespaceFederatedInformer.Start() + go func() { // G3 + // deadlocks: x > 0 + <-stopChan + nc.namespaceFederatedInformer.Stop() + }() + nc.namespaceDeliverer.StartWithHandler(func() { + nc.reconcileNamespace() + }) +} + +type DeltaFIFO_kubernetes30872 struct { + lock sync.RWMutex +} + +func (f *DeltaFIFO_kubernetes30872) HasSynced() { + f.lock.Lock() // L2 + defer f.lock.Unlock() +} + +func (f *DeltaFIFO_kubernetes30872) Pop(process PopProcessFunc_kubernetes30872) { + f.lock.Lock() // L2 + defer f.lock.Unlock() + process() +} + +func NewFederatedInformer_kubernetes30872() FederatedInformer_kubernetes30872 { + federatedInformer := &federatedInformerImpl_kubernetes30872{} + federatedInformer.clusterInformer.controller = NewInformer_kubernetes30872( + ResourceEventHandlerFuncs_kubernetes30872{ + AddFunc: func() { + federatedInformer.addCluster() + }, + }) + return federatedInformer +} + +func NewInformer_kubernetes30872(h ResourceEventHandler_kubernetes30872) *Controller_kubernetes30872 { + fifo := &DeltaFIFO_kubernetes30872{} + cfg := &Config_kubernetes30872{ + Queue: fifo, + Process: func() { + h.OnAdd() + }, + } + return &Controller_kubernetes30872{config: *cfg} +} + +func NewNamespaceController_kubernetes30872() *NamespaceController_kubernetes30872 { + nc := &NamespaceController_kubernetes30872{} + nc.namespaceDeliverer = &DelayingDeliverer_kubernetes30872{} + nc.namespaceFederatedInformer = NewFederatedInformer_kubernetes30872() + return nc +} + +func Kubernetes30872() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { // G1 + namespaceController := NewNamespaceController_kubernetes30872() + stop := make(chan struct{}) + namespaceController.Run(stop) + close(stop) + }() + } +} + +/// Example of deadlocking trace. +/// +/// G1 G2 G3 G4 +/// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +/// namespaceController.Run() +/// nc.namespaceFederatedInformer.Start() +/// f.Lock() [L1] +/// go f.clusterInformer.controller.Run()[G2] +/// <<>> +/// . Util(c.processLoop, stopCh) +/// . c.config.Queue.Pop() +/// . f.lock.Lock() [L2] +/// . process() +/// . h.OnAdd() +/// . r.AddFunc() +/// . federatedInformer.addCluster() +/// . f.Lock() [L1] +/// f.Unlock() [L1] . +/// go func()[G3] . +/// nc.namespaceDeliverer.StartWithHandler() . . +/// go func()[G4] . . +/// close(stop) . . . +/// <<>> . . . +/// . <-stopChan . +/// . nc.namespaceFederatedInformer.Stop() . +/// . f.Lock() [L1] . +/// . . handler() +/// . . nc.reconcileNamespace() +/// . . nc.isSynced() +/// . . nc.namespaceFederatedInformer.ClustersSynced() +/// . . f.Lock() [L1] +/// . . f.clusterInformer.controller.HasSynced() +/// . . c.config.Queue.HasSynced() +/// . . f.lock.Lock() [L2] +///----------------------------------------------------------------------------G2,G3,G4 leak---------------------------------------------------------------------------------------------- +/// diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go new file mode 100644 index 00000000000000..b204230c29a3a5 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go @@ -0,0 +1,75 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes38669", Kubernetes38669) +} + +type Event_kubernetes38669 int +type watchCacheEvent_kubernetes38669 int + +type cacheWatcher_kubernetes38669 struct { + sync.Mutex + input chan watchCacheEvent_kubernetes38669 + result chan Event_kubernetes38669 + stopped bool +} + +func (c *cacheWatcher_kubernetes38669) process(initEvents []watchCacheEvent_kubernetes38669) { + for _, event := range initEvents { + c.sendWatchCacheEvent(&event) + } + defer close(c.result) + defer c.Stop() + for { + _, ok := <-c.input + if !ok { + return + } + } +} + +func (c *cacheWatcher_kubernetes38669) sendWatchCacheEvent(event *watchCacheEvent_kubernetes38669) { + c.result <- Event_kubernetes38669(*event) +} + +func (c *cacheWatcher_kubernetes38669) Stop() { + c.stop() +} + +func (c *cacheWatcher_kubernetes38669) stop() { + c.Lock() + defer c.Unlock() + if !c.stopped { + c.stopped = true + close(c.input) + } +} + +func newCacheWatcher_kubernetes38669(chanSize int, initEvents []watchCacheEvent_kubernetes38669) *cacheWatcher_kubernetes38669 { + watcher := &cacheWatcher_kubernetes38669{ + input: make(chan watchCacheEvent_kubernetes38669, chanSize), + result: make(chan Event_kubernetes38669, chanSize), + stopped: false, + } + // deadlocks: 1 + go watcher.process(initEvents) + return watcher +} + +func Kubernetes38669() { + defer func() { + time.Sleep(1 * time.Second) + runtime.GC() + }() + go func() { + initEvents := []watchCacheEvent_kubernetes38669{1, 2} + w := newCacheWatcher_kubernetes38669(0, initEvents) + w.Stop() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go new file mode 100644 index 00000000000000..540988ba774c66 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go @@ -0,0 +1,69 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/5316 + * Buggy version: c868b0bbf09128960bc7c4ada1a77347a464d876 + * fix commit-id: cc3a433a7abc89d2f766d4c87eaae9448e3dc091 + * Flaky: 100/100 + * Description: + * If the main goroutine selects a case that doesn’t consumes + * the channels, the anonymous goroutine will be blocked on sending + * to channel. + */ + +package main + +import ( + "errors" + "math/rand" + "runtime" + "time" +) + +func init() { + register("Kubernetes5316", Kubernetes5316) +} + +func finishRequest_kubernetes5316(timeout time.Duration, fn func() error) { + ch := make(chan bool) + errCh := make(chan error) + go func() { // G2 + // deadlocks: 1 + if err := fn(); err != nil { + errCh <- err + } else { + ch <- true + } + }() + + select { + case <-ch: + case <-errCh: + case <-time.After(timeout): + } +} + +/// +/// G1 G2 +/// finishRequest() +/// fn() +/// time.After() +/// errCh<-/ch<- +/// --------------G2 leak---------------- +/// + +func Kubernetes5316() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + fn := func() error { + time.Sleep(2 * time.Millisecond) + if rand.Intn(10) > 5 { + return errors.New("Error") + } + return nil + } + go finishRequest_kubernetes5316(time.Millisecond, fn) // G1 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go new file mode 100644 index 00000000000000..9e328fc5330e2f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go @@ -0,0 +1,134 @@ +/* + * Project: kubernetes + * Tag: Reproduce misbehavior + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/58107 + * Buggy version: 2f17d782eb2772d6401da7ddced9ac90656a7a79 + * fix commit-id: 010a127314a935d8d038f8dd4559fc5b249813e4 + * Flaky: 53/100 + * Description: + * The rules for read and write lock: allows concurrent read lock; + * write lock has higher priority than read lock. + * There are two queues (queue 1 and queue 2) involved in this bug, + * and the two queues are protected by the same read-write lock + * (rq.workerLock.RLock()). Before getting an element from queue 1 or + * queue 2, rq.workerLock.RLock() is acquired. If the queue is empty, + * cond.Wait() will be invoked. There is another goroutine (goroutine D), + * which will periodically invoke rq.workerLock.Lock(). Under the following + * situation, deadlock will happen. Queue 1 is empty, so that some goroutines + * hold rq.workerLock.RLock(), and block at cond.Wait(). Goroutine D is + * blocked when acquiring rq.workerLock.Lock(). Some goroutines try to process + * jobs in queue 2, but they are blocked when acquiring rq.workerLock.RLock(), + * since write lock has a higher priority. + * The fix is to not acquire rq.workerLock.RLock(), while pulling data + * from any queue. Therefore, when a goroutine is blocked at cond.Wait(), + * rq.workLock.RLock() is not held. + */ + +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes58107", Kubernetes58107) +} + +type RateLimitingInterface_kubernetes58107 interface { + Get() + Put() +} + +type Type_kubernetes58107 struct { + cond *sync.Cond +} + +func (q *Type_kubernetes58107) Get() { + q.cond.L.Lock() + defer q.cond.L.Unlock() + q.cond.Wait() +} + +func (q *Type_kubernetes58107) Put() { + q.cond.Signal() +} + +type ResourceQuotaController_kubernetes58107 struct { + workerLock sync.RWMutex + queue RateLimitingInterface_kubernetes58107 + missingUsageQueue RateLimitingInterface_kubernetes58107 +} + +func (rq *ResourceQuotaController_kubernetes58107) worker(queue RateLimitingInterface_kubernetes58107, _ string) { + workFunc := func() bool { + rq.workerLock.RLock() + defer rq.workerLock.RUnlock() + queue.Get() + return true + } + for { + if quit := workFunc(); quit { + return + } + } +} + +func (rq *ResourceQuotaController_kubernetes58107) Run() { + // deadlocks: x > 0 + go rq.worker(rq.queue, "G1") // G3 + // deadlocks: x > 0 + go rq.worker(rq.missingUsageQueue, "G2") // G4 +} + +func (rq *ResourceQuotaController_kubernetes58107) Sync() { + for i := 0; i < 100000; i++ { + rq.workerLock.Lock() + runtime.Gosched() + rq.workerLock.Unlock() + } +} + +func (rq *ResourceQuotaController_kubernetes58107) HelperSignals() { + for i := 0; i < 100000; i++ { + rq.queue.Put() + rq.missingUsageQueue.Put() + } +} + +func startResourceQuotaController_kubernetes58107() { + resourceQuotaController := &ResourceQuotaController_kubernetes58107{ + queue: &Type_kubernetes58107{sync.NewCond(&sync.Mutex{})}, + missingUsageQueue: &Type_kubernetes58107{sync.NewCond(&sync.Mutex{})}, + } + + go resourceQuotaController.Run() // G2 + // deadlocks: x > 0 + go resourceQuotaController.Sync() // G5 + resourceQuotaController.HelperSignals() +} + +func Kubernetes58107() { + defer func() { + time.Sleep(1000 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go startResourceQuotaController_kubernetes58107() // G1 + } +} + +// Example of deadlock: +// +// G1 G3 G4 G5 +// ------------------------------------------------------------------------------------------------------------ +// <<>> (no more signals) ... ... Sync() +// rq.workerLock.RLock() . . +// q.cond.L.Lock() . . +// q.cond.Wait() . . +// . . rq.workerLock.Lock() +// . rq.workerLock.RLock() . +// . q.cond.L.Lock() . +// --------------------------------------------G3, G4, G5 leak------------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go new file mode 100644 index 00000000000000..48bcb7efe6fe95 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go @@ -0,0 +1,119 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/62464 + * Buggy version: a048ca888ad27367b1a7b7377c67658920adbf5d + * fix commit-id: c1b19fce903675b82e9fdd1befcc5f5d658bfe78 + * Flaky: 8/100 + * Description: + * This is another example for recursive read lock bug. It has + * been noticed by the go developers that RLock should not be + * recursively used in the same thread. + */ + +package main + +import ( + "math/rand" + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes62464", Kubernetes62464) +} + +type State_kubernetes62464 interface { + GetCPUSetOrDefault() + GetCPUSet() bool + GetDefaultCPUSet() + SetDefaultCPUSet() +} + +type stateMemory_kubernetes62464 struct { + sync.RWMutex +} + +func (s *stateMemory_kubernetes62464) GetCPUSetOrDefault() { + s.RLock() + defer s.RUnlock() + if ok := s.GetCPUSet(); ok { + return + } + s.GetDefaultCPUSet() +} + +func (s *stateMemory_kubernetes62464) GetCPUSet() bool { + runtime.Gosched() + s.RLock() + defer s.RUnlock() + + if rand.Intn(10) > 5 { + return true + } + return false +} + +func (s *stateMemory_kubernetes62464) GetDefaultCPUSet() { + s.RLock() + defer s.RUnlock() +} + +func (s *stateMemory_kubernetes62464) SetDefaultCPUSet() { + s.Lock() + runtime.Gosched() + defer s.Unlock() +} + +type staticPolicy_kubernetes62464 struct{} + +func (p *staticPolicy_kubernetes62464) RemoveContainer(s State_kubernetes62464) { + s.GetDefaultCPUSet() + s.SetDefaultCPUSet() +} + +type manager_kubernetes62464 struct { + state *stateMemory_kubernetes62464 +} + +func (m *manager_kubernetes62464) reconcileState() { + m.state.GetCPUSetOrDefault() +} + +func NewPolicyAndManager_kubernetes62464() (*staticPolicy_kubernetes62464, *manager_kubernetes62464) { + s := &stateMemory_kubernetes62464{} + m := &manager_kubernetes62464{s} + p := &staticPolicy_kubernetes62464{} + return p, m +} + +/// +/// G1 G2 +/// m.reconcileState() +/// m.state.GetCPUSetOrDefault() +/// s.RLock() +/// s.GetCPUSet() +/// p.RemoveContainer() +/// s.GetDefaultCPUSet() +/// s.SetDefaultCPUSet() +/// s.Lock() +/// s.RLock() +/// ---------------------G1,G2 deadlock--------------------- +/// + +func Kubernetes62464() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + p, m := NewPolicyAndManager_kubernetes62464() + // deadlocks: x > 0 + go m.reconcileState() + // deadlocks: x > 0 + go p.RemoveContainer(m.state) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go new file mode 100644 index 00000000000000..26fc0f3db03596 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go @@ -0,0 +1,99 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/6632 + * Buggy version: e597b41d939573502c8dda1dde7bf3439325fb5d + * fix commit-id: 82afb7ab1fe12cf2efceede2322d082eaf5d5adc + * Flaky: 4/100 + * Description: + * This is a lock-channel bug. When resetChan is full, WriteFrame + * holds the lock and blocks on the channel. Then monitor() fails + * to close the resetChan because lock is already held by WriteFrame. + * Fix: create a goroutine to drain the channel + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes6632", Kubernetes6632) +} + +type Connection_kubernetes6632 struct { + closeChan chan bool +} + +type idleAwareFramer_kubernetes6632 struct { + resetChan chan bool + writeLock sync.Mutex + conn *Connection_kubernetes6632 +} + +func (i *idleAwareFramer_kubernetes6632) monitor() { + var resetChan = i.resetChan +Loop: + for { + select { + case <-i.conn.closeChan: + i.writeLock.Lock() + close(resetChan) + i.resetChan = nil + i.writeLock.Unlock() + break Loop + } + } +} + +func (i *idleAwareFramer_kubernetes6632) WriteFrame() { + i.writeLock.Lock() + defer i.writeLock.Unlock() + if i.resetChan == nil { + return + } + i.resetChan <- true +} + +func NewIdleAwareFramer_kubernetes6632() *idleAwareFramer_kubernetes6632 { + return &idleAwareFramer_kubernetes6632{ + resetChan: make(chan bool), + conn: &Connection_kubernetes6632{ + closeChan: make(chan bool), + }, + } +} + +/// +/// G1 G2 helper goroutine +/// i.monitor() +/// <-i.conn.closeChan +/// i.WriteFrame() +/// i.writeLock.Lock() +/// i.resetChan <- +/// i.conn.closeChan<- +/// i.writeLock.Lock() +/// ----------------------G1,G2 deadlock------------------------ +/// + +func Kubernetes6632() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + i := NewIdleAwareFramer_kubernetes6632() + + go func() { // helper goroutine + i.conn.closeChan <- true + }() + // deadlocks: x > 0 + go i.monitor() // G1 + // deadlocks: x > 0 + go i.WriteFrame() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go new file mode 100644 index 00000000000000..234f4145695a8c --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go @@ -0,0 +1,92 @@ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Kubernetes70277", Kubernetes70277) +} + +type WaitFunc_kubernetes70277 func(done <-chan struct{}) <-chan struct{} + +type ConditionFunc_kubernetes70277 func() (done bool, err error) + +func WaitFor_kubernetes70277(wait WaitFunc_kubernetes70277, fn ConditionFunc_kubernetes70277, done <-chan struct{}) error { + c := wait(done) + for { + _, open := <-c + ok, err := fn() + if err != nil { + return err + } + if ok { + return nil + } + if !open { + break + } + } + return nil +} + +func poller_kubernetes70277(interval, timeout time.Duration) WaitFunc_kubernetes70277 { + return WaitFunc_kubernetes70277(func(done <-chan struct{}) <-chan struct{} { + ch := make(chan struct{}) + go func() { + defer close(ch) + + tick := time.NewTicker(interval) + defer tick.Stop() + + var after <-chan time.Time + if timeout != 0 { + timer := time.NewTimer(timeout) + after = timer.C + defer timer.Stop() + } + for { + select { + case <-tick.C: + select { + case ch <- struct{}{}: + default: + } + case <-after: + return + case <-done: + return + } + } + }() + + return ch + }) +} + +func Kubernetes70277() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + stopCh := make(chan struct{}) + defer close(stopCh) + waitFunc := poller_kubernetes70277(time.Millisecond, 80*time.Millisecond) + var doneCh <-chan struct{} + + WaitFor_kubernetes70277(func(done <-chan struct{}) <-chan struct{} { + doneCh = done + return waitFunc(done) + }, func() (bool, error) { + time.Sleep(10 * time.Millisecond) + return true, nil + }, stopCh) + + <-doneCh // block here + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/main.go b/src/runtime/testdata/testgoroutineleakgc/main.go new file mode 100644 index 00000000000000..ae491a2a978043 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/main.go @@ -0,0 +1,35 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "os" + +var cmds = map[string]func(){} + +func register(name string, f func()) { + if cmds[name] != nil { + panic("duplicate registration: " + name) + } + cmds[name] = f +} + +func registerInit(name string, f func()) { + if len(os.Args) >= 2 && os.Args[1] == name { + f() + } +} + +func main() { + if len(os.Args) < 2 { + println("usage: " + os.Args[0] + " name-of-test") + return + } + f := cmds[os.Args[1]] + if f == nil { + println("unknown function: " + os.Args[1]) + return + } + f() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby17176.go b/src/runtime/testdata/testgoroutineleakgc/moby17176.go new file mode 100644 index 00000000000000..e3cc10a2276da6 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby17176.go @@ -0,0 +1,74 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/17176 + * Buggy version: d295dc66521e2734390473ec1f1da8a73ad3288a + * fix commit-id: 2f16895ee94848e2d8ad72bc01968b4c88d84cb8 + * Flaky: 100/100 + * Description: + * devices.nrDeletedDevices takes devices.Lock() but does + * not drop it if there are no deleted devices. This will block + * other goroutines trying to acquire devices.Lock(). + * In general reason is that when device deletion is happning, + * we can try deletion/deactivation in a loop. And that that time + * we don't want to block rest of the device operations in parallel. + * So we drop the inner devices lock while continue to hold per + * device lock + * A test is added for this bug, and we need to try whether + * this bug can be reproduced. + */ +package main + +import ( + "errors" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby17176", Moby17176) +} + +type DeviceSet_moby17176 struct { + sync.Mutex + nrDeletedDevices int +} + +func (devices *DeviceSet_moby17176) cleanupDeletedDevices() error { + devices.Lock() + if devices.nrDeletedDevices == 0 { + /// Missing devices.Unlock() + return nil + } + devices.Unlock() + return errors.New("Error") +} + +func testDevmapperLockReleasedDeviceDeletion_moby17176() { + ds := &DeviceSet_moby17176{ + nrDeletedDevices: 0, + } + ds.cleanupDeletedDevices() + doneChan := make(chan bool) + go func() { + // deadlocks: x > 0 + ds.Lock() + defer ds.Unlock() + doneChan <- true + }() + + select { + case <-time.After(time.Millisecond): + case <-doneChan: + } +} +func Moby17176() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go testDevmapperLockReleasedDeviceDeletion_moby17176() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby21233.go b/src/runtime/testdata/testgoroutineleakgc/moby21233.go new file mode 100644 index 00000000000000..837bd6cb344328 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby21233.go @@ -0,0 +1,167 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/21233 + * Buggy version: cc12d2bfaae135e63b1f962ad80e6943dd995337 + * fix commit-id: 2f4aa9658408ac72a598363c6e22eadf93dbb8a7 + * Flaky:100/100 + * Description: + * This test was checking that it received every progress update that was + * produced. But delivery of these intermediate progress updates is not + * guaranteed. A new update can overwrite the previous one if the previous + * one hasn't been sent to the channel yet. + * The call to t.Fatalf exited the cur rent goroutine which was consuming + * the channel, which caused a deadlock and eventual test timeout rather + * than a proper failure message. + */ +package main + +import ( + "math/rand" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby21233", Moby21233) +} + +type Progress_moby21233 struct{} + +type Output_moby21233 interface { + WriteProgress(Progress_moby21233) error +} + +type chanOutput_moby21233 chan<- Progress_moby21233 + +type TransferManager_moby21233 struct { + mu sync.Mutex +} + +type Transfer_moby21233 struct { + mu sync.Mutex +} + +type Watcher_moby21233 struct { + signalChan chan struct{} + releaseChan chan struct{} + running chan struct{} +} + +func ChanOutput_moby21233(progressChan chan<- Progress_moby21233) Output_moby21233 { + return chanOutput_moby21233(progressChan) +} +func (out chanOutput_moby21233) WriteProgress(p Progress_moby21233) error { + out <- p + return nil +} +func NewTransferManager_moby21233() *TransferManager_moby21233 { + return &TransferManager_moby21233{} +} +func NewTransfer_moby21233() *Transfer_moby21233 { + return &Transfer_moby21233{} +} +func (t *Transfer_moby21233) Release(watcher *Watcher_moby21233) { + t.mu.Lock() + t.mu.Unlock() + close(watcher.releaseChan) + <-watcher.running +} +func (t *Transfer_moby21233) Watch(progressOutput Output_moby21233) *Watcher_moby21233 { + t.mu.Lock() + defer t.mu.Unlock() + lastProgress := Progress_moby21233{} + w := &Watcher_moby21233{ + releaseChan: make(chan struct{}), + signalChan: make(chan struct{}), + running: make(chan struct{}), + } + go func() { // G2 + // deadlocks: x > 0 + defer func() { + close(w.running) + }() + done := false + for { + t.mu.Lock() + t.mu.Unlock() + if rand.Int31n(2) >= 1 { + progressOutput.WriteProgress(lastProgress) + } + if done { + return + } + select { + case <-w.signalChan: + case <-w.releaseChan: + done = true + } + } + }() + return w +} +func (tm *TransferManager_moby21233) Transfer(progressOutput Output_moby21233) (*Transfer_moby21233, *Watcher_moby21233) { + tm.mu.Lock() + defer tm.mu.Unlock() + t := NewTransfer_moby21233() + return t, t.Watch(progressOutput) +} + +func testTransfer_moby21233() { // G1 + // deadlocks: x > 0 + tm := NewTransferManager_moby21233() + progressChan := make(chan Progress_moby21233) + progressDone := make(chan struct{}) + go func() { // G3 + time.Sleep(1 * time.Millisecond) + for p := range progressChan { /// Chan consumer + if rand.Int31n(2) >= 1 { + return + } + _ = p + } + close(progressDone) + }() + time.Sleep(1 * time.Millisecond) + ids := []string{"id1", "id2", "id3"} + xrefs := make([]*Transfer_moby21233, len(ids)) + watchers := make([]*Watcher_moby21233, len(ids)) + for i := range ids { + xrefs[i], watchers[i] = tm.Transfer(ChanOutput_moby21233(progressChan)) /// Chan producer + time.Sleep(2 * time.Millisecond) + } + + for i := range xrefs { + xrefs[i].Release(watchers[i]) + } + + close(progressChan) + <-progressDone +} + +func Moby21233() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go testTransfer_moby21233() // G1 + } +} + +// Example deadlock trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// testTransfer() +// tm.Transfer() +// t.Watch() +// . WriteProgress() +// . ProgressChan<- +// . . <-progressChan +// . . rand.Int31n(2) >= 1 +// . . return +// . ProgressChan<- . +// <-watcher.running +// ----------------------G1, G2 leak-------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/moby25348.go b/src/runtime/testdata/testgoroutineleakgc/moby25348.go new file mode 100644 index 00000000000000..4370779e9885c2 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby25348.go @@ -0,0 +1,58 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/25384 + * Buggy version: 58befe3081726ef74ea09198cd9488fb42c51f51 + * fix commit-id: 42360d164b9f25fb4b150ef066fcf57fa39559a7 + * Flaky: 100/100 + * Description: + * When n=1 (len(pm.plugins)), the location of group.Wait() doesn’t matter. + * When n is larger than 1, group.Wait() is invoked in each iteration. Whenever + * group.Wait() is invoked, it waits for group.Done() to be executed n times. + * However, group.Done() is only executed once in one iteration. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby25348", Moby25348) +} + +type plugin_moby25348 struct{} + +type Manager_moby25348 struct { + plugins []*plugin_moby25348 +} + +func (pm *Manager_moby25348) init() { + var group sync.WaitGroup + group.Add(len(pm.plugins)) + for _, p := range pm.plugins { + go func(p *plugin_moby25348) { + defer group.Done() + }(p) + group.Wait() // Block here + } +} + +func Moby25348() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func() { + p1 := &plugin_moby25348{} + p2 := &plugin_moby25348{} + pm := &Manager_moby25348{ + plugins: []*plugin_moby25348{p1, p2}, + } + // deadlocks: 100 + go pm.init() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby27782.go b/src/runtime/testdata/testgoroutineleakgc/moby27782.go new file mode 100644 index 00000000000000..36c715c33ed8de --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby27782.go @@ -0,0 +1,266 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/27782 + * Buggy version: 18768fdc2e76ec6c600c8ab57d2d487ee7877794 + * fix commit-id: a69a59ffc7e3d028a72d1195c2c1535f447eaa84 + * Flaky: 2/100 + */ +package main + +import ( + "errors" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby27782", Moby27782) +} + +type Event_moby27782 struct { + Op Op_moby27782 +} + +type Op_moby27782 uint32 + +const ( + Create_moby27782 Op_moby27782 = 1 << iota + Write_moby27782 + Remove_moby27782 + Rename_moby27782 + Chmod_moby27782 +) + +func newEvent(op Op_moby27782) Event_moby27782 { + return Event_moby27782{op} +} + +func (e *Event_moby27782) ignoreLinux(w *Watcher_moby27782) bool { + if e.Op != Write_moby27782 { + w.mu.Lock() + defer w.mu.Unlock() + w.cv.Broadcast() + return true + } + runtime.Gosched() + return false +} + +type Watcher_moby27782 struct { + Events chan Event_moby27782 + mu sync.Mutex // L1 + cv *sync.Cond // C1 + done chan struct{} +} + +func NewWatcher_moby27782() *Watcher_moby27782 { + w := &Watcher_moby27782{ + Events: make(chan Event_moby27782), + done: make(chan struct{}), + } + w.cv = sync.NewCond(&w.mu) + // deadlocks: x > 0 + go w.readEvents() // G3 + return w +} + +func (w *Watcher_moby27782) readEvents() { + defer close(w.Events) + for { + if w.isClosed() { + return + } + event := newEvent(Write_moby27782) // MODIFY event + if !event.ignoreLinux(w) { + runtime.Gosched() + select { + case w.Events <- event: + case <-w.done: + return + } + } + } +} + +func (w *Watcher_moby27782) isClosed() bool { + select { + case <-w.done: + return true + default: + return false + } +} + +func (w *Watcher_moby27782) Close() { + if w.isClosed() { + return + } + close(w.done) +} + +func (w *Watcher_moby27782) Remove() { + w.mu.Lock() + defer w.mu.Unlock() + exists := true + for exists { + w.cv.Wait() + runtime.Gosched() + } +} + +type FileWatcher_moby27782 interface { + Events() <-chan Event_moby27782 + Remove() + Close() +} + +func New_moby27782() FileWatcher_moby27782 { + return NewEventWatcher_moby27782() +} + +func NewEventWatcher_moby27782() FileWatcher_moby27782 { + return &fsNotifyWatcher_moby27782{NewWatcher_moby27782()} +} + +type fsNotifyWatcher_moby27782 struct { + *Watcher_moby27782 +} + +func (w *fsNotifyWatcher_moby27782) Events() <-chan Event_moby27782 { + return w.Watcher_moby27782.Events +} + +func watchFile_moby27782() FileWatcher_moby27782 { + fileWatcher := New_moby27782() + return fileWatcher +} + +type LogWatcher_moby27782 struct { + closeOnce sync.Once + closeNotifier chan struct{} +} + +func (w *LogWatcher_moby27782) Close() { + w.closeOnce.Do(func() { + close(w.closeNotifier) + }) +} + +func (w *LogWatcher_moby27782) WatchClose() <-chan struct{} { + return w.closeNotifier +} + +func NewLogWatcher_moby27782() *LogWatcher_moby27782 { + return &LogWatcher_moby27782{ + closeNotifier: make(chan struct{}), + } +} + +func followLogs_moby27782(logWatcher *LogWatcher_moby27782) { + fileWatcher := watchFile_moby27782() + defer func() { + fileWatcher.Close() + }() + waitRead := func() { + runtime.Gosched() + select { + case <-fileWatcher.Events(): + case <-logWatcher.WatchClose(): + fileWatcher.Remove() + return + } + } + handleDecodeErr := func() { + waitRead() + } + handleDecodeErr() +} + +type Container_moby27782 struct { + LogDriver *JSONFileLogger_moby27782 +} + +func (container *Container_moby27782) InitializeStdio() { + if err := container.startLogging(); err != nil { + container.Reset() + } +} + +func (container *Container_moby27782) startLogging() error { + l := &JSONFileLogger_moby27782{ + readers: make(map[*LogWatcher_moby27782]struct{}), + } + container.LogDriver = l + l.ReadLogs() + return errors.New("Some error") +} + +func (container *Container_moby27782) Reset() { + if container.LogDriver != nil { + container.LogDriver.Close() + } +} + +type JSONFileLogger_moby27782 struct { + readers map[*LogWatcher_moby27782]struct{} +} + +func (l *JSONFileLogger_moby27782) ReadLogs() *LogWatcher_moby27782 { + logWatcher := NewLogWatcher_moby27782() + // deadlocks: x > 0 + go l.readLogs(logWatcher) // G2 + return logWatcher +} + +func (l *JSONFileLogger_moby27782) readLogs(logWatcher *LogWatcher_moby27782) { + l.readers[logWatcher] = struct{}{} + followLogs_moby27782(logWatcher) +} + +func (l *JSONFileLogger_moby27782) Close() { + for r := range l.readers { + r.Close() + delete(l.readers, r) + } +} + +func Moby27782() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 10000; i++ { + go (&Container_moby27782{}).InitializeStdio() // G1 + } +} + +// +// Example deadlock trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------- +// InitializeStdio() +// startLogging() +// l.ReadLogs() +// NewLogWatcher() +// go l.readLogs() [G2] l.readLogs() +// container.Reset() . +// LogDriver.Close() . +// r.Close() . +// close(w.closeNotifier) . +// . followLogs(logWatcher) +// . watchFile() +// . New() +// . NewEventWatcher() +// . NewWatcher() +// . . w.readEvents() +// . . event.ignoreLinux() +// . . return false +// . <-logWatcher.WatchClose() . +// . fileWatcher.Remove() . +// . w.cv.Wait() . +// . . w.Events <- event +// --------------------------------G2,G3 leak------------------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/moby28462.go b/src/runtime/testdata/testgoroutineleakgc/moby28462.go new file mode 100644 index 00000000000000..af7bc8a4722db3 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby28462.go @@ -0,0 +1,139 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/28462 + * Buggy version: b184bdabf7a01c4b802304ac64ac133743c484be + * fix commit-id: 89b123473774248fc3a0356dd3ce5b116cc69b29 + * Flaky: 69/100 + * Description: + * There are three goroutines mentioned in the bug report Moby#28405. + * Actually, only two goroutines are needed to trigger this bug. This bug + * is another example where lock and channel are mixed with each other. + * + * Moby#28405 : https://github.com/moby/moby/issues/28405 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby28462", Moby28462) +} + +type State_moby28462 struct { + Health *Health_moby28462 +} + +type Container_moby28462 struct { + sync.Mutex + State *State_moby28462 +} + +func (ctr *Container_moby28462) start() { + go ctr.waitExit() +} +func (ctr *Container_moby28462) waitExit() { + +} + +type Store_moby28462 struct { + ctr *Container_moby28462 +} + +func (s *Store_moby28462) Get() *Container_moby28462 { + return s.ctr +} + +type Daemon_moby28462 struct { + containers Store_moby28462 +} + +func (d *Daemon_moby28462) StateChanged() { + c := d.containers.Get() + c.Lock() + d.updateHealthMonitorElseBranch(c) + defer c.Unlock() +} + +func (d *Daemon_moby28462) updateHealthMonitorIfBranch(c *Container_moby28462) { + h := c.State.Health + if stop := h.OpenMonitorChannel(); stop != nil { + go monitor_moby28462(c, stop) + } +} +func (d *Daemon_moby28462) updateHealthMonitorElseBranch(c *Container_moby28462) { + h := c.State.Health + h.CloseMonitorChannel() +} + +type Health_moby28462 struct { + stop chan struct{} +} + +func (s *Health_moby28462) OpenMonitorChannel() chan struct{} { + return s.stop +} + +func (s *Health_moby28462) CloseMonitorChannel() { + if s.stop != nil { + s.stop <- struct{}{} + } +} + +func monitor_moby28462(c *Container_moby28462, stop chan struct{}) { + for { + select { + case <-stop: + return + default: + handleProbeResult_moby28462(c) + } + } +} + +func handleProbeResult_moby28462(c *Container_moby28462) { + runtime.Gosched() + c.Lock() + defer c.Unlock() +} + +func NewDaemonAndContainer_moby28462() (*Daemon_moby28462, *Container_moby28462) { + c := &Container_moby28462{ + State: &State_moby28462{&Health_moby28462{make(chan struct{})}}, + } + d := &Daemon_moby28462{Store_moby28462{c}} + return d, c +} + +/// +/// G1 G2 +/// monitor() +/// handleProbeResult() +/// d.StateChanged() +/// c.Lock() +/// d.updateHealthMonitorElseBranch() +/// h.CloseMonitorChannel() +/// s.stop <- struct{}{} +/// c.Lock() +/// ----------------------G1,G2 deadlock------------------------ +/// + +func Moby28462() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 10000; i++ { + go func() { + d, c := NewDaemonAndContainer_moby28462() + // deadlocks: x > 0 + go monitor_moby28462(c, c.State.Health.OpenMonitorChannel()) // G1 + // deadlocks: x > 0 + go d.StateChanged() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby29733.go b/src/runtime/testdata/testgoroutineleakgc/moby29733.go new file mode 100644 index 00000000000000..65807d6ee7f1ac --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby29733.go @@ -0,0 +1,74 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby29733", Moby29733) +} + +type Plugin_moby29733 struct { + activated bool + activateWait *sync.Cond +} + +type plugins_moby29733 struct { + sync.Mutex + plugins map[int]*Plugin_moby29733 +} + +func (p *Plugin_moby29733) waitActive() { + p.activateWait.L.Lock() + for !p.activated { + p.activateWait.Wait() + } + p.activateWait.L.Unlock() +} + +type extpointHandlers_moby29733 struct { + sync.RWMutex + extpointHandlers map[int]struct{} +} + +func Handle_moby29733(storage plugins_moby29733, handlers extpointHandlers_moby29733) { + handlers.Lock() + for _, p := range storage.plugins { + p.activated = false + } + handlers.Unlock() +} + +func testActive_moby29733(p *Plugin_moby29733) { + done := make(chan struct{}) + go func() { + // deadlocks: x > 0 + p.waitActive() + close(done) + }() + <-done +} + +func Moby29733() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1; i++ { + go func() { + // deadlocks: x > 0 + storage := plugins_moby29733{plugins: make(map[int]*Plugin_moby29733)} + handlers := extpointHandlers_moby29733{extpointHandlers: make(map[int]struct{})} + + p := &Plugin_moby29733{activateWait: sync.NewCond(&sync.Mutex{})} + storage.plugins[0] = p + + testActive_moby29733(p) + Handle_moby29733(storage, handlers) + testActive_moby29733(p) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby30408.go b/src/runtime/testdata/testgoroutineleakgc/moby30408.go new file mode 100644 index 00000000000000..2ca03184260954 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby30408.go @@ -0,0 +1,62 @@ +package main + +import ( + "errors" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby30408", Moby30408) +} + +type Manifest_moby30408 struct { + Implements []string +} + +type Plugin_moby30408 struct { + activateWait *sync.Cond + activateErr error + Manifest *Manifest_moby30408 +} + +func (p *Plugin_moby30408) waitActive() error { + p.activateWait.L.Lock() + for !p.activated() { + p.activateWait.Wait() + } + p.activateWait.L.Unlock() + return p.activateErr +} + +func (p *Plugin_moby30408) activated() bool { + return p.Manifest != nil +} + +func testActive_moby30408(p *Plugin_moby30408) { + done := make(chan struct{}) + go func() { + // deadlocks: 100 + p.waitActive() + close(done) + }() + <-done +} + +func Moby30408() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: 100 + p := &Plugin_moby30408{activateWait: sync.NewCond(&sync.Mutex{})} + p.activateErr = errors.New("some junk happened") + + testActive_moby30408(p) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby33781.go b/src/runtime/testdata/testgoroutineleakgc/moby33781.go new file mode 100644 index 00000000000000..115313fa3c9acb --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby33781.go @@ -0,0 +1,84 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/33781 + * Buggy version: 33fd3817b0f5ca4b87f0a75c2bd583b4425d392b + * fix commit-id: 67297ba0051d39be544009ba76abea14bc0be8a4 + * Flaky: 25/100 + * Description: + * The goroutine created using anonymous function is blocked at + * sending message to a unbuffered channel. However there exists a + * path in the parent goroutine where the parent function will + * return without draining the channel. + */ + +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Moby33781", Moby33781) +} + +func monitor_moby33781(stop chan bool) { + probeInterval := time.Millisecond + probeTimeout := time.Millisecond + for { + select { + case <-stop: + return + case <-time.After(probeInterval): + results := make(chan bool) + ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout) + go func() { // G3 + // deadlocks: x > 0 + results <- true + close(results) + }() + select { + case <-stop: + // results should be drained here + cancelProbe() + return + case <-results: + cancelProbe() + case <-ctx.Done(): + cancelProbe() + <-results + } + } + } +} + +/// +/// G1 G2 G3 +/// monitor() +/// <-time.After() +/// stop <- +/// <-stop +/// return +/// cancelProbe() +/// return +/// result<- +///----------------G3 leak------------------ +/// + +func Moby33781() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func(i int) { + stop := make(chan bool) + go monitor_moby33781(stop) // G1 + go func() { // G2 + time.Sleep(time.Duration(i) * time.Millisecond) + stop <- true + }() + }(i) + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby36114.go b/src/runtime/testdata/testgoroutineleakgc/moby36114.go new file mode 100644 index 00000000000000..1f4c3ba8533dbd --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby36114.go @@ -0,0 +1,52 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/36114 + * Buggy version: 6d4d3c52ae7c3f910bfc7552a2a673a8338e5b9f + * fix commit-id: a44fcd3d27c06aaa60d8d1cbce169f0d982e74b1 + * Flaky: 100/100 + * Description: + * This is a double lock bug. The the lock for the + * struct svm has already been locked when calling + * svm.hotRemoveVHDsAtStart() + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby36114", Moby36114) +} + +type serviceVM_moby36114 struct { + sync.Mutex +} + +func (svm *serviceVM_moby36114) hotAddVHDsAtStart() { + svm.Lock() + defer svm.Unlock() + svm.hotRemoveVHDsAtStart() +} + +func (svm *serviceVM_moby36114) hotRemoveVHDsAtStart() { + svm.Lock() + defer svm.Unlock() +} + +func Moby36114() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + s := &serviceVM_moby36114{} + // deadlocks: x > 0 + go s.hotAddVHDsAtStart() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby4395.go b/src/runtime/testdata/testgoroutineleakgc/moby4395.go new file mode 100644 index 00000000000000..f77cc883025608 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby4395.go @@ -0,0 +1,49 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/4395 + * Buggy version: 6d6ec5e0051ad081be3d71e20b39a25c711b4bc3 + * fix commit-id: d3a6ee1e55a53ee54b91ffb6c53ba674768cf9de + * Flaky: 100/100 + * Description: + * The anonyous goroutine could be waiting on sending to + * the channel which might never be drained. + */ + +package main + +import ( + "errors" + "runtime" + "time" +) + +func init() { + register("Moby4395", Moby4395) +} + +func Go_moby4395(f func() error) chan error { + ch := make(chan error) + go func() { + // deadlocks: 1 + ch <- f() // G2 + }() + return ch +} + +/// +/// G1 G2 +/// Go() +/// return ch +/// ch <- f() +/// ----------G2 leak------------- +/// + +func Moby4395() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + Go_moby4395(func() error { // G1 + return errors.New("") + }) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby4951.go b/src/runtime/testdata/testgoroutineleakgc/moby4951.go new file mode 100644 index 00000000000000..91d18fd73e8ecd --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby4951.go @@ -0,0 +1,102 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/4951 + * Buggy version: 81f148be566ab2b17810ad4be61a5d8beac8330f + * fix commit-id: 2ffef1b7eb618162673c6ffabccb9ca57c7dfce3 + * Flaky: 100/100 + * Description: + * The root cause and patch is clearly explained in the commit + * description. The global lock is devices.Lock(), and the device + * lock is baseInfo.lock.Lock(). It is very likely that this bug + * can be reproduced. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby4951", Moby4951) +} + +type DeviceSet_moby4951 struct { + sync.Mutex + infos map[string]*DevInfo_moby4951 + nrDeletedDevices int +} + +func (devices *DeviceSet_moby4951) DeleteDevice(hash string) { + devices.Lock() + defer devices.Unlock() + + info := devices.lookupDevice(hash) + + info.lock.Lock() + defer info.lock.Unlock() + + devices.deleteDevice(info) +} + +func (devices *DeviceSet_moby4951) lookupDevice(hash string) *DevInfo_moby4951 { + existing, ok := devices.infos[hash] + if !ok { + return nil + } + return existing +} + +func (devices *DeviceSet_moby4951) deleteDevice(info *DevInfo_moby4951) { + devices.removeDeviceAndWait(info.Name()) +} + +func (devices *DeviceSet_moby4951) removeDeviceAndWait(devname string) { + /// remove devices by devname + devices.Unlock() + time.Sleep(300 * time.Nanosecond) + devices.Lock() +} + +type DevInfo_moby4951 struct { + lock sync.Mutex + name string +} + +func (info *DevInfo_moby4951) Name() string { + return info.name +} + +func NewDeviceSet_moby4951() *DeviceSet_moby4951 { + devices := &DeviceSet_moby4951{ + infos: make(map[string]*DevInfo_moby4951), + } + info1 := &DevInfo_moby4951{ + name: "info1", + } + info2 := &DevInfo_moby4951{ + name: "info2", + } + devices.infos[info1.name] = info1 + devices.infos[info2.name] = info2 + return devices +} + +func Moby4951() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + ds := NewDeviceSet_moby4951() + /// Delete devices by the same info + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby7559.go b/src/runtime/testdata/testgoroutineleakgc/moby7559.go new file mode 100644 index 00000000000000..d9c94b2c1f4d06 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby7559.go @@ -0,0 +1,52 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/7559 + * Buggy version: 64579f51fcb439c36377c0068ccc9a007b368b5a + * fix commit-id: 6cbb8e070d6c3a66bf48fbe5cbf689557eee23db + * Flaky: 100/100 + */ +package main + +import ( + "net" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby7559", Moby7559) +} + +type UDPProxy_moby7559 struct { + connTrackLock sync.Mutex +} + +func (proxy *UDPProxy_moby7559) Run() { + for i := 0; i < 2; i++ { + proxy.connTrackLock.Lock() + _, err := net.DialUDP("udp", nil, nil) + if err != nil { + /// Missing unlock here + continue + } + if i == 0 { + break + } + } + proxy.connTrackLock.Unlock() +} +func Moby7559() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + proxy := &UDPProxy_moby7559{} + // deadlocks: x > 0 + go proxy.Run() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/patterns.go b/src/runtime/testdata/testgoroutineleakgc/patterns.go new file mode 100644 index 00000000000000..0849216683349f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/patterns.go @@ -0,0 +1,262 @@ +package main + +import ( + "context" + "fmt" + "runtime" + "time" +) + +func init() { + register("NoCloseRange", NoCloseRange) + register("MethodContractViolation", MethodContractViolation) + register("DoubleSend", DoubleSend) + register("EarlyReturn", EarlyReturn) + register("NCastLeak", NCastLeak) + register("Timeout", Timeout) +} + +// Incoming list of items and the number of workers. +func noCloseRange(list []any, workers int) { + ch := make(chan any) + + // Create each worker + for i := 0; i < workers; i++ { + go func() { + // deadlocks: 10 + + // Each worker waits for an item and processes it. + for item := range ch { + // Process each item + _ = item + } + }() + } + + // Send each item to one of the workers. + for _, item := range list { + // Sending can deadlock if workers == 0 or if one of the workers panics + ch <- item + } + // The channel is never closed, so workers deadlock once there are no more + // items left to process. +} + +func NoCloseRange() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + go noCloseRange([]any{1, 2, 3}, 0) + go noCloseRange([]any{1, 2, 3}, 3) +} + +// A worker processes items pushed to `ch` one by one in the background. +// When the worker is no longer needed, it must be closed with `Stop`. +// +// Specifications: +// +// A worker may be started any number of times, but must be stopped only once. +// Stopping a worker multiple times will lead to a close panic. +// Any worker that is started must eventually be stopped. +// Failing to stop a worker results in a goroutine leak +type worker struct { + ch chan any + done chan any +} + +// Start spawns a background goroutine that extracts items pushed to the queue. +func (w worker) Start() { + go func() { + // deadlocks: 1 + + for { + select { + case <-w.ch: // Normal workflow + case <-w.done: + return // Shut down + } + } + }() +} + +func (w worker) Stop() { + // Allows goroutine created by Start to terminate + close(w.done) +} + +func (w worker) AddToQueue(item any) { + w.ch <- item +} + +// worker limited in scope by workerLifecycle +func workerLifecycle(items []any) { + // Create a new worker + w := worker{ + ch: make(chan any), + done: make(chan any), + } + // Start worker + w.Start() + + // Operate on worker + for _, item := range items { + w.AddToQueue(item) + } + + runtime.Gosched() + // Exits without calling ’Stop’. Goroutine created by `Start` eventually deadlocks. +} + +func MethodContractViolation() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + workerLifecycle(make([]any, 10)) + runtime.Gosched() +} + +// doubleSend incoming channel must send a message (incoming error simulates an error generated internally). +func doubleSend(ch chan any, err error) { + if err != nil { + // In case of an error, send nil. + ch <- nil + // Return is missing here. + } + // Otherwise, continue with normal behaviour + // This send is still executed in the error case, which may lead to deadlock. + ch <- struct{}{} +} + +func DoubleSend() { + ch := make(chan any) + defer func() { + time.Sleep(1000 * time.Millisecond) + runtime.GC() + }() + + go func() { + // deadlocks: 0 + doubleSend(ch, nil) + }() + <-ch + + go func() { + // deadlocks: 1 + doubleSend(ch, fmt.Errorf("error")) + }() + <-ch + + ch1 := make(chan any, 1) + go func() { + // deadlocks: 0 + doubleSend(ch1, fmt.Errorf("error")) + }() + <-ch1 +} + +// earlyReturn demonstrates a common pattern of goroutine leaks. +// A return statement interrupts the evaluation of the parent goroutine before it can consume a message. +// Incoming error simulates an error produced internally. +func earlyReturn(err error) { + // Create a synchronous channel + ch := make(chan any) + + go func() { + // deadlocks: 1 + + // Send something to the channel. + // Deadlocks if the parent goroutine terminates early. + ch <- struct{}{} + }() + + if err != nil { + // Interrupt evaluation of parent early in case of error. + // Sender deadlocks. + return + } + + // Only receive if there is no error. + <-ch +} + +func EarlyReturn() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + go earlyReturn(nil) + go earlyReturn(fmt.Errorf("error")) +} + +// nCastLeak processes a number of items. First result to pass the post is retrieved from the channel queue. +func nCastLeak(items []any) { + // Channel is synchronous. + ch := make(chan any) + + // Iterate over every item + for range items { + go func() { + // deadlocks: 99 + + // Process item and send result to channel + ch <- struct{}{} + // Channel is synchronous: only one sender will synchronise + }() + } + // Retrieve first result. All other senders block. + // Receiver blocks if there are no senders. + <-ch +} + +func NCastLeak() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + go func() { + // deadlocks: 1 + nCastLeak(nil) + }() + + go func() { + nCastLeak(make([]any, 100)) + }() +} + +// A context is provided to short-circuit evaluation. +func timeout(ctx context.Context) { + ch := make(chan any) + + go func() { + // deadlocks: x > 0 + ch <- struct{}{} + }() + + runtime.Gosched() + select { + case <-ch: // Receive message + // Sender is released + case <-ctx.Done(): // Context was cancelled or timed out + // Sender is stuck + } +} + +func Timeout() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + for i := 0; i < 100; i++ { + go timeout(ctx) + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/serving2137.go b/src/runtime/testdata/testgoroutineleakgc/serving2137.go new file mode 100644 index 00000000000000..b655585926b268 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/serving2137.go @@ -0,0 +1,143 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Serving2137", Serving2137) +} + +type token_serving2137 struct{} + +type request_serving2137 struct { + lock *sync.Mutex + accepted chan bool +} + +type Breaker_serving2137 struct { + pendingRequests chan token_serving2137 + activeRequests chan token_serving2137 +} + +func (b *Breaker_serving2137) Maybe(thunk func()) bool { + var t token_serving2137 + select { + default: + // Pending request queue is full. Report failure. + return false + case b.pendingRequests <- t: + // Pending request has capacity. + // Wait for capacity in the active queue. + b.activeRequests <- t + // Defer releasing capacity in the active and pending request queue. + defer func() { + <-b.activeRequests + runtime.Gosched() + <-b.pendingRequests + }() + // Do the thing. + thunk() + // Report success + return true + } +} + +func (b *Breaker_serving2137) concurrentRequest() request_serving2137 { + r := request_serving2137{lock: &sync.Mutex{}, accepted: make(chan bool, 1)} + r.lock.Lock() + var start sync.WaitGroup + start.Add(1) + go func() { // G2, G3 + // deadlocks: x > 0 + start.Done() + runtime.Gosched() + ok := b.Maybe(func() { + // Will block on locked mutex. + r.lock.Lock() + runtime.Gosched() + r.lock.Unlock() + }) + r.accepted <- ok + }() + start.Wait() // Ensure that the go func has had a chance to execute. + return r +} + +// Perform n requests against the breaker, returning mutexes for each +// request which succeeded, and a slice of bools for all requests. +func (b *Breaker_serving2137) concurrentRequests(n int) []request_serving2137 { + requests := make([]request_serving2137, n) + for i := range requests { + requests[i] = b.concurrentRequest() + } + return requests +} + +func NewBreaker_serving2137(queueDepth, maxConcurrency int32) *Breaker_serving2137 { + return &Breaker_serving2137{ + pendingRequests: make(chan token_serving2137, queueDepth+maxConcurrency), + activeRequests: make(chan token_serving2137, maxConcurrency), + } +} + +func unlock_serving2137(req request_serving2137) { + req.lock.Unlock() + runtime.Gosched() + // Verify that function has completed + ok := <-req.accepted + runtime.Gosched() + // Requeue for next usage + req.accepted <- ok +} + +func unlockAll_serving2137(requests []request_serving2137) { + for _, lc := range requests { + unlock_serving2137(lc) + } +} + +func Serving2137() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + b := NewBreaker_serving2137(1, 1) + + locks := b.concurrentRequests(2) // G1 + unlockAll_serving2137(locks) + }() + } +} + +// +// Example deadlock trace: +// G1 G2 G3 +// ------------------------------------------------------------------------------- +// b.concurrentRequests(2) +// b.concurrentRequest() +// r.lock.Lock() +// start.Done() +// start.Wait() +// b.concurrentRequest() +// r.lock.Lock() +// start.Done() +// start.Wait() +// unlockAll(locks) +// unlock(lc) +// req.lock.Unlock() +// ok := <-req.accepted +// b.Maybe() +// b.activeRequests <- t +// thunk() +// r.lock.Lock() +// b.Maybe() +// b.activeRequests <- t +// ----------------------------G1,G2,G3 deadlock----------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go new file mode 100644 index 00000000000000..16eb2800da3b6f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go @@ -0,0 +1,85 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Syncthing4829", Syncthing4829) +} + +type Address_syncthing4829 int + +type Mapping_syncthing4829 struct { + mut sync.RWMutex + + extAddresses map[string]Address_syncthing4829 +} + +func (m *Mapping_syncthing4829) clearAddresses() { + m.mut.Lock() // First locking + var removed []Address_syncthing4829 + for id, addr := range m.extAddresses { + removed = append(removed, addr) + delete(m.extAddresses, id) + } + if len(removed) > 0 { + m.notify(nil, removed) + } + m.mut.Unlock() +} + +func (m *Mapping_syncthing4829) notify(added, remove []Address_syncthing4829) { + m.mut.RLock() + m.mut.RUnlock() +} + +type Service_syncthing4829 struct { + mut sync.RWMutex + + mappings []*Mapping_syncthing4829 +} + +func (s *Service_syncthing4829) NewMapping() *Mapping_syncthing4829 { + mapping := &Mapping_syncthing4829{ + extAddresses: make(map[string]Address_syncthing4829), + } + s.mut.Lock() + s.mappings = append(s.mappings, mapping) + s.mut.Unlock() + return mapping +} + +func (s *Service_syncthing4829) RemoveMapping(mapping *Mapping_syncthing4829) { + s.mut.Lock() + defer s.mut.Unlock() + for _, existing := range s.mappings { + if existing == mapping { + mapping.clearAddresses() + } + } +} + +func NewService_syncthing4829() *Service_syncthing4829 { + return &Service_syncthing4829{} +} + +func Syncthing4829() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + natSvc := NewService_syncthing4829() + m := natSvc.NewMapping() + m.extAddresses["test"] = 0 + + natSvc.RemoveMapping(m) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go new file mode 100644 index 00000000000000..5ff25ca268a09b --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go @@ -0,0 +1,123 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Syncthing5795", Syncthing5795) +} + +type message_syncthing5795 interface{} + +type ClusterConfig_syncthing5795 struct{} + +type Model_syncthing5795 interface { + ClusterConfig(message_syncthing5795) +} + +type TestModel_syncthing5795 struct { + ccFn func() +} + +func (t *TestModel_syncthing5795) ClusterConfig(msg message_syncthing5795) { + if t.ccFn != nil { + t.ccFn() + } +} + +func newTestModel_syncthing5795() *TestModel_syncthing5795 { + return &TestModel_syncthing5795{} +} + +type Connection_syncthing5795 interface { + Start() + Close() +} + +type rawConnection_syncthing5795 struct { + receiver Model_syncthing5795 + + inbox chan message_syncthing5795 + dispatcherLoopStopped chan struct{} + closed chan struct{} + closeOnce sync.Once +} + +func (c *rawConnection_syncthing5795) Start() { + go c.readerLoop() + go func() { + // deadlocks: 1 + c.dispatcherLoop() + }() +} + +func (c *rawConnection_syncthing5795) readerLoop() { + for { + select { + case <-c.closed: + return + default: + } + } +} + +func (c *rawConnection_syncthing5795) dispatcherLoop() { + defer close(c.dispatcherLoopStopped) + var msg message_syncthing5795 + for { + select { + case msg = <-c.inbox: + case <-c.closed: + return + } + switch msg := msg.(type) { + case *ClusterConfig_syncthing5795: + c.receiver.ClusterConfig(msg) + default: + return + } + } +} + +func (c *rawConnection_syncthing5795) internalClose() { + c.closeOnce.Do(func() { + close(c.closed) + <-c.dispatcherLoopStopped + }) +} + +func (c *rawConnection_syncthing5795) Close() { + c.internalClose() +} + +func NewConnection_syncthing5795(receiver Model_syncthing5795) Connection_syncthing5795 { + return &rawConnection_syncthing5795{ + dispatcherLoopStopped: make(chan struct{}), + closed: make(chan struct{}), + inbox: make(chan message_syncthing5795), + receiver: receiver, + } +} + +func Syncthing5795() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + m := newTestModel_syncthing5795() + c := NewConnection_syncthing5795(m).(*rawConnection_syncthing5795) + m.ccFn = func() { + c.Close() + } + + c.Start() + c.inbox <- &ClusterConfig_syncthing5795{} + + <-c.dispatcherLoopStopped + }() +} From 3f28a8c39d90da6f9f3081ee862b9693affc603b Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 4 Aug 2025 09:58:47 +0200 Subject: [PATCH 15/36] Removed FindGoLeaks API, addressed nits, and cleaned comments. --- src/runtime/mgc.go | 31 ++++--------------------------- src/runtime/mgcmark.go | 25 ++++++++++++++----------- 2 files changed, 18 insertions(+), 38 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 5e0fa8a2581521..8975ebf77a9571 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -569,24 +569,6 @@ func GC() { releasem(mp) } -// FindGoleaks instructs the Go garbage collector to attempt -// goroutine leak detection during the next GC cycle. -// -// Only operates if goroutineleakfindergc is enabled in GOEXPERIMENT. -// Otherwise, it just runs runtime.GC(). -func FindGoLeaks() { - if !goexperiment.GoroutineLeakFinderGC { - GC() - return - } - - work.goroutineLeakFinder.pending.Store(true) - - for work.goroutineLeakFinder.pending.Load() { - GC() - } -} - // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has // already completed this mark phase, it returns immediately. func gcWaitOnMark(n uint32) { @@ -1095,13 +1077,11 @@ top: gcMarkTermination(stw) } -// checkIfMaybeRunnable checks whether a goroutine may still be semantically runnable. +// isMaybeRunnable checks whether a goroutine may still be semantically runnable. // For goroutines which are semantically runnable, this will eventually return true // as the GC marking phase progresses. It returns false for leaked goroutines, or for // goroutines which are not yet computed as possibly runnable by the GC. -func (gp *g) checkIfMaybeRunnable() bool { - // Unmask the goroutine address to ensure we are not - // dereferencing a masked address. +func (gp *g) isMaybeRunnable() bool { switch gp.waitreason { case waitReasonSelectNoCases, waitReasonChanSendNilChan, @@ -1128,7 +1108,6 @@ func (gp *g) checkIfMaybeRunnable() bool { // If waiting on mutexes, wait groups, or condition variables, // check if the synchronization primitive attached to the sudog is marked. if gp.waiting != nil { - // Unmask the sema address and check if it's marked. return isMarkedOrNotInHeap(gp.waiting.elem.get()) } } @@ -1148,12 +1127,12 @@ func findMaybeRunnableGoroutines() (moreWork bool) { var vIndex, ivIndex int = work.nMaybeRunnableStackRoots, work.nStackRoots // Reorder goroutine list for vIndex < ivIndex { - if work.stackRoots[vIndex].checkIfMaybeRunnable() { + if work.stackRoots[vIndex].isMaybeRunnable() { vIndex = vIndex + 1 continue } for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { - if gp := work.stackRoots[ivIndex]; gp.checkIfMaybeRunnable() { + if gp := work.stackRoots[ivIndex]; gp.isMaybeRunnable() { work.stackRoots[ivIndex] = work.stackRoots[vIndex] work.stackRoots[vIndex] = gp vIndex = vIndex + 1 @@ -1186,8 +1165,6 @@ func gcUntrackSyncObjects() { // gcRestoreSyncObjects restores the elem and c fields of all sudogs to their original values. // Should be invoked after the goroutine leak detection phase. -// -//go:nosplit func gcRestoreSyncObjects() { assertWorldStopped() diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index d7424a1970c811..f9ceaaf0f9f950 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -63,13 +63,12 @@ func (gp *g) internalBlocked() bool { return reason < waitReasonChanReceiveNilChan || waitReasonSyncWaitGroupWait < reason } +// allGsSnapshotSortedForGC takes a snapshot of allgs and returns a sorted +// array of Gs. The array is sorted by the G's status, with running Gs +// first, followed by blocked Gs. The returned index indicates the cutoff +// between runnable and blocked Gs. +// // The world must be stopped or allglock must be held. -// go through the snapshot of allgs, putting them into an arrays, -// separated by index, where [0:blockedIndex] contains only running Gs -// allGs[blockedIndex:] contain only blocking Gs -// To avoid GC from marking and scanning the blocked Gs by scanning -// the returned array (which is heap allocated), we mask the highest -// bit of the pointers to Gs with gcBitMask. func allGsSnapshotSortedForGC() ([]*g, int) { assertWorldStoppedOrLockHeld(&allglock) @@ -1195,7 +1194,11 @@ func gcDrainMarkWorkerFractional(gcw *gcWork) { gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) } -func gcUpdateMarkrootNext() (uint32, bool) { +// gcNextMarkRoot safely increments work.markrootNext and returns the +// index of the next root job. The returned boolean is true if the root job +// is valid, and false if there are no more root jobs to be claimed, +// i.e. work.markrootNext >= work.markrootJobs. +func gcNextMarkRoot() (uint32, bool) { var success bool next, jobs := work.markrootNext.Load(), work.markrootJobs.Load() @@ -1281,8 +1284,8 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { - job, success := gcUpdateMarkrootNext() - if !success { + job, ok := gcNextMarkRoot() + if !ok { break } markroot(gcw, job, flushBgCredit) @@ -1429,8 +1432,8 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { if b = gcw.tryGetObj(); b == 0 { // Try to do a root job. if work.markrootNext.Load() < work.markrootJobs.Load() { - job, success := gcUpdateMarkrootNext() - if success { + job, ok := gcNextMarkRoot() + if ok { workFlushed += markroot(gcw, job, false) continue } From bbb1d40642fcbfda21a830774668d34b4f60e9dc Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 4 Aug 2025 10:09:44 +0200 Subject: [PATCH 16/36] Added missing expected leak in GC test. --- src/runtime/gc_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index ba53487625df72..7e7349aaef8ec0 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1206,6 +1206,7 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest(testCase{name: "Cockroach1055"}, `main\.Cockroach1055\.func2 .* \[chan receive\]`, `main\.Cockroach1055\.func2\.1 .* \[chan receive\]`, + `main\.Cockroach1055\.func2\.1 .* \[sync\.Mutex\.Lock\]`, `main\.Cockroach1055\.func2\.2 .* \[sync\.WaitGroup\.Wait\]`), makeTest(testCase{name: "Cockroach1462"}, `main\.\(\*Stopper_cockroach1462\)\.RunWorker\.func1 .* \[chan send\]`, From 0237928b08489c3786f0b31bdbcbfa0f408f6c74 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Tue, 5 Aug 2025 10:38:26 +0200 Subject: [PATCH 17/36] Addressed flakiness on some tests. --- src/runtime/gc_test.go | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 7e7349aaef8ec0..5c1084fc81c8a9 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1217,10 +1217,9 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest(testCase{name: "Cockroach3710"}, `main\.Cockroach3710\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, `main\.\(\*Store_cockroach3710\)\.processRaft\.func1 .* \[sync\.RWMutex\.Lock\]`), - makeTest(testCase{name: "Cockroach6181", repetitions: 50}, + makeTest(testCase{name: "Cockroach6181", repetitions: 20}, `main\.testRangeCacheCoalescedRequests_cockroach6181 .* \[sync\.WaitGroup\.Wait\]`, - `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.Lock\]`, + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.(RW)?Mutex\.Lock\]`, `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.RLock\]`), makeTest(testCase{name: "Cockroach7504", repetitions: 100}, `main\.Cockroach7504\.func2\.1 .* \[sync\.Mutex\.Lock\]`, @@ -1303,6 +1302,7 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest(testCase{name: "Hugo5379"}, `main\.\(\*Page_hugo5379\)\.initContent\.func1\.1 .* \[sync\.Mutex\.Lock\]`, `main\.\(\*Site_hugo5379\)\.renderPages\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*Page_hugo5379\)\.initContentPlainAndMeta(\.\(\*Page_hugo5379\)\.initContent)?\.func1\.1 .* \[sync\.Mutex\.Lock\]`, `main\.Hugo5379\.func2 .* \[sync\.WaitGroup\.Wait\]`), makeTest(testCase{name: "Istio16224"}, `main\.Istio16224\.func2 .* \[sync\.Mutex\.Lock\]`, @@ -1315,9 +1315,14 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest(testCase{name: "Istio18454"}, `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan receive\]`, `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan send\]`), - makeTest(testCase{name: "Kubernetes1321"}, - `main\.NewMux_kubernetes1321\.gowrap1 .* \[chan send\]`, - `main\.testMuxWatcherClose_kubernetes1321 .* \[sync\.Mutex\.Lock\]`), + // NOTE (vsaioc): + // Kubernetes/1321 is excluded due to a race condition in the original program + // that may, in very rare cases, lead to nil pointer dereference crashes. + // (Reproducible even with regular GC). + // + // makeTest(testCase{name: "Kubernetes1321"}, + // `main\.NewMux_kubernetes1321\.gowrap1 .* \[chan send\]`, + // `main\.testMuxWatcherClose_kubernetes1321 .* \[sync\.Mutex\.Lock\]`), makeTest(testCase{name: "Kubernetes5316"}, `main\.finishRequest_kubernetes5316\.func1 .* \[chan send\]`), makeTest(testCase{name: "Kubernetes6632"}, @@ -1327,7 +1332,7 @@ func TestGoroutineLeakGC(t *testing.T) { `main\.\(\*statusManager_kubernetes10182\)\.Start\.func1 .* \[sync\.Mutex\.Lock\]`, `main\.Kubernetes10182\.func2\.gowrap2 .* \[chan send\]`, `main\.Kubernetes10182\.func2\.gowrap3 .* \[chan send\]`), - makeTest(testCase{name: "Kubernetes11298"}, + makeTest(testCase{name: "Kubernetes11298", repetitions: 10}, `main\.After_kubernetes11298\.func1 .* \[chan receive\]`, `main\.After_kubernetes11298\.func1 .* \[sync\.Cond\.Wait\]`, `main\.Kubernetes11298\.func2 .* \[chan receive\]`), @@ -1345,11 +1350,9 @@ func TestGoroutineLeakGC(t *testing.T) { `main\.\(\*NamespaceController_kubernetes30872\)\.Run\.func1 .* \[sync\.Mutex\.Lock\]`), makeTest(testCase{name: "Kubernetes38669"}, `main\.newCacheWatcher_kubernetes38669\.gowrap1 .* \[chan send\]`), - makeTest(testCase{name: "Kubernetes58107"}, - `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap1 .* \[sync\.Cond\.Wait\]`, - `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, - `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap2 .* \[sync\.Cond\.Wait\]`, - `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap2 .* \[sync\.RWMutex\.RLock\]`, + makeTest(testCase{name: "Kubernetes58107", repetitions: 5}, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap[12] .* \[sync\.Cond\.Wait\]`, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap[12] .* \[sync\.RWMutex\.RLock\]`, `main\.startResourceQuotaController_kubernetes58107\.gowrap2 .* \[sync\.RWMutex\.Lock\]`), makeTest(testCase{name: "Kubernetes62464"}, `main\.Kubernetes62464\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, @@ -1358,7 +1361,7 @@ func TestGoroutineLeakGC(t *testing.T) { `main\.Kubernetes70277\.func2 .* \[chan receive\]`), makeTest(testCase{name: "Moby4395"}, `main\.Go_moby4395\.func1 .* \[chan send\]`), - makeTest(testCase{name: "Moby4951"}, + makeTest(testCase{name: "Moby4951", repetitions: 10}, `main\.Moby4951\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, `main\.Moby4951\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`), makeTest(testCase{name: "Moby7559"}, From 4de988f78b23e03fcead090ba9cab34392860bbc Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Tue, 5 Aug 2025 15:03:33 +0200 Subject: [PATCH 18/36] Addressed more flakiness issues. --- src/runtime/gc_test.go | 222 +++++++++++++++++++++++++++-------------- 1 file changed, 148 insertions(+), 74 deletions(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 5c1084fc81c8a9..dbb69d2f676504 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1118,10 +1118,14 @@ func TestGoroutineLeakGC(t *testing.T) { flakyLeaks map[*regexp.Regexp]struct{} } + // FLAKYBOUNDARY is a special string that delineates flaky leaks from expected leaks + // when using makeTest (below) to construct test cases. + const FLAKYBOUNDARY = "$flaky-leaks$" + // makeTest is a short-hand for creating test cases. // Each of the leaks in the list is identified by a regular expression. // - // If a leak is the string "FLAKY", it notifies makeTest that any remaining + // If a leak is FLAKYBOUNDARY, it notifies makeTest that any remaining // leak patterns should be added to the flakyLeaks map. makeTest := func( cfg testCase, @@ -1142,7 +1146,7 @@ func TestGoroutineLeakGC(t *testing.T) { mode := EXPECTED for _, leak := range leaks { - if leak == "FLAKY" { + if leak == FLAKYBOUNDARY { mode = FLAKY continue } @@ -1202,84 +1206,115 @@ func TestGoroutineLeakGC(t *testing.T) { // This list is curated for tests that are not excessively flaky. gokerTestCases := []testCase{ makeTest(testCase{name: "Cockroach584"}, - `main\.Cockroach584\.func2\.1 .* \[sync\.Mutex\.Lock\]`), + `main\.Cockroach584\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Cockroach1055"}, `main\.Cockroach1055\.func2 .* \[chan receive\]`, + `main\.Cockroach1055\.func2\.2 .* \[sync\.WaitGroup\.Wait\]`, `main\.Cockroach1055\.func2\.1 .* \[chan receive\]`, + FLAKYBOUNDARY, `main\.Cockroach1055\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Cockroach1055\.func2\.2 .* \[sync\.WaitGroup\.Wait\]`), + ), makeTest(testCase{name: "Cockroach1462"}, `main\.\(\*Stopper_cockroach1462\)\.RunWorker\.func1 .* \[chan send\]`, - `main\.Cockroach1462\.func2 .* \[sync\.WaitGroup\.Wait\]`), + `main\.Cockroach1462\.func2 .* \[sync\.WaitGroup\.Wait\]`, + ), makeTest(testCase{name: "Cockroach2448"}, `main\.Cockroach2448\.func2\.gowrap1 .* \[select\]`, - `main\.Cockroach2448\.func2\.gowrap2 .* \[select\]`), + `main\.Cockroach2448\.func2\.gowrap2 .* \[select\]`, + ), makeTest(testCase{name: "Cockroach3710"}, `main\.Cockroach3710\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, - `main\.\(\*Store_cockroach3710\)\.processRaft\.func1 .* \[sync\.RWMutex\.Lock\]`), + `main\.\(\*Store_cockroach3710\)\.processRaft\.func1 .* \[sync\.RWMutex\.Lock\]`, + ), makeTest(testCase{name: "Cockroach6181", repetitions: 20}, `main\.testRangeCacheCoalescedRequests_cockroach6181 .* \[sync\.WaitGroup\.Wait\]`, `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.(RW)?Mutex\.Lock\]`, - `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.RLock\]`), + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.RLock\]`, + ), makeTest(testCase{name: "Cockroach7504", repetitions: 100}, `main\.Cockroach7504\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Cockroach7504\.func2\.2 .* \[sync\.Mutex\.Lock\]`), + `main\.Cockroach7504\.func2\.2 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Cockroach9935"}, - `main\.Cockroach9935\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + `main\.Cockroach9935\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Cockroach10214"}, `main\.Cockroach10214\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Cockroach10214\.func2\.2 .* \[sync\.Mutex\.Lock\]`), + `main\.Cockroach10214\.func2\.2 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Cockroach10790"}, - `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`), + `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`, + ), makeTest(testCase{name: "Cockroach13197"}, - `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`), + `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`, + ), makeTest(testCase{name: "Cockroach13755"}, - `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`), + `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`, + ), makeTest(testCase{name: "Cockroach16167"}, `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, - `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`), + `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`, + ), makeTest(testCase{name: "Cockroach10790"}, - `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`), + `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`, + ), makeTest(testCase{name: "Cockroach13197"}, - `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`), + `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`, + ), makeTest(testCase{name: "Cockroach13755"}, - `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`), + `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`, + ), makeTest(testCase{name: "Cockroach16167"}, `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, - `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`), + `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`, + ), makeTest(testCase{name: "Cockroach18101"}, - `main\.restore_cockroach18101\.func1 .* \[chan send\]`), + `main\.restore_cockroach18101\.func1 .* \[chan send\]`, + ), makeTest(testCase{name: "Cockroach24808"}, - `main\.Cockroach24808\.func2 .* \[chan send\]`), + `main\.Cockroach24808\.func2 .* \[chan send\]`, + ), makeTest(testCase{name: "Cockroach25456"}, - `main\.Cockroach25456\.func2 .* \[chan receive\]`), + `main\.Cockroach25456\.func2 .* \[chan receive\]`, + ), makeTest(testCase{name: "Cockroach35073"}, `main\.Cockroach35073\.func2.1 .* \[chan send\]`, - `main\.Cockroach35073\.func2 .* \[chan send\]`), + `main\.Cockroach35073\.func2 .* \[chan send\]`, + ), makeTest(testCase{name: "Cockroach35931"}, - `main\.Cockroach35931\.func2 .* \[chan send\]`), + `main\.Cockroach35931\.func2 .* \[chan send\]`, + ), makeTest(testCase{name: "Etcd5509"}, - `main\.Etcd5509\.func2 .* \[sync\.RWMutex\.Lock\]`), + `main\.Etcd5509\.func2 .* \[sync\.RWMutex\.Lock\]`, + ), makeTest(testCase{name: "Etcd6857"}, - `main\.Etcd6857\.func2\.gowrap2 .* \[chan send\]`), + `main\.Etcd6857\.func2\.gowrap2 .* \[chan send\]`, + ), makeTest(testCase{name: "Etcd6873"}, `main\.Etcd6873\.func2\.gowrap1 .* \[chan receive\]`, - `main\.newWatchBroadcasts_etcd6873\.func1 .* \[sync\.Mutex\.Lock\]`), + `main\.newWatchBroadcasts_etcd6873\.func1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Etcd7492"}, `main\.Etcd7492\.func2 .* \[sync\.WaitGroup\.Wait\]`, `main\.Etcd7492\.func2\.1 .* \[chan send\]`, - `main\.NewSimpleTokenTTLKeeper_etcd7492\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + `main\.NewSimpleTokenTTLKeeper_etcd7492\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Etcd7902"}, `main\.doRounds_etcd7902\.gowrap1 .* \[chan receive\]`, `main\.doRounds_etcd7902\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.runElectionFunc_etcd7902 .* \[sync\.WaitGroup\.Wait\]`), + `main\.runElectionFunc_etcd7902 .* \[sync\.WaitGroup\.Wait\]`, + ), makeTest(testCase{name: "Etcd10492"}, - `main\.Etcd10492\.func2 .* \[sync\.Mutex\.Lock\]`), + `main\.Etcd10492\.func2 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Grpc660"}, - `main\.\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1 .* \[chan send\]`), + `main\.\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1 .* \[chan send\]`, + ), makeTest(testCase{name: "Grpc795"}, `main\.\(\*test_grpc795\)\.startServer\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.testServerGracefulStopIdempotent_grpc795 .* \[sync\.Mutex\.Lock\]`), + `main\.testServerGracefulStopIdempotent_grpc795 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Grpc862"}, `main\.DialContext_grpc862\.func2 .* \[chan receive\]`), makeTest(testCase{name: "Grpc1275"}, @@ -1288,117 +1323,156 @@ func TestGoroutineLeakGC(t *testing.T) { `main\.DialContext_grpc1424\.func1 .* \[chan receive\]`), makeTest(testCase{name: "Grpc1460"}, `main\.Grpc1460\.func2\.gowrap1 .* \[chan receive\]`, - `main\.Grpc1460\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`), - makeTest(testCase{name: "Grpc3017", repetitions: 50}, + `main\.Grpc1460\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`, + ), + makeTest(testCase{name: "Grpc3017", repetitions: 20}, // grpc/3017 involves a goroutine leak that also simultaneously engages many GC assists. - // Testing runtime behaviour when pivoting between regular and goroutine leak detection modes. + FLAKYBOUNDARY, + // All the leaks are flaky. `main\.Grpc3017\.func2 .* \[chan receive\]`, `main\.Grpc3017\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1 .* \[sync\.Mutex\.Lock\]`), + `main\.\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Hugo3251", repetitions: 20}, `main\.Hugo3251\.func2 .* \[sync\.WaitGroup\.Wait\]`, `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`), + `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + ), makeTest(testCase{name: "Hugo5379"}, - `main\.\(\*Page_hugo5379\)\.initContent\.func1\.1 .* \[sync\.Mutex\.Lock\]`, `main\.\(\*Site_hugo5379\)\.renderPages\.gowrap1 .* \[sync\.Mutex\.Lock\]`, `main\.\(\*Page_hugo5379\)\.initContentPlainAndMeta(\.\(\*Page_hugo5379\)\.initContent)?\.func1\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Hugo5379\.func2 .* \[sync\.WaitGroup\.Wait\]`), + `main\.Hugo5379\.func2 .* \[sync\.WaitGroup\.Wait\]`, + FLAKYBOUNDARY, + `main\.\(\*Page_hugo5379\)\.initContent\.func1\.1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Istio16224"}, `main\.Istio16224\.func2 .* \[sync\.Mutex\.Lock\]`, `main\.Istio16224\.func2\.gowrap1 .* \[chan send\]`, + FLAKYBOUNDARY, // This is also a leak, but it is too flaky to be reliably detected. - `FLAKY`, - `main\.Istio16224\.func2\.gowrap1 .* \[chan receive\]`), + `main\.Istio16224\.func2\.gowrap1 .* \[chan receive\]`, + ), makeTest(testCase{name: "Istio17860"}, - `main\.\(\*agent_istio17860\)\.Restart\.gowrap2 .* \[chan send\]`), + `main\.\(\*agent_istio17860\)\.Restart\.gowrap2 .* \[chan send\]`, + ), makeTest(testCase{name: "Istio18454"}, `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan receive\]`, - `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan send\]`), + `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan send\]`, + ), // NOTE (vsaioc): // Kubernetes/1321 is excluded due to a race condition in the original program - // that may, in very rare cases, lead to nil pointer dereference crashes. - // (Reproducible even with regular GC). + // that may, in extremely rare cases, lead to nil pointer dereference crashes. + // (Reproducible even with regular GC). Only kept here for posterity. // // makeTest(testCase{name: "Kubernetes1321"}, // `main\.NewMux_kubernetes1321\.gowrap1 .* \[chan send\]`, // `main\.testMuxWatcherClose_kubernetes1321 .* \[sync\.Mutex\.Lock\]`), makeTest(testCase{name: "Kubernetes5316"}, - `main\.finishRequest_kubernetes5316\.func1 .* \[chan send\]`), + `main\.finishRequest_kubernetes5316\.func1 .* \[chan send\]`, + ), makeTest(testCase{name: "Kubernetes6632"}, `main\.Kubernetes6632\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Kubernetes6632\.func2\.gowrap2 .* \[chan send\]`), + `main\.Kubernetes6632\.func2\.gowrap2 .* \[chan send\]`, + ), makeTest(testCase{name: "Kubernetes10182"}, `main\.\(\*statusManager_kubernetes10182\)\.Start\.func1 .* \[sync\.Mutex\.Lock\]`, `main\.Kubernetes10182\.func2\.gowrap2 .* \[chan send\]`, - `main\.Kubernetes10182\.func2\.gowrap3 .* \[chan send\]`), - makeTest(testCase{name: "Kubernetes11298", repetitions: 10}, + `main\.Kubernetes10182\.func2\.gowrap3 .* \[chan send\]`, + ), + makeTest(testCase{name: "Kubernetes11298"}, + // This entire test is flaky in producing leaks. Run it only to check for crashes. + FLAKYBOUNDARY, `main\.After_kubernetes11298\.func1 .* \[chan receive\]`, `main\.After_kubernetes11298\.func1 .* \[sync\.Cond\.Wait\]`, - `main\.Kubernetes11298\.func2 .* \[chan receive\]`), + `main\.Kubernetes11298\.func2 .* \[chan receive\]`, + ), makeTest(testCase{name: "Kubernetes13135"}, - `main\.Kubernetes13135\.func2 .* \[sync\.WaitGroup\.Wait\]`), + `main\.Kubernetes13135\.func2 .* \[sync\.WaitGroup\.Wait\]`, + ), makeTest(testCase{name: "Kubernetes25331"}, - `main\.Kubernetes25331\.func2\.gowrap1 .* \[chan send\]`), + `main\.Kubernetes25331\.func2\.gowrap1 .* \[chan send\]`, + ), makeTest(testCase{name: "Kubernetes26980"}, `main\.Kubernetes26980\.func2 .* \[chan receive\]`, `main\.Kubernetes26980\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Kubernetes26980\.func2\.gowrap2 .* \[chan receive\]`), + `main\.Kubernetes26980\.func2\.gowrap2 .* \[chan receive\]`, + ), makeTest(testCase{name: "Kubernetes30872"}, `main\.\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1 .* \[sync\.Mutex\.Lock\]`, `main\.\(\*federatedInformerImpl_kubernetes30872\)\.Start\.gowrap2 .* \[sync\.Mutex\.Lock\]`, - `main\.\(\*NamespaceController_kubernetes30872\)\.Run\.func1 .* \[sync\.Mutex\.Lock\]`), + `main\.\(\*NamespaceController_kubernetes30872\)\.Run\.func1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Kubernetes38669"}, - `main\.newCacheWatcher_kubernetes38669\.gowrap1 .* \[chan send\]`), + `main\.newCacheWatcher_kubernetes38669\.gowrap1 .* \[chan send\]`, + ), makeTest(testCase{name: "Kubernetes58107", repetitions: 5}, `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap[12] .* \[sync\.Cond\.Wait\]`, `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap[12] .* \[sync\.RWMutex\.RLock\]`, - `main\.startResourceQuotaController_kubernetes58107\.gowrap2 .* \[sync\.RWMutex\.Lock\]`), + `main\.startResourceQuotaController_kubernetes58107\.gowrap2 .* \[sync\.RWMutex\.Lock\]`, + ), makeTest(testCase{name: "Kubernetes62464"}, `main\.Kubernetes62464\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, - `main\.Kubernetes62464\.func2\.gowrap2 .* \[sync\.RWMutex\.Lock\]`), + `main\.Kubernetes62464\.func2\.gowrap2 .* \[sync\.RWMutex\.Lock\]`, + ), makeTest(testCase{name: "Kubernetes70277"}, - `main\.Kubernetes70277\.func2 .* \[chan receive\]`), + `main\.Kubernetes70277\.func2 .* \[chan receive\]`, + ), makeTest(testCase{name: "Moby4395"}, - `main\.Go_moby4395\.func1 .* \[chan send\]`), - makeTest(testCase{name: "Moby4951", repetitions: 10}, + `main\.Go_moby4395\.func1 .* \[chan send\]`, + ), + makeTest(testCase{name: "Moby4951"}, + FLAKYBOUNDARY, `main\.Moby4951\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Moby4951\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`), + `main\.Moby4951\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Moby7559"}, - `main\.Moby7559\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + `main\.Moby7559\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Moby17176"}, - `main\.testDevmapperLockReleasedDeviceDeletion_moby17176\.func1 .* \[sync\.Mutex\.Lock\]`), + `main\.testDevmapperLockReleasedDeviceDeletion_moby17176\.func1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Moby21233"}, `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[chan send\]`, `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[select\]`, - `main\.testTransfer_moby21233 .* \[chan receive\]`), + `main\.testTransfer_moby21233 .* \[chan receive\]`, + ), makeTest(testCase{name: "Moby25348"}, - `main\.Moby25348\.func2\.gowrap1 .* \[sync\.WaitGroup\.Wait\]`), + `main\.Moby25348\.func2\.gowrap1 .* \[sync\.WaitGroup\.Wait\]`, + ), makeTest(testCase{name: "Moby27782"}, `main\.\(\*JSONFileLogger_moby27782\)\.ReadLogs\.gowrap1 .* \[sync\.Cond\.Wait\]`, - `main\.NewWatcher_moby27782\.gowrap1 .* \[select\]`), + `main\.NewWatcher_moby27782\.gowrap1 .* \[select\]`, + ), makeTest(testCase{name: "Moby28462"}, `main\.Moby28462\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Moby28462\.func2\.gowrap2 .* \[chan send\]`), + `main\.Moby28462\.func2\.gowrap2 .* \[chan send\]`, + ), makeTest(testCase{name: "Moby29733"}, `main\.Moby29733\.func2 .* \[chan receive\]`, - `main\.testActive_moby29733\.func1 .* \[sync\.Cond\.Wait\]`), + `main\.testActive_moby29733\.func1 .* \[sync\.Cond\.Wait\]`, + ), makeTest(testCase{name: "Moby30408"}, `main\.Moby30408\.func2 .* \[chan receive\]`, - `main\.testActive_moby30408\.func1 .* \[sync\.Cond\.Wait\]`), + `main\.testActive_moby30408\.func1 .* \[sync\.Cond\.Wait\]`, + ), makeTest(testCase{name: "Moby33781"}, - `main\.monitor_moby33781\.func1 .* \[chan send\]`), + `main\.monitor_moby33781\.func1 .* \[chan send\]`, + ), makeTest(testCase{name: "Moby36114"}, - `main\.Moby36114\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + `main\.Moby36114\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + ), makeTest(testCase{name: "Serving2137"}, `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[chan send\]`, `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[sync\.Mutex\.Lock\]`, - `main\.Serving2137\.func2 .* \[chan receive\]`), + `main\.Serving2137\.func2 .* \[chan receive\]`, + ), makeTest(testCase{name: "Syncthing4829"}, - `main\.Syncthing4829\.func2 .* \[sync\.RWMutex\.RLock\]`), + `main\.Syncthing4829\.func2 .* \[sync\.RWMutex\.RLock\]`, + ), makeTest(testCase{name: "Syncthing5795"}, `main\.\(\*rawConnection_syncthing5795\)\.Start\.func1 .* \[chan receive\]`, - `main\.Syncthing5795\.func2 .* \[chan receive\]`), + `main\.Syncthing5795\.func2 .* \[chan receive\]`, + ), } // Combine all test cases into a single list. From 3e40e335c1b3988f1cf040d2731d212362a08104 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 7 Aug 2025 11:58:48 +0200 Subject: [PATCH 19/36] Fixed TODO comment. --- src/runtime/mbitmap.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 05c04e4007ec22..fa6a9362bfcc55 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1261,7 +1261,7 @@ func isMarkedOrNotInHeap(p unsafe.Pointer) bool { // In this case, it is either a pointer to a stack object or a global resource. // Treat it as reachable in memory by default, to be safe. // - // (vsaioc) TODO: we could possibly be more precise by only checking against the stacks + // TODO(vsaioc): we could be more precise by checking against the stacks // of runnable goroutines. I don't think this is necessary, based on what we've seen, but // let's keep the option open in case the runtime evolves. // This will (naively) lead to quadratic blow-up for goroutine leak detection, From f5e66f025fbf4e1a19904e8f0d557752af52963f Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 7 Aug 2025 13:00:54 +0200 Subject: [PATCH 20/36] Collect goroutine leaks via profiling. Remove debug flag. --- src/net/http/pprof/pprof.go | 34 +++++++------ src/runtime/mgc.go | 38 ++++++++------ src/runtime/mprof.go | 97 ++++++++++++++++++++++++++++++++++++ src/runtime/pprof/pprof.go | 58 ++++++++++++++++----- src/runtime/pprof/runtime.go | 6 +++ src/runtime/proc.go | 19 +++++++ src/runtime/runtime1.go | 2 - src/runtime/traceback.go | 5 +- 8 files changed, 213 insertions(+), 46 deletions(-) diff --git a/src/net/http/pprof/pprof.go b/src/net/http/pprof/pprof.go index 635d3ad9d9f132..bfe6111fdf203d 100644 --- a/src/net/http/pprof/pprof.go +++ b/src/net/http/pprof/pprof.go @@ -351,25 +351,27 @@ func collectProfile(p *pprof.Profile) (*profile.Profile, error) { } var profileSupportsDelta = map[handler]bool{ - "allocs": true, - "block": true, - "goroutine": true, - "heap": true, - "mutex": true, - "threadcreate": true, + "allocs": true, + "block": true, + "goroutine": true, + "goroutineleak": true, + "heap": true, + "mutex": true, + "threadcreate": true, } var profileDescriptions = map[string]string{ - "allocs": "A sampling of all past memory allocations", - "block": "Stack traces that led to blocking on synchronization primitives", - "cmdline": "The command line invocation of the current program", - "goroutine": "Stack traces of all current goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.", - "heap": "A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.", - "mutex": "Stack traces of holders of contended mutexes", - "profile": "CPU profile. You can specify the duration in the seconds GET parameter. After you get the profile file, use the go tool pprof command to investigate the profile.", - "symbol": "Maps given program counters to function names. Counters can be specified in a GET raw query or POST body, multiple counters are separated by '+'.", - "threadcreate": "Stack traces that led to the creation of new OS threads", - "trace": "A trace of execution of the current program. You can specify the duration in the seconds GET parameter. After you get the trace file, use the go tool trace command to investigate the trace.", + "allocs": "A sampling of all past memory allocations", + "block": "Stack traces that led to blocking on synchronization primitives", + "cmdline": "The command line invocation of the current program", + "goroutine": "Stack traces of all current goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.", + "goroutineleak": "Stack traces of all leaked goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.", + "heap": "A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.", + "mutex": "Stack traces of holders of contended mutexes", + "profile": "CPU profile. You can specify the duration in the seconds GET parameter. After you get the profile file, use the go tool pprof command to investigate the profile.", + "symbol": "Maps given program counters to function names. Counters can be specified in a GET raw query or POST body, multiple counters are separated by '+'.", + "threadcreate": "Stack traces that led to the creation of new OS threads", + "trace": "A trace of execution of the current program. You can specify the duration in the seconds GET parameter. After you get the trace file, use the go tool trace command to investigate the trace.", } type profileEntry struct { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 8975ebf77a9571..9821da4d040d51 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -569,6 +569,25 @@ func GC() { releasem(mp) } +//go:linkname runtime_goroutineLeakGC runtime/pprof.runtime_goroutineLeakGC +func runtime_goroutineLeakGC() { + goroutineLeakGC() +} + +// goroutineLeakGC runs a GC cycle that performs goroutine leak detection. +func goroutineLeakGC() { + // Set the pending flag to true, instructing the next GC cycle to + // perform goroutine leak detection. + work.goroutineLeakFinder.pending.Store(true) + + // Spin GC cycles until the pending flag is unset. + // This ensures that goroutineLeakGC waits for a GC cycle that + // actually performs goroutine leak detection. + for work.goroutineLeakFinder.pending.Load() { + GC() + } +} + // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has // already completed this mark phase, it returns immediately. func gcWaitOnMark(n uint32) { @@ -714,9 +733,9 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode - } else if work.goroutineLeakFinder.pending.Load() || debug.gcgoroutineleaks > 0 { - // If goroutine leak detection has been enabled (via GODEBUG=gcgoroutineleaks=1), - // or via profiling, stop the world during the marking phase. + } else if work.goroutineLeakFinder.pending.Load() { + // If goroutine leak detection has been enabled via profiling, + // stop the world during the marking phase. mode = gcForceMode } @@ -795,8 +814,7 @@ func gcStart(trigger gcTrigger) { schedEnableUser(false) } - if work.goroutineLeakFinder.pending.Load() || - debug.gcgoroutineleaks > 0 { + if work.goroutineLeakFinder.pending.Load() { work.goroutineLeakFinder.enabled = true work.goroutineLeakFinder.pending.Store(false) gcUntrackSyncObjects() @@ -1199,16 +1217,6 @@ func findGoleaks() bool { for i := work.nMaybeRunnableStackRoots; i < work.nStackRoots; i++ { gp := work.stackRoots[i] casgstatus(gp, _Gwaiting, _Gleaked) - fn := findfunc(gp.startpc) - if fn.valid() { - print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ", - "[", waitReasonStrings[gp.waitreason], "]\n") - } else { - print("goroutine leak! goroutine ", gp.goid, ": !unnamed goroutine!", " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ", - "[", waitReasonStrings[gp.waitreason], "]\n") - } - traceback(gp.sched.pc, gp.sched.sp, gp.sched.lr, gp) - println() } // Put the remaining roots as ready for marking and drain them. work.markrootJobs.Add(int32(work.nStackRoots - work.nMaybeRunnableStackRoots)) diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index b2ff257f65eca2..7969c11211580d 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -1259,6 +1259,20 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P return goroutineProfileWithLabelsConcurrent(p, labels) } +//go:linkname pprof_goroutineLeakProfileWithLabels +func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { + return goroutineLeakProfileWithLabelsConcurrent(p, labels) +} + +// labels may be nil. If labels is non-nil, it must have the same length as p. +func goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { + if labels != nil && len(labels) != len(p) { + labels = nil + } + + return goroutineLeakProfileWithLabelsConcurrent(p, labels) +} + var goroutineProfile = struct { sema uint32 active bool @@ -1302,6 +1316,89 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) } +func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { + if len(p) == 0 { + // An empty slice is obviously too small. Return a rough + // allocation estimate without bothering to STW. As long as + // this is close, then we'll only need to STW once (on the next + // call). + return int(gleakcount()), false + } + + // Use the same semaphore as goroutineProfileWithLabelsConcurrent, + // because ultimately we still use goroutine profiles. + semacquire(&goroutineProfile.sema) + + // Unlike in goroutineProfileWithLabelsConcurrent, we don't save the current + // goroutine stack, because it is obviously not a leaked goroutine. + + pcbuf := makeProfStack() // see saveg() for explanation + stw := stopTheWorld(stwGoroutineProfile) + // Using gleakcount while the world is stopped should give us a consistent view + // of the number of leaked goroutines. + n = int(gleakcount()) + + if n > len(p) { + // There's not enough space in p to store the whole profile, so (per the + // contract of runtime.GoroutineProfile) we're not allowed to write to p + // at all and must return n, false. + startTheWorld(stw) + semrelease(&goroutineProfile.sema) + return n, false + } + + // Prepare for all other goroutines to enter the profile. Every goroutine struct in the allgs list + // has its goroutineProfiled field cleared. Any goroutine created from this point on (while + // goroutineProfile.active is set) will start with its goroutineProfiled + // field set to goroutineProfileSatisfied. + goroutineProfile.active = true + goroutineProfile.records = p + goroutineProfile.labels = labels + startTheWorld(stw) + + // Visit each leaked goroutine that existed as of the startTheWorld call above. + forEachGRace(func(gp1 *g) { + if readgstatus(gp1) == _Gleaked { + tryRecordGoroutineProfile(gp1, pcbuf, Gosched) + } + }) + + stw = stopTheWorld(stwGoroutineProfileCleanup) + endOffset := goroutineProfile.offset.Swap(0) + goroutineProfile.active = false + goroutineProfile.records = nil + goroutineProfile.labels = nil + startTheWorld(stw) + + // Restore the invariant that every goroutine struct in allgs has its + // goroutineProfiled field cleared. + forEachGRace(func(gp1 *g) { + gp1.goroutineProfiled.Store(goroutineProfileAbsent) + }) + + if raceenabled { + raceacquire(unsafe.Pointer(&labelSync)) + } + + if n != int(endOffset) { + // It's a big surprise that the number of goroutines changed while we + // were collecting the profile. But probably better to return a + // truncated profile than to crash the whole process. + // + // For instance, needm moves a goroutine out of the _Gdead state and so + // might be able to change the goroutine count without interacting with + // the scheduler. For code like that, the race windows are small and the + // combination of features is uncommon, so it's hard to be (and remain) + // sure we've caught them all. + // + // FIXME(vsaioc): I kept this in because goroutineProfileWithLabelsConcurrent + // also uses it, but... is this dead code? + } + + semrelease(&goroutineProfile.sema) + return n, true +} + func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { if len(p) == 0 { // An empty slice is obviously too small. Return a rough diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index 55563009b3a2c1..28923181bb619f 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -105,12 +105,13 @@ import ( // // Each Profile has a unique name. A few profiles are predefined: // -// goroutine - stack traces of all current goroutines -// heap - a sampling of memory allocations of live objects -// allocs - a sampling of all past memory allocations -// threadcreate - stack traces that led to the creation of new OS threads -// block - stack traces that led to blocking on synchronization primitives -// mutex - stack traces of holders of contended mutexes +// goroutine - stack traces of all current goroutines +// goroutineleak - stack traces of all leaked goroutines +// allocs - a sampling of all past memory allocations +// heap - a sampling of memory allocations of live objects +// threadcreate - stack traces that led to the creation of new OS threads +// block - stack traces that led to blocking on synchronization primitives +// mutex - stack traces of holders of contended mutexes // // These predefined profiles maintain themselves and panic on an explicit // [Profile.Add] or [Profile.Remove] method call. @@ -169,6 +170,7 @@ import ( // holds a lock for 1s while 5 other goroutines are waiting for the entire // second to acquire the lock, its unlock call stack will report 5s of // contention. + type Profile struct { name string mu sync.Mutex @@ -189,6 +191,12 @@ var goroutineProfile = &Profile{ write: writeGoroutine, } +var goroutineLeakProfile = &Profile{ + name: "goroutineleak", + count: countGoroutineLeak, + write: writeGoroutineLeak, +} + var threadcreateProfile = &Profile{ name: "threadcreate", count: countThreadCreate, @@ -224,12 +232,13 @@ func lockProfiles() { if profiles.m == nil { // Initial built-in profiles. profiles.m = map[string]*Profile{ - "goroutine": goroutineProfile, - "threadcreate": threadcreateProfile, - "heap": heapProfile, - "allocs": allocsProfile, - "block": blockProfile, - "mutex": mutexProfile, + "goroutine": goroutineProfile, + "goroutineleak": goroutineLeakProfile, + "threadcreate": threadcreateProfile, + "heap": heapProfile, + "allocs": allocsProfile, + "block": blockProfile, + "mutex": mutexProfile, } } } @@ -739,6 +748,11 @@ func countGoroutine() int { return runtime.NumGoroutine() } +// countGoroutineLeak returns the number of leaked goroutines. +func countGoroutineLeak() int { + return int(runtime_gleakcount()) +} + // writeGoroutine writes the current runtime GoroutineProfile to w. func writeGoroutine(w io.Writer, debug int) error { if debug >= 2 { @@ -747,6 +761,23 @@ func writeGoroutine(w io.Writer, debug int) error { return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels) } +// writeGoroutineLeak first invokes a GC cycle that performs goroutine leak detection. +// It then writes the goroutine profile, filtering for leaked goroutines. +func writeGoroutineLeak(w io.Writer, debug int) error { + // Run the GC with leak detection first so that leaked goroutines + // may transition to the leaked state. + runtime_goroutineLeakGC() + + // If the debug flag is set sufficiently high, just defer to writing goroutine stacks + // like in a regular goroutine profile. Include non-leaked goroutines, too. + if debug >= 2 { + return writeGoroutineStacks(w) + } + + // Otherwise, write the goroutine leak profile. + return writeRuntimeProfile(w, debug, "goroutineleak", pprof_goroutineLeakProfileWithLabels) +} + func writeGoroutineStacks(w io.Writer) error { // We don't know how big the buffer needs to be to collect // all the goroutines. Start with 1 MB and try a few times, doubling each time. @@ -969,6 +1000,9 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu //go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) +//go:linkname pprof_goroutineLeakProfileWithLabels runtime.pprof_goroutineLeakProfileWithLabels +func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) + //go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond func pprof_cyclesPerSecond() int64 diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go index 8d37c7d3add146..ec09700d811efa 100644 --- a/src/runtime/pprof/runtime.go +++ b/src/runtime/pprof/runtime.go @@ -29,6 +29,12 @@ func runtime_setProfLabel(labels unsafe.Pointer) // runtime_getProfLabel is defined in runtime/proflabel.go. func runtime_getProfLabel() unsafe.Pointer +// runtime_gleakcount is defined in runtime/proc.go. +func runtime_gleakcount() int32 + +// runtime_goroutineLeakGC is defined in runtime/mgc.go. +func runtime_goroutineLeakGC() + // SetGoroutineLabels sets the current goroutine's labels to match ctx. // A new goroutine inherits the labels of the goroutine that created it. // This is a lower-level API than [Do], which should be used instead when possible. diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 2be179dbfad794..eff1edae7c53fc 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -5553,6 +5553,25 @@ func gcount() int32 { return n } +//go:linkname runtime_gleakcount runtime/pprof.runtime_gleakcount +func runtime_gleakcount() int32 { + return gleakcount() +} + +// gleakcount returns the number of leaked goroutines currently reported by +// the runtime. Protected by allglock. +func gleakcount() int32 { + n := int32(0) + lock(&allglock) + for _, g := range allgs { + if readgstatus(g) == _Gleaked { + n++ + } + } + unlock(&allglock) + return n +} + func mcount() int32 { return int32(sched.mnext - sched.nmfreed) } diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 0ebef6d6e2adb1..424745d2357dc9 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -316,7 +316,6 @@ var debug struct { dontfreezetheworld int32 efence int32 gccheckmark int32 - gcgoroutineleaks int32 gcpacertrace int32 gcshrinkstackoff int32 gcstoptheworld int32 @@ -382,7 +381,6 @@ var dbgvars = []*dbgVar{ {name: "efence", value: &debug.efence}, {name: "gccheckmark", value: &debug.gccheckmark}, {name: "gcpacertrace", value: &debug.gcpacertrace}, - {name: "gcgoroutineleaks", value: &debug.gcgoroutineleaks}, {name: "gcshrinkstackoff", value: &debug.gcshrinkstackoff}, {name: "gcstoptheworld", value: &debug.gcstoptheworld}, {name: "gctrace", value: &debug.gctrace}, diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index e8fef35da7d104..9e70821891494e 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1227,7 +1227,7 @@ func goroutineheader(gp *g) { } // Override. - if gpstatus == _Gwaiting && gp.waitreason != waitReasonZero { + if (gpstatus == _Gwaiting || gpstatus == _Gleaked) && gp.waitreason != waitReasonZero { status = gp.waitreason.String() } @@ -1246,6 +1246,9 @@ func goroutineheader(gp *g) { } } print(" [", status) + if gpstatus == _Gleaked { + print(" (leaked)") + } if isScan { print(" (scan)") } From 0df23546b6e0bb4827de84251d7c2894fe746ad4 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 7 Aug 2025 13:01:32 +0200 Subject: [PATCH 21/36] Goroutine leak tests updated to collect goroutine leak profiles. --- src/runtime/gc_test.go | 670 +++++++++--------- .../testgoroutineleakgc/cockroach10214.go | 5 +- .../testgoroutineleakgc/cockroach1055.go | 6 +- .../testgoroutineleakgc/cockroach10790.go | 6 +- .../testgoroutineleakgc/cockroach13197.go | 6 +- .../testgoroutineleakgc/cockroach13755.go | 6 +- .../testgoroutineleakgc/cockroach1462.go | 5 +- .../testgoroutineleakgc/cockroach16167.go | 5 +- .../testgoroutineleakgc/cockroach18101.go | 6 +- .../testgoroutineleakgc/cockroach2448.go | 5 +- .../testgoroutineleakgc/cockroach24808.go | 6 +- .../testgoroutineleakgc/cockroach25456.go | 6 +- .../testgoroutineleakgc/cockroach35073.go | 6 +- .../testgoroutineleakgc/cockroach35931.go | 6 +- .../testgoroutineleakgc/cockroach3710.go | 5 +- .../testgoroutineleakgc/cockroach584.go | 6 +- .../testgoroutineleakgc/cockroach6181.go | 9 +- .../testgoroutineleakgc/cockroach7504.go | 5 +- .../testgoroutineleakgc/cockroach9935.go | 6 +- .../testdata/testgoroutineleakgc/etcd10492.go | 38 +- .../testdata/testgoroutineleakgc/etcd5509.go | 9 +- .../testdata/testgoroutineleakgc/etcd6708.go | 22 +- .../testdata/testgoroutineleakgc/etcd6857.go | 6 +- .../testdata/testgoroutineleakgc/etcd6873.go | 6 +- .../testdata/testgoroutineleakgc/etcd7492.go | 5 +- .../testdata/testgoroutineleakgc/etcd7902.go | 6 +- .../testgoroutineleakgc/goroutineleakgc.go | 47 +- .../testdata/testgoroutineleakgc/grpc1275.go | 6 +- .../testdata/testgoroutineleakgc/grpc1424.go | 6 +- .../testdata/testgoroutineleakgc/grpc1460.go | 5 +- .../testdata/testgoroutineleakgc/grpc3017.go | 4 + .../testdata/testgoroutineleakgc/grpc660.go | 6 +- .../testdata/testgoroutineleakgc/grpc795.go | 6 +- .../testdata/testgoroutineleakgc/grpc862.go | 6 +- .../testdata/testgoroutineleakgc/hugo3251.go | 5 +- .../testgoroutineleakgc/istio16224.go | 6 +- .../testgoroutineleakgc/istio17860.go | 6 +- .../testgoroutineleakgc/istio18454.go | 6 +- .../testgoroutineleakgc/kubernetes10182.go | 5 +- .../testgoroutineleakgc/kubernetes11298.go | 6 +- .../testgoroutineleakgc/kubernetes13135.go | 6 +- .../testgoroutineleakgc/kubernetes1321.go | 5 +- .../testgoroutineleakgc/kubernetes25331.go | 6 +- .../testgoroutineleakgc/kubernetes26980.go | 5 +- .../testgoroutineleakgc/kubernetes30872.go | 5 +- .../testgoroutineleakgc/kubernetes38669.go | 6 +- .../testgoroutineleakgc/kubernetes5316.go | 6 +- .../testgoroutineleakgc/kubernetes58107.go | 5 +- .../testgoroutineleakgc/kubernetes62464.go | 5 +- .../testgoroutineleakgc/kubernetes6632.go | 6 +- .../testgoroutineleakgc/kubernetes70277.go | 6 +- .../testdata/testgoroutineleakgc/moby17176.go | 6 +- .../testdata/testgoroutineleakgc/moby21233.go | 6 +- .../testdata/testgoroutineleakgc/moby25348.go | 26 +- .../testdata/testgoroutineleakgc/moby27782.go | 5 +- .../testdata/testgoroutineleakgc/moby28462.go | 5 +- .../testdata/testgoroutineleakgc/moby29733.go | 28 +- .../testdata/testgoroutineleakgc/moby30408.go | 6 +- .../testdata/testgoroutineleakgc/moby33781.go | 6 +- .../testdata/testgoroutineleakgc/moby36114.go | 6 +- .../testdata/testgoroutineleakgc/moby4395.go | 49 -- .../testdata/testgoroutineleakgc/moby4951.go | 24 +- .../testdata/testgoroutineleakgc/moby7559.go | 7 +- .../testdata/testgoroutineleakgc/patterns.go | 21 +- .../testgoroutineleakgc/serving2137.go | 5 +- .../testgoroutineleakgc/syncthing4829.go | 6 +- .../testgoroutineleakgc/syncthing5795.go | 6 +- 67 files changed, 686 insertions(+), 574 deletions(-) mode change 100644 => 100755 src/runtime/gc_test.go delete mode 100644 src/runtime/testdata/testgoroutineleakgc/moby4395.go diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go old mode 100644 new mode 100755 index dbb69d2f676504..395afff3a6e226 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -18,7 +18,6 @@ import ( "runtime" "runtime/debug" "slices" - "strconv" "strings" "sync" "sync/atomic" @@ -1109,7 +1108,6 @@ func TestGoroutineLeakGC(t *testing.T) { // Repetitions are used to amortize flakiness in some tests. type testCase struct { name string - repetitions int expectedLeaks map[*regexp.Regexp]bool // flakyLeaks are goroutine leaks that are too flaky to be reliably detected. @@ -1118,85 +1116,143 @@ func TestGoroutineLeakGC(t *testing.T) { flakyLeaks map[*regexp.Regexp]struct{} } - // FLAKYBOUNDARY is a special string that delineates flaky leaks from expected leaks - // when using makeTest (below) to construct test cases. - const FLAKYBOUNDARY = "$flaky-leaks$" - - // makeTest is a short-hand for creating test cases. + // makeAnyTest is a short-hand for creating test cases. // Each of the leaks in the list is identified by a regular expression. - // - // If a leak is FLAKYBOUNDARY, it notifies makeTest that any remaining - // leak patterns should be added to the flakyLeaks map. - makeTest := func( - cfg testCase, + // If a leak is flaky, it is added to the flakyLeaks map. + makeAnyTest := func( + name string, + flaky bool, leaks ...string) testCase { tc := testCase{ - name: cfg.name, + name: name, expectedLeaks: make(map[*regexp.Regexp]bool, len(leaks)), flakyLeaks: make(map[*regexp.Regexp]struct{}, len(leaks)), } - // Default to 1 repetition if not specified. - // One extra rep for configured tests is irrelevant. - tc.repetitions = cfg.repetitions | 1 - const ( - EXPECTED int = iota - FLAKY - ) - - mode := EXPECTED for _, leak := range leaks { - if leak == FLAKYBOUNDARY { - mode = FLAKY - continue - } - - switch mode { - case EXPECTED: + if !flaky { tc.expectedLeaks[regexp.MustCompile(leak)] = false - case FLAKY: + } else { tc.flakyLeaks[regexp.MustCompile(leak)] = struct{}{} } } + return tc } + // makeTest is a short-hand for creating non-flaky test cases. + makeTest := func(name string, leaks ...string) testCase { + return makeAnyTest(name, false, leaks...) + } + + // makeFlakyTest is a short-hand for creating flaky test cases. + makeFlakyTest := func(name string, leaks ...string) testCase { + return makeAnyTest(name, true, leaks...) + } + + goroutineHeader := regexp.MustCompile(`goroutine \d+ \[`) + + // extractLeaks takes the output of a test and splits it into a + // list of strings denoting goroutine leaks. + // + // If the input is: + // + // goroutine 1 [wait reason (leaked)]: + // main.leaked() + // ./testgoroutineleakgc/foo.go:37 +0x100 + // created by main.main() + // ./testgoroutineleakgc/main.go:10 +0x20 + // + // goroutine 2 [wait reason (leaked)]: + // main.leaked2() + // ./testgoroutineleakgc/foo.go:37 +0x100 + // created by main.main() + // ./testgoroutineleakgc/main.go:10 +0x20 + // + // The output is (as a list of strings): + // + // leaked() [wait reason] + // leaked2() [wait reason] + extractLeaks := func(output string) []string { + stacks := strings.Split(output, "\n\ngoroutine") + var leaks []string + for _, stack := range stacks { + lines := strings.Split(stack, "\n") + if len(lines) < 5 { + // Expecting at least the following lines (where n=len(lines)-1): + // + // [0] goroutine n [wait reason (leaked)] + // ... + // [n-3] bottom.leak.frame(...) + // [n-2] ./bottom/leak/frame/source.go:line + // [n-1] created by go.instruction() + // [n] ./go/instruction/source.go:line + continue + } + + if !strings.Contains(lines[0], "(leaked)") { + // Ignore non-leaked goroutines. + continue + } + + // Get the wait reason from the goroutine header. + header := lines[0] + waitReason := goroutineHeader.ReplaceAllString(header, "[") + waitReason = strings.ReplaceAll(waitReason, " (leaked)", "") + + // Get the function name from the stack trace (should be two lines above `created by`). + var funcName string + for i := len(lines) - 1; i >= 0; i-- { + if strings.Contains(lines[i], "created by") { + funcName = strings.TrimPrefix(lines[i-2], "main.") + break + } + } + if funcName == "" { + t.Fatalf("failed to extract function name from stack trace: %s", lines) + } + + leaks = append(leaks, funcName+" "+waitReason) + } + return leaks + } + // Micro tests involve very simple leaks for each type of concurrency primitive operation. microTests := []testCase{ - makeTest(testCase{name: "NilRecv"}, `\[chan receive \(nil chan\)\]`), - makeTest(testCase{name: "NilSend"}, `\[chan send \(nil chan\)\]`), - makeTest(testCase{name: "SelectNoCases"}, `\[select \(no cases\)\]`), - makeTest(testCase{name: "ChanRecv"}, `\[chan receive\]`), - makeTest(testCase{name: "ChanSend"}, `\[chan send\]`), - makeTest(testCase{name: "Select"}, `\[select\]`), - makeTest(testCase{name: "WaitGroup"}, `\[sync\.WaitGroup\.Wait\]`), - makeTest(testCase{name: "MutexStack"}, `\[sync\.Mutex\.Lock\]`), - makeTest(testCase{name: "MutexHeap"}, `\[sync\.Mutex\.Lock\]`), - makeTest(testCase{name: "Cond"}, `\[sync\.Cond\.Wait\]`), - makeTest(testCase{name: "RWMutexRLock"}, `\[sync\.RWMutex\.RLock\]`), - makeTest(testCase{name: "RWMutexLock"}, `\[sync\.(RW)?Mutex\.Lock\]`), - makeTest(testCase{name: "Mixed"}, `\[sync\.WaitGroup\.Wait\]`, `\[chan send\]`), - makeTest(testCase{name: "NoLeakGlobal"}), + makeTest("NilRecv", `\[chan receive \(nil chan\)\]`), + makeTest("NilSend", `\[chan send \(nil chan\)\]`), + makeTest("SelectNoCases", `\[select \(no cases\)\]`), + makeTest("ChanRecv", `\[chan receive\]`), + makeTest("ChanSend", `\[chan send\]`), + makeTest("Select", `\[select\]`), + makeTest("WaitGroup", `\[sync\.WaitGroup\.Wait\]`), + makeTest("MutexStack", `\[sync\.Mutex\.Lock\]`), + makeTest("MutexHeap", `\[sync\.Mutex\.Lock\]`), + makeTest("Cond", `\[sync\.Cond\.Wait\]`), + makeTest("RWMutexRLock", `\[sync\.RWMutex\.RLock\]`), + makeTest("RWMutexLock", `\[sync\.(RW)?Mutex\.Lock\]`), + makeTest("Mixed", `\[sync\.WaitGroup\.Wait\]`, `\[chan send\]`), + makeTest("NoLeakGlobal"), } // Common goroutine leak patterns. // Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" // doi:10.1109/CGO57630.2024.10444835 patternTestCases := []testCase{ - makeTest(testCase{name: "NoCloseRange"}, - `main\.NoCloseRange\.gowrap1 .* \[chan send\]`, - `main\.noCloseRange\.func1 .* \[chan receive\]`), - makeTest(testCase{name: "MethodContractViolation"}, - `main\.worker\.Start\.func1 .* \[select\]`), - makeTest(testCase{name: "DoubleSend"}, - `main\.DoubleSend\.func3 .* \[chan send\]`), - makeTest(testCase{name: "EarlyReturn"}, - `main\.earlyReturn\.func1 .* \[chan send\]`), - makeTest(testCase{name: "NCastLeak"}, - `main\.nCastLeak\.func1 .* \[chan send\]`, - `main\.NCastLeak\.func2 .* \[chan receive\]`), - makeTest(testCase{name: "Timeout"}, - `main\.timeout\.func1 .* \[chan send\]`), + makeTest("NoCloseRange", + `noCloseRange\(.* \[chan send\]`, + `noCloseRange\.func1\(.* \[chan receive\]`), + makeTest("MethodContractViolation", + `worker\.Start\.func1.* \[select\]`), + makeTest("DoubleSend", + `DoubleSend\.func3.* \[chan send\]`), + makeTest("EarlyReturn", + `earlyReturn\.func1.* \[chan send\]`), + makeTest("NCastLeak", + `nCastLeak\.func1.* \[chan send\]`, + `NCastLeak\.func2.* \[chan receive\]`), + makeTest("Timeout", + `timeout\.func1.* \[chan send\]`), } // GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs". @@ -1204,274 +1260,246 @@ func TestGoroutineLeakGC(t *testing.T) { // doi:10.1109/CGO51591.2021.9370317. // // This list is curated for tests that are not excessively flaky. + // Some tests are also excluded because they are redundant. gokerTestCases := []testCase{ - makeTest(testCase{name: "Cockroach584"}, - `main\.Cockroach584\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Cockroach584", + `Cockroach584\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach1055"}, - `main\.Cockroach1055\.func2 .* \[chan receive\]`, - `main\.Cockroach1055\.func2\.2 .* \[sync\.WaitGroup\.Wait\]`, - `main\.Cockroach1055\.func2\.1 .* \[chan receive\]`, - FLAKYBOUNDARY, - `main\.Cockroach1055\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Cockroach1055", + `Cockroach1055\.func2\(.* \[chan receive\]`, + `Cockroach1055\.func2\.2\(.* \[sync\.WaitGroup\.Wait\]`, + `Cockroach1055\.func2\.1\(.* \[chan receive\]`, + `Cockroach1055\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach1462"}, - `main\.\(\*Stopper_cockroach1462\)\.RunWorker\.func1 .* \[chan send\]`, - `main\.Cockroach1462\.func2 .* \[sync\.WaitGroup\.Wait\]`, + makeFlakyTest("Cockroach1462", + `\(\*Stopper_cockroach1462\)\.RunWorker\.func1\(.* \[chan send\]`, + `Cockroach1462\.func2\(.* \[sync\.WaitGroup\.Wait\]`, ), - makeTest(testCase{name: "Cockroach2448"}, - `main\.Cockroach2448\.func2\.gowrap1 .* \[select\]`, - `main\.Cockroach2448\.func2\.gowrap2 .* \[select\]`, + makeFlakyTest("Cockroach2448", + `\(\*Store_cockroach2448\)\.processRaft\(.* \[select\]`, + `\(\*state_cockroach2448\)\.start\(.* \[select\]`, ), - makeTest(testCase{name: "Cockroach3710"}, - `main\.Cockroach3710\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, - `main\.\(\*Store_cockroach3710\)\.processRaft\.func1 .* \[sync\.RWMutex\.Lock\]`, + makeFlakyTest("Cockroach3710", + `\(\*Store_cockroach3710\)\.ForceRaftLogScanAndProcess\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*Store_cockroach3710\)\.processRaft\.func1\(.* \[sync\.RWMutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach6181", repetitions: 20}, - `main\.testRangeCacheCoalescedRequests_cockroach6181 .* \[sync\.WaitGroup\.Wait\]`, - `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.(RW)?Mutex\.Lock\]`, - `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.RLock\]`, + makeFlakyTest("Cockroach6181", + `testRangeCacheCoalescedRequests_cockroach6181\(.* \[sync\.WaitGroup\.Wait\]`, + `testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.(RW)?Mutex\.Lock\]`, + `testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.RWMutex\.RLock\]`, ), - makeTest(testCase{name: "Cockroach7504", repetitions: 100}, - `main\.Cockroach7504\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Cockroach7504\.func2\.2 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Cockroach7504", + `Cockroach7504\.func2\.1.* \[sync\.Mutex\.Lock\]`, + `Cockroach7504\.func2\.2.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach9935"}, - `main\.Cockroach9935\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Cockroach9935", + `\(\*loggingT_cockroach9935\)\.outputLogEntry\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach10214"}, - `main\.Cockroach10214\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Cockroach10214\.func2\.2 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Cockroach10214", + `Cockroach10214\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `Cockroach10214\.func2\.2\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach10790"}, - `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`, + makeTest("Cockroach10790", + `\(\*Replica_cockroach10790\)\.beginCmds\.func1\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Cockroach13197"}, - `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`, + makeTest("Cockroach13197", + `\(\*Tx_cockroach13197\)\.awaitDone\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Cockroach13755"}, - `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`, + makeTest("Cockroach13755", + `\(\*Rows_cockroach13755\)\.awaitDone\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Cockroach16167"}, - `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, - `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`, + makeFlakyTest("Cockroach16167", + `Cockroach16167\.func2\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*Executor_cockroach16167\)\.Start\(.* \[sync\.RWMutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach10790"}, - `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`, + makeFlakyTest("Cockroach18101", + `restore_cockroach18101\.func1\(.* \[chan send\]`, ), - makeTest(testCase{name: "Cockroach13197"}, - `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`, + makeTest("Cockroach24808", + `Cockroach24808\.func2\(.* \[chan send\]`, ), - makeTest(testCase{name: "Cockroach13755"}, - `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`, + makeTest("Cockroach25456", + `Cockroach25456\.func2\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Cockroach16167"}, - `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, - `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`, + makeTest("Cockroach35073", + `Cockroach35073\.func2.1\(.* \[chan send\]`, + `Cockroach35073\.func2\(.* \[chan send\]`, ), - makeTest(testCase{name: "Cockroach18101"}, - `main\.restore_cockroach18101\.func1 .* \[chan send\]`, + makeTest("Cockroach35931", + `Cockroach35931\.func2\(.* \[chan send\]`, ), - makeTest(testCase{name: "Cockroach24808"}, - `main\.Cockroach24808\.func2 .* \[chan send\]`, + makeFlakyTest("Etcd5509", + `Etcd5509\.func2\(.* \[sync\.RWMutex\.Lock\]`, ), - makeTest(testCase{name: "Cockroach25456"}, - `main\.Cockroach25456\.func2 .* \[chan receive\]`, + makeFlakyTest("Etcd6708", + `Etcd6708\.func2\(.* \[sync\.RWMutex\.RLock\]`, ), - makeTest(testCase{name: "Cockroach35073"}, - `main\.Cockroach35073\.func2.1 .* \[chan send\]`, - `main\.Cockroach35073\.func2 .* \[chan send\]`, + makeFlakyTest("Etcd6857", + `\(\*node_etcd6857\)\.Status\(.* \[chan send\]`, ), - makeTest(testCase{name: "Cockroach35931"}, - `main\.Cockroach35931\.func2 .* \[chan send\]`, + makeFlakyTest("Etcd6873", + `\(\*watchBroadcasts_etcd6873\)\.stop\(.* \[chan receive\]`, + `newWatchBroadcasts_etcd6873\.func1\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Etcd5509"}, - `main\.Etcd5509\.func2 .* \[sync\.RWMutex\.Lock\]`, + makeFlakyTest("Etcd7492", + `Etcd7492\.func2\(.* \[sync\.WaitGroup\.Wait\]`, + `Etcd7492\.func2\.1\(.* \[chan send\]`, + `\(\*simpleTokenTTLKeeper_etcd7492\)\.run\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Etcd6857"}, - `main\.Etcd6857\.func2\.gowrap2 .* \[chan send\]`, + makeTest("Etcd7902", + `doRounds_etcd7902\.func1\(.* \[chan receive\]`, + `doRounds_etcd7902\.func1\(.* \[sync\.Mutex\.Lock\]`, + `runElectionFunc_etcd7902\(.* \[sync\.WaitGroup\.Wait\]`, ), - makeTest(testCase{name: "Etcd6873"}, - `main\.Etcd6873\.func2\.gowrap1 .* \[chan receive\]`, - `main\.newWatchBroadcasts_etcd6873\.func1 .* \[sync\.Mutex\.Lock\]`, + makeTest("Etcd10492", + `Etcd10492\.func2\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Etcd7492"}, - `main\.Etcd7492\.func2 .* \[sync\.WaitGroup\.Wait\]`, - `main\.Etcd7492\.func2\.1 .* \[chan send\]`, - `main\.NewSimpleTokenTTLKeeper_etcd7492\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + makeTest("Grpc660", + `\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1\(.* \[chan send\]`, ), - makeTest(testCase{name: "Etcd7902"}, - `main\.doRounds_etcd7902\.gowrap1 .* \[chan receive\]`, - `main\.doRounds_etcd7902\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.runElectionFunc_etcd7902 .* \[sync\.WaitGroup\.Wait\]`, + makeFlakyTest("Grpc795", + `\(\*Server_grpc795\)\.Serve\(.* \[sync\.Mutex\.Lock\]`, + `testServerGracefulStopIdempotent_grpc795\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Etcd10492"}, - `main\.Etcd10492\.func2 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Grpc862", + `DialContext_grpc862\.func2\(.* \[chan receive\]`), + makeTest("Grpc1275", + `testInflightStreamClosing_grpc1275\.func1\(.* \[chan receive\]`), + makeTest("Grpc1424", + `DialContext_grpc1424\.func1\(.* \[chan receive\]`), + makeFlakyTest("Grpc1460", + `\(\*http2Client_grpc1460\)\.keepalive\(.* \[chan receive\]`, + `\(\*http2Client_grpc1460\)\.NewStream\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Grpc660"}, - `main\.\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1 .* \[chan send\]`, - ), - makeTest(testCase{name: "Grpc795"}, - `main\.\(\*test_grpc795\)\.startServer\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.testServerGracefulStopIdempotent_grpc795 .* \[sync\.Mutex\.Lock\]`, - ), - makeTest(testCase{name: "Grpc862"}, - `main\.DialContext_grpc862\.func2 .* \[chan receive\]`), - makeTest(testCase{name: "Grpc1275"}, - `main\.testInflightStreamClosing_grpc1275\.func1 .* \[chan receive\]`), - makeTest(testCase{name: "Grpc1424"}, - `main\.DialContext_grpc1424\.func1 .* \[chan receive\]`), - makeTest(testCase{name: "Grpc1460"}, - `main\.Grpc1460\.func2\.gowrap1 .* \[chan receive\]`, - `main\.Grpc1460\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`, - ), - makeTest(testCase{name: "Grpc3017", repetitions: 20}, + makeFlakyTest("Grpc3017", // grpc/3017 involves a goroutine leak that also simultaneously engages many GC assists. - FLAKYBOUNDARY, - // All the leaks are flaky. - `main\.Grpc3017\.func2 .* \[chan receive\]`, - `main\.Grpc3017\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1 .* \[sync\.Mutex\.Lock\]`, + `Grpc3017\.func2\(.* \[chan receive\]`, + `Grpc3017\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Hugo3251", repetitions: 20}, - `main\.Hugo3251\.func2 .* \[sync\.WaitGroup\.Wait\]`, - `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + makeFlakyTest("Hugo3251", + `Hugo3251\.func2\(.* \[sync\.WaitGroup\.Wait\]`, + `Hugo3251\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `Hugo3251\.func2\.1\(.* \[sync\.RWMutex\.RLock\]`, ), - makeTest(testCase{name: "Hugo5379"}, - `main\.\(\*Site_hugo5379\)\.renderPages\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.\(\*Page_hugo5379\)\.initContentPlainAndMeta(\.\(\*Page_hugo5379\)\.initContent)?\.func1\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Hugo5379\.func2 .* \[sync\.WaitGroup\.Wait\]`, - FLAKYBOUNDARY, - `main\.\(\*Page_hugo5379\)\.initContent\.func1\.1 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Istio16224", + `Istio16224\.func2\(.* \[sync\.Mutex\.Lock\]`, + `\(\*controller_istio16224\)\.Run\(.* \[chan send\]`, + `\(\*controller_istio16224\)\.Run\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Istio16224"}, - `main\.Istio16224\.func2 .* \[sync\.Mutex\.Lock\]`, - `main\.Istio16224\.func2\.gowrap1 .* \[chan send\]`, - FLAKYBOUNDARY, - // This is also a leak, but it is too flaky to be reliably detected. - `main\.Istio16224\.func2\.gowrap1 .* \[chan receive\]`, + makeFlakyTest("Istio17860", + `\(\*agent_istio17860\)\.runWait\(.* \[chan send\]`, ), - makeTest(testCase{name: "Istio17860"}, - `main\.\(\*agent_istio17860\)\.Restart\.gowrap2 .* \[chan send\]`, + makeFlakyTest("Istio18454", + `\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan receive\]`, + `\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan send\]`, ), - makeTest(testCase{name: "Istio18454"}, - `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan receive\]`, - `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan send\]`, - ), - // NOTE (vsaioc): + // NOTE(vsaioc): // Kubernetes/1321 is excluded due to a race condition in the original program // that may, in extremely rare cases, lead to nil pointer dereference crashes. // (Reproducible even with regular GC). Only kept here for posterity. // // makeTest(testCase{name: "Kubernetes1321"}, - // `main\.NewMux_kubernetes1321\.gowrap1 .* \[chan send\]`, - // `main\.testMuxWatcherClose_kubernetes1321 .* \[sync\.Mutex\.Lock\]`), - makeTest(testCase{name: "Kubernetes5316"}, - `main\.finishRequest_kubernetes5316\.func1 .* \[chan send\]`, + // `NewMux_kubernetes1321\.gowrap1\(.* \[chan send\]`, + // `testMuxWatcherClose_kubernetes1321\(.* \[sync\.Mutex\.Lock\]`), + makeFlakyTest("Kubernetes5316", + `finishRequest_kubernetes5316\.func1\(.* \[chan send\]`, ), - makeTest(testCase{name: "Kubernetes6632"}, - `main\.Kubernetes6632\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Kubernetes6632\.func2\.gowrap2 .* \[chan send\]`, + makeFlakyTest("Kubernetes6632", + `\(\*idleAwareFramer_kubernetes6632\)\.monitor\(.* \[sync\.Mutex\.Lock\]`, + `\(\*idleAwareFramer_kubernetes6632\)\.WriteFrame\(.* \[chan send\]`, ), - makeTest(testCase{name: "Kubernetes10182"}, - `main\.\(\*statusManager_kubernetes10182\)\.Start\.func1 .* \[sync\.Mutex\.Lock\]`, - `main\.Kubernetes10182\.func2\.gowrap2 .* \[chan send\]`, - `main\.Kubernetes10182\.func2\.gowrap3 .* \[chan send\]`, + makeFlakyTest("Kubernetes10182", + `\(\*statusManager_kubernetes10182\)\.Start\.func1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*statusManager_kubernetes10182\)\.SetPodStatus\(.* \[chan send\]`, ), - makeTest(testCase{name: "Kubernetes11298"}, + makeFlakyTest("Kubernetes11298", // This entire test is flaky in producing leaks. Run it only to check for crashes. - FLAKYBOUNDARY, - `main\.After_kubernetes11298\.func1 .* \[chan receive\]`, - `main\.After_kubernetes11298\.func1 .* \[sync\.Cond\.Wait\]`, - `main\.Kubernetes11298\.func2 .* \[chan receive\]`, - ), - makeTest(testCase{name: "Kubernetes13135"}, - `main\.Kubernetes13135\.func2 .* \[sync\.WaitGroup\.Wait\]`, + `After_kubernetes11298\.func1\(.* \[chan receive\]`, + `After_kubernetes11298\.func1\(.* \[sync\.Cond\.Wait\]`, + `Kubernetes11298\.func2\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Kubernetes25331"}, - `main\.Kubernetes25331\.func2\.gowrap1 .* \[chan send\]`, + makeFlakyTest("Kubernetes13135", + `Kubernetes13135\.func2\(.* \[sync\.WaitGroup\.Wait\]`, ), - makeTest(testCase{name: "Kubernetes26980"}, - `main\.Kubernetes26980\.func2 .* \[chan receive\]`, - `main\.Kubernetes26980\.func2\.1 .* \[sync\.Mutex\.Lock\]`, - `main\.Kubernetes26980\.func2\.gowrap2 .* \[chan receive\]`, + makeTest("Kubernetes25331", + `\(\*watchChan_kubernetes25331\)\.run\(.* \[chan send\]`, ), - makeTest(testCase{name: "Kubernetes30872"}, - `main\.\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1 .* \[sync\.Mutex\.Lock\]`, - `main\.\(\*federatedInformerImpl_kubernetes30872\)\.Start\.gowrap2 .* \[sync\.Mutex\.Lock\]`, - `main\.\(\*NamespaceController_kubernetes30872\)\.Run\.func1 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Kubernetes26980", + `Kubernetes26980\.func2\(.* \[chan receive\]`, + `Kubernetes26980\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*processorListener_kubernetes26980\)\.pop\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Kubernetes38669"}, - `main\.newCacheWatcher_kubernetes38669\.gowrap1 .* \[chan send\]`, + makeFlakyTest("Kubernetes30872", + `\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*Controller_kubernetes30872\)\.Run\(.* \[sync\.Mutex\.Lock\]`, + `\(\*NamespaceController_kubernetes30872\)\.Run\.func1\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Kubernetes58107", repetitions: 5}, - `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap[12] .* \[sync\.Cond\.Wait\]`, - `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap[12] .* \[sync\.RWMutex\.RLock\]`, - `main\.startResourceQuotaController_kubernetes58107\.gowrap2 .* \[sync\.RWMutex\.Lock\]`, + makeTest("Kubernetes38669", + `\(\*cacheWatcher_kubernetes38669\)\.process\(.* \[chan send\]`, ), - makeTest(testCase{name: "Kubernetes62464"}, - `main\.Kubernetes62464\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, - `main\.Kubernetes62464\.func2\.gowrap2 .* \[sync\.RWMutex\.Lock\]`, + makeFlakyTest("Kubernetes58107", + `\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.Cond\.Wait\]`, + `\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*ResourceQuotaController_kubernetes58107\)\.Sync\(.* \[sync\.RWMutex\.Lock\]`, ), - makeTest(testCase{name: "Kubernetes70277"}, - `main\.Kubernetes70277\.func2 .* \[chan receive\]`, + makeFlakyTest("Kubernetes62464", + `\(\*manager_kubernetes62464\)\.reconcileState\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*staticPolicy_kubernetes62464\)\.RemoveContainer\(.* \[sync\.(RW)?Mutex\.Lock\]`, ), - makeTest(testCase{name: "Moby4395"}, - `main\.Go_moby4395\.func1 .* \[chan send\]`, + makeFlakyTest("Kubernetes70277", + `Kubernetes70277\.func2\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Moby4951"}, - FLAKYBOUNDARY, - `main\.Moby4951\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Moby4951\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`, + makeFlakyTest("Moby4951", + `\(\*DeviceSet_moby4951\)\.DeleteDevice\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Moby7559"}, - `main\.Moby7559\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + makeTest("Moby7559", + `\(\*UDPProxy_moby7559\)\.Run\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Moby17176"}, - `main\.testDevmapperLockReleasedDeviceDeletion_moby17176\.func1 .* \[sync\.Mutex\.Lock\]`, + makeTest("Moby17176", + `testDevmapperLockReleasedDeviceDeletion_moby17176\.func1\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Moby21233"}, - `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[chan send\]`, - `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[select\]`, - `main\.testTransfer_moby21233 .* \[chan receive\]`, + makeTest("Moby21233", + `\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[chan send\]`, + `\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[select\]`, + `testTransfer_moby21233\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Moby25348"}, - `main\.Moby25348\.func2\.gowrap1 .* \[sync\.WaitGroup\.Wait\]`, + makeTest("Moby25348", + `\(\*Manager_moby25348\)\.init\(.* \[sync\.WaitGroup\.Wait\]`, ), - makeTest(testCase{name: "Moby27782"}, - `main\.\(\*JSONFileLogger_moby27782\)\.ReadLogs\.gowrap1 .* \[sync\.Cond\.Wait\]`, - `main\.NewWatcher_moby27782\.gowrap1 .* \[select\]`, + makeFlakyTest("Moby27782", + `\(\*JSONFileLogger_moby27782\)\.readLogs\(.* \[sync\.Cond\.Wait\]`, + `\(\*Watcher_moby27782\)\.readEvents\(.* \[select\]`, ), - makeTest(testCase{name: "Moby28462"}, - `main\.Moby28462\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, - `main\.Moby28462\.func2\.gowrap2 .* \[chan send\]`, + makeFlakyTest("Moby28462", + `monitor_moby28462\(.* \[sync\.Mutex\.Lock\]`, + `\(\*Daemon_moby28462\)\.StateChanged\(.* \[chan send\]`, ), - makeTest(testCase{name: "Moby29733"}, - `main\.Moby29733\.func2 .* \[chan receive\]`, - `main\.testActive_moby29733\.func1 .* \[sync\.Cond\.Wait\]`, + makeTest("Moby29733", + `Moby29733\.func2\(.* \[chan receive\]`, + `testActive_moby29733\.func1\(.* \[sync\.Cond\.Wait\]`, ), - makeTest(testCase{name: "Moby30408"}, - `main\.Moby30408\.func2 .* \[chan receive\]`, - `main\.testActive_moby30408\.func1 .* \[sync\.Cond\.Wait\]`, + makeTest("Moby30408", + `Moby30408\.func2\(.* \[chan receive\]`, + `testActive_moby30408\.func1\(.* \[sync\.Cond\.Wait\]`, ), - makeTest(testCase{name: "Moby33781"}, - `main\.monitor_moby33781\.func1 .* \[chan send\]`, + makeFlakyTest("Moby33781", + `monitor_moby33781\.func1\(.* \[chan send\]`, ), - makeTest(testCase{name: "Moby36114"}, - `main\.Moby36114\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + makeTest("Moby36114", + `\(\*serviceVM_moby36114\)\.hotAddVHDsAtStart\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest(testCase{name: "Serving2137"}, - `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[chan send\]`, - `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[sync\.Mutex\.Lock\]`, - `main\.Serving2137\.func2 .* \[chan receive\]`, + makeFlakyTest("Serving2137", + `\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[chan send\]`, + `\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[sync\.Mutex\.Lock\]`, + `Serving2137\.func2\(.* \[chan receive\]`, ), - makeTest(testCase{name: "Syncthing4829"}, - `main\.Syncthing4829\.func2 .* \[sync\.RWMutex\.RLock\]`, + makeFlakyTest("Syncthing4829", + `Syncthing4829\.func2\(.* \[sync\.RWMutex\.RLock\]`, ), - makeTest(testCase{name: "Syncthing5795"}, - `main\.\(\*rawConnection_syncthing5795\)\.Start\.func1 .* \[chan receive\]`, - `main\.Syncthing5795\.func2 .* \[chan receive\]`, + makeTest("Syncthing5795", + `\(\*rawConnection_syncthing5795\)\.Start\.func1.* \[chan receive\]`, + `Syncthing5795\.func2.* \[chan receive\]`, ), } @@ -1493,92 +1521,64 @@ func TestGoroutineLeakGC(t *testing.T) { // Run tests in parallel. t.Parallel() - // Default to 1 repetition if not specified. - // One extra rep for tests with a specified number of repetitions - // is irrelevant. - repetitions := tcase.repetitions | 1 - - // Output trace. Aggregated across all repetitions. - var output string - // Output and trace are protected by separate mutexes to reduce contention. - var outputMu sync.Mutex - var traceMu sync.RWMutex - // Wait group coordinates across all repetitions. - var wg sync.WaitGroup - - wg.Add(repetitions) - for i := 0; i < repetitions; i++ { - go func() { - defer wg.Done() - - // FIXME: Use GODEBUG flag only temporarily until we can use pprof/goroutineleaks. - repOutput := runBuiltTestProg(t, exe, tcase.name, "GODEBUG=gctrace=1,gcgoroutineleaks=1") - - // If the test case was not expected to produce leaks, but some were reported, - // stop the test immediately. Zero tolerance policy for false positives. - if len(tcase.expectedLeaks)+len(tcase.flakyLeaks) == 0 && strings.Contains(repOutput, "goroutine leak!") { - t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", repOutput) - } + // Run program and get output trace. + output := runBuiltTestProg(t, exe, tcase.name, "GODEBUG=asyncpreemptoff=1") + if len(output) == 0 { + t.Fatalf("Test produced no output. Is the goroutine leak profile collected?") + } - // Zero tolerance policy for fatal exceptions or panics. - if failStates.MatchString(repOutput) { - t.Errorf("output:\n%s\n\nunexpected fatal exception or panic", repOutput) - } + // Zero tolerance policy for fatal exceptions or panics. + if failStates.MatchString(output) { + t.Errorf("output:\n%s\n\nunexpected fatal exception or panic", output) + } - // Parse the output line by line and look for the `goroutine leak!` message. - LINES: - for _, line := range strings.Split(repOutput, "\n") { - // We are not interested in anything else. - if !strings.Contains(line, "goroutine leak!") { - continue - } + // Extract all the goroutine leaks + foundLeaks := extractLeaks(output) - // Check if the leak is expected. - // If it is, check whether it has been encountered before. - var foundNew bool - var leakPattern *regexp.Regexp - traceMu.RLock() - for expectedLeak, ok := range tcase.expectedLeaks { - if expectedLeak.MatchString(line) { - if !ok { - foundNew = true - } - - leakPattern = expectedLeak - break - } - } - traceMu.RUnlock() + // If the test case was not expected to produce leaks, but some were reported, + // stop the test immediately. Zero tolerance policy for false positives. + if len(tcase.expectedLeaks)+len(tcase.flakyLeaks) == 0 && len(foundLeaks) > 0 { + t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", output) + } - if foundNew { - // Only bother writing if we found a new leak. - traceMu.Lock() - tcase.expectedLeaks[leakPattern] = true - traceMu.Unlock() + // Parse every leak and check if it is expected (maybe as a flaky leak). + LEAKS: + for _, leak := range foundLeaks { + // Check if the leak is expected. + // If it is, check whether it has been encountered before. + var foundNew bool + var leakPattern *regexp.Regexp + + for expectedLeak, ok := range tcase.expectedLeaks { + if expectedLeak.MatchString(leak) { + if !ok { + foundNew = true } - if leakPattern == nil { - // We are dealing with a leak not marked as expected. - // Check if it is a flaky leak. - for flakyLeak := range tcase.flakyLeaks { - if flakyLeak.MatchString(line) { - // The leak is flaky. Carry on to the next line. - continue LINES - } - } - - t.Errorf("output:\n%s\n\nunexpected goroutine leak: %s", repOutput, line) + leakPattern = expectedLeak + break + } + } + + if foundNew { + // Only bother writing if we found a new leak. + tcase.expectedLeaks[leakPattern] = true + } + + if leakPattern == nil { + // We are dealing with a leak not marked as expected. + // Check if it is a flaky leak. + for flakyLeak := range tcase.flakyLeaks { + if flakyLeak.MatchString(leak) { + // The leak is flaky. Carry on to the next line. + continue LEAKS } } - outputMu.Lock() - output += "\nRepetition " + strconv.Itoa(i) + ":\n" + repOutput + "\n--------------------------\n" - outputMu.Unlock() - }() + t.Errorf("output:\n%s\n\nunexpected goroutine leak: %s", output, leak) + } } - // Coordinate across all repetitions. - wg.Wait() missingLeakStrs := make([]string, 0, len(tcase.expectedLeaks)) for expectedLeak, found := range tcase.expectedLeaks { if !found { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go index cc06c4b77541ca..313c98e226dc93 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go @@ -11,7 +11,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" "unsafe" @@ -98,9 +100,10 @@ func (r *Replica_cockroach10214) maybeCoalesceHeartbeat() bool { } func Cockroach10214() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go index f479a29dc36855..ac7bc93821caf3 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "sync/atomic" "time" @@ -70,9 +71,10 @@ func NewStopper_cockroach1055() *Stopper_cockroach1055 { } func Cockroach1055() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(1 * time.Second) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i <= 1000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go index 57c0d0c81bb4d4..d31ffbc3195f95 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go @@ -14,7 +14,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -107,9 +108,10 @@ func NewReplica_cockroach10790() *Replica_cockroach10790 { /// func Cockroach10790() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go index 1745df4dbc66a0..9496fcbc7c688a 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go @@ -11,7 +11,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -61,9 +62,10 @@ func (tx *Tx_cockroach13197) close() { /// -----------G2 leak------------- func Cockroach13197() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() db := &DB_cockroach13197{} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go index 690a5586f2df32..1435d214eaca0d 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go @@ -13,7 +13,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -48,9 +49,10 @@ func (rs *Rows_cockroach13755) close(err error) { /// ---------------G2 leak----------------- func Cockroach13755() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() rs := &Rows_cockroach13755{} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go index b2365a8b95efdc..30e2a8a11ee93f 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go @@ -1,7 +1,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -143,9 +145,10 @@ func processEventsUntil_cockroach1462(ch <-chan interceptMessage_cockroach1462, } func Cockroach1462() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(2000 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i <= 1000; i++ { go func() { // G1 diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go index 7860b74b437850..52e5ec692a4b78 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go @@ -18,7 +18,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -103,9 +105,10 @@ func NewExectorAndSession_cockroach16167() (*Executor_cockroach16167, *Session_c /// ----------------------G1,G2 deadlock-------------------- func Cockroach16167() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go index e85007a675d1e4..716d6c2b12524f 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go @@ -15,7 +15,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -61,9 +62,10 @@ func splitAndScatter_cockroach18101(ctx context.Context, readyForImportCh chan b /// func Cockroach18101() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { ctx, cancel := context.WithCancel(context.Background()) diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go index 7ab60bd11b2a60..aec73f5dfac5d0 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go @@ -1,7 +1,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "time" ) @@ -102,9 +104,10 @@ func NewStoreAndState_cockroach2448() (*Store_cockroach2448, *state_cockroach244 } func Cockroach2448() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(time.Second) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go index b16d4db25dd1a7..f11fe94fa4fb00 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go @@ -2,7 +2,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -64,9 +65,10 @@ func (c *Compactor_cockroach24808) Start(ctx context.Context, stopper *Stopper_c } func Cockroach24808() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { // deadlocks: 1 diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go index 961c2fe7c065f3..765b1a7517728b 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "time" ) @@ -74,9 +75,10 @@ func (tc *testContext_cockroach25456) StartWithStoreConfig(stopper *Stopper_cock } func Cockroach25456() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { // deadlocks: 1 diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go index 9ca074f83f279a..7a0dd6b8f0350c 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "sync/atomic" "time" @@ -87,9 +88,10 @@ func (m *outbox_cockroach35073) run(wg *sync.WaitGroup) { } func Cockroach35073() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { // deadlocks: 1 diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go index 587c1dfc58f4bc..863033cdfad533 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -90,9 +91,10 @@ func makeFlowRegistry_cockroach35931() *flowRegistry_cockroach35931 { } func Cockroach35931() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { // deadlocks: 1 diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go index c1fe8abec02678..4c5c215b0ff854 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go @@ -12,7 +12,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" "unsafe" @@ -117,9 +119,10 @@ func NewStore_cockroach3710() *Store_cockroach3710 { /// ----------------------G1,G2 deadlock--------------------- func Cockroach3710() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 10000; i++ { go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go index 89602804ca9727..a49f89b162a1ac 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -38,9 +39,10 @@ func (g *gossip_cockroach584) manage() { } func Cockroach584() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go index 20c440678e76a3..9a6b9354f5e144 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go @@ -8,8 +8,10 @@ package main import ( - "fmt" + "io" + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -33,7 +35,7 @@ type rangeDescriptorCache_cockroach6181 struct { func (rdc *rangeDescriptorCache_cockroach6181) LookupRangeDescriptor() { rdc.rangeCacheMu.RLock() runtime.Gosched() - fmt.Println("lookup range descriptor:", rdc) + io.Discard.Write([]byte(rdc.String())) rdc.rangeCacheMu.RUnlock() rdc.rangeCacheMu.Lock() rdc.rangeCacheMu.Unlock() @@ -90,9 +92,10 @@ func testRangeCacheCoalescedRequests_cockroach6181() { /// -------------------------------------G2,G3,... deadlock-------------------------------------- func Cockroach6181() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { go testRangeCacheCoalescedRequests_cockroach6181() // G1 diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go index 9fd36f243b9ef9..1de6987b673b85 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go @@ -10,7 +10,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -139,9 +141,10 @@ func NewLeaseSet_cockroach7504(n int) *LeaseSet_cockroach7504 { } func Cockroach7504() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go index e0d1a44a2b4ef9..8e1914b0689056 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go @@ -12,7 +12,8 @@ package main import ( "errors" "math/rand" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -43,9 +44,10 @@ func (l *loggingT_cockroach9935) exit(err error) { defer l.mu.Unlock() } func Cockroach9935() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go index ed9165f751ed3c..f46d23f08be1b8 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go @@ -2,7 +2,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -20,7 +21,7 @@ type lessor_etcd10492 struct { } func (le *lessor_etcd10492) Checkpoint() { - le.mu.Lock() + le.mu.Lock() // Lock acquired twice here defer le.mu.Unlock() } @@ -40,26 +41,25 @@ func (le *lessor_etcd10492) Renew() { le.cp(context.Background()) } } + func Etcd10492() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 100; i++ { - go func() { - // deadlocks: x > 0 - - le := &lessor_etcd10492{ - checkpointInterval: 0, - } - fakerCheckerpointer_etcd10492 := func(ctx context.Context) { - le.Checkpoint() - } - le.SetCheckpointer(fakerCheckerpointer_etcd10492) - le.mu.Lock() - le.mu.Unlock() - le.Renew() - }() - } + go func() { // G1 + // deadlocks: 1 + le := &lessor_etcd10492{ + checkpointInterval: 0, + } + fakerCheckerpointer_etcd10492 := func(ctx context.Context) { + le.Checkpoint() + } + le.SetCheckpointer(fakerCheckerpointer_etcd10492) + le.mu.Lock() + le.mu.Unlock() + le.Renew() + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go index 3b8e8ed1cafcaa..ab7c6d24dca126 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go @@ -2,8 +2,10 @@ package main import ( "context" - "fmt" + "io" + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -89,9 +91,10 @@ func NewKV_etcd5509(c *Client_etcd5509) KV { } func Etcd5509() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 10; i++ { @@ -107,7 +110,7 @@ func Etcd5509() { defer close(donec) err := kv.Get(context.TODO()) if err != nil && err != ErrConnClosed_etcd5509 { - fmt.Println("Expect ErrConnClosed") + io.Discard.Write([]byte("Expect ErrConnClosed")) } }() diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go index 40690395e5589d..e45d13aa3e2bb2 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go @@ -2,7 +2,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -77,18 +78,17 @@ func NewMembersAPI_etcd6708(c Client_etcd6708) MembersAPI_etcd6708 { } func Etcd6708() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 100; i++ { - go func() { - // deadlocks: x > 0 - hc := &httpClusterClient_etcd6708{ - selectionMode: EndpointSelectionPrioritizeLeader_etcd6708, - } - hc.Sync(context.Background()) - }() - } + go func() { + // deadlocks: 1 + hc := &httpClusterClient_etcd6708{ + selectionMode: EndpointSelectionPrioritizeLeader_etcd6708, + } + hc.Sync(context.Background()) + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6857.go b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go index 7411a3216143d0..8c3af3ef192eaf 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd6857.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go @@ -8,7 +8,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "time" ) @@ -75,9 +76,10 @@ func NewNode_etcd6857() *node_etcd6857 { /// func Etcd6857() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i <= 100; i++ { go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6873.go b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go index 848c56d801492a..a668219d40aea4 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd6873.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go @@ -8,7 +8,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -76,9 +77,10 @@ func (wbs *watchBroadcasts_etcd6873) update(wb *watchBroadcast_etcd6873) { /// func Etcd6873() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7492.go b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go index feb497f9b4512a..fbda74c48359f7 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd7492.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go @@ -8,7 +8,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -154,9 +156,10 @@ func setupAuthStore_etcd7492() (store *authStore_etcd7492, teardownfunc func()) /// func Etcd7492() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7902.go b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go index 72f713ed44cc7b..4e8bbb2eb04df2 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd7902.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go @@ -18,7 +18,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -103,9 +104,10 @@ func doRounds_etcd7902(rcs []roundClient_etcd7902, rounds int) { /// func Etcd7902() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { go runElectionFunc_etcd7902() // G1 diff --git a/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go index e7daebcb14f74c..7a3e087f5091cf 100644 --- a/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go +++ b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -27,53 +28,59 @@ func init() { } func NilRecv() { + prof := pprof.Lookup("goroutineleak") go func() { var c chan int <-c panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func NilSend() { + prof := pprof.Lookup("goroutineleak") go func() { var c chan int c <- 0 panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func ChanRecv() { + prof := pprof.Lookup("goroutineleak") go func() { <-make(chan int) panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func SelectNoCases() { + prof := pprof.Lookup("goroutineleak") go func() { select {} panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func ChanSend() { + prof := pprof.Lookup("goroutineleak") go func() { make(chan int) <- 0 panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func Select() { + prof := pprof.Lookup("goroutineleak") go func() { select { case make(chan int) <- 0: @@ -82,10 +89,11 @@ func Select() { panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func WaitGroup() { + prof := pprof.Lookup("goroutineleak") go func() { var wg sync.WaitGroup wg.Add(1) @@ -93,10 +101,11 @@ func WaitGroup() { panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func MutexStack() { + prof := pprof.Lookup("goroutineleak") for i := 0; i < 1000; i++ { go func() { var mu sync.Mutex @@ -106,11 +115,11 @@ func MutexStack() { }() } time.Sleep(10 * time.Millisecond) - runtime.GC() - time.Sleep(10 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) } func MutexHeap() { + prof := pprof.Lookup("goroutineleak") for i := 0; i < 1000; i++ { go func() { mu := &sync.Mutex{} @@ -122,11 +131,11 @@ func MutexHeap() { }() } time.Sleep(10 * time.Millisecond) - runtime.GC() - time.Sleep(10 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) } func RWMutexRLock() { + prof := pprof.Lookup("goroutineleak") go func() { mu := &sync.RWMutex{} mu.Lock() @@ -134,10 +143,11 @@ func RWMutexRLock() { panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func RWMutexLock() { + prof := pprof.Lookup("goroutineleak") go func() { mu := &sync.RWMutex{} mu.Lock() @@ -145,10 +155,11 @@ func RWMutexLock() { panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func Cond() { + prof := pprof.Lookup("goroutineleak") go func() { cond := sync.NewCond(&sync.Mutex{}) cond.L.Lock() @@ -156,10 +167,11 @@ func Cond() { panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } func Mixed() { + prof := pprof.Lookup("goroutineleak") go func() { ch := make(chan int) wg := sync.WaitGroup{} @@ -174,16 +186,17 @@ func Mixed() { panic("should not be reached") }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } var ch = make(chan int) // No leak should be reported by this test func NoLeakGlobal() { + prof := pprof.Lookup("goroutineleak") go func() { <-ch }() time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) } diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1275.go b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go index f9a876cb9d0e61..155b4ef6b16244 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc1275.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go @@ -19,7 +19,8 @@ package main import ( "io" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -106,9 +107,10 @@ func testInflightStreamClosing_grpc1275() { /// func Grpc1275() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { testInflightStreamClosing_grpc1275() // G1 diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1424.go b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go index 262335b1d1a1d7..0ec8c8f8e7e41d 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc1424.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go @@ -10,7 +10,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -104,9 +105,10 @@ func DialContext_grpc1424() { /// func Grpc1424() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go DialContext_grpc1424() // G1 } diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1460.go b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go index 44e761c7f37e26..d55db220064baa 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc1460.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go @@ -15,7 +15,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -67,9 +69,10 @@ func (t *http2Client_grpc1460) NewStream() { /// func Grpc1460() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc3017.go b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go index 47cea8068cd781..ba0c7f4b7480cb 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc3017.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go @@ -1,7 +1,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -88,8 +90,10 @@ func (ccc *lbCacheClientConn_grpc3017) RemoveSubConn(sc SubConn_grpc3017) { } func Grpc3017() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc660.go b/src/runtime/testdata/testgoroutineleakgc/grpc660.go index 08a1ee502ca47e..d9023eb8abff0d 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc660.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc660.go @@ -11,7 +11,8 @@ package main import ( "math/rand" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -53,9 +54,10 @@ func (bc *benchmarkClient_grpc660) doCloseLoopUnary() { /// func Grpc660() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(1 * time.Second) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { bc := &benchmarkClient_grpc660{ diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc795.go b/src/runtime/testdata/testgoroutineleakgc/grpc795.go index c97f9c96c67d71..1208f4ff875248 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc795.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc795.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -60,9 +61,10 @@ func testServerGracefulStopIdempotent_grpc795() { } func Grpc795() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { go testServerGracefulStopIdempotent_grpc795() diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc862.go b/src/runtime/testdata/testgoroutineleakgc/grpc862.go index 15f787cf6e53d9..679fb052daf1f2 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc862.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc862.go @@ -13,7 +13,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -97,9 +98,10 @@ func DialContext_grpc862(ctx context.Context) (conn *ClientConn_grpc862) { /// func Grpc862() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { ctx, cancel := context.WithCancel(context.Background()) diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo3251.go b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go index 1c1f598c4578cc..de19aa10069480 100644 --- a/src/runtime/testdata/testgoroutineleakgc/hugo3251.go +++ b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go @@ -2,7 +2,9 @@ package main import ( "fmt" + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -43,9 +45,10 @@ func resGetRemote_hugo3251(remoteURLLock *remoteLock_hugo3251, url string) error } func Hugo3251() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(time.Second) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 10; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/istio16224.go b/src/runtime/testdata/testgoroutineleakgc/istio16224.go index f3d4cfd1b1f6a4..20658186d68022 100644 --- a/src/runtime/testdata/testgoroutineleakgc/istio16224.go +++ b/src/runtime/testdata/testgoroutineleakgc/istio16224.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -92,9 +93,10 @@ func NewBufferedMonitor_istio16224() Monitor_istio16224 { } func Istio16224() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/istio17860.go b/src/runtime/testdata/testgoroutineleakgc/istio17860.go index a8aba84cd3f15f..7163603ff831ff 100644 --- a/src/runtime/testdata/testgoroutineleakgc/istio17860.go +++ b/src/runtime/testdata/testgoroutineleakgc/istio17860.go @@ -2,7 +2,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "sync" "time" @@ -113,9 +114,10 @@ func NewAgent_istio17860(proxy Proxy_istio17860) Agent_istio17860 { } func Istio17860() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/istio18454.go b/src/runtime/testdata/testgoroutineleakgc/istio18454.go index a795e7e8f0ff9a..680ff85d61152d 100644 --- a/src/runtime/testdata/testgoroutineleakgc/istio18454.go +++ b/src/runtime/testdata/testgoroutineleakgc/istio18454.go @@ -2,7 +2,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "sync" "time" @@ -122,9 +123,10 @@ func NewWorker_istio18454() *Worker_istio18454 { } func Istio18454() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go index eed829866640fd..d83cb60c289dc2 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go @@ -12,7 +12,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -77,9 +79,10 @@ func NewStatusManager_kubernetes10182() *statusManager_kubernetes10182 { // -----------------------------------G1,G3 leak------------------------------------- func Kubernetes10182() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go index 056495a6597b35..20e80e436647bc 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -94,9 +95,10 @@ func Notify_kubernetes11298(abort <-chan struct{}) { Until_kubernetes11298(func() { n.serviceLoop(finished) }, 0, abort) } func Kubernetes11298() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go index d0f445a0583fe2..8cfaa700eba779 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go @@ -8,7 +8,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -166,9 +167,10 @@ func NewCacher_kubernetes13135() *Cacher_kubernetes13135 { /// func Kubernetes13135() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() StopChannel_kubernetes13135 = make(chan struct{}) diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go index 4ca72c65ddb045..c5b3e2f27378e6 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go @@ -19,7 +19,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -111,9 +113,10 @@ func testMuxWatcherClose_kubernetes1321() { /// func Kubernetes1321() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { go testMuxWatcherClose_kubernetes1321() // G1 diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go index e393ce9d5e0a40..bce3b155fa969c 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go @@ -15,7 +15,8 @@ package main import ( "context" "errors" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -68,9 +69,10 @@ func NewWatchChan_kubernetes25331() *watchChan_kubernetes25331 { /// func Kubernetes25331() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { wc := NewWatchChan_kubernetes25331() diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go index cf1b79cd57d8e5..1ad2c6c28f63f4 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go @@ -1,7 +1,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -53,9 +55,10 @@ func newProcessListener_kubernetes26980() *processorListener_kubernetes26980 { return ret } func Kubernetes26980() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 3000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go index 4b306e40794337..b3661168d78b58 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go @@ -1,7 +1,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -203,9 +205,10 @@ func NewNamespaceController_kubernetes30872() *NamespaceController_kubernetes308 } func Kubernetes30872() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go index b204230c29a3a5..3d9c8f203de157 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -63,9 +64,10 @@ func newCacheWatcher_kubernetes38669(chanSize int, initEvents []watchCacheEvent_ } func Kubernetes38669() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(1 * time.Second) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { initEvents := []watchCacheEvent_kubernetes38669{1, 2} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go index 540988ba774c66..cd7c42eb4a1d6f 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go @@ -15,7 +15,8 @@ package main import ( "errors" "math/rand" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -52,9 +53,10 @@ func finishRequest_kubernetes5316(timeout time.Duration, fn func() error) { /// func Kubernetes5316() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { fn := func() error { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go index 9e328fc5330e2f..7b4fed76f5be26 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go @@ -27,7 +27,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -110,9 +112,10 @@ func startResourceQuotaController_kubernetes58107() { } func Kubernetes58107() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(1000 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go index 48bcb7efe6fe95..90669b0e545d5c 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go @@ -14,6 +14,8 @@ package main import ( "math/rand" + "os" + "runtime/pprof" "runtime" "sync" "time" @@ -102,9 +104,10 @@ func NewPolicyAndManager_kubernetes62464() (*staticPolicy_kubernetes62464, *mana /// func Kubernetes62464() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go index 26fc0f3db03596..f3222c46d436e1 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go @@ -13,7 +13,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -78,9 +79,10 @@ func NewIdleAwareFramer_kubernetes6632() *idleAwareFramer_kubernetes6632 { /// func Kubernetes6632() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go index 234f4145695a8c..472c65149e25b4 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "time" ) @@ -66,9 +67,10 @@ func poller_kubernetes70277(interval, timeout time.Duration) WaitFunc_kubernetes } func Kubernetes70277() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby17176.go b/src/runtime/testdata/testgoroutineleakgc/moby17176.go index e3cc10a2276da6..484b255a2520d6 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby17176.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby17176.go @@ -20,7 +20,8 @@ package main import ( "errors" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -63,9 +64,10 @@ func testDevmapperLockReleasedDeviceDeletion_moby17176() { } } func Moby17176() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby21233.go b/src/runtime/testdata/testgoroutineleakgc/moby21233.go index 837bd6cb344328..60dcbdeaa29fda 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby21233.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby21233.go @@ -17,7 +17,8 @@ package main import ( "math/rand" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -140,9 +141,10 @@ func testTransfer_moby21233() { // G1 } func Moby21233() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { go testTransfer_moby21233() // G1 diff --git a/src/runtime/testdata/testgoroutineleakgc/moby25348.go b/src/runtime/testdata/testgoroutineleakgc/moby25348.go index 4370779e9885c2..3a7efda93d74b2 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby25348.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby25348.go @@ -13,7 +13,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -40,19 +41,18 @@ func (pm *Manager_moby25348) init() { } func Moby25348() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + p1 := &plugin_moby25348{} + p2 := &plugin_moby25348{} + pm := &Manager_moby25348{ + plugins: []*plugin_moby25348{p1, p2}, + } + // deadlocks: 1 + go pm.init() }() - for i := 0; i < 100; i++ { - go func() { - p1 := &plugin_moby25348{} - p2 := &plugin_moby25348{} - pm := &Manager_moby25348{ - plugins: []*plugin_moby25348{p1, p2}, - } - // deadlocks: 100 - go pm.init() - }() - } } diff --git a/src/runtime/testdata/testgoroutineleakgc/moby27782.go b/src/runtime/testdata/testgoroutineleakgc/moby27782.go index 36c715c33ed8de..51212e77c1728a 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby27782.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby27782.go @@ -9,7 +9,9 @@ package main import ( "errors" + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -226,9 +228,10 @@ func (l *JSONFileLogger_moby27782) Close() { } func Moby27782() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 10000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby28462.go b/src/runtime/testdata/testgoroutineleakgc/moby28462.go index af7bc8a4722db3..933d9c78fd853f 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby28462.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby28462.go @@ -14,7 +14,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -122,9 +124,10 @@ func NewDaemonAndContainer_moby28462() (*Daemon_moby28462, *Container_moby28462) /// func Moby28462() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 10000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby29733.go b/src/runtime/testdata/testgoroutineleakgc/moby29733.go index 65807d6ee7f1ac..10830acd4d9d82 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby29733.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby29733.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -52,23 +53,22 @@ func testActive_moby29733(p *Plugin_moby29733) { } func Moby29733() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 1; i++ { - go func() { - // deadlocks: x > 0 - storage := plugins_moby29733{plugins: make(map[int]*Plugin_moby29733)} - handlers := extpointHandlers_moby29733{extpointHandlers: make(map[int]struct{})} + go func() { + // deadlocks: x > 0 + storage := plugins_moby29733{plugins: make(map[int]*Plugin_moby29733)} + handlers := extpointHandlers_moby29733{extpointHandlers: make(map[int]struct{})} - p := &Plugin_moby29733{activateWait: sync.NewCond(&sync.Mutex{})} - storage.plugins[0] = p + p := &Plugin_moby29733{activateWait: sync.NewCond(&sync.Mutex{})} + storage.plugins[0] = p - testActive_moby29733(p) - Handle_moby29733(storage, handlers) - testActive_moby29733(p) - }() - } + testActive_moby29733(p) + Handle_moby29733(storage, handlers) + testActive_moby29733(p) + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/moby30408.go b/src/runtime/testdata/testgoroutineleakgc/moby30408.go index 2ca03184260954..c38731aac73022 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby30408.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby30408.go @@ -2,7 +2,8 @@ package main import ( "errors" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -45,9 +46,10 @@ func testActive_moby30408(p *Plugin_moby30408) { } func Moby30408() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby33781.go b/src/runtime/testdata/testgoroutineleakgc/moby33781.go index 115313fa3c9acb..7c820b0ebd9d88 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby33781.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby33781.go @@ -15,7 +15,8 @@ package main import ( "context" - "runtime" + "os" + "runtime/pprof" "time" ) @@ -67,9 +68,10 @@ func monitor_moby33781(stop chan bool) { /// func Moby33781() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { go func(i int) { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby36114.go b/src/runtime/testdata/testgoroutineleakgc/moby36114.go index 1f4c3ba8533dbd..c3155b9135969b 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby36114.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby36114.go @@ -12,7 +12,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -37,9 +38,10 @@ func (svm *serviceVM_moby36114) hotRemoveVHDsAtStart() { } func Moby36114() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby4395.go b/src/runtime/testdata/testgoroutineleakgc/moby4395.go deleted file mode 100644 index f77cc883025608..00000000000000 --- a/src/runtime/testdata/testgoroutineleakgc/moby4395.go +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Project: moby - * Issue or PR : https://github.com/moby/moby/pull/4395 - * Buggy version: 6d6ec5e0051ad081be3d71e20b39a25c711b4bc3 - * fix commit-id: d3a6ee1e55a53ee54b91ffb6c53ba674768cf9de - * Flaky: 100/100 - * Description: - * The anonyous goroutine could be waiting on sending to - * the channel which might never be drained. - */ - -package main - -import ( - "errors" - "runtime" - "time" -) - -func init() { - register("Moby4395", Moby4395) -} - -func Go_moby4395(f func() error) chan error { - ch := make(chan error) - go func() { - // deadlocks: 1 - ch <- f() // G2 - }() - return ch -} - -/// -/// G1 G2 -/// Go() -/// return ch -/// ch <- f() -/// ----------G2 leak------------- -/// - -func Moby4395() { - defer func() { - time.Sleep(100 * time.Millisecond) - runtime.GC() - }() - Go_moby4395(func() error { // G1 - return errors.New("") - }) -} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby4951.go b/src/runtime/testdata/testgoroutineleakgc/moby4951.go index 91d18fd73e8ecd..8d0abbca1359b0 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby4951.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby4951.go @@ -13,7 +13,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -35,6 +37,7 @@ func (devices *DeviceSet_moby4951) DeleteDevice(hash string) { info := devices.lookupDevice(hash) info.lock.Lock() + runtime.Gosched() defer info.lock.Unlock() devices.deleteDevice(info) @@ -84,19 +87,18 @@ func NewDeviceSet_moby4951() *DeviceSet_moby4951 { } func Moby4951() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 100; i++ { - go func() { - ds := NewDeviceSet_moby4951() - /// Delete devices by the same info - // deadlocks: x > 0 - go ds.DeleteDevice("info1") - // deadlocks: x > 0 - go ds.DeleteDevice("info1") - }() - } + go func() { + ds := NewDeviceSet_moby4951() + /// Delete devices by the same info + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/moby7559.go b/src/runtime/testdata/testgoroutineleakgc/moby7559.go index d9c94b2c1f4d06..d0e3579e72115b 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby7559.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby7559.go @@ -9,7 +9,8 @@ package main import ( "net" - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -36,10 +37,12 @@ func (proxy *UDPProxy_moby7559) Run() { } proxy.connTrackLock.Unlock() } + func Moby7559() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/patterns.go b/src/runtime/testdata/testgoroutineleakgc/patterns.go index 0849216683349f..41377c5656edc5 100644 --- a/src/runtime/testdata/testgoroutineleakgc/patterns.go +++ b/src/runtime/testdata/testgoroutineleakgc/patterns.go @@ -3,7 +3,9 @@ package main import ( "context" "fmt" + "os" "runtime" + "runtime/pprof" "time" ) @@ -43,9 +45,10 @@ func noCloseRange(list []any, workers int) { } func NoCloseRange() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go noCloseRange([]any{1, 2, 3}, 0) @@ -110,9 +113,10 @@ func workerLifecycle(items []any) { } func MethodContractViolation() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() workerLifecycle(make([]any, 10)) @@ -132,10 +136,11 @@ func doubleSend(ch chan any, err error) { } func DoubleSend() { + prof := pprof.Lookup("goroutineleak") ch := make(chan any) defer func() { time.Sleep(1000 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { @@ -184,9 +189,10 @@ func earlyReturn(err error) { } func EarlyReturn() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go earlyReturn(nil) @@ -214,9 +220,10 @@ func nCastLeak(items []any) { } func NCastLeak() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { @@ -238,7 +245,6 @@ func timeout(ctx context.Context) { ch <- struct{}{} }() - runtime.Gosched() select { case <-ch: // Receive message // Sender is released @@ -248,9 +254,10 @@ func timeout(ctx context.Context) { } func Timeout() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(10 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() ctx, cancel := context.WithCancel(context.Background()) diff --git a/src/runtime/testdata/testgoroutineleakgc/serving2137.go b/src/runtime/testdata/testgoroutineleakgc/serving2137.go index b655585926b268..45ebb3c6e64519 100644 --- a/src/runtime/testdata/testgoroutineleakgc/serving2137.go +++ b/src/runtime/testdata/testgoroutineleakgc/serving2137.go @@ -1,7 +1,9 @@ package main import ( + "os" "runtime" + "runtime/pprof" "sync" "time" ) @@ -100,9 +102,10 @@ func unlockAll_serving2137(requests []request_serving2137) { } func Serving2137() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 1000; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go index 16eb2800da3b6f..fba60b32335520 100644 --- a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -67,9 +68,10 @@ func NewService_syncthing4829() *Service_syncthing4829 { } func Syncthing4829() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() for i := 0; i < 100; i++ { diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go index 5ff25ca268a09b..beca0fddd64ae3 100644 --- a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go @@ -1,7 +1,8 @@ package main import ( - "runtime" + "os" + "runtime/pprof" "sync" "time" ) @@ -103,9 +104,10 @@ func NewConnection_syncthing5795(receiver Model_syncthing5795) Connection_syncth } func Syncthing5795() { + prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) - runtime.GC() + prof.WriteTo(os.Stdout, 2) }() go func() { // deadlocks: 1 From 294ab4820d2d77b168685649a03573f3c5bdc544 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 7 Aug 2025 15:44:51 +0200 Subject: [PATCH 22/36] Fixed incorrect leak in kubernetes/13135 --- .../testgoroutineleakgc/kubernetes13135.go | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go index 8cfaa700eba779..19325ea3624886 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go @@ -121,7 +121,7 @@ func (w *WatchCache_kubernetes13135) Replace(obj interface{}) { } } -func NewCacher_kubernetes13135() *Cacher_kubernetes13135 { +func NewCacher_kubernetes13135(stopCh <-chan struct{}) *Cacher_kubernetes13135 { watchCache := &WatchCache_kubernetes13135{} cacher := &Cacher_kubernetes13135{ initialized: sync.WaitGroup{}, @@ -134,7 +134,6 @@ func NewCacher_kubernetes13135() *Cacher_kubernetes13135 { cacher.Unlock() }) watchCache.SetOnEvent(cacher.processEvent) - stopCh := StopChannel_kubernetes13135 go Util_kubernetes13135(func() { cacher.startCaching(stopCh) }, 0, stopCh) // G2 cacher.initialized.Wait() return cacher @@ -166,6 +165,19 @@ func NewCacher_kubernetes13135() *Cacher_kubernetes13135 { ///--------------------------------G2,G3 deadlock------------------------------------- /// +/// +/// G1 G2 G3 +/// NewCacher() +/// watchCache.SetOnReplace() +/// watchCache.SetOnEvent() +/// watchCache.initialized.Wait() +/// Util(...) +/// +/// cacher.startCaching() +/// c.Lock() +/// c.reflector.ListAndWatch() +/// r.syncWith() +///--------------------------------G1 deadlocks------------------------------------- func Kubernetes13135() { prof := pprof.Lookup("goroutineleak") defer func() { @@ -176,10 +188,15 @@ func Kubernetes13135() { StopChannel_kubernetes13135 = make(chan struct{}) for i := 0; i < 50; i++ { go func() { + // Should create a local channel. Using a single global channel + // concurrently will cause a deadlock which does not actually exist + // in the original microbenchmark. + StopChannel_kubernetes13135 := make(chan struct{}) + // deadlocks: x > 0 - c := NewCacher_kubernetes13135() // G1 + c := NewCacher_kubernetes13135(StopChannel_kubernetes13135) // G1 go c.watchCache.Add(nil) // G3 + go close(StopChannel_kubernetes13135) }() } - go close(StopChannel_kubernetes13135) } From a93c302c02e11528cf8181bc4ff65cbc909ee90f Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 7 Aug 2025 17:37:22 +0200 Subject: [PATCH 23/36] Updated a few flaky entries. Unclogged output printing. --- src/runtime/gc_test.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 395afff3a6e226..0f6c7cd9aab0c5 100755 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1414,13 +1414,13 @@ func TestGoroutineLeakGC(t *testing.T) { `\(\*statusManager_kubernetes10182\)\.SetPodStatus\(.* \[chan send\]`, ), makeFlakyTest("Kubernetes11298", - // This entire test is flaky in producing leaks. Run it only to check for crashes. `After_kubernetes11298\.func1\(.* \[chan receive\]`, `After_kubernetes11298\.func1\(.* \[sync\.Cond\.Wait\]`, `Kubernetes11298\.func2\(.* \[chan receive\]`, ), makeFlakyTest("Kubernetes13135", - `Kubernetes13135\.func2\(.* \[sync\.WaitGroup\.Wait\]`, + `Util_kubernetes13135\(.* \[sync\.Mutex\.Lock\]`, + `\(\*WatchCache_kubernetes13135\)\.Add\(.* \[sync\.Mutex\.Lock\]`, ), makeTest("Kubernetes25331", `\(\*watchChan_kubernetes25331\)\.run\(.* \[chan send\]`, @@ -1459,7 +1459,7 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest("Moby17176", `testDevmapperLockReleasedDeviceDeletion_moby17176\.func1\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest("Moby21233", + makeFlakyTest("Moby21233", `\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[chan send\]`, `\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[select\]`, `testTransfer_moby21233\(.* \[chan receive\]`, @@ -1541,6 +1541,8 @@ func TestGoroutineLeakGC(t *testing.T) { t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", output) } + unexpectedLeaks := make([]string, 0, len(foundLeaks)) + // Parse every leak and check if it is expected (maybe as a flaky leak). LEAKS: for _, leak := range foundLeaks { @@ -1575,7 +1577,7 @@ func TestGoroutineLeakGC(t *testing.T) { } } - t.Errorf("output:\n%s\n\nunexpected goroutine leak: %s", output, leak) + unexpectedLeaks = append(unexpectedLeaks, leak) } } @@ -1586,8 +1588,15 @@ func TestGoroutineLeakGC(t *testing.T) { } } + var errors []error + if len(unexpectedLeaks) > 0 { + errors = append(errors, fmt.Errorf("unexpected goroutine leaks:\n%s", strings.Join(unexpectedLeaks, "\n"))) + } if len(missingLeakStrs) > 0 { - t.Fatalf("output:\n%s\n\nnot enough goroutines leaks detected. Missing:\n%s", output, strings.Join(missingLeakStrs, ", ")) + errors = append(errors, fmt.Errorf("missing expected leaks:\n%s", strings.Join(missingLeakStrs, ", "))) + } + if len(errors) > 0 { + t.Fatalf("Failed with the following errors:\n%s\n\noutput:\n%s", errors, output) } }) } From 9711f7f32122bbd8d95671d5fbce44446ace907f Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 8 Aug 2025 13:42:49 +0200 Subject: [PATCH 24/36] Unset executable flag and abort early if test produced a fatal exception or panic. --- src/runtime/gc_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) mode change 100755 => 100644 src/runtime/gc_test.go diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go old mode 100755 new mode 100644 index 0f6c7cd9aab0c5..75be3c45a3692a --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1529,7 +1529,8 @@ func TestGoroutineLeakGC(t *testing.T) { // Zero tolerance policy for fatal exceptions or panics. if failStates.MatchString(output) { - t.Errorf("output:\n%s\n\nunexpected fatal exception or panic", output) + t.Errorf("unexpected fatal exception or panic!\noutput:\n%s\n\n", output) + return } // Extract all the goroutine leaks From 8987acc1ab3e63825fbb4afc90a1956661d10b7d Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 8 Aug 2025 14:53:17 +0200 Subject: [PATCH 25/36] Fixed a printing bug for GC cycles. Running without STW GC. Tightened next mark root loop syntax (and switched to non-loop variant for regular GC). --- src/runtime/mgc.go | 13 ++++--------- src/runtime/mgcmark.go | 26 ++++++++++---------------- 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 9821da4d040d51..ec94cb2543e098 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -733,10 +733,6 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode - } else if work.goroutineLeakFinder.pending.Load() { - // If goroutine leak detection has been enabled via profiling, - // stop the world during the marking phase. - mode = gcForceMode } // Ok, we're doing it! Stop everybody else @@ -936,9 +932,6 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) - if work.goroutineLeakFinder.enabled { - findMaybeRunnableGoroutines() - } top: // Re-check transition condition under transition lock. @@ -1381,9 +1374,11 @@ func gcMarkTermination(stw worldStop) { gcRestoreSyncObjects() } + var goroutineLeakDetectionDone bool systemstack(func() { // Pull the GC out of goroutine leak detection mode. work.goroutineLeakFinder.enabled = false + goroutineLeakDetectionDone = work.goroutineLeakFinder.done work.goroutineLeakFinder.done = false // The memstats updated above must be updated with the world @@ -1460,8 +1455,8 @@ func gcMarkTermination(stw worldStop) { print("gc ", memstats.numgc, " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", util, "%") - if work.goroutineLeakFinder.done { - print(" (goroutine leak finder GC)") + if goroutineLeakDetectionDone { + print(" (checking for goroutine leaks)") } print(": ") prev := work.tSweepTerm diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index f9ceaaf0f9f950..419f6cacde93fd 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1199,24 +1199,18 @@ func gcDrainMarkWorkerFractional(gcw *gcWork) { // is valid, and false if there are no more root jobs to be claimed, // i.e. work.markrootNext >= work.markrootJobs. func gcNextMarkRoot() (uint32, bool) { - var success bool - next, jobs := work.markrootNext.Load(), work.markrootJobs.Load() + if !work.goroutineLeakFinder.enabled { + // If not running goroutine leak detection, behave as the GC previously did. + job := work.markrootNext.Add(1) - 1 + return job, job < work.markrootJobs.Load() + } - if next < jobs { - // still work available at the moment - for !success { - success = work.markrootNext.CompareAndSwap(next, next+1) + // Otherwise, use a CAS loop to increment markrootNext. + for next, jobs := work.markrootNext.Load(), work.markrootJobs.Load(); next < jobs; next = work.markrootNext.Load() { + // There is still work available at the moment. + if work.markrootNext.CompareAndSwap(next, next+1) { // We manage to snatch a root job. Return the root index. - if success { - return next, true - } - - // Get the latest value of markrootNext. - next = work.markrootNext.Load() - // We are out of markroot jobs. - if next >= jobs { - break - } + return next, true } } return 0, false From e700358647592e95f3e1cb04f4d13d240b3a1ab2 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 8 Aug 2025 15:07:13 +0200 Subject: [PATCH 26/36] Demoted etcd/7902 to flaky. --- src/runtime/gc_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 75be3c45a3692a..ace8acfcda3f33 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1346,7 +1346,7 @@ func TestGoroutineLeakGC(t *testing.T) { `Etcd7492\.func2\.1\(.* \[chan send\]`, `\(\*simpleTokenTTLKeeper_etcd7492\)\.run\(.* \[sync\.Mutex\.Lock\]`, ), - makeTest("Etcd7902", + makeFlakyTest("Etcd7902", `doRounds_etcd7902\.func1\(.* \[chan receive\]`, `doRounds_etcd7902\.func1\(.* \[sync\.Mutex\.Lock\]`, `runElectionFunc_etcd7902\(.* \[sync\.WaitGroup\.Wait\]`, From 0546f69a0488f9d91db1a781ae6c3433e88188d3 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 8 Aug 2025 15:37:29 +0200 Subject: [PATCH 27/36] Removed one goroutine in pattern leaks. --- src/runtime/testdata/testgoroutineleakgc/patterns.go | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime/testdata/testgoroutineleakgc/patterns.go b/src/runtime/testdata/testgoroutineleakgc/patterns.go index 41377c5656edc5..488b46f2e308a2 100644 --- a/src/runtime/testdata/testgoroutineleakgc/patterns.go +++ b/src/runtime/testdata/testgoroutineleakgc/patterns.go @@ -195,7 +195,6 @@ func EarlyReturn() { prof.WriteTo(os.Stdout, 2) }() - go earlyReturn(nil) go earlyReturn(fmt.Errorf("error")) } From c9cd1ddd6095f0c572d2e5ec02ad4fc56be1cd31 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 8 Aug 2025 16:04:53 +0200 Subject: [PATCH 28/36] Serialize simple tests for less scheduling pressure. --- src/runtime/gc_test.go | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index ace8acfcda3f33..0885cc7eba70ef 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1108,6 +1108,7 @@ func TestGoroutineLeakGC(t *testing.T) { // Repetitions are used to amortize flakiness in some tests. type testCase struct { name string + simple bool expectedLeaks map[*regexp.Regexp]bool // flakyLeaks are goroutine leaks that are too flaky to be reliably detected. @@ -1235,6 +1236,13 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest("NoLeakGlobal"), } + // Set all micro tests to simple so that they are executed serially. + // This reduces scheduling pressure on the test runner, and improves + // reliability. + for i := range microTests { + microTests[i].simple = true + } + // Common goroutine leak patterns. // Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" // doi:10.1109/CGO57630.2024.10444835 @@ -1255,6 +1263,13 @@ func TestGoroutineLeakGC(t *testing.T) { `timeout\.func1.* \[chan send\]`), } + // Set all pattern tests to simple so that they are executed serially. + // This reduces scheduling pressure on the test runner, and improves + // reliability. + for i := range patternTestCases { + patternTestCases[i].simple = true + } + // GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs". // White paper found at https://lujie.ac.cn/files/papers/GoBench.pdf // doi:10.1109/CGO51591.2021.9370317. @@ -1518,8 +1533,11 @@ func TestGoroutineLeakGC(t *testing.T) { for _, tcase := range testCases { t.Run(tcase.name, func(t *testing.T) { - // Run tests in parallel. - t.Parallel() + if !tcase.simple { + // Run complex tests in parallel. Do this because such tests + // are flaky and we do not necessarily care about their output. + t.Parallel() + } // Run program and get output trace. output := runBuiltTestProg(t, exe, tcase.name, "GODEBUG=asyncpreemptoff=1") From 25da27483bfed6562ef74c4b84cb2badd3d4e0d5 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 11 Aug 2025 10:46:40 +0200 Subject: [PATCH 29/36] Maybe runnable checks if goroutines are in a waiting state. Prevents a bug where runnable goroutines are not treated as maybe runnable. --- src/runtime/mgc.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index ec94cb2543e098..72dde1704675fb 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1093,6 +1093,12 @@ top: // as the GC marking phase progresses. It returns false for leaked goroutines, or for // goroutines which are not yet computed as possibly runnable by the GC. func (gp *g) isMaybeRunnable() bool { + // Check whether the goroutine is actually in a waiting state first. + if readgstatus(gp) != _Gwaiting { + // If the goroutine is not waiting, then clearly it is maybe runnable. + return true + } + switch gp.waitreason { case waitReasonSelectNoCases, waitReasonChanSendNilChan, From a7feaf0f0be0373d2176ba19a45f75c7c53b9715 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 11 Aug 2025 14:15:36 +0200 Subject: [PATCH 30/36] Improved control over scheduler for micro tests. --- src/runtime/gc_test.go | 19 +++++++--- .../testgoroutineleakgc/goroutineleakgc.go | 35 +++++++++++-------- .../testdata/testgoroutineleakgc/patterns.go | 34 ++++++++++++------ 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0885cc7eba70ef..9d25966b2b9bdf 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1260,6 +1260,9 @@ func TestGoroutineLeakGC(t *testing.T) { `nCastLeak\.func1.* \[chan send\]`, `NCastLeak\.func2.* \[chan receive\]`), makeTest("Timeout", + // (vsaioc): Timeout is *theoretically* flaky, but the + // pseudo-random choice for select case branches makes it + // practically impossible for it to fail. `timeout\.func1.* \[chan send\]`), } @@ -1533,14 +1536,20 @@ func TestGoroutineLeakGC(t *testing.T) { for _, tcase := range testCases { t.Run(tcase.name, func(t *testing.T) { - if !tcase.simple { - // Run complex tests in parallel. Do this because such tests - // are flaky and we do not necessarily care about their output. - t.Parallel() + t.Parallel() + + cmdEnv := []string{ + "GODEBUG=asyncpreemptoff=1", + } + + if tcase.simple { + // If the test is simple, set GOMAXPROCS=1 in order to better + // control the behavior of the scheduler. + cmdEnv = append(cmdEnv, "GOMAXPROCS=1") } // Run program and get output trace. - output := runBuiltTestProg(t, exe, tcase.name, "GODEBUG=asyncpreemptoff=1") + output := runBuiltTestProg(t, exe, tcase.name, cmdEnv...) if len(output) == 0 { t.Fatalf("Test produced no output. Is the goroutine leak profile collected?") } diff --git a/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go index 7a3e087f5091cf..0412b9cbdca5b5 100644 --- a/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go +++ b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go @@ -2,13 +2,18 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) // This is a set of micro-tests with obvious goroutine leaks that // ensures goroutine leak detection works. +// +// Tests in this file are not flaky iff. run with GOMAXPROCS=1. +// The main goroutine forcefully yields via `runtime.Gosched()` before +// running the profiler. This moves them to the back of the run queue, +// allowing the leaky goroutines to be scheduled beforehand and get stuck. func init() { register("NilRecv", NilRecv) @@ -34,7 +39,7 @@ func NilRecv() { <-c panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -45,7 +50,7 @@ func NilSend() { c <- 0 panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -55,7 +60,7 @@ func ChanRecv() { <-make(chan int) panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -65,7 +70,7 @@ func SelectNoCases() { select {} panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -75,7 +80,7 @@ func ChanSend() { make(chan int) <- 0 panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -88,7 +93,7 @@ func Select() { } panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -100,7 +105,7 @@ func WaitGroup() { wg.Wait() panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -114,7 +119,7 @@ func MutexStack() { panic("should not be reached") }() } - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -130,7 +135,7 @@ func MutexHeap() { }() }() } - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -142,7 +147,7 @@ func RWMutexRLock() { mu.RLock() panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -154,7 +159,7 @@ func RWMutexLock() { mu.Lock() panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -166,7 +171,7 @@ func Cond() { cond.Wait() panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -185,7 +190,7 @@ func Mixed() { <-ch panic("should not be reached") }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } @@ -197,6 +202,6 @@ func NoLeakGlobal() { go func() { <-ch }() - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) } diff --git a/src/runtime/testdata/testgoroutineleakgc/patterns.go b/src/runtime/testdata/testgoroutineleakgc/patterns.go index 488b46f2e308a2..295e5f18520b73 100644 --- a/src/runtime/testdata/testgoroutineleakgc/patterns.go +++ b/src/runtime/testdata/testgoroutineleakgc/patterns.go @@ -9,6 +9,15 @@ import ( "time" ) +// Common goroutine leak patterns. Extracted from: +// "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" +// doi:10.1109/CGO57630.2024.10444835 +// +// Tests in this file are not flaky iff. the test is run with GOMAXPROCS=1. +// The main goroutine forcefully yields via `runtime.Gosched()` before +// running the profiler. This moves them to the back of the run queue, +// allowing the leaky goroutines to be scheduled beforehand and get stuck. + func init() { register("NoCloseRange", NoCloseRange) register("MethodContractViolation", MethodContractViolation) @@ -115,7 +124,7 @@ func workerLifecycle(items []any) { func MethodContractViolation() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() @@ -139,7 +148,7 @@ func DoubleSend() { prof := pprof.Lookup("goroutineleak") ch := make(chan any) defer func() { - time.Sleep(1000 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() @@ -191,7 +200,7 @@ func earlyReturn(err error) { func EarlyReturn() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() @@ -206,7 +215,7 @@ func nCastLeak(items []any) { // Iterate over every item for range items { go func() { - // deadlocks: 99 + // deadlocks: 4 // Process item and send result to channel ch <- struct{}{} @@ -221,7 +230,11 @@ func nCastLeak(items []any) { func NCastLeak() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + for i := 0; i < 10; i++ { + // Yield enough times to allow all the leaky goroutines to + // reach the execution point. + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() @@ -231,11 +244,12 @@ func NCastLeak() { }() go func() { - nCastLeak(make([]any, 100)) + nCastLeak(make([]any, 5)) }() } -// A context is provided to short-circuit evaluation. +// A context is provided to short-circuit evaluation, leading +// the sender goroutine to leak. func timeout(ctx context.Context) { ch := make(chan any) @@ -246,16 +260,16 @@ func timeout(ctx context.Context) { select { case <-ch: // Receive message - // Sender is released + // Sender is released case <-ctx.Done(): // Context was cancelled or timed out - // Sender is stuck + // Sender is leaked } } func Timeout() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() From 93fd4d1b957578761686e79b052f00abce9bfe68 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 11 Aug 2025 14:43:39 +0200 Subject: [PATCH 31/36] Removed STW from goroutine profile. --- src/runtime/mprof.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index 7969c11211580d..1e0f563a2930bf 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -1333,7 +1333,6 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab // goroutine stack, because it is obviously not a leaked goroutine. pcbuf := makeProfStack() // see saveg() for explanation - stw := stopTheWorld(stwGoroutineProfile) // Using gleakcount while the world is stopped should give us a consistent view // of the number of leaked goroutines. n = int(gleakcount()) @@ -1342,7 +1341,6 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab // There's not enough space in p to store the whole profile, so (per the // contract of runtime.GoroutineProfile) we're not allowed to write to p // at all and must return n, false. - startTheWorld(stw) semrelease(&goroutineProfile.sema) return n, false } @@ -1354,7 +1352,6 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab goroutineProfile.active = true goroutineProfile.records = p goroutineProfile.labels = labels - startTheWorld(stw) // Visit each leaked goroutine that existed as of the startTheWorld call above. forEachGRace(func(gp1 *g) { @@ -1363,12 +1360,10 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab } }) - stw = stopTheWorld(stwGoroutineProfileCleanup) endOffset := goroutineProfile.offset.Swap(0) goroutineProfile.active = false goroutineProfile.records = nil goroutineProfile.labels = nil - startTheWorld(stw) // Restore the invariant that every goroutine struct in allgs has its // goroutineProfiled field cleared. From 40802752e4955753ecd7f9f84fca0c6ea9400c31 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 11 Aug 2025 14:48:04 +0200 Subject: [PATCH 32/36] Tightened control over runtime schedule for small tests with GOMAXPROCS. Expanded test set. --- src/runtime/gc_test.go | 88 ++++++++++++++----- .../{patterns.go => commonpatterns.go} | 0 .../{goroutineleakgc.go => simple.go} | 0 .../testgoroutineleakgc/stresstests.go | 44 ++++++++++ 4 files changed, 110 insertions(+), 22 deletions(-) rename src/runtime/testdata/testgoroutineleakgc/{patterns.go => commonpatterns.go} (100%) rename src/runtime/testdata/testgoroutineleakgc/{goroutineleakgc.go => simple.go} (100%) create mode 100644 src/runtime/testdata/testgoroutineleakgc/stresstests.go diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 9d25966b2b9bdf..f4c780abbc9c07 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1220,19 +1220,46 @@ func TestGoroutineLeakGC(t *testing.T) { // Micro tests involve very simple leaks for each type of concurrency primitive operation. microTests := []testCase{ - makeTest("NilRecv", `\[chan receive \(nil chan\)\]`), - makeTest("NilSend", `\[chan send \(nil chan\)\]`), - makeTest("SelectNoCases", `\[select \(no cases\)\]`), - makeTest("ChanRecv", `\[chan receive\]`), - makeTest("ChanSend", `\[chan send\]`), - makeTest("Select", `\[select\]`), - makeTest("WaitGroup", `\[sync\.WaitGroup\.Wait\]`), - makeTest("MutexStack", `\[sync\.Mutex\.Lock\]`), - makeTest("MutexHeap", `\[sync\.Mutex\.Lock\]`), - makeTest("Cond", `\[sync\.Cond\.Wait\]`), - makeTest("RWMutexRLock", `\[sync\.RWMutex\.RLock\]`), - makeTest("RWMutexLock", `\[sync\.(RW)?Mutex\.Lock\]`), - makeTest("Mixed", `\[sync\.WaitGroup\.Wait\]`, `\[chan send\]`), + makeTest("NilRecv", + `NilRecv\.func1\(.* \[chan receive \(nil chan\)\]`, + ), + makeTest("NilSend", + `NilSend\.func1\(.* \[chan send \(nil chan\)\]`, + ), + makeTest("SelectNoCases", + `SelectNoCases\.func1\(.* \[select \(no cases\)\]`, + ), + makeTest("ChanRecv", + `ChanRecv\.func1\(.* \[chan receive\]`, + ), + makeTest("ChanSend", + `ChanSend\.func1\(.* \[chan send\]`, + ), + makeTest("Select", + `Select\.func1\(.* \[select\]`, + ), + makeTest("WaitGroup", + `WaitGroup\.func1\(.* \[sync\.WaitGroup\.Wait\]`, + ), + makeTest("MutexStack", + `MutexStack\.func1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("MutexHeap", + `MutexHeap\.func1.1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Cond", + `Cond\.func1\(.* \[sync\.Cond\.Wait\]`, + ), + makeTest("RWMutexRLock", + `RWMutexRLock\.func1\(.* \[sync\.RWMutex\.RLock\]`, + ), + makeTest("RWMutexLock", + `RWMutexLock\.func1\(.* \[sync\.(RW)?Mutex\.Lock\]`, + ), + makeTest("Mixed", + `Mixed\.func1\(.* \[sync\.WaitGroup\.Wait\]`, + `Mixed\.func1.1\(.* \[chan send\]`, + ), makeTest("NoLeakGlobal"), } @@ -1243,27 +1270,42 @@ func TestGoroutineLeakGC(t *testing.T) { microTests[i].simple = true } + // Stress tests are flaky and we do not strictly care about their output. + // They are only intended to stress the goroutine leak detector and profiling + // infrastructure in interesting ways. + stressTestCases := []testCase{ + makeFlakyTest("SpawnGC", + `spawnGC.func1\(.* \[chan receive\]`, + ), + } + // Common goroutine leak patterns. // Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" // doi:10.1109/CGO57630.2024.10444835 patternTestCases := []testCase{ makeTest("NoCloseRange", `noCloseRange\(.* \[chan send\]`, - `noCloseRange\.func1\(.* \[chan receive\]`), + `noCloseRange\.func1\(.* \[chan receive\]`, + ), makeTest("MethodContractViolation", - `worker\.Start\.func1.* \[select\]`), + `worker\.Start\.func1\(.* \[select\]`, + ), makeTest("DoubleSend", - `DoubleSend\.func3.* \[chan send\]`), + `DoubleSend\.func3\(.* \[chan send\]`, + ), makeTest("EarlyReturn", - `earlyReturn\.func1.* \[chan send\]`), + `earlyReturn\.func1\(.* \[chan send\]`, + ), makeTest("NCastLeak", - `nCastLeak\.func1.* \[chan send\]`, - `NCastLeak\.func2.* \[chan receive\]`), + `nCastLeak\.func1\(.* \[chan send\]`, + `NCastLeak\.func2\(.* \[chan receive\]`, + ), makeTest("Timeout", // (vsaioc): Timeout is *theoretically* flaky, but the // pseudo-random choice for select case branches makes it // practically impossible for it to fail. - `timeout\.func1.* \[chan send\]`), + `timeout\.func1\(.* \[chan send\]`, + ), } // Set all pattern tests to simple so that they are executed serially. @@ -1522,7 +1564,8 @@ func TestGoroutineLeakGC(t *testing.T) { } // Combine all test cases into a single list. - testCases := append(microTests, patternTestCases...) + testCases := append(microTests, stressTestCases...) + testCases = append(testCases, patternTestCases...) testCases = append(testCases, gokerTestCases...) // Test cases must not panic or cause fatal exceptions. @@ -1537,9 +1580,10 @@ func TestGoroutineLeakGC(t *testing.T) { for _, tcase := range testCases { t.Run(tcase.name, func(t *testing.T) { t.Parallel() - + cmdEnv := []string{ "GODEBUG=asyncpreemptoff=1", + "GOEXPERIMENT=greenteagc", } if tcase.simple { diff --git a/src/runtime/testdata/testgoroutineleakgc/patterns.go b/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go similarity index 100% rename from src/runtime/testdata/testgoroutineleakgc/patterns.go rename to src/runtime/testdata/testgoroutineleakgc/commonpatterns.go diff --git a/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go b/src/runtime/testdata/testgoroutineleakgc/simple.go similarity index 100% rename from src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go rename to src/runtime/testdata/testgoroutineleakgc/simple.go diff --git a/src/runtime/testdata/testgoroutineleakgc/stresstests.go b/src/runtime/testdata/testgoroutineleakgc/stresstests.go new file mode 100644 index 00000000000000..397963b26120e1 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/stresstests.go @@ -0,0 +1,44 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +const spawnGCMaxDepth = 5 + +func init() { + register("Spawn", SpawnGC) +} + +func spawnGC(i int) { + prof := pprof.Lookup("goroutineleak") + if i == 0 { + return + } + wg := &sync.WaitGroup{} + wg.Add(i + 1) + go func() { + wg.Done() + // deadlocks: x > 0 + <-make(chan int) + }() + for j := 0; j < i; j++ { + go func() { + wg.Done() + spawnGC(i - 1) + }() + } + wg.Wait() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +// SpawnGC spawns a tree of goroutine leaks and calls the goroutine leak profiler +// for each node in the tree. It is supposed to stress the goroutine leak profiler +// under a heavily concurrent workload. +func SpawnGC() { + spawnGC(spawnGCMaxDepth) +} From a7c42754522b379774d926a3490158e7f3ae4973 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 11 Aug 2025 17:09:15 +0200 Subject: [PATCH 33/36] Forced more microbenchmarks into deterministic behavior. --- src/runtime/gc_test.go | 40 +++++++----------- .../testgoroutineleakgc/cockroach13197.go | 4 +- .../testgoroutineleakgc/cockroach13755.go | 4 +- .../testgoroutineleakgc/cockroach24808.go | 4 +- .../testgoroutineleakgc/cockroach25456.go | 4 +- .../testgoroutineleakgc/cockroach35073.go | 4 +- .../testgoroutineleakgc/cockroach35931.go | 4 +- .../testgoroutineleakgc/cockroach584.go | 22 ++++------ .../testdata/testgoroutineleakgc/etcd10492.go | 3 +- .../testdata/testgoroutineleakgc/etcd5509.go | 42 +++++++++---------- .../testdata/testgoroutineleakgc/etcd6708.go | 4 +- .../testdata/testgoroutineleakgc/grpc660.go | 4 +- .../testdata/testgoroutineleakgc/grpc862.go | 3 +- .../testgoroutineleakgc/kubernetes13135.go | 1 - .../testgoroutineleakgc/kubernetes25331.go | 4 +- .../testgoroutineleakgc/kubernetes38669.go | 4 +- .../testgoroutineleakgc/kubernetes5316.go | 2 + .../testdata/testgoroutineleakgc/moby25348.go | 4 +- .../testdata/testgoroutineleakgc/moby29733.go | 4 +- .../testdata/testgoroutineleakgc/moby30408.go | 20 ++++----- .../testdata/testgoroutineleakgc/moby7559.go | 8 +--- .../testgoroutineleakgc/syncthing4829.go | 20 ++++----- .../testgoroutineleakgc/syncthing5795.go | 4 +- 23 files changed, 96 insertions(+), 117 deletions(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index f4c780abbc9c07..1e78970b4968ba 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1143,7 +1143,9 @@ func TestGoroutineLeakGC(t *testing.T) { // makeTest is a short-hand for creating non-flaky test cases. makeTest := func(name string, leaks ...string) testCase { - return makeAnyTest(name, false, leaks...) + tcase := makeAnyTest(name, false, leaks...) + tcase.simple = true + return tcase } // makeFlakyTest is a short-hand for creating flaky test cases. @@ -1263,13 +1265,6 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest("NoLeakGlobal"), } - // Set all micro tests to simple so that they are executed serially. - // This reduces scheduling pressure on the test runner, and improves - // reliability. - for i := range microTests { - microTests[i].simple = true - } - // Stress tests are flaky and we do not strictly care about their output. // They are only intended to stress the goroutine leak detector and profiling // infrastructure in interesting ways. @@ -1308,22 +1303,17 @@ func TestGoroutineLeakGC(t *testing.T) { ), } - // Set all pattern tests to simple so that they are executed serially. - // This reduces scheduling pressure on the test runner, and improves - // reliability. - for i := range patternTestCases { - patternTestCases[i].simple = true - } - // GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs". // White paper found at https://lujie.ac.cn/files/papers/GoBench.pdf // doi:10.1109/CGO51591.2021.9370317. // // This list is curated for tests that are not excessively flaky. // Some tests are also excluded because they are redundant. + // + // TODO(vsaioc): Some of these might be removable (their patterns may overlap). gokerTestCases := []testCase{ - makeFlakyTest("Cockroach584", - `Cockroach584\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + makeTest("Cockroach584", + `Cockroach584\.func2\(.* \[sync\.Mutex\.Lock\]`, ), makeFlakyTest("Cockroach1055", `Cockroach1055\.func2\(.* \[chan receive\]`, @@ -1388,10 +1378,10 @@ func TestGoroutineLeakGC(t *testing.T) { makeTest("Cockroach35931", `Cockroach35931\.func2\(.* \[chan send\]`, ), - makeFlakyTest("Etcd5509", + makeTest("Etcd5509", `Etcd5509\.func2\(.* \[sync\.RWMutex\.Lock\]`, ), - makeFlakyTest("Etcd6708", + makeTest("Etcd6708", `Etcd6708\.func2\(.* \[sync\.RWMutex\.RLock\]`, ), makeFlakyTest("Etcd6857", @@ -1421,7 +1411,7 @@ func TestGoroutineLeakGC(t *testing.T) { `\(\*Server_grpc795\)\.Serve\(.* \[sync\.Mutex\.Lock\]`, `testServerGracefulStopIdempotent_grpc795\(.* \[sync\.Mutex\.Lock\]`, ), - makeFlakyTest("Grpc862", + makeTest("Grpc862", `DialContext_grpc862\.func2\(.* \[chan receive\]`), makeTest("Grpc1275", `testInflightStreamClosing_grpc1275\.func1\(.* \[chan receive\]`), @@ -1462,7 +1452,7 @@ func TestGoroutineLeakGC(t *testing.T) { // makeTest(testCase{name: "Kubernetes1321"}, // `NewMux_kubernetes1321\.gowrap1\(.* \[chan send\]`, // `testMuxWatcherClose_kubernetes1321\(.* \[sync\.Mutex\.Lock\]`), - makeFlakyTest("Kubernetes5316", + makeTest("Kubernetes5316", `finishRequest_kubernetes5316\.func1\(.* \[chan send\]`, ), makeFlakyTest("Kubernetes6632", @@ -1546,7 +1536,7 @@ func TestGoroutineLeakGC(t *testing.T) { makeFlakyTest("Moby33781", `monitor_moby33781\.func1\(.* \[chan send\]`, ), - makeTest("Moby36114", + makeFlakyTest("Moby36114", `\(\*serviceVM_moby36114\)\.hotAddVHDsAtStart\(.* \[sync\.Mutex\.Lock\]`, ), makeFlakyTest("Serving2137", @@ -1554,7 +1544,7 @@ func TestGoroutineLeakGC(t *testing.T) { `\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[sync\.Mutex\.Lock\]`, `Serving2137\.func2\(.* \[chan receive\]`, ), - makeFlakyTest("Syncthing4829", + makeTest("Syncthing4829", `Syncthing4829\.func2\(.* \[sync\.RWMutex\.RLock\]`, ), makeTest("Syncthing5795", @@ -1662,10 +1652,10 @@ func TestGoroutineLeakGC(t *testing.T) { var errors []error if len(unexpectedLeaks) > 0 { - errors = append(errors, fmt.Errorf("unexpected goroutine leaks:\n%s", strings.Join(unexpectedLeaks, "\n"))) + errors = append(errors, fmt.Errorf("unexpected goroutine leaks:\n%s\n", strings.Join(unexpectedLeaks, "\n"))) } if len(missingLeakStrs) > 0 { - errors = append(errors, fmt.Errorf("missing expected leaks:\n%s", strings.Join(missingLeakStrs, ", "))) + errors = append(errors, fmt.Errorf("missing expected leaks:\n%s\n", strings.Join(missingLeakStrs, ", "))) } if len(errors) > 0 { t.Fatalf("Failed with the following errors:\n%s\n\noutput:\n%s", errors, output) diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go index 9496fcbc7c688a..195c911218e85a 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go @@ -12,8 +12,8 @@ package main import ( "context" "os" + "runtime" "runtime/pprof" - "time" ) func init() { @@ -64,7 +64,7 @@ func (tx *Tx_cockroach13197) close() { func Cockroach13197() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go index 1435d214eaca0d..063446efe02782 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go @@ -14,8 +14,8 @@ package main import ( "context" "os" + "runtime" "runtime/pprof" - "time" ) func init() { @@ -51,7 +51,7 @@ func (rs *Rows_cockroach13755) close(err error) { func Cockroach13755() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go index f11fe94fa4fb00..9621a91e52b5bb 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go @@ -3,9 +3,9 @@ package main import ( "context" "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -67,7 +67,7 @@ func (c *Compactor_cockroach24808) Start(ctx context.Context, stopper *Stopper_c func Cockroach24808() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go index 765b1a7517728b..dfda17c1b9172e 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go @@ -2,8 +2,8 @@ package main import ( "os" + "runtime" "runtime/pprof" - "time" ) func init() { @@ -77,7 +77,7 @@ func (tc *testContext_cockroach25456) StartWithStoreConfig(stopper *Stopper_cock func Cockroach25456() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go index 7a0dd6b8f0350c..0ffc6609f9aa30 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go @@ -2,10 +2,10 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" "sync/atomic" - "time" ) func init() { @@ -90,7 +90,7 @@ func (m *outbox_cockroach35073) run(wg *sync.WaitGroup) { func Cockroach35073() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go index 863033cdfad533..edbcdc3720d373 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go @@ -2,9 +2,9 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -93,7 +93,7 @@ func makeFlowRegistry_cockroach35931() *flowRegistry_cockroach35931 { func Cockroach35931() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go index a49f89b162a1ac..cfb9902592791d 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go @@ -2,9 +2,9 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -41,20 +41,16 @@ func (g *gossip_cockroach584) manage() { func Cockroach584() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 100; i++ { - go func() { - g := &gossip_cockroach584{ - closed: true, - } - go func() { - // deadlocks: x > 0 - g.bootstrap() - g.manage() - }() - }() + g := &gossip_cockroach584{ + closed: true, } + go func() { + // deadlocks: 1 + g.bootstrap() + g.manage() + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go index f46d23f08be1b8..bbeb24290829d4 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go @@ -3,6 +3,7 @@ package main import ( "context" "os" + "runtime" "runtime/pprof" "sync" "time" @@ -45,7 +46,7 @@ func (le *lessor_etcd10492) Renew() { func Etcd10492() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(10 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go index ab7c6d24dca126..7c87fd64e7bb42 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go @@ -7,7 +7,6 @@ import ( "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -93,31 +92,30 @@ func NewKV_etcd5509(c *Client_etcd5509) KV { func Etcd5509() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() + runtime.Gosched() // Yield twice. prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 10; i++ { + go func() { + // deadlocks: 1 + ctx, _ := context.WithCancel(context.TODO()) + cli := &Client_etcd5509{ + ctx: ctx, + } + kv := NewKV_etcd5509(cli) + donec := make(chan struct{}) go func() { - // deadlocks: x > 0 - ctx, _ := context.WithCancel(context.TODO()) - cli := &Client_etcd5509{ - ctx: ctx, + defer close(donec) + err := kv.Get(context.TODO()) + if err != nil && err != ErrConnClosed_etcd5509 { + io.Discard.Write([]byte("Expect ErrConnClosed")) } - kv := NewKV_etcd5509(cli) - donec := make(chan struct{}) - go func() { - defer close(donec) - err := kv.Get(context.TODO()) - if err != nil && err != ErrConnClosed_etcd5509 { - io.Discard.Write([]byte("Expect ErrConnClosed")) - } - }() - - runtime.Gosched() - cli.Close() - - <-donec }() - } + + runtime.Gosched() + cli.Close() + + <-donec + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go index e45d13aa3e2bb2..16186fe9fc0a2c 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go @@ -3,9 +3,9 @@ package main import ( "context" "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -80,7 +80,7 @@ func NewMembersAPI_etcd6708(c Client_etcd6708) MembersAPI_etcd6708 { func Etcd6708() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc660.go b/src/runtime/testdata/testgoroutineleakgc/grpc660.go index d9023eb8abff0d..9f458904ee6479 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc660.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc660.go @@ -12,8 +12,8 @@ package main import ( "math/rand" "os" + "runtime" "runtime/pprof" - "time" ) func init() { @@ -56,7 +56,7 @@ func (bc *benchmarkClient_grpc660) doCloseLoopUnary() { func Grpc660() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(1 * time.Second) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc862.go b/src/runtime/testdata/testgoroutineleakgc/grpc862.go index 679fb052daf1f2..69c79e2f6d8fba 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc862.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc862.go @@ -14,6 +14,7 @@ package main import ( "context" "os" + "runtime" "runtime/pprof" "time" ) @@ -100,7 +101,7 @@ func DialContext_grpc862(ctx context.Context) (conn *ClientConn_grpc862) { func Grpc862() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go index 19325ea3624886..a50a4c80ff8951 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go @@ -172,7 +172,6 @@ func NewCacher_kubernetes13135(stopCh <-chan struct{}) *Cacher_kubernetes13135 { /// watchCache.SetOnEvent() /// watchCache.initialized.Wait() /// Util(...) -/// /// cacher.startCaching() /// c.Lock() /// c.reflector.ListAndWatch() diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go index bce3b155fa969c..56d9d127af1dbb 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go @@ -16,8 +16,8 @@ import ( "context" "errors" "os" + "runtime" "runtime/pprof" - "time" ) func init() { @@ -71,7 +71,7 @@ func NewWatchChan_kubernetes25331() *watchChan_kubernetes25331 { func Kubernetes25331() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go index 3d9c8f203de157..1e132d9221bca6 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go @@ -2,9 +2,9 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -66,7 +66,7 @@ func newCacheWatcher_kubernetes38669(chanSize int, initEvents []watchCacheEvent_ func Kubernetes38669() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(1 * time.Second) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go index cd7c42eb4a1d6f..832d59ca504c6f 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go @@ -16,6 +16,7 @@ import ( "errors" "math/rand" "os" + "runtime" "runtime/pprof" "time" ) @@ -56,6 +57,7 @@ func Kubernetes5316() { prof := pprof.Lookup("goroutineleak") defer func() { time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby25348.go b/src/runtime/testdata/testgoroutineleakgc/moby25348.go index 3a7efda93d74b2..4ee34b1f08fec8 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby25348.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby25348.go @@ -14,9 +14,9 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -43,7 +43,7 @@ func (pm *Manager_moby25348) init() { func Moby25348() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby29733.go b/src/runtime/testdata/testgoroutineleakgc/moby29733.go index 10830acd4d9d82..4348cce5964187 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby29733.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby29733.go @@ -2,9 +2,9 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -55,7 +55,7 @@ func testActive_moby29733(p *Plugin_moby29733) { func Moby29733() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/moby30408.go b/src/runtime/testdata/testgoroutineleakgc/moby30408.go index c38731aac73022..1c39fcd1b47988 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby30408.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby30408.go @@ -3,9 +3,9 @@ package main import ( "errors" "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -38,7 +38,7 @@ func (p *Plugin_moby30408) activated() bool { func testActive_moby30408(p *Plugin_moby30408) { done := make(chan struct{}) go func() { - // deadlocks: 100 + // deadlocks: 1 p.waitActive() close(done) }() @@ -48,17 +48,15 @@ func testActive_moby30408(p *Plugin_moby30408) { func Moby30408() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 100; i++ { - go func() { - // deadlocks: 100 - p := &Plugin_moby30408{activateWait: sync.NewCond(&sync.Mutex{})} - p.activateErr = errors.New("some junk happened") + go func() { + // deadlocks: 1 + p := &Plugin_moby30408{activateWait: sync.NewCond(&sync.Mutex{})} + p.activateErr = errors.New("some junk happened") - testActive_moby30408(p) - }() - } + testActive_moby30408(p) + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/moby7559.go b/src/runtime/testdata/testgoroutineleakgc/moby7559.go index d0e3579e72115b..8b7ba9cae564bf 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby7559.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby7559.go @@ -45,11 +45,7 @@ func Moby7559() { prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 100; i++ { - go func() { - proxy := &UDPProxy_moby7559{} - // deadlocks: x > 0 - go proxy.Run() - }() + for i := 0; i < 20; i++ { + go (&UDPProxy_moby7559{}).Run() } } diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go index fba60b32335520..89ac2c42d5a104 100644 --- a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go @@ -2,9 +2,9 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -70,18 +70,16 @@ func NewService_syncthing4829() *Service_syncthing4829 { func Syncthing4829() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 100; i++ { - go func() { - // deadlocks: x > 0 - natSvc := NewService_syncthing4829() - m := natSvc.NewMapping() - m.extAddresses["test"] = 0 + go func() { + // deadlocks: 1 + natSvc := NewService_syncthing4829() + m := natSvc.NewMapping() + m.extAddresses["test"] = 0 - natSvc.RemoveMapping(m) - }() - } + natSvc.RemoveMapping(m) + }() } diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go index beca0fddd64ae3..25f9cc420837bf 100644 --- a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go @@ -2,9 +2,9 @@ package main import ( "os" + "runtime" "runtime/pprof" "sync" - "time" ) func init() { @@ -106,7 +106,7 @@ func NewConnection_syncthing5795(receiver Model_syncthing5795) Connection_syncth func Syncthing5795() { prof := pprof.Lookup("goroutineleak") defer func() { - time.Sleep(100 * time.Millisecond) + runtime.Gosched() prof.WriteTo(os.Stdout, 2) }() go func() { From 6d18877ae2fcf0f7c2cf0e9ebc290ca120cc64ef Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Tue, 12 Aug 2025 11:31:19 +0200 Subject: [PATCH 34/36] Increased number of yields for examples guaranteed to lead to goroutine leaks. --- .../testgoroutineleakgc/cockroach13197.go | 5 +- .../testgoroutineleakgc/cockroach13755.go | 5 +- .../testgoroutineleakgc/cockroach24808.go | 5 +- .../testgoroutineleakgc/cockroach25456.go | 5 +- .../testgoroutineleakgc/cockroach35073.go | 5 +- .../testgoroutineleakgc/cockroach35931.go | 5 +- .../testgoroutineleakgc/cockroach584.go | 5 +- .../testgoroutineleakgc/commonpatterns.go | 2 +- .../testdata/testgoroutineleakgc/etcd10492.go | 5 +- .../testdata/testgoroutineleakgc/etcd5509.go | 6 +- .../testdata/testgoroutineleakgc/etcd6708.go | 5 +- .../testdata/testgoroutineleakgc/grpc660.go | 5 +- .../testdata/testgoroutineleakgc/grpc862.go | 5 +- .../testdata/testgoroutineleakgc/hugo3251.go | 2 +- .../testgoroutineleakgc/kubernetes25331.go | 5 +- .../testgoroutineleakgc/kubernetes38669.go | 5 +- .../testgoroutineleakgc/kubernetes5316.go | 7 +- .../testdata/testgoroutineleakgc/main.go | 4 ++ .../testdata/testgoroutineleakgc/moby25348.go | 5 +- .../testdata/testgoroutineleakgc/moby29733.go | 5 +- .../testdata/testgoroutineleakgc/moby30408.go | 5 +- .../testdata/testgoroutineleakgc/simple.go | 70 +++++++++++++++---- .../testgoroutineleakgc/syncthing4829.go | 5 +- .../testgoroutineleakgc/syncthing5795.go | 5 +- 24 files changed, 144 insertions(+), 37 deletions(-) diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go index 195c911218e85a..342eb05b0765d5 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go @@ -64,7 +64,10 @@ func (tx *Tx_cockroach13197) close() { func Cockroach13197() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go index 063446efe02782..101736c20789b7 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go @@ -51,7 +51,10 @@ func (rs *Rows_cockroach13755) close(err error) { func Cockroach13755() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go index 9621a91e52b5bb..f8997d1e99fe43 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go @@ -67,7 +67,10 @@ func (c *Compactor_cockroach24808) Start(ctx context.Context, stopper *Stopper_c func Cockroach24808() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go index dfda17c1b9172e..ad06301d0b7c27 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go @@ -77,7 +77,10 @@ func (tc *testContext_cockroach25456) StartWithStoreConfig(stopper *Stopper_cock func Cockroach25456() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go index 0ffc6609f9aa30..532ed0f507b963 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go @@ -90,7 +90,10 @@ func (m *outbox_cockroach35073) run(wg *sync.WaitGroup) { func Cockroach35073() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go index edbcdc3720d373..b06c5610eb7e0a 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go @@ -93,7 +93,10 @@ func makeFlowRegistry_cockroach35931() *flowRegistry_cockroach35931 { func Cockroach35931() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go index cfb9902592791d..470489b8c544b8 100644 --- a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go @@ -41,7 +41,10 @@ func (g *gossip_cockroach584) manage() { func Cockroach584() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + for i := 0; i < yieldCount; i++ { + // Yield several times to allow the child goroutine to run. + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go b/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go index 295e5f18520b73..5a2d3e022c933a 100644 --- a/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go +++ b/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go @@ -230,7 +230,7 @@ func nCastLeak(items []any) { func NCastLeak() { prof := pprof.Lookup("goroutineleak") defer func() { - for i := 0; i < 10; i++ { + for i := 0; i < yieldCount; i++ { // Yield enough times to allow all the leaky goroutines to // reach the execution point. runtime.Gosched() diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go index bbeb24290829d4..826e029835b003 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go @@ -46,7 +46,10 @@ func (le *lessor_etcd10492) Renew() { func Etcd10492() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go index 7c87fd64e7bb42..fc7342c9350a22 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go @@ -92,8 +92,10 @@ func NewKV_etcd5509(c *Client_etcd5509) KV { func Etcd5509() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() - runtime.Gosched() // Yield twice. + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go index 16186fe9fc0a2c..8450d692c10e36 100644 --- a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go @@ -80,7 +80,10 @@ func NewMembersAPI_etcd6708(c Client_etcd6708) MembersAPI_etcd6708 { func Etcd6708() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc660.go b/src/runtime/testdata/testgoroutineleakgc/grpc660.go index 9f458904ee6479..ea41d0d5e82e5a 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc660.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc660.go @@ -56,7 +56,10 @@ func (bc *benchmarkClient_grpc660) doCloseLoopUnary() { func Grpc660() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc862.go b/src/runtime/testdata/testgoroutineleakgc/grpc862.go index 69c79e2f6d8fba..2c63d4c40a9255 100644 --- a/src/runtime/testdata/testgoroutineleakgc/grpc862.go +++ b/src/runtime/testdata/testgoroutineleakgc/grpc862.go @@ -101,7 +101,10 @@ func DialContext_grpc862(ctx context.Context) (conn *ClientConn_grpc862) { func Grpc862() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo3251.go b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go index de19aa10069480..f3b333b76dc5dd 100644 --- a/src/runtime/testdata/testgoroutineleakgc/hugo3251.go +++ b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go @@ -51,7 +51,7 @@ func Hugo3251() { prof.WriteTo(os.Stdout, 2) }() - for i := 0; i < 10; i++ { + for i := 0; i < 11; i++ { go func() { // G1 // deadlocks: x > 0 url := "http://Foo.Bar/foo_Bar-Foo" diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go index 56d9d127af1dbb..25895f9f199f4c 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go @@ -71,7 +71,10 @@ func NewWatchChan_kubernetes25331() *watchChan_kubernetes25331 { func Kubernetes25331() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go index 1e132d9221bca6..9d00bb0da67658 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go @@ -66,7 +66,10 @@ func newCacheWatcher_kubernetes38669(chanSize int, initEvents []watchCacheEvent_ func Kubernetes38669() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go index 832d59ca504c6f..3b34fd11a30ff3 100644 --- a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go @@ -56,8 +56,13 @@ func finishRequest_kubernetes5316(timeout time.Duration, fn func() error) { func Kubernetes5316() { prof := pprof.Lookup("goroutineleak") defer func() { + // Wait a bit because the child goroutine relies on timed operations. time.Sleep(100 * time.Millisecond) - runtime.Gosched() + + // Yield several times to allow the child goroutine to run + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/main.go b/src/runtime/testdata/testgoroutineleakgc/main.go index ae491a2a978043..921fb98fd29e36 100644 --- a/src/runtime/testdata/testgoroutineleakgc/main.go +++ b/src/runtime/testdata/testgoroutineleakgc/main.go @@ -6,6 +6,10 @@ package main import "os" +// The number of times the main (profiling) goroutine should yield +// in order to allow the leaking goroutines to get stuck. +const yieldCount = 10 + var cmds = map[string]func(){} func register(name string, f func()) { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby25348.go b/src/runtime/testdata/testgoroutineleakgc/moby25348.go index 4ee34b1f08fec8..5b6f9392275200 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby25348.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby25348.go @@ -43,7 +43,10 @@ func (pm *Manager_moby25348) init() { func Moby25348() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { diff --git a/src/runtime/testdata/testgoroutineleakgc/moby29733.go b/src/runtime/testdata/testgoroutineleakgc/moby29733.go index 4348cce5964187..c8b0b72c216c08 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby29733.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby29733.go @@ -55,7 +55,10 @@ func testActive_moby29733(p *Plugin_moby29733) { func Moby29733() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/moby30408.go b/src/runtime/testdata/testgoroutineleakgc/moby30408.go index 1c39fcd1b47988..a60bf603bd0c7c 100644 --- a/src/runtime/testdata/testgoroutineleakgc/moby30408.go +++ b/src/runtime/testdata/testgoroutineleakgc/moby30408.go @@ -48,7 +48,10 @@ func testActive_moby30408(p *Plugin_moby30408) { func Moby30408() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/simple.go b/src/runtime/testdata/testgoroutineleakgc/simple.go index 0412b9cbdca5b5..73378787060875 100644 --- a/src/runtime/testdata/testgoroutineleakgc/simple.go +++ b/src/runtime/testdata/testgoroutineleakgc/simple.go @@ -39,7 +39,10 @@ func NilRecv() { <-c panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -50,7 +53,10 @@ func NilSend() { c <- 0 panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -60,7 +66,10 @@ func ChanRecv() { <-make(chan int) panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -70,7 +79,10 @@ func SelectNoCases() { select {} panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -80,7 +92,10 @@ func ChanSend() { make(chan int) <- 0 panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -93,7 +108,10 @@ func Select() { } panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -105,7 +123,10 @@ func WaitGroup() { wg.Wait() panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -119,7 +140,10 @@ func MutexStack() { panic("should not be reached") }() } - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -135,7 +159,10 @@ func MutexHeap() { }() }() } - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -147,7 +174,10 @@ func RWMutexRLock() { mu.RLock() panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -159,7 +189,10 @@ func RWMutexLock() { mu.Lock() panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -171,7 +204,10 @@ func Cond() { cond.Wait() panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -190,7 +226,10 @@ func Mixed() { <-ch panic("should not be reached") }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } @@ -202,6 +241,9 @@ func NoLeakGlobal() { go func() { <-ch }() - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) } diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go index 89ac2c42d5a104..603243fedfc2b8 100644 --- a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go @@ -70,7 +70,10 @@ func NewService_syncthing4829() *Service_syncthing4829 { func Syncthing4829() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go index 25f9cc420837bf..8b932187b525ea 100644 --- a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go @@ -106,7 +106,10 @@ func NewConnection_syncthing5795(receiver Model_syncthing5795) Connection_syncth func Syncthing5795() { prof := pprof.Lookup("goroutineleak") defer func() { - runtime.Gosched() + // Yield several times to allow the child goroutine to run. + for i := 0; i < yieldCount; i++ { + runtime.Gosched() + } prof.WriteTo(os.Stdout, 2) }() go func() { From 1987a1ad801070a65d017033f9699d2a2d6b796d Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Wed, 13 Aug 2025 10:38:58 +0200 Subject: [PATCH 35/36] Demoted cockroach/584 to flaky. --- src/runtime/gc_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 1e78970b4968ba..09a784b9e90566 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1312,7 +1312,7 @@ func TestGoroutineLeakGC(t *testing.T) { // // TODO(vsaioc): Some of these might be removable (their patterns may overlap). gokerTestCases := []testCase{ - makeTest("Cockroach584", + makeFlakyTest("Cockroach584", `Cockroach584\.func2\(.* \[sync\.Mutex\.Lock\]`, ), makeFlakyTest("Cockroach1055", From ac385329d9cebfb3d67a36bccd1017ed3b35c37b Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 14 Aug 2025 15:43:30 +0200 Subject: [PATCH 36/36] Added a pathological case where the GC is slow. Fixed SpawnGC test case registration. --- src/runtime/gc_test.go | 1 + .../testgoroutineleakgc/stresstests.go | 46 ++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 09a784b9e90566..f28f51b6707a34 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -1272,6 +1272,7 @@ func TestGoroutineLeakGC(t *testing.T) { makeFlakyTest("SpawnGC", `spawnGC.func1\(.* \[chan receive\]`, ), + makeTest("DaisyChain"), } // Common goroutine leak patterns. diff --git a/src/runtime/testdata/testgoroutineleakgc/stresstests.go b/src/runtime/testdata/testgoroutineleakgc/stresstests.go index 397963b26120e1..b2fc732507aa77 100644 --- a/src/runtime/testdata/testgoroutineleakgc/stresstests.go +++ b/src/runtime/testdata/testgoroutineleakgc/stresstests.go @@ -1,16 +1,19 @@ package main import ( + "io" "os" "runtime" "runtime/pprof" "sync" + "time" ) const spawnGCMaxDepth = 5 func init() { - register("Spawn", SpawnGC) + register("SpawnGC", SpawnGC) + register("DaisyChain", DaisyChain) } func spawnGC(i int) { @@ -33,7 +36,16 @@ func spawnGC(i int) { } wg.Wait() runtime.Gosched() - prof.WriteTo(os.Stdout, 2) + if i == spawnGCMaxDepth { + prof.WriteTo(os.Stdout, 2) + } else { + // We want to concurrently trigger the profile in order to concurrently run + // the GC, but we don't want to stream all the profiles to standard output. + // + // Only output the profile for the root call to spawnGC, and otherwise stream + // the profile outputs to /dev/null to avoid jumbling. + prof.WriteTo(io.Discard, 2) + } } // SpawnGC spawns a tree of goroutine leaks and calls the goroutine leak profiler @@ -42,3 +54,33 @@ func spawnGC(i int) { func SpawnGC() { spawnGC(spawnGCMaxDepth) } + +// DaisyChain spawns a daisy-chain of runnable goroutines. +// +// Each goroutine in the chain creates a new channel and goroutine. +// +// This illustrates a pathological worstcase for the goroutine leak GC complexity, +// as opposed to the regular GC, which is not negatively affected by this pattern. +func DaisyChain() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(time.Second) + prof.WriteTo(os.Stdout, 2) + }() + var chain func(i int, ch chan struct{}) + chain = func(i int, ch chan struct{}) { + if i <= 0 { + go func() { + time.Sleep(time.Hour) + ch <- struct{}{} + }() + return + } + ch2 := make(chan struct{}) + go chain(i-1, ch2) + <-ch2 + ch <- struct{}{} + } + // The channel buffer avoids deadlocks. + go chain(1000, make(chan struct{}, 1)) +}