Commit eee85fc5 authored by Austin Clements's avatar Austin Clements

runtime: snapshot heap profile during mark termination

Currently we snapshot the heap profile just *after* mark termination
starts the world because it's a relatively expensive operation.
However, this means any alloc or free events that happen between
starting the world and snapshotting the heap profile can be accounted
to the wrong cycle. In the worst case, a free can be accounted to the
cycle before the alloc; if the heap is small, this can result
temporarily in a negative "in use" count in the profile.

Fix this without making STW more expensive by using a global heap
profile cycle counter. This lets us split up the operation into a two
parts: 1) a super-cheap snapshot operation that simply increments the
global cycle counter during STW, and 2) a more expensive cleanup
operation we can do after starting the world that frees up a slot in
all buckets for use by the next heap profile cycle.

Fixes #19311.

Change-Id: I6bdafabf111c48b3d26fe2d91267f7bef0bd4270
Reviewed-on: https://go-review.googlesource.com/37714
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRick Hudson <rlh@golang.org>
parent 3ebe7d7d
......@@ -1302,18 +1302,22 @@ func gcMarkTermination() {
sweep.nbgsweep = 0
sweep.npausesweep = 0
// If gcSweep didn't do it, finish the current heap profiling
// cycle and start a new heap profiling cycle. We do this
// before starting the world so events don't leak into the
// wrong cycle.
needProfCycle := _ConcurrentSweep && work.mode != gcForceBlockMode
if needProfCycle {
mProf_NextCycle()
}
systemstack(startTheWorldWithSema)
// Update heap profile stats if gcSweep didn't do it. This is
// relatively expensive, so we don't want to do it while the
// world is stopped, but it needs to happen ASAP after
// starting the world to prevent too many allocations from the
// next cycle leaking in. It must happen before releasing
// worldsema since there are applications that do a
// runtime.GC() to update the heap profile and then
// immediately collect the profile.
if _ConcurrentSweep && work.mode != gcForceBlockMode {
mProf_GC()
// Flush the heap profile so we can start a new cycle next GC.
// This is relatively expensive, so we don't do it with the
// world stopped.
if needProfCycle {
mProf_Flush()
}
// Free stack spans. This must be done between GC cycles.
......@@ -1759,9 +1763,12 @@ func gcSweep(mode gcMode) {
for sweepone() != ^uintptr(0) {
sweep.npausesweep++
}
// Do an additional mProf_GC, because all 'free' events are now real as well.
mProf_GC()
mProf_GC()
// All "free" events are now real, so flush everything
// into the published profile.
mProf_NextCycle()
mProf_Flush()
mProf_NextCycle()
mProf_Flush()
return
}
......
......@@ -77,36 +77,43 @@ type memRecord struct {
//
// alloc → ▲ ← free
// ┠┅┅┅┅┅┅┅┅┅┅┅P
// r_a → p_a → allocs
// p_f → frees
// C+2 → C+1 → C
//
// alloc → ▲ ← free
// ┠┅┅┅┅┅┅┅┅┅┅┅P
// r_a → p_a → alloc
// p_f → frees
// C+2 → C+1 → C
//
// Since we can't publish a consistent snapshot until all of
// the sweep frees are accounted for, we wait until the next
// mark termination ("MT" above) to publish the previous mark
// termination's snapshot ("P" above). To do this, information
// is delayed through "recent" and "prev" stages ("r_*" and
// "p_*" above). Specifically:
// termination's snapshot ("P" above). To do this, allocation
// and free events are accounted to *future* heap profile
// cycles ("C+n" above) and we only publish a cycle once all
// of the events from that cycle must be done. Specifically:
//
// Mallocs are accounted in recent stats.
// Explicit frees are accounted in recent stats.
// GC frees are accounted in prev stats.
// After GC prev stats are added to final stats and
// recent stats are moved into prev stats.
// Mallocs are accounted to cycle C+2.
// Explicit frees are accounted to cycle C+2.
// GC frees (done during sweeping) are accounted to cycle C+1.
//
// After mark termination, we increment the global heap
// profile cycle counter and accumulate the stats from cycle C
// into the active profile.
// active is the currently published profile. A profiling
// cycle can be accumulated into active once its complete.
active memRecordCycle
// changes between next-to-last GC and last GC
prev memRecordCycle
// changes since last GC
recent memRecordCycle
// future records the profile events we're counting for cycles
// that have not yet been published. This is ring buffer
// indexed by the global heap profile cycle C and stores
// cycles C, C+1, and C+2. Unlike active, these counts are
// only for a single cycle; they are not cumulative across
// cycles.
//
// We store cycle C here because there's a window between when
// C becomes the active cycle and when we've flushed it to
// active.
future [3]memRecordCycle
}
// memRecordCycle
......@@ -136,8 +143,21 @@ var (
xbuckets *bucket // mutex profile buckets
buckhash *[179999]*bucket
bucketmem uintptr
mProf struct {
// All fields in mProf are protected by proflock.
// cycle is the global heap profile cycle. This wraps
// at mProfCycleWrap.
cycle uint32
// flushed indicates that future[cycle] in all buckets
// has been flushed to the active profile.
flushed bool
}
)
const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
// newBucket allocates a bucket with the given type and number of stack entries.
func newBucket(typ bucketType, nstk int) *bucket {
size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0))
......@@ -248,32 +268,64 @@ func eqslice(x, y []uintptr) bool {
return true
}
func mprof_GC() {
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
mp.active.add(&mp.prev)
mp.prev = mp.recent
mp.recent = memRecordCycle{}
}
// mProf_NextCycle publishes the next heap profile cycle and creates a
// fresh heap profile cycle. This operation is fast and can be done
// during STW. The caller must call mProf_Flush before calling
// mProf_NextCycle again.
//
// This is called by mark termination during STW so allocations and
// frees after the world is started again count towards a new heap
// profiling cycle.
func mProf_NextCycle() {
lock(&proflock)
// We explicitly wrap mProf.cycle rather than depending on
// uint wraparound because the memRecord.future ring does not
// itself wrap at a power of two.
mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap
mProf.flushed = false
unlock(&proflock)
}
// Record that a gc just happened: all the 'recent' statistics are now real.
func mProf_GC() {
// mProf_Flush flushes the events from the current heap profiling
// cycle into the active profile. After this it is safe to start a new
// heap profiling cycle with mProf_NextCycle.
//
// This is called by GC after mark termination starts the world. In
// contrast with mProf_NextCycle, this is somewhat expensive, but safe
// to do concurrently.
func mProf_Flush() {
lock(&proflock)
mprof_GC()
if !mProf.flushed {
mProf_FlushLocked()
mProf.flushed = true
}
unlock(&proflock)
}
func mProf_FlushLocked() {
c := mProf.cycle
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
// Flush cycle C into the published profile and clear
// it for reuse.
mpc := &mp.future[c%uint32(len(mp.future))]
mp.active.add(mpc)
*mpc = memRecordCycle{}
}
}
// Called by malloc to record a profiled block.
func mProf_Malloc(p unsafe.Pointer, size uintptr) {
var stk [maxStack]uintptr
nstk := callers(4, stk[:])
lock(&proflock)
b := stkbucket(memProfile, size, stk[:nstk], true)
c := mProf.cycle
mp := b.mp()
mp.recent.allocs++
mp.recent.alloc_bytes += size
mpc := &mp.future[(c+2)%uint32(len(mp.future))]
mpc.allocs++
mpc.alloc_bytes += size
unlock(&proflock)
// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
......@@ -288,9 +340,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
// Called when freeing a profiled block.
func mProf_Free(b *bucket, size uintptr) {
lock(&proflock)
c := mProf.cycle
mp := b.mp()
mp.prev.frees++
mp.prev.free_bytes += size
mpc := &mp.future[(c+1)%uint32(len(mp.future))]
mpc.frees++
mpc.free_bytes += size
unlock(&proflock)
}
......@@ -467,6 +521,10 @@ func (r *MemProfileRecord) Stack() []uintptr {
// of calling MemProfile directly.
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
lock(&proflock)
// If we're between mProf_NextCycle and mProf_Flush, take care
// of flushing to the active profile so we only have to look
// at the active profile below.
mProf_FlushLocked()
clear := true
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
......@@ -481,12 +539,14 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
// Absolutely no data, suggesting that a garbage collection
// has not yet happened. In order to allow profiling when
// garbage collection is disabled from the beginning of execution,
// accumulate stats as if a GC just happened, and recount buckets.
mprof_GC()
mprof_GC()
// accumulate all of the cycles, and recount buckets.
n = 0
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
for c := range mp.future {
mp.active.add(&mp.future[c])
mp.future[c] = memRecordCycle{}
}
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
n++
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment