Commit a2cd2bd5 authored by Michael Anthony Knyszek's avatar Michael Anthony Knyszek Committed by Michael Knyszek

runtime: add per-p page allocation cache

This change adds a per-p free page cache which the page allocator may
allocate out of without a lock. The change also introduces a completely
lockless page allocator fast path.

Although the cache contains at most 64 pages (and usually less), the
vast majority (85%+) of page allocations are exactly 1 page in size.

Updates #35112.

Change-Id: I170bf0a9375873e7e3230845eb1df7e5cf741b78
Reviewed-on: https://go-review.googlesource.com/c/go/+/195701
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 81640ea3
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
package runtime package runtime
import ( import (
"math/bits"
"runtime/internal/atomic" "runtime/internal/atomic"
"runtime/internal/sys" "runtime/internal/sys"
"unsafe" "unsafe"
...@@ -358,6 +359,10 @@ func ReadMemStatsSlow() (base, slow MemStats) { ...@@ -358,6 +359,10 @@ func ReadMemStatsSlow() (base, slow MemStats) {
pg := mheap_.pages.chunks[i].scavenged.popcntRange(0, pallocChunkPages) pg := mheap_.pages.chunks[i].scavenged.popcntRange(0, pallocChunkPages)
slow.HeapReleased += uint64(pg) * pageSize slow.HeapReleased += uint64(pg) * pageSize
} }
for _, p := range allp {
pg := bits.OnesCount64(p.pcache.scav)
slow.HeapReleased += uint64(pg) * pageSize
}
// Unused space in the current arena also counts as released space. // Unused space in the current arena also counts as released space.
slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base) slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base)
...@@ -879,3 +884,20 @@ func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) { ...@@ -879,3 +884,20 @@ func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
}) })
return return
} }
func PageCachePagesLeaked() (leaked uintptr) {
stopTheWorld("PageCachePagesLeaked")
// Walk over destroyed Ps and look for unflushed caches.
deadp := allp[len(allp):cap(allp)]
for _, p := range deadp {
// Since we're going past len(allp) we may see nil Ps.
// Just ignore them.
if p != nil {
leaked += uintptr(bits.OnesCount64(p.pcache.cache))
}
}
startTheWorld()
return
}
...@@ -168,6 +168,14 @@ func TestTinyAlloc(t *testing.T) { ...@@ -168,6 +168,14 @@ func TestTinyAlloc(t *testing.T) {
} }
} }
func TestPageCacheLeak(t *testing.T) {
defer GOMAXPROCS(GOMAXPROCS(1))
leaked := PageCachePagesLeaked()
if leaked != 0 {
t.Fatalf("found %d leaked pages in page caches", leaked)
}
}
func TestPhysicalMemoryUtilization(t *testing.T) { func TestPhysicalMemoryUtilization(t *testing.T) {
got := runTestProg(t, "testprog", "GCPhys") got := runTestProg(t, "testprog", "GCPhys")
want := "OK\n" want := "OK\n"
......
...@@ -1073,28 +1073,60 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS ...@@ -1073,28 +1073,60 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
gp := getg() gp := getg()
base, scav := uintptr(0), uintptr(0) base, scav := uintptr(0), uintptr(0)
// Try to allocate a cached span. // If the allocation is small enough, try the page cache!
pp := gp.m.p.ptr()
if pp != nil && npages < pageCachePages/4 {
c := &pp.pcache
// If the cache is empty, refill it.
if c.empty() {
lock(&h.lock)
*c = h.pages.allocToCache()
unlock(&h.lock)
}
// Try to allocate from the cache.
base, scav = c.alloc(npages)
if base != 0 {
s = h.tryAllocMSpan() s = h.tryAllocMSpan()
// We failed to do what we need to do without the lock. if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
goto HaveSpan
}
// We're either running duing GC, failed to acquire a mspan,
// or the allocation is for a large object. This means we
// have to lock the heap and do a bunch of extra work,
// so go down the HaveBaseLocked path.
//
// We must do this during GC to avoid skew with heap_scan
// since we flush mcache stats whenever we lock.
//
// TODO(mknyszek): It would be nice to not have to
// lock the heap if it's a large allocation, but
// it's fine for now. The critical section here is
// short and large object allocations are relatively
// infrequent.
}
}
// For one reason or another, we couldn't get the
// whole job done without the heap lock.
lock(&h.lock) lock(&h.lock)
if base == 0 {
// Try to acquire a base address. // Try to acquire a base address.
base, scav = h.pages.alloc(npages) base, scav = h.pages.alloc(npages)
if base != 0 { if base == 0 {
goto HaveBase
}
if !h.grow(npages) { if !h.grow(npages) {
unlock(&h.lock) unlock(&h.lock)
return nil return nil
} }
base, scav = h.pages.alloc(npages) base, scav = h.pages.alloc(npages)
if base != 0 { if base == 0 {
goto HaveBase
}
throw("grew heap, but no adequate free space found") throw("grew heap, but no adequate free space found")
}
HaveBase: }
}
if s == nil { if s == nil {
// We failed to get an mspan earlier, so grab // We failed to get an mspan earlier, so grab
// one now that we have the heap lock. // one now that we have the heap lock.
...@@ -1124,7 +1156,9 @@ HaveBase: ...@@ -1124,7 +1156,9 @@ HaveBase:
} }
unlock(&h.lock) unlock(&h.lock)
// Initialize the span. HaveSpan:
// At this point, both s != nil and base != 0, and the heap
// lock is no longer held. Initialize the span.
s.init(base, npages) s.init(base, npages)
if h.allocNeedsZero(base, npages) { if h.allocNeedsZero(base, npages) {
s.needzero = 1 s.needzero = 1
......
...@@ -4088,6 +4088,7 @@ func (pp *p) destroy() { ...@@ -4088,6 +4088,7 @@ func (pp *p) destroy() {
mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i])) mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
} }
pp.mspancache.len = 0 pp.mspancache.len = 0
pp.pcache.flush(&mheap_.pages)
}) })
freemcache(pp.mcache) freemcache(pp.mcache)
pp.mcache = nil pp.mcache = nil
......
...@@ -555,6 +555,7 @@ type p struct { ...@@ -555,6 +555,7 @@ type p struct {
sysmontick sysmontick // last tick observed by sysmon sysmontick sysmontick // last tick observed by sysmon
m muintptr // back-link to associated m (nil if idle) m muintptr // back-link to associated m (nil if idle)
mcache *mcache mcache *mcache
pcache pageCache
raceprocctx uintptr raceprocctx uintptr
deferpool [5][]*_defer // pool of available defer structs of different sizes (see panic.go) deferpool [5][]*_defer // pool of available defer structs of different sizes (see panic.go)
...@@ -611,7 +612,7 @@ type p struct { ...@@ -611,7 +612,7 @@ type p struct {
palloc persistentAlloc // per-P to avoid mutex palloc persistentAlloc // per-P to avoid mutex
// _ uint32 // Alignment for atomic fields below _ uint32 // Alignment for atomic fields below
// Per-P GC state // Per-P GC state
gcAssistTime int64 // Nanoseconds in assistAlloc gcAssistTime int64 // Nanoseconds in assistAlloc
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment