Commit 8fc6ed4c authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

sync: less agressive local caching in Pool

Currently Pool can cache up to 15 elements per P, and these elements are not accesible to other Ps.
If a Pool caches large objects, say 2MB, and GOMAXPROCS is set to a large value, say 32,
then the Pool can waste up to 960MB.
The new caching policy caches at most 1 per-P element, the rest is shared between Ps.

Get/Put performance is unchanged. Nested Get/Put performance is 57% worse.
However, overall scalability of nested Get/Put is significantly improved,
so the new policy starts winning under contention.

benchmark                     old ns/op     new ns/op     delta
BenchmarkPool                 27.4          26.7          -2.55%
BenchmarkPool-4               6.63          6.59          -0.60%
BenchmarkPool-16              1.98          1.87          -5.56%
BenchmarkPool-64              1.93          1.86          -3.63%
BenchmarkPoolOverlflow        3970          6235          +57.05%
BenchmarkPoolOverlflow-4      10935         1668          -84.75%
BenchmarkPoolOverlflow-16     13419         520           -96.12%
BenchmarkPoolOverlflow-64     10295         380           -96.31%

LGTM=rsc
R=rsc
CC=golang-codereviews, khr
https://golang.org/cl/86020043
parent 1e1506a2
...@@ -91,42 +91,24 @@ enum { ...@@ -91,42 +91,24 @@ enum {
// Initialized from $GOGC. GOGC=off means no gc. // Initialized from $GOGC. GOGC=off means no gc.
static int32 gcpercent = GcpercentUnknown; static int32 gcpercent = GcpercentUnknown;
static struct static FuncVal* poolcleanup;
{
Lock;
void* head;
} pools;
void void
sync·runtime_registerPool(void **p) sync·runtime_registerPoolCleanup(FuncVal *f)
{ {
runtime·lock(&pools); poolcleanup = f;
p[0] = pools.head;
pools.head = p;
runtime·unlock(&pools);
} }
static void static void
clearpools(void) clearpools(void)
{ {
void **pool, **next;
P *p, **pp; P *p, **pp;
MCache *c; MCache *c;
uintptr off;
int32 i; int32 i;
// clear sync.Pool's // clear sync.Pool's
for(pool = pools.head; pool != nil; pool = next) { if(poolcleanup != nil)
next = pool[0]; reflect·call(poolcleanup, nil, 0, 0);
pool[0] = nil; // next
pool[1] = nil; // local
pool[2] = nil; // localSize
off = (uintptr)pool[3] / sizeof(void*);
pool[off+0] = nil; // global slice
pool[off+1] = nil;
pool[off+2] = nil;
}
pools.head = nil;
for(pp=runtime·allp; p=*pp; pp++) { for(pp=runtime·allp; p=*pp; pp++) {
// clear tinyalloc pool // clear tinyalloc pool
......
...@@ -10,12 +10,6 @@ import ( ...@@ -10,12 +10,6 @@ import (
"unsafe" "unsafe"
) )
const (
cacheLineSize = 128
poolLocalSize = 2 * cacheLineSize
poolLocalCap = poolLocalSize/unsafe.Sizeof(*(*interface{})(nil)) - 1
)
// A Pool is a set of temporary objects that may be individually saved and // A Pool is a set of temporary objects that may be individually saved and
// retrieved. // retrieved.
// //
...@@ -46,36 +40,21 @@ const ( ...@@ -46,36 +40,21 @@ const (
// free list. // free list.
// //
type Pool struct { type Pool struct {
// The following fields are known to runtime. local unsafe.Pointer // local fixed-size per-P pool, actual type is [P]poolLocal
next *Pool // for use by runtime localSize uintptr // size of the local array
local *poolLocal // local fixed-size per-P pool, actually an array
localSize uintptr // size of the local array
globalOffset uintptr // offset of global
// The rest is not known to runtime.
// New optionally specifies a function to generate // New optionally specifies a function to generate
// a value when Get would otherwise return nil. // a value when Get would otherwise return nil.
// It may not be changed concurrently with calls to Get. // It may not be changed concurrently with calls to Get.
New func() interface{} New func() interface{}
pad [cacheLineSize]byte
// Read-mostly date above this point, mutable data follows.
mu Mutex
global []interface{} // global fallback pool
} }
// Local per-P Pool appendix. // Local per-P Pool appendix.
type poolLocal struct { type poolLocal struct {
tail int private interface{} // Can be used only by the respective P.
unused int shared []interface{} // Can be used by any P.
buf [poolLocalCap]interface{} Mutex // Protects shared.
} pad [128]byte // Prevents false sharing.
func init() {
var v poolLocal
if unsafe.Sizeof(v) != poolLocalSize {
panic("sync: incorrect pool size")
}
} }
// Put adds x to the pool. // Put adds x to the pool.
...@@ -90,14 +69,17 @@ func (p *Pool) Put(x interface{}) { ...@@ -90,14 +69,17 @@ func (p *Pool) Put(x interface{}) {
return return
} }
l := p.pin() l := p.pin()
t := l.tail if l.private == nil {
if t < int(poolLocalCap) { l.private = x
l.buf[t] = x x = nil
l.tail = t + 1 }
runtime_procUnpin() runtime_procUnpin()
if x == nil {
return return
} }
p.putSlow(l, x) l.Lock()
l.shared = append(l.shared, x)
l.Unlock()
} }
// Get selects an arbitrary item from the Pool, removes it from the // Get selects an arbitrary item from the Pool, removes it from the
...@@ -116,69 +98,49 @@ func (p *Pool) Get() interface{} { ...@@ -116,69 +98,49 @@ func (p *Pool) Get() interface{} {
return nil return nil
} }
l := p.pin() l := p.pin()
t := l.tail x := l.private
if t > 0 { l.private = nil
t -= 1 runtime_procUnpin()
x := l.buf[t] if x != nil {
l.tail = t
runtime_procUnpin()
return x return x
} }
return p.getSlow() l.Lock()
} last := len(l.shared) - 1
if last >= 0 {
func (p *Pool) putSlow(l *poolLocal, x interface{}) { x = l.shared[last]
// Grab half of items from local pool and put to global pool. l.shared = l.shared[:last]
// Can not lock the mutex while pinned.
const N = int(poolLocalCap/2 + 1)
var buf [N]interface{}
buf[0] = x
for i := 1; i < N; i++ {
l.tail--
buf[i] = l.buf[l.tail]
} }
runtime_procUnpin() l.Unlock()
if x != nil {
p.mu.Lock() return x
p.global = append(p.global, buf[:]...) }
p.mu.Unlock() return p.getSlow()
} }
func (p *Pool) getSlow() (x interface{}) { func (p *Pool) getSlow() (x interface{}) {
// Grab a batch of items from global pool and put to local pool. // See the comment in pin regarding ordering of the loads.
// Can not lock the mutex while pinned. size := atomic.LoadUintptr(&p.localSize) // load-acquire
runtime_procUnpin() local := p.local // load-consume
p.mu.Lock() // Try to steal one element from other procs.
pid := runtime_procPin() pid := runtime_procPin()
s := p.localSize runtime_procUnpin()
l := p.local for i := 0; i < int(size); i++ {
if uintptr(pid) < s { l := indexLocal(local, (pid+i+1)%int(size))
l = indexLocal(l, pid) l.Lock()
// Get the item to return. last := len(l.shared) - 1
last := len(p.global) - 1
if last >= 0 { if last >= 0 {
x = p.global[last] x = l.shared[last]
p.global = p.global[:last] l.shared = l.shared[:last]
} l.Unlock()
// Try to refill local pool, we may have been rescheduled to another P. break
if last > 0 && l.tail == 0 {
n := int(poolLocalCap / 2)
gl := len(p.global)
if n > gl {
n = gl
}
copy(l.buf[:], p.global[gl-n:])
p.global = p.global[:gl-n]
l.tail = n
} }
l.Unlock()
} }
runtime_procUnpin()
p.mu.Unlock()
if x == nil && p.New != nil { if x == nil && p.New != nil {
x = p.New() x = p.New()
} }
return return x
} }
// pin pins the current goroutine to P, disables preemption and returns poolLocal pool for the P. // pin pins the current goroutine to P, disables preemption and returns poolLocal pool for the P.
...@@ -199,32 +161,63 @@ func (p *Pool) pin() *poolLocal { ...@@ -199,32 +161,63 @@ func (p *Pool) pin() *poolLocal {
func (p *Pool) pinSlow() *poolLocal { func (p *Pool) pinSlow() *poolLocal {
// Retry under the mutex. // Retry under the mutex.
// Can not lock the mutex while pinned.
runtime_procUnpin() runtime_procUnpin()
p.mu.Lock() allPoolsMu.Lock()
defer p.mu.Unlock() defer allPoolsMu.Unlock()
pid := runtime_procPin() pid := runtime_procPin()
// poolCleanup won't be called while we are pinned.
s := p.localSize s := p.localSize
l := p.local l := p.local
if uintptr(pid) < s { if uintptr(pid) < s {
return indexLocal(l, pid) return indexLocal(l, pid)
} }
if p.local == nil { if p.local == nil {
p.globalOffset = unsafe.Offsetof(p.global) allPools = append(allPools, p)
runtime_registerPool(p)
} }
// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one. // If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
size := runtime.GOMAXPROCS(0) size := runtime.GOMAXPROCS(0)
local := make([]poolLocal, size) local := make([]poolLocal, size)
atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&p.local)), unsafe.Pointer(&local[0])) // store-release atomic.StorePointer((*unsafe.Pointer)(&p.local), unsafe.Pointer(&local[0])) // store-release
atomic.StoreUintptr(&p.localSize, uintptr(size)) // store-release atomic.StoreUintptr(&p.localSize, uintptr(size)) // store-release
return &local[pid] return &local[pid]
} }
func indexLocal(l *poolLocal, i int) *poolLocal { func poolCleanup() {
return (*poolLocal)(unsafe.Pointer(uintptr(unsafe.Pointer(l)) + unsafe.Sizeof(*l)*uintptr(i))) // uh... // This function is called with the world stopped, at the beginning of a garbage collection.
// It must not allocate and probably should not call any runtime functions.
// Defensively zero out everything, 2 reasons:
// 1. To prevent false retention of whole Pools.
// 2. If GC happens while a goroutine works with l.shared in Put/Get,
// it will retain whole Pool. So next cycle memory consumption would be doubled.
for i, p := range allPools {
allPools[i] = nil
for i := 0; i < int(p.localSize); i++ {
l := indexLocal(p.local, i)
l.private = nil
for j := range l.shared {
l.shared[j] = nil
}
l.shared = nil
}
}
allPools = []*Pool{}
}
var (
allPoolsMu Mutex
allPools []*Pool
)
func init() {
runtime_registerPoolCleanup(poolCleanup)
}
func indexLocal(l unsafe.Pointer, i int) *poolLocal {
return &(*[1000000]poolLocal)(l)[i]
} }
// Implemented in runtime. // Implemented in runtime.
func runtime_registerPool(*Pool) func runtime_registerPoolCleanup(cleanup func())
func runtime_procPin() int func runtime_procPin() int
func runtime_procUnpin() func runtime_procUnpin()
...@@ -25,12 +25,12 @@ func TestPool(t *testing.T) { ...@@ -25,12 +25,12 @@ func TestPool(t *testing.T) {
} }
p.Put("a") p.Put("a")
p.Put("b") p.Put("b")
if g := p.Get(); g != "b" {
t.Fatalf("got %#v; want b", g)
}
if g := p.Get(); g != "a" { if g := p.Get(); g != "a" {
t.Fatalf("got %#v; want a", g) t.Fatalf("got %#v; want a", g)
} }
if g := p.Get(); g != "b" {
t.Fatalf("got %#v; want b", g)
}
if g := p.Get(); g != nil { if g := p.Get(); g != nil {
t.Fatalf("got %#v; want nil", g) t.Fatalf("got %#v; want nil", g)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment