Commit 3c561434 authored by Changkun Ou's avatar Changkun Ou Committed by Keith Randall

runtime: one lock per order

This CL implements one lock per order of stackpool. It improves performance when mutator stack growth deeply, see benchmark below:

```
name                old time/op  new time/op  delta
StackGrowth-8       3.60ns ± 5%  3.59ns ± 1%    ~     (p=0.794 n=10+9)
StackGrowthDeep-8    370ns ± 1%   335ns ± 1%  -9.47%  (p=0.000 n=9+9)
StackCopyPtr-8      72.6ms ± 0%  71.6ms ± 1%  -1.31%  (p=0.000 n=9+9)
StackCopy-8         53.5ms ± 0%  53.2ms ± 1%  -0.54%  (p=0.006 n=8+9)
StackCopyNoCache-8   100ms ± 0%    99ms ± 0%  -0.70%  (p=0.000 n=8+8)
```

Change-Id: I1170d3fd9e6ff8516e25f669d0aaf1861311420f
GitHub-Last-Rev: 13b820cddd8008079c98d01ac22dd123a46a6603
GitHub-Pull-Request: golang/go#33399
Reviewed-on: https://go-review.googlesource.com/c/go/+/188478
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 88ca80b3
......@@ -5,6 +5,7 @@
package runtime
import (
"internal/cpu"
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
......@@ -137,9 +138,16 @@ const (
// Stacks are assigned an order according to size.
// order = log_2(size/FixedStack)
// There is a free list for each order.
// TODO: one lock per order?
var stackpool [_NumStackOrders]mSpanList
var stackpoolmu mutex
var stackpool [_NumStackOrders]struct {
item stackpoolItem
_ [cpu.CacheLinePadSize - unsafe.Sizeof(stackpoolItem{})%cpu.CacheLinePadSize]byte
}
//go:notinheap
type stackpoolItem struct {
mu mutex
span mSpanList
}
// Global pool of large stack spans.
var stackLarge struct {
......@@ -152,7 +160,7 @@ func stackinit() {
throw("cache size must be a multiple of page size")
}
for i := range stackpool {
stackpool[i].init()
stackpool[i].item.span.init()
}
for i := range stackLarge.free {
stackLarge.free[i].init()
......@@ -170,9 +178,9 @@ func stacklog2(n uintptr) int {
}
// Allocates a stack from the free pool. Must be called with
// stackpoolmu held.
// stackpool[order].item.mu held.
func stackpoolalloc(order uint8) gclinkptr {
list := &stackpool[order]
list := &stackpool[order].item.span
s := list.first
if s == nil {
// no free stacks. Allocate another span worth.
......@@ -208,7 +216,7 @@ func stackpoolalloc(order uint8) gclinkptr {
return x
}
// Adds stack x to the free pool. Must be called with stackpoolmu held.
// Adds stack x to the free pool. Must be called with stackpool[order].item.mu held.
func stackpoolfree(x gclinkptr, order uint8) {
s := spanOfUnchecked(uintptr(x))
if s.state != mSpanManual {
......@@ -216,7 +224,7 @@ func stackpoolfree(x gclinkptr, order uint8) {
}
if s.manualFreeList.ptr() == nil {
// s will now have a free stack
stackpool[order].insert(s)
stackpool[order].item.span.insert(s)
}
x.ptr().next = s.manualFreeList
s.manualFreeList = x
......@@ -237,7 +245,7 @@ func stackpoolfree(x gclinkptr, order uint8) {
// pointer into a free span.
//
// By not freeing, we prevent step #4 until GC is done.
stackpool[order].remove(s)
stackpool[order].item.span.remove(s)
s.manualFreeList = 0
osStackFree(s)
mheap_.freeManual(s, &memstats.stacks_inuse)
......@@ -257,14 +265,14 @@ func stackcacherefill(c *mcache, order uint8) {
// Grab half of the allowed capacity (to prevent thrashing).
var list gclinkptr
var size uintptr
lock(&stackpoolmu)
lock(&stackpool[order].item.mu)
for size < _StackCacheSize/2 {
x := stackpoolalloc(order)
x.ptr().next = list
list = x
size += _FixedStack << order
}
unlock(&stackpoolmu)
unlock(&stackpool[order].item.mu)
c.stackcache[order].list = list
c.stackcache[order].size = size
}
......@@ -276,14 +284,14 @@ func stackcacherelease(c *mcache, order uint8) {
}
x := c.stackcache[order].list
size := c.stackcache[order].size
lock(&stackpoolmu)
lock(&stackpool[order].item.mu)
for size > _StackCacheSize/2 {
y := x.ptr().next
stackpoolfree(x, order)
x = y
size -= _FixedStack << order
}
unlock(&stackpoolmu)
unlock(&stackpool[order].item.mu)
c.stackcache[order].list = x
c.stackcache[order].size = size
}
......@@ -293,8 +301,8 @@ func stackcache_clear(c *mcache) {
if stackDebug >= 1 {
print("stackcache clear\n")
}
lock(&stackpoolmu)
for order := uint8(0); order < _NumStackOrders; order++ {
lock(&stackpool[order].item.mu)
x := c.stackcache[order].list
for x.ptr() != nil {
y := x.ptr().next
......@@ -303,8 +311,8 @@ func stackcache_clear(c *mcache) {
}
c.stackcache[order].list = 0
c.stackcache[order].size = 0
unlock(&stackpool[order].item.mu)
}
unlock(&stackpoolmu)
}
// stackalloc allocates an n byte stack.
......@@ -355,9 +363,9 @@ func stackalloc(n uint32) stack {
// procresize. Just get a stack from the global pool.
// Also don't touch stackcache during gc
// as it's flushed concurrently.
lock(&stackpoolmu)
lock(&stackpool[order].item.mu)
x = stackpoolalloc(order)
unlock(&stackpoolmu)
unlock(&stackpool[order].item.mu)
} else {
x = c.stackcache[order].list
if x.ptr() == nil {
......@@ -446,9 +454,9 @@ func stackfree(stk stack) {
x := gclinkptr(v)
c := gp.m.mcache
if stackNoCache != 0 || c == nil || gp.m.preemptoff != "" {
lock(&stackpoolmu)
lock(&stackpool[order].item.mu)
stackpoolfree(x, order)
unlock(&stackpoolmu)
unlock(&stackpool[order].item.mu)
} else {
if c.stackcache[order].size >= _StackCacheSize {
stackcacherelease(c, order)
......@@ -1134,11 +1142,11 @@ func shrinkstack(gp *g) {
// freeStackSpans frees unused stack spans at the end of GC.
func freeStackSpans() {
lock(&stackpoolmu)
// Scan stack pools for empty stack spans.
for order := range stackpool {
list := &stackpool[order]
lock(&stackpool[order].item.mu)
list := &stackpool[order].item.span
for s := list.first; s != nil; {
next := s.next
if s.allocCount == 0 {
......@@ -1149,10 +1157,9 @@ func freeStackSpans() {
}
s = next
}
unlock(&stackpool[order].item.mu)
}
unlock(&stackpoolmu)
// Free large stack spans.
lock(&stackLarge.lock)
for i := range stackLarge.free {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment