Commit 2ef88f7f authored by Austin Clements's avatar Austin Clements

runtime: lock-free fast path for mark bits allocation

Currently we acquire a global lock for every newMarkBits call. This is
unfortunate since every span sweep operation calls newMarkBits.

However, most allocations are simply linear allocations from the
current arena. Take advantage of this to add a lock-free fast path for
allocating from the current arena. With this change, the global lock
only protects the lists of arenas, not the free offset in the current
arena.

Change-Id: I6cf6182af8492c8bfc21276114c77275fe3d7826
Reviewed-on: https://go-review.googlesource.com/34595
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRick Hudson <rlh@golang.org>
parent 6c4a8d19
...@@ -1331,7 +1331,7 @@ type gcBitsHeader struct { ...@@ -1331,7 +1331,7 @@ type gcBitsHeader struct {
//go:notinheap //go:notinheap
type gcBits struct { type gcBits struct {
// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand. // gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
free uintptr // free is the index into bits of the next free byte; protected by gcBitsArenas.lock free uintptr // free is the index into bits of the next free byte; read/write atomically
next *gcBits next *gcBits
bits [gcBitsChunkBytes - gcBitsHeaderBytes]uint8 bits [gcBitsChunkBytes - gcBitsHeaderBytes]uint8
} }
...@@ -1339,28 +1339,45 @@ type gcBits struct { ...@@ -1339,28 +1339,45 @@ type gcBits struct {
var gcBitsArenas struct { var gcBitsArenas struct {
lock mutex lock mutex
free *gcBits free *gcBits
next *gcBits next *gcBits // Read atomically. Write atomically under lock.
current *gcBits current *gcBits
previous *gcBits previous *gcBits
} }
// tryAlloc allocates from b or returns nil if b does not have enough room. // tryAlloc allocates from b or returns nil if b does not have enough room.
// The caller must hold gcBitsArenas.lock. // This is safe to call concurrently.
func (b *gcBits) tryAlloc(bytes uintptr) *uint8 { func (b *gcBits) tryAlloc(bytes uintptr) *uint8 {
if b == nil || b.free+bytes > uintptr(len(b.bits)) { if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) {
return nil return nil
} }
p := &b.bits[b.free] // Try to allocate from this block.
b.free += bytes end := atomic.Xadduintptr(&b.free, bytes)
return p if end > uintptr(len(b.bits)) {
return nil
}
// There was enough room.
start := end - bytes
return &b.bits[start]
} }
// newMarkBits returns a pointer to 8 byte aligned bytes // newMarkBits returns a pointer to 8 byte aligned bytes
// to be used for a span's mark bits. // to be used for a span's mark bits.
func newMarkBits(nelems uintptr) *uint8 { func newMarkBits(nelems uintptr) *uint8 {
lock(&gcBitsArenas.lock)
blocksNeeded := uintptr((nelems + 63) / 64) blocksNeeded := uintptr((nelems + 63) / 64)
bytesNeeded := blocksNeeded * 8 bytesNeeded := blocksNeeded * 8
// Try directly allocating from the current head arena.
head := (*gcBits)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next)))
if p := head.tryAlloc(bytesNeeded); p != nil {
return p
}
// There's not enough room in the head arena. We may need to
// allocate a new arena.
lock(&gcBitsArenas.lock)
// Try the head arena again, since it may have changed. Now
// that we hold the lock, the list head can't change, but its
// free position still can.
if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil { if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
unlock(&gcBitsArenas.lock) unlock(&gcBitsArenas.lock)
return p return p
...@@ -1380,7 +1397,8 @@ func newMarkBits(nelems uintptr) *uint8 { ...@@ -1380,7 +1397,8 @@ func newMarkBits(nelems uintptr) *uint8 {
return p return p
} }
// Allocate from the fresh arena. // Allocate from the fresh arena. We haven't linked it in yet, so
// this cannot race and is guaranteed to succeed.
p := fresh.tryAlloc(bytesNeeded) p := fresh.tryAlloc(bytesNeeded)
if p == nil { if p == nil {
throw("markBits overflow") throw("markBits overflow")
...@@ -1388,7 +1406,7 @@ func newMarkBits(nelems uintptr) *uint8 { ...@@ -1388,7 +1406,7 @@ func newMarkBits(nelems uintptr) *uint8 {
// Add the fresh arena to the "next" list. // Add the fresh arena to the "next" list.
fresh.next = gcBitsArenas.next fresh.next = gcBitsArenas.next
gcBitsArenas.next = fresh atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), unsafe.Pointer(fresh))
unlock(&gcBitsArenas.lock) unlock(&gcBitsArenas.lock)
return p return p
...@@ -1434,7 +1452,7 @@ func nextMarkBitArenaEpoch() { ...@@ -1434,7 +1452,7 @@ func nextMarkBitArenaEpoch() {
} }
gcBitsArenas.previous = gcBitsArenas.current gcBitsArenas.previous = gcBitsArenas.current
gcBitsArenas.current = gcBitsArenas.next gcBitsArenas.current = gcBitsArenas.next
gcBitsArenas.next = nil // newMarkBits calls newArena when needed atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), nil) // newMarkBits calls newArena when needed
unlock(&gcBitsArenas.lock) unlock(&gcBitsArenas.lock)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment