Commit 50048a4e authored by Ian Lance Taylor's avatar Ian Lance Taylor

runtime: add as many extra M's as needed

When a non-Go thread calls into Go, the runtime needs an M to run the Go
code. The runtime keeps a list of extra M's available. When the last
extra M is allocated, the needextram field is set to tell it to allocate
a new extra M as soon as it is running in Go. This ensures that an extra
M will always be available for the next thread.

However, if many threads need an extra M at the same time, this
serializes them all. One thread will get an extra M with the needextram
field set. All the other threads will see that there is no M available
and will go to sleep. The one thread that succeeded will create a new
extra M. One lucky thread will get it. All the other threads will see
that there is no M available and will go to sleep. The effect is
thundering herd, as all the threads looking for an extra M go through
the process one by one. This seems to have a particularly bad effect on
the FreeBSD scheduler for some reason.

With this change, we track the number of threads waiting for an M, and
create all of them as soon as one thread gets through. This still means
that all the threads will fight for the lock to pick up the next M. But
at least each thread that gets the lock will succeed, instead of going
to sleep only to fight again.

This smooths out the performance greatly on FreeBSD, reducing the
average wall time of `testprogcgo CgoCallbackGC` by 74%.  On GNU/Linux
the average wall time goes down by 9%.

Fixes #13926
Fixes #16396

Change-Id: I6dc42a4156085a7ed4e5334c60b39db8f8ef8fea
Reviewed-on: https://go-review.googlesource.com/25047
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDmitry Vyukov <dvyukov@google.com>
parent 883e128f
......@@ -80,6 +80,7 @@
package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
......@@ -176,7 +177,7 @@ func cgocallbackg(ctxt uintptr) {
func cgocallbackg1(ctxt uintptr) {
gp := getg()
if gp.m.needextram {
if gp.m.needextram || atomic.Load(&extraMWaiters) > 0 {
gp.m.needextram = false
systemstack(newextram)
}
......
......@@ -1389,10 +1389,27 @@ func needm(x byte) {
var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
// newextram allocates an m and puts it on the extra list.
// newextram allocates m's and puts them on the extra list.
// It is called with a working local m, so that it can do things
// like call schedlock and allocate.
func newextram() {
c := atomic.Xchg(&extraMWaiters, 0)
if c > 0 {
for i := uint32(0); i < c; i++ {
oneNewExtraM()
}
} else {
// Make sure there is at least one extra M.
mp := lockextra(true)
unlockextra(mp)
if mp == nil {
oneNewExtraM()
}
}
}
// oneNewExtraM allocates an m and puts it on the extra list.
func oneNewExtraM() {
// Create extra goroutine locked to extra m.
// The goroutine is the context in which the cgo callback will run.
// The sched.pc will never be returned to, but setting it to
......@@ -1485,6 +1502,7 @@ func getm() uintptr {
}
var extram uintptr
var extraMWaiters uint32
// lockextra locks the extra list and returns the list head.
// The caller must unlock the list by storing a new list head
......@@ -1495,6 +1513,7 @@ var extram uintptr
func lockextra(nilokay bool) *m {
const locked = 1
incr := false
for {
old := atomic.Loaduintptr(&extram)
if old == locked {
......@@ -1503,6 +1522,13 @@ func lockextra(nilokay bool) *m {
continue
}
if old == 0 && !nilokay {
if !incr {
// Add 1 to the number of threads
// waiting for an M.
// This is cleared by newextram.
atomic.Xadd(&extraMWaiters, 1)
incr = true
}
usleep(1)
continue
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment