Commit a0fc3060 authored by Austin Clements's avatar Austin Clements

runtime: eliminate runqvictims and a copy from runqsteal

Currently, runqsteal steals Gs from another P into an intermediate
buffer and then copies those Gs into the current P's run queue. This
intermediate buffer itself was moved from the stack to the P in commit
c4fe5031 to eliminate the cost of zeroing it on every steal.

This commit follows up c4fe5031 by stealing directly into the current
P's run queue, which eliminates the copy and the need for the
intermediate buffer. The update to the tail pointer is only committed
once the entire steal operation has succeeded, so the semantics of
stealing do not change.

Change-Id: Icdd7a0eb82668980bf42c0154b51eef6419fdd51
Reviewed-on: https://go-review.googlesource.com/9998Reviewed-by: default avatarRuss Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
parent ab4e7988
......@@ -3460,10 +3460,11 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
}
}
// Grabs a batch of goroutines from local runnable queue.
// batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines.
// Grabs a batch of goroutines from _p_'s runnable queue into batch.
// Batch is a ring buffer starting at batchHead.
// Returns number of grabbed goroutines.
// Can be executed by any P.
func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 {
func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 {
for {
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
......@@ -3484,7 +3485,7 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 {
if !_p_.runnext.cas(next, 0) {
continue
}
batch[0] = next.ptr()
batch[batchHead%uint32(len(batch))] = next.ptr()
return 1
}
}
......@@ -3494,7 +3495,8 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 {
continue
}
for i := uint32(0); i < n; i++ {
batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
batch[(batchHead+i)%uint32(len(batch))] = g
}
if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
return n
......@@ -3506,23 +3508,20 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 {
// and put onto local runnable queue of p.
// Returns one of the stolen elements (or nil if failed).
func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
n := runqgrab(p2, _p_.runqvictims[:], stealRunNextG)
t := _p_.runqtail
n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
if n == 0 {
return nil
}
n--
gp := _p_.runqvictims[n]
gp := _p_.runq[(t+n)%uint32(len(_p_.runq))]
if n == 0 {
return gp
}
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
t := _p_.runqtail
if t-h+n >= uint32(len(_p_.runq)) {
throw("runqsteal: runq overflow")
}
for i := uint32(0); i < n; i++ {
_p_.runq[(t+i)%uint32(len(_p_.runq))] = _p_.runqvictims[i]
}
atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
return gp
}
......
......@@ -356,7 +356,6 @@ type p struct {
runqhead uint32
runqtail uint32
runq [256]*g
runqvictims [128]*g // Used to stage victims from another p's runq
// runnext, if non-nil, is a runnable G that was ready'd by
// the current G and should be run next instead of what's in
// runq if there's time remaining in the running G's time
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment