Commit a0fc3060 authored by Austin Clements's avatar Austin Clements

runtime: eliminate runqvictims and a copy from runqsteal

Currently, runqsteal steals Gs from another P into an intermediate
buffer and then copies those Gs into the current P's run queue. This
intermediate buffer itself was moved from the stack to the P in commit
c4fe5031 to eliminate the cost of zeroing it on every steal.

This commit follows up c4fe5031 by stealing directly into the current
P's run queue, which eliminates the copy and the need for the
intermediate buffer. The update to the tail pointer is only committed
once the entire steal operation has succeeded, so the semantics of
stealing do not change.

Change-Id: Icdd7a0eb82668980bf42c0154b51eef6419fdd51
Reviewed-on: https://go-review.googlesource.com/9998Reviewed-by: default avatarRuss Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
parent ab4e7988
...@@ -3460,10 +3460,11 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) { ...@@ -3460,10 +3460,11 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
} }
} }
// Grabs a batch of goroutines from local runnable queue. // Grabs a batch of goroutines from _p_'s runnable queue into batch.
// batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines. // Batch is a ring buffer starting at batchHead.
// Returns number of grabbed goroutines.
// Can be executed by any P. // Can be executed by any P.
func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 {
for { for {
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
...@@ -3484,7 +3485,7 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { ...@@ -3484,7 +3485,7 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 {
if !_p_.runnext.cas(next, 0) { if !_p_.runnext.cas(next, 0) {
continue continue
} }
batch[0] = next.ptr() batch[batchHead%uint32(len(batch))] = next.ptr()
return 1 return 1
} }
} }
...@@ -3494,7 +3495,8 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { ...@@ -3494,7 +3495,8 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 {
continue continue
} }
for i := uint32(0); i < n; i++ { for i := uint32(0); i < n; i++ {
batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))] g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
batch[(batchHead+i)%uint32(len(batch))] = g
} }
if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
return n return n
...@@ -3506,23 +3508,20 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { ...@@ -3506,23 +3508,20 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 {
// and put onto local runnable queue of p. // and put onto local runnable queue of p.
// Returns one of the stolen elements (or nil if failed). // Returns one of the stolen elements (or nil if failed).
func runqsteal(_p_, p2 *p, stealRunNextG bool) *g { func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
n := runqgrab(p2, _p_.runqvictims[:], stealRunNextG) t := _p_.runqtail
n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
if n == 0 { if n == 0 {
return nil return nil
} }
n-- n--
gp := _p_.runqvictims[n] gp := _p_.runq[(t+n)%uint32(len(_p_.runq))]
if n == 0 { if n == 0 {
return gp return gp
} }
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
t := _p_.runqtail
if t-h+n >= uint32(len(_p_.runq)) { if t-h+n >= uint32(len(_p_.runq)) {
throw("runqsteal: runq overflow") throw("runqsteal: runq overflow")
} }
for i := uint32(0); i < n; i++ {
_p_.runq[(t+i)%uint32(len(_p_.runq))] = _p_.runqvictims[i]
}
atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
return gp return gp
} }
......
...@@ -353,10 +353,9 @@ type p struct { ...@@ -353,10 +353,9 @@ type p struct {
goidcacheend uint64 goidcacheend uint64
// Queue of runnable goroutines. Accessed without lock. // Queue of runnable goroutines. Accessed without lock.
runqhead uint32 runqhead uint32
runqtail uint32 runqtail uint32
runq [256]*g runq [256]*g
runqvictims [128]*g // Used to stage victims from another p's runq
// runnext, if non-nil, is a runnable G that was ready'd by // runnext, if non-nil, is a runnable G that was ready'd by
// the current G and should be run next instead of what's in // the current G and should be run next instead of what's in
// runq if there's time remaining in the running G's time // runq if there's time remaining in the running G's time
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment