Commit 36a432f2 authored by Austin Clements's avatar Austin Clements

runtime: make copystack/sudog synchronization more explicit

When we copy a stack of a goroutine blocked in a channel operation, we
have to be very careful because other goroutines may be writing to
that goroutine's stack. To handle this, stack copying acquires the
locks for the channels a goroutine is waiting on.

One complication is that stack growth may happen while a goroutine
holds these locks, in which case stack copying must *not* acquire
these locks because that would self-deadlock.

Currently, stack growth never acquires these locks because stack
growth only happens when a goroutine is running, which means it's
either not blocking on a channel or it's holding the channel locks
already. Stack shrinking always acquires these locks because shrinking
happens asynchronously, so the goroutine is never running, so there
are either no locks or they've been released by the goroutine.

However, we're about to change when stack shrinking can happen, which
is going to break the current rules. Rather than find a new way to
derive whether to acquire these locks or not, this CL simply adds a
flag to the g struct that indicates that stack copying should acquire
channel locks. This flag is set while the goroutine is blocked on a
channel op.

For #10958, #24543.

Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/172982
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarMichael Knyszek <mknyszek@google.com>
parent 8c586157
......@@ -249,7 +249,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
gp.waiting = mysg
gp.param = nil
c.sendq.enqueue(mysg)
goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
// Ensure the value being sent is kept alive until the
// receiver copies it out. The sudog has a pointer to the
// stack object, but sudogs aren't considered as roots of the
......@@ -261,6 +261,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
throw("G waiting list is corrupted")
}
gp.waiting = nil
gp.activeStackChans = false
if gp.param == nil {
if c.closed == 0 {
throw("chansend: spurious wakeup")
......@@ -559,13 +560,14 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)
// someone woke us up
if mysg != gp.waiting {
throw("G waiting list is corrupted")
}
gp.waiting = nil
gp.activeStackChans = false
if mysg.releasetime > 0 {
blockevent(mysg.releasetime-t0, 2)
}
......@@ -632,6 +634,14 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
goready(gp, skip+1)
}
func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool {
// There are unlocked sudogs that point into gp's stack. Stack
// copying must lock the channels of those sudogs.
gp.activeStackChans = true
unlock((*mutex)(chanLock))
return true
}
// compiler implements
//
// select {
......
......@@ -423,6 +423,12 @@ type g struct {
paniconfault bool // panic (instead of crash) on unexpected fault address
gcscandone bool // g has scanned stack; protected by _Gscan bit in status
throwsplit bool // must not split stack
// activeStackChans indicates that there are unlocked channels
// pointing into this goroutine's stack. If true, stack
// copying needs to acquire channel locks to protect these
// areas of the stack.
activeStackChans bool
raceignore int8 // ignore race detection events
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
sysexitticks int64 // cputicks when syscall has returned (for tracing)
......
......@@ -75,6 +75,9 @@ func selunlock(scases []scase, lockorder []uint16) {
}
func selparkcommit(gp *g, _ unsafe.Pointer) bool {
// There are unlocked sudogs that point into gp's stack. Stack
// copying must lock the channels of those sudogs.
gp.activeStackChans = true
// This must not access gp's stack (see gopark). In
// particular, it must not access the *hselect. That's okay,
// because by the time this is called, gp.waiting has all
......@@ -311,6 +314,7 @@ loop:
// wait for someone to wake us up
gp.param = nil
gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
gp.activeStackChans = false
sellock(scases, lockorder)
......
......@@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
_32bit uintptr // size on 32bit platforms
_64bit uintptr // size on 64bit platforms
}{
{runtime.G{}, 212, 368}, // g, but exported for testing
{runtime.G{}, 216, 376}, // g, but exported for testing
}
for _, tt := range tests {
......
......@@ -786,10 +786,6 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr {
}
// Lock channels to prevent concurrent send/receive.
// It's important that we *only* do this for async
// copystack; otherwise, gp may be in the middle of
// putting itself on wait queues and this would
// self-deadlock.
var lastc *hchan
for sg := gp.waiting; sg != nil; sg = sg.waitlink {
if sg.c != lastc {
......@@ -826,12 +822,7 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr {
// Copies gp's stack to a new stack of a different size.
// Caller must have changed gp status to Gcopystack.
//
// If sync is true, this is a self-triggered stack growth and, in
// particular, no other G may be writing to gp's stack (e.g., via a
// channel operation). If sync is false, copystack protects against
// concurrent channel operations.
func copystack(gp *g, newsize uintptr, sync bool) {
func copystack(gp *g, newsize uintptr) {
if gp.syscallsp != 0 {
throw("stack growth not allowed in system call")
}
......@@ -857,15 +848,16 @@ func copystack(gp *g, newsize uintptr, sync bool) {
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
if sync {
if !gp.activeStackChans {
adjustsudogs(gp, &adjinfo)
} else {
// sudogs can point in to the stack. During concurrent
// shrinking, these areas may be written to. Find the
// highest such pointer so we can handle everything
// there and below carefully. (This shouldn't be far
// from the bottom of the stack, so there's little
// cost in handling everything below it carefully.)
// sudogs may be pointing in to the stack and gp has
// released channel locks, so other goroutines could
// be writing to gp's stack. Find the highest such
// pointer so we can handle everything there and below
// carefully. (This shouldn't be far from the bottom
// of the stack, so there's little cost in handling
// everything below it carefully.)
adjinfo.sghi = findsghi(gp, old)
// Synchronize with channel ops and copy the part of
......@@ -1040,7 +1032,7 @@ func newstack() {
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
copystack(gp, newsize, true)
copystack(gp, newsize)
if stackDebug >= 1 {
print("stack grow done\n")
}
......@@ -1120,7 +1112,7 @@ func shrinkstack(gp *g) {
print("shrinking stack ", oldsize, "->", newsize, "\n")
}
copystack(gp, newsize, false)
copystack(gp, newsize)
}
// freeStackSpans frees unused stack spans at the end of GC.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment