Commit b89b4623 authored by Austin Clements's avatar Austin Clements

runtime: support preemption on windows/{386,amd64}

This implements preemptM on Windows using SuspendThead and
ResumeThread.

Unlike on POSIX platforms, preemptM on Windows happens synchronously.
This means we need a make a few other tweaks to suspendG:

1. We need to CAS the G back to _Grunning before doing the preemptM,
   or there's a good chance we'll just catch the G spinning on its
   status in the runtime, which won't be preemptible.

2. We need to rate-limit preemptM attempts. Otherwise, if the first
   attempt catches the G at a non-preemptible point, the busy loop in
   suspendG may hammer it so hard that it never makes it past that
   non-preemptible point.

Updates #10958, #24543.

Change-Id: Ie53b098811096f7e45d864afd292dc9e999ce226
Reviewed-on: https://go-review.googlesource.com/c/go/+/204340
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 6fd467ee
......@@ -6,6 +6,7 @@ package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
......@@ -32,6 +33,7 @@ const (
//go:cgo_import_dynamic runtime._GetSystemDirectoryA GetSystemDirectoryA%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll"
......@@ -79,6 +81,7 @@ var (
_GetSystemInfo,
_GetSystemTimeAsFileTime,
_GetThreadContext,
_SetThreadContext,
_LoadLibraryW,
_LoadLibraryA,
_PostQueuedCompletionStatus,
......@@ -539,6 +542,11 @@ var exiting uint32
//go:nosplit
func exit(code int32) {
// Disallow thread suspension for preemption. Otherwise,
// ExitProcess and SuspendThread can race: SuspendThread
// queues a suspension request for this thread, ExitProcess
// kills the suspending thread, and then this thread suspends.
lock(&suspendLock)
atomic.Store(&exiting, 1)
stdcall1(_ExitProcess, uintptr(code))
}
......@@ -1003,19 +1011,22 @@ func profilem(mp *m, thread uintptr) {
r.contextflags = _CONTEXT_CONTROL
stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(r)))
var gp *g
gp := gFromTLS(mp)
sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
}
func gFromTLS(mp *m) *g {
switch GOARCH {
default:
panic("unsupported architecture")
case "arm":
tls := &mp.tls[0]
gp = **((***g)(unsafe.Pointer(tls)))
return **((***g)(unsafe.Pointer(tls)))
case "386", "amd64":
tls := &mp.tls[0]
gp = *((**g)(unsafe.Pointer(tls)))
return *((**g)(unsafe.Pointer(tls)))
}
sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
throw("unsupported architecture")
return nil
}
func profileloop1(param uintptr) uint32 {
......@@ -1081,10 +1092,95 @@ func setThreadCPUProfiler(hz int32) {
atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
}
const preemptMSupported = false
const preemptMSupported = GOARCH != "arm"
// suspendLock protects simultaneous SuspendThread operations from
// suspending each other.
var suspendLock mutex
func preemptM(mp *m) {
// Not currently supported.
//
// TODO: Use SuspendThread/GetThreadContext/ResumeThread
if GOARCH == "arm" {
// TODO: Implement call injection
return
}
if mp == getg().m {
throw("self-preempt")
}
// Acquire our own handle to mp's thread.
lock(&mp.threadLock)
if mp.thread == 0 {
// The M hasn't been minit'd yet (or was just unminit'd).
unlock(&mp.threadLock)
atomic.Xadd(&mp.preemptGen, 1)
return
}
var thread uintptr
stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS)
unlock(&mp.threadLock)
// Prepare thread context buffer.
var c *context
cbuf := make([]byte, unsafe.Sizeof(*c)+15)
// Align Context to 16 bytes.
c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15))
c.contextflags = _CONTEXT_CONTROL
// Serialize thread suspension. SuspendThread is asynchronous,
// so it's otherwise possible for two threads to suspend each
// other and deadlock. We must hold this lock until after
// GetThreadContext, since that blocks until the thread is
// actually suspended.
lock(&suspendLock)
// Suspend the thread.
if int32(stdcall1(_SuspendThread, thread)) == -1 {
unlock(&suspendLock)
stdcall1(_CloseHandle, thread)
// The thread no longer exists. This shouldn't be
// possible, but just acknowledge the request.
atomic.Xadd(&mp.preemptGen, 1)
return
}
// We have to be very careful between this point and once
// we've shown mp is at an async safe-point. This is like a
// signal handler in the sense that mp could have been doing
// anything when we stopped it, including holding arbitrary
// locks.
// We have to get the thread context before inspecting the M
// because SuspendThread only requests a suspend.
// GetThreadContext actually blocks until it's suspended.
stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c)))
unlock(&suspendLock)
// Does it want a preemption and is it safe to preempt?
gp := gFromTLS(mp)
if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, c.ip(), c.sp(), c.lr()) {
// Inject call to asyncPreempt
targetPC := funcPC(asyncPreempt)
switch GOARCH {
default:
throw("unsupported architecture")
case "386", "amd64":
// Make it look like the thread called targetPC.
pc := c.ip()
sp := c.sp()
sp -= sys.PtrSize
*(*uintptr)(unsafe.Pointer(sp)) = pc
c.set_sp(sp)
c.set_ip(targetPC)
}
stdcall2(_SetThreadContext, thread, uintptr(unsafe.Pointer(c)))
}
// Acknowledge the preemption.
atomic.Xadd(&mp.preemptGen, 1)
stdcall1(_ResumeThread, thread)
stdcall1(_CloseHandle, thread)
}
......@@ -118,6 +118,7 @@ func suspendG(gp *g) suspendGState {
stopped := false
var asyncM *m
var asyncGen uint32
var nextPreemptM int64
for i := 0; ; i++ {
switch s := readgstatus(gp); s {
default:
......@@ -205,14 +206,32 @@ func suspendG(gp *g) suspendGState {
gp.preempt = true
gp.stackguard0 = stackPreempt
// Send asynchronous preemption.
asyncM = gp.m
asyncGen = atomic.Load(&asyncM.preemptGen)
if preemptMSupported && debug.asyncpreemptoff == 0 {
preemptM(asyncM)
}
// Prepare for asynchronous preemption.
asyncM2 := gp.m
asyncGen2 := atomic.Load(&asyncM2.preemptGen)
needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
asyncM = asyncM2
asyncGen = asyncGen2
casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
// Send asynchronous preemption. We do this
// after CASing the G back to _Grunning
// because preemptM may be synchronous and we
// don't want to catch the G just spinning on
// its status.
if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
// Rate limit preemptM calls. This is
// particularly important on Windows
// where preemptM is actually
// synchronous and the spin loop here
// can lead to live-lock.
now := nanotime()
if now >= nextPreemptM {
nextPreemptM = now + yieldDelay/2
preemptM(asyncM)
}
}
}
// TODO: Don't busy wait. This loop should really only
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment