runtime: don't clear lockedExt on locked M when G exits

When a locked M has its G exit without calling UnlockOSThread, then lockedExt on it was getting cleared. Unfortunately, this meant that during P handoff, if a new M was started, it might get forked (on most OSs besides Windows) from the locked M, which could have kernel state attached to it. To solve this, just don't clear lockedExt. At the point where the locked M has its G exit, it will also exit in accordance with the LockOSThread API. So, we can safely assume that it's lockedExt state will no longer be used. For the case of the main thread where it just gets wedged instead of exiting, it's probably better for it to keep the locked marker since it more accurately represents its state. Fixed #28979. Change-Id: I7d3d71dd65bcb873e9758086d2cbcb9a06429b0f Reviewed-on: https://go-review.googlesource.com/c/153078 Run-TryBot: Michael Knyszek <mknyszek@google.com> Reviewed-by: Austin Clements <austin@google.com>

runtime: don't clear lockedExt on locked M when G exits
When a locked M has its G exit without calling UnlockOSThread, then lockedExt on it was getting cleared. Unfortunately, this meant that during P handoff, if a new M was started, it might get forked (on most OSs besides Windows) from the locked M, which could have kernel state attached to it. To solve this, just don't clear lockedExt. At the point where the locked M has its G exit, it will also exit in accordance with the LockOSThread API. So, we can safely assume that it's lockedExt state will no longer be used. For the case of the main thread where it just gets wedged instead of exiting, it's probably better for it to keep the locked marker since it more accurately represents its state. Fixed #28979. Change-Id: I7d3d71dd65bcb873e9758086d2cbcb9a06429b0f Reviewed-on: https://go-review.googlesource.com/c/153078 Run-TryBot: Michael Knyszek <mknyszek@google.com> Reviewed-by: Austin Clements <austin@google.com>
d0f8a751 · Michael Anthony Knyszek · Michael Knyszek · 6fcab648 · d0f8a751 · d0f8a751
Commit d0f8a751 authored Dec 07, 2018 by Michael Anthony Knyszek Committed by Michael Knyszek Dec 19, 2018
5 changed files
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2703,7 +2703,6 @@ func goexit0(gp *g) {
 		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
 		throw("internal lockOSThread error")
 	}
-	_g_.m.lockedExt = 0
 	gfput(_g_.m.p.ptr(), gp)
 	if locked {
 		// The goroutine may have locked this thread because
@@ -2714,6 +2713,10 @@ func goexit0(gp *g) {
 		// the thread.
 		if GOOS != "plan9" { // See golang.org/issue/22227.
 			gogo(&_g_.m.g0.sched)
+		} else {
+			// Clear lockedExt on plan9 since we may end up re-using
+			// this thread.
+			_g_.m.lockedExt = 0
 		}
 	}
 	schedule()

--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -885,6 +885,12 @@ func TestLockOSThreadNesting(t *testing.T) {

 func TestLockOSThreadExit(t *testing.T) {
 	testLockOSThreadExit(t, "testprog")
+
+	want := "OK\n"
+	output := runTestProg(t, "testprog", "LockOSThreadAvoidsStatePropagation", "GOMAXPROCS=1")
+	if output != want {
+		t.Errorf("want %s, got %s\n", want, output)
+	}
 }

 func testLockOSThreadExit(t *testing.T, prog string) {

--- a/src/runtime/testdata/testprog/lockosthread.go
+++ b/src/runtime/testdata/testprog/lockosthread.go
@@ -24,6 +24,12 @@ func init() {
 		runtime.LockOSThread()
 	})
 	register("LockOSThreadAlt", LockOSThreadAlt)
+
+	registerInit("LockOSThreadAvoidsStatePropagation", func() {
+		// Lock the OS thread now so main runs on the main thread.
+		runtime.LockOSThread()
+	})
+	register("LockOSThreadAvoidsStatePropagation", LockOSThreadAvoidsStatePropagation)
 }

 func LockOSThreadMain() {
@@ -92,3 +98,96 @@ func LockOSThreadAlt() {
 ok:
 	println("OK")
 }
+
+func LockOSThreadAvoidsStatePropagation() {
+	// This test is similar to LockOSThreadAlt in that it will detect if a thread
+	// which should have died is still running. However, rather than do this with
+	// thread IDs, it does this by unsharing state on that thread. This way, it
+	// also detects whether new threads were cloned from the dead thread, and not
+	// from a clean thread. Cloning from a locked thread is undesirable since
+	// cloned threads will inherit potentially unwanted OS state.
+	//
+	// unshareFs, getcwd, and chdir("/tmp") are only guaranteed to work on
+	// Linux, so on other platforms this just checks that the runtime doesn't
+	// do anything terrible.
+	//
+	// This is running locked to the main OS thread.
+
+	// GOMAXPROCS=1 makes this fail much more reliably if a tainted thread is
+	// cloned from.
+	if runtime.GOMAXPROCS(-1) != 1 {
+		println("requires GOMAXPROCS=1")
+		os.Exit(1)
+	}
+
+	if err := chdir("/"); err != nil {
+		println("failed to chdir:", err.Error())
+		os.Exit(1)
+	}
+	// On systems other than Linux, cwd == "".
+	cwd, err := getcwd()
+	if err != nil {
+		println("failed to get cwd:", err.Error())
+		os.Exit(1)
+	}
+	if cwd != "" && cwd != "/" {
+		println("unexpected cwd", cwd, " wanted /")
+		os.Exit(1)
+	}
+
+	ready := make(chan bool, 1)
+	go func() {
+		// This goroutine must be running on a new thread.
+		runtime.LockOSThread()
+
+		// Unshare details about the FS, like the CWD, with
+		// the rest of the process on this thread.
+		// On systems other than Linux, this is a no-op.
+		if err := unshareFs(); err != nil {
+			println("failed to unshare fs:", err.Error())
+			os.Exit(1)
+		}
+		// Chdir to somewhere else on this thread.
+		// On systems other than Linux, this is a no-op.
+		if err := chdir("/tmp"); err != nil {
+			println("failed to chdir:", err.Error())
+			os.Exit(1)
+		}
+
+		// The state on this thread is now considered "tainted", but it
+		// should no longer be observable in any other context.
+
+		ready <- true
+		// Exit with the thread locked.
+	}()
+	<-ready
+
+	// Spawn yet another goroutine and lock it. Since GOMAXPROCS=1, if
+	// for some reason state from the (hopefully dead) locked thread above
+	// propagated into a newly created thread (via clone), or that thread
+	// is actually being re-used, then we should get scheduled on such a
+	// thread with high likelihood.
+	done := make(chan bool)
+	go func() {
+		runtime.LockOSThread()
+
+		// Get the CWD and check if this is the same as the main thread's
+		// CWD. Every thread should share the same CWD.
+		// On systems other than Linux, wd == "".
+		wd, err := getcwd()
+		if err != nil {
+			println("failed to get cwd:", err.Error())
+			os.Exit(1)
+		}
+		if wd != cwd {
+			println("bad state from old thread propagated after it should have died")
+			os.Exit(1)
+		}
+		<-done
+
+		runtime.UnlockOSThread()
+	}()
+	done <- true
+	runtime.UnlockOSThread()
+	println("OK")
+}
--- a/src/runtime/testdata/testprog/gettid.go
+++ b/src/runtime/testdata/testprog/gettid.go
@@ -27,3 +27,28 @@ func tidExists(tid int) (exists, supported bool) {
 	state := bytes.Fields(stat)[2]
 	return !(len(state) == 1 && state[0] == 'Z'), true
 }
+
+func getcwd() (string, error) {
+	if !syscall.ImplementsGetwd {
+		return "", nil
+	}
+	// Use the syscall to get the current working directory.
+	// This is imperative for checking for OS thread state
+	// after an unshare since os.Getwd might just check the
+	// environment, or use some other mechanism.
+	var buf [4096]byte
+	n, err := syscall.Getcwd(buf[:])
+	if err != nil {
+		return "", err
+	}
+	// Subtract one for null terminator.
+	return string(buf[:n-1]), nil
+}
+
+func unshareFs() error {
+	return syscall.Unshare(syscall.CLONE_FS)
+}
+
+func chdir(path string) error {
+	return syscall.Chdir(path)
+}
--- a/src/runtime/testdata/testprog/gettid_none.go
+++ b/src/runtime/testdata/testprog/gettid_none.go
@@ -13,3 +13,15 @@ func gettid() int {
 func tidExists(tid int) (exists, supported bool) {
 	return false, false
 }
+
+func getcwd() (string, error) {
+	return "", nil
+}
+
+func unshareFs() error {
+	return nil
+}
+
+func chdir(path string) error {
+	return nil
+}