runtime: implement atomicand8 atomically

We're skating on thin ice, and things are finally starting to melt around here. (I want to avoid the debugging session that will happen when someone uses atomicand8 expecting it to be atomic with respect to other operations.) Change-Id: I254f1582be4eb1f2d7fbba05335a91c6bf0c7f02 Reviewed-on: https://go-review.googlesource.com/7861Reviewed-by: Minux Ma <minux@golang.org>

runtime: implement atomicand8 atomically
We're skating on thin ice, and things are finally starting to melt around here. (I want to avoid the debugging session that will happen when someone uses atomicand8 expecting it to be atomic with respect to other operations.) Change-Id: I254f1582be4eb1f2d7fbba05335a91c6bf0c7f02 Reviewed-on: https://go-review.googlesource.com/7861Reviewed-by: Minux Ma <minux@golang.org>
631d6a33 · Russ Cox · b93fa309 · 631d6a33 · 631d6a33 · 631d6a33
Commit 631d6a33 authored Mar 19, 2015 by Russ Cox
9 changed files
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -608,6 +608,14 @@ TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
 	ORB	BX, (AX)
 	RET

+// void	runtime·atomicand8(byte volatile*, byte);
+TEXT runtime·atomicand8(SB), NOSPLIT, $0-5
+	MOVL	ptr+0(FP), AX
+	MOVB	val+4(FP), BX
+	LOCK
+	ANDB	BX, (AX)
+	RET
+
 // void jmpdefer(fn, sp);
 // called from deferreturn.
 // 1. pop the caller

--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -588,6 +588,14 @@ TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
 	ORB	BX, (AX)
 	RET

+// void	runtime·atomicand8(byte volatile*, byte);
+TEXT runtime·atomicand8(SB), NOSPLIT, $0-9
+	MOVQ	ptr+0(FP), AX
+	MOVB	val+8(FP), BX
+	LOCK
+	ANDB	BX, (AX)
+	RET
+
 // void jmpdefer(fn, sp);
 // called from deferreturn.
 // 1. pop the caller

--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -609,12 +609,42 @@ TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
 	// Shift val for aligned ptr.  R4 = val << R6
 	SLD	R6, R4, R4

-atomicor8_again:
+again:
 	SYNC
 	LWAR	(R5), R6
 	OR	R4, R6
 	STWCCC	R6, (R5)
-	BNE	atomicor8_again
+	BNE	again
+	SYNC
+	ISYNC
+	RETURN
+
+// void	runtime·atomicand8(byte volatile*, byte);
+TEXT runtime·atomicand8(SB), NOSPLIT, $0-9
+	MOVD	ptr+0(FP), R3
+	MOVBZ	val+8(FP), R4
+	// Align ptr down to 4 bytes so we can use 32-bit load/store.
+	// R5 = (R3 << 0) & ~3
+	RLDCR	$0, R3, $~3, R5
+	// Compute val shift.
+#ifdef GOARCH_ppc64
+	// Big endian.  ptr = ptr ^ 3
+	XOR	$3, R3
+#endif
+	// R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8)
+	RLDC	$3, R3, $(3*8), R6
+	// Shift val for aligned ptr.  R4 = val << R6 | ^(0xFF << R6)
+	MOVD	$0xFF, R7
+	SLD	R6, R4
+	SLD	R6, R7
+	XOR $-1, R7
+	OR	R7, R4
+again:
+	SYNC
+	LWAR	(R5), R6
+	AND	R4, R6
+	STWCCC	R6, (R5)
+	BNE	again
 	SYNC
 	ISYNC
 	RETURN

--- a/src/runtime/atomic_386.go
+++ b/src/runtime/atomic_386.go
@@ -57,9 +57,14 @@ func xchguintptr(ptr *uintptr, new uintptr) uintptr
 //go:noescape
 func atomicload64(ptr *uint64) uint64

+//go:noescape
+func atomicand8(ptr *uint8, val uint8)
+
 //go:noescape
 func atomicor8(ptr *uint8, val uint8)

+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
 //go:noescape
 func cas64(ptr *uint64, old, new uint64) bool


--- a/src/runtime/atomic_amd64x.go
+++ b/src/runtime/atomic_amd64x.go
@@ -48,9 +48,14 @@ func xchgp1(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
 //go:noescape
 func xchguintptr(ptr *uintptr, new uintptr) uintptr

+//go:noescape
+func atomicand8(ptr *uint8, val uint8)
+
 //go:noescape
 func atomicor8(ptr *uint8, val uint8)

+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
 //go:noescape
 func cas64(ptr *uint64, old, new uint64) bool


--- a/src/runtime/atomic_arm.go
+++ b/src/runtime/atomic_arm.go
@@ -153,3 +153,19 @@ func atomicor8(addr *uint8, v uint8) {
 		}
 	}
 }
+
+//go:nosplit
+func atomicand8(addr *uint8, v uint8) {
+	// Align down to 4 bytes and use 32-bit CAS.
+	uaddr := uintptr(unsafe.Pointer(addr))
+	addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
+	mask := 0xFF << ((uaddr & 3) * 8)      // little endian
+	word := uint32(v) << ((uaddr & 3) * 8) // little endian
+	word |= ^mask
+	for {
+		old := *addr32
+		if cas(addr32, old, old&word) {
+			return
+		}
+	}
+}
--- a/src/runtime/atomic_arm64.go
+++ b/src/runtime/atomic_arm64.go
@@ -48,6 +48,23 @@ func atomicor8(addr *uint8, v uint8) {
 	}
 }

+//go:nosplit
+func atomicand8(addr *uint8, v uint8) {
+	// TODO(dfc) implement this in asm.
+	// Align down to 4 bytes and use 32-bit CAS.
+	uaddr := uintptr(unsafe.Pointer(addr))
+	addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
+	word := uint32(v) << ((uaddr & 3) * 8)    // little endian
+	mask := uint32(0xFF) << ((uaddr & 3) * 8) // little endian
+	word |= ^mask
+	for {
+		old := *addr32
+		if cas(addr32, old, old&word) {
+			return
+		}
+	}
+}
+
 //go:noescape
 func cas64(ptr *uint64, old, new uint64) bool


--- a/src/runtime/atomic_ppc64x.go
+++ b/src/runtime/atomic_ppc64x.go
@@ -35,9 +35,14 @@ func atomicload64(ptr *uint64) uint64
 //go:noescape
 func atomicloadp(ptr unsafe.Pointer) unsafe.Pointer

+//go:noescape
+func atomicand8(ptr *uint8, val uint8)
+
 //go:noescape
 func atomicor8(ptr *uint8, val uint8)

+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
 //go:noescape
 func cas64(ptr *uint64, old, new uint64) bool


--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -142,17 +142,6 @@ func have_cgo_allocate() bool {
 	return &weak_cgo_allocate != nil
 }

-// Slow for now as we serialize this, since this is on a debug path
-// speed is not critical at this point.
-var andlock mutex
-
-//go:nowritebarrier
-func atomicand8(src *byte, val byte) {
-	lock(&andlock)
-	*src &= val
-	unlock(&andlock)
-}
-
 var gcdatamask bitvector
 var gcbssmask bitvector