Commit ae7d5f84 authored by Austin Clements's avatar Austin Clements

runtime: buffered write barrier for ppc64

Updates #22460.

Change-Id: I6040c4024111c80361c81eb7eec5071ec9efb4f9
Reviewed-on: https://go-review.googlesource.com/92702
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 313a4b2b
......@@ -408,7 +408,7 @@ func Main(archInit func(*Arch)) {
}
switch objabi.GOARCH {
case "amd64", "amd64p32", "386", "arm", "arm64", "mips64", "mips64le", "mips", "mipsle":
case "amd64", "amd64p32", "386", "arm", "arm64", "ppc64", "ppc64le", "mips64", "mips64le", "mips", "mipsle":
default:
// Other architectures don't support the buffered
// write barrier yet.
......
......@@ -1086,6 +1086,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
q.To.Reg = ppc64.REG_R2
}
case ssa.OpPPC64LoweredWB:
p := s.Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = v.Aux.(*obj.LSym)
case ssa.OpPPC64LoweredNilCheck:
// Issue a load which will fault if arg is nil.
p := s.Prog(ppc64.AMOVBZ)
......
......@@ -651,6 +651,9 @@
(IsSliceInBounds idx len) -> (LessEqual (CMPU idx len))
(NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
// Write barrier.
(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
// Optimizations
// Note that PPC "logical" immediates come in 0:15 and 16:31 unsigned immediate forms,
// so ORconst, XORconst easily expand into a pair.
......
......@@ -474,6 +474,11 @@ func init() {
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It preserves R0 through R15, g, and its arguments R20 and R21,
// but may clobber anything else, including R31 (REGTMP).
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
// (InvertFlags (CMP a b)) == (CMP b a)
// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
// then we do (LessThan (InvertFlags (CMP b a))) instead.
......
......@@ -1492,6 +1492,7 @@ const (
OpPPC64LoweredAtomicCas32
OpPPC64LoweredAtomicAnd8
OpPPC64LoweredAtomicOr8
OpPPC64LoweredWB
OpPPC64InvertFlags
OpPPC64FlagEQ
OpPPC64FlagLT
......@@ -19243,6 +19244,20 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredWB",
auxType: auxSym,
argLen: 3,
clobberFlags: true,
symEffect: SymNone,
reg: regInfo{
inputs: []inputInfo{
{0, 1048576}, // R20
{1, 2097152}, // R21
},
clobbers: 576460746931503104, // R16 R17 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
{
name: "InvertFlags",
argLen: 1,
......
......@@ -623,6 +623,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpTrunc64to32_0(v)
case OpTrunc64to8:
return rewriteValuePPC64_OpTrunc64to8_0(v)
case OpWB:
return rewriteValuePPC64_OpWB_0(v)
case OpXor16:
return rewriteValuePPC64_OpXor16_0(v)
case OpXor32:
......@@ -43015,6 +43017,24 @@ func rewriteValuePPC64_OpTrunc64to8_0(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpWB_0(v *Value) bool {
// match: (WB {fn} destptr srcptr mem)
// cond:
// result: (LoweredWB {fn} destptr srcptr mem)
for {
fn := v.Aux
_ = v.Args[2]
destptr := v.Args[0]
srcptr := v.Args[1]
mem := v.Args[2]
v.reset(OpPPC64LoweredWB)
v.Aux = fn
v.AddArg(destptr)
v.AddArg(srcptr)
v.AddArg(mem)
return true
}
}
func rewriteValuePPC64_OpXor16_0(v *Value) bool {
// match: (Xor16 x y)
// cond:
......
......@@ -10,3 +10,5 @@ runtime/asm_ppc64x.s: [GOARCH] addmoduledata: function addmoduledata missing Go
runtime/duff_ppc64x.s: [GOARCH] duffzero: function duffzero missing Go declaration
runtime/tls_ppc64x.s: [GOARCH] save_g: function save_g missing Go declaration
runtime/tls_ppc64x.s: [GOARCH] load_g: function load_g missing Go declaration
runtime/asm_ARCHSUFF.s: [GOARCH] gcWriteBarrier: function gcWriteBarrier missing Go declaration
......@@ -1495,3 +1495,74 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
MOVW $1, R3
MOVB R3, ret+0(FP)
RET
// gcWriteBarrier performs a heap pointer write and informs the GC.
//
// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
// - R20 is the destination of the write
// - R21 is the value being written at R20.
// It clobbers condition codes.
// It does not clobber R0 through R15,
// but may clobber any other register, *including* R31.
TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112
// The standard prologue clobbers R31.
// We use R16 and R17 as scratch registers.
MOVD g_m(g), R16
MOVD m_p(R16), R16
MOVD (p_wbBuf+wbBuf_next)(R16), R17
// Increment wbBuf.next position.
ADD $16, R17
MOVD R17, (p_wbBuf+wbBuf_next)(R16)
MOVD (p_wbBuf+wbBuf_end)(R16), R16
CMP R16, R17
// Record the write.
MOVD R21, -16(R17) // Record value
MOVD (R20), R16 // TODO: This turns bad writes into bad reads.
MOVD R16, -8(R17) // Record *slot
// Is the buffer full? (flags set in CMP above)
BEQ flush
ret:
// Do the write.
MOVD R21, (R20)
RET
flush:
// Save registers R0 through R15 since these were not saved by the caller.
// We don't save all registers on ppc64 because it takes too much space.
MOVD R20, (FIXED_FRAME+0)(R1) // Also first argument to wbBufFlush
MOVD R21, (FIXED_FRAME+8)(R1) // Also second argument to wbBufFlush
// R0 is always 0, so no need to spill.
// R1 is SP.
// R2 is SB.
MOVD R3, (FIXED_FRAME+16)(R1)
MOVD R4, (FIXED_FRAME+24)(R1)
MOVD R5, (FIXED_FRAME+32)(R1)
MOVD R6, (FIXED_FRAME+40)(R1)
MOVD R7, (FIXED_FRAME+48)(R1)
MOVD R8, (FIXED_FRAME+56)(R1)
MOVD R9, (FIXED_FRAME+64)(R1)
MOVD R10, (FIXED_FRAME+72)(R1)
MOVD R11, (FIXED_FRAME+80)(R1)
MOVD R12, (FIXED_FRAME+88)(R1)
// R13 is REGTLS
MOVD R14, (FIXED_FRAME+96)(R1)
MOVD R15, (FIXED_FRAME+104)(R1)
// This takes arguments R20 and R21.
CALL runtime·wbBufFlush(SB)
MOVD (FIXED_FRAME+0)(R1), R20
MOVD (FIXED_FRAME+8)(R1), R21
MOVD (FIXED_FRAME+16)(R1), R3
MOVD (FIXED_FRAME+24)(R1), R4
MOVD (FIXED_FRAME+32)(R1), R5
MOVD (FIXED_FRAME+40)(R1), R6
MOVD (FIXED_FRAME+48)(R1), R7
MOVD (FIXED_FRAME+56)(R1), R8
MOVD (FIXED_FRAME+64)(R1), R9
MOVD (FIXED_FRAME+72)(R1), R10
MOVD (FIXED_FRAME+80)(R1), R11
MOVD (FIXED_FRAME+88)(R1), R12
MOVD (FIXED_FRAME+96)(R1), R14
MOVD (FIXED_FRAME+104)(R1), R15
JMP ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment