Commit 517a44d5 authored by Michael Munday's avatar Michael Munday

cmd/compile: intrinsify atomic operations on s390x

Implements the following intrinsics on s390x:
 - AtomicAdd{32,64}
 - AtomicCompareAndSwap{32,64}
 - AtomicExchange{32,64}
 - AtomicLoad{32,64,Ptr}
 - AtomicStore{32,64,PtrNoWB}

I haven't added rules for And8 or Or8 yet.

Change-Id: I647af023a8e513718e90e98a60191e7af6167314
Reviewed-on: https://go-review.googlesource.com/31614
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 2113c9ad
...@@ -2566,63 +2566,63 @@ func intrinsicInit() { ...@@ -2566,63 +2566,63 @@ func intrinsicInit() {
v := s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem()) v := s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v) return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Load64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Load64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem()) v := s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v) return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Loadp"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Loadp"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem()) v := s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, ptrto(Types[TUINT8]), v) return s.newValue1(ssa.OpSelect0, ptrto(Types[TUINT8]), v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Store"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Store"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
return nil return nil
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Store64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Store64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
return nil return nil
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "StorepNoWB"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "StorepNoWB"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
return nil return nil
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Xchg"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Xchg"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue3(ssa.OpAtomicExchange32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) v := s.newValue3(ssa.OpAtomicExchange32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v) return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Xchg64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Xchg64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue3(ssa.OpAtomicExchange64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) v := s.newValue3(ssa.OpAtomicExchange64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v) return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Xadd"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Xadd"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue3(ssa.OpAtomicAdd32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) v := s.newValue3(ssa.OpAtomicAdd32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v) return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Xadd64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Xadd64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue3(ssa.OpAtomicAdd64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) v := s.newValue3(ssa.OpAtomicAdd64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v) return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Cas"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Cas"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue4(ssa.OpAtomicCompareAndSwap32, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem()) v := s.newValue4(ssa.OpAtomicCompareAndSwap32, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TBOOL], v) return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "Cas64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "Cas64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
v := s.newValue4(ssa.OpAtomicCompareAndSwap64, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem()) v := s.newValue4(ssa.OpAtomicCompareAndSwap64, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
return s.newValue1(ssa.OpSelect0, Types[TBOOL], v) return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
}, sys.AMD64, sys.ARM64), }, sys.AMD64, sys.ARM64, sys.S390X),
intrinsicKey{"runtime/internal/atomic", "And8"}: enableOnArch(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime/internal/atomic", "And8"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
......
...@@ -158,6 +158,12 @@ var progtable = [s390x.ALAST & obj.AMask]gc.ProgInfo{ ...@@ -158,6 +158,12 @@ var progtable = [s390x.ALAST & obj.AMask]gc.ProgInfo{
s390x.ACMPUBGT & obj.AMask: {Flags: gc.Cjmp}, s390x.ACMPUBGT & obj.AMask: {Flags: gc.Cjmp},
s390x.ACMPUBLE & obj.AMask: {Flags: gc.Cjmp}, s390x.ACMPUBLE & obj.AMask: {Flags: gc.Cjmp},
// Atomic
s390x.ACS & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.LeftWrite | gc.RegRead | gc.RightRead | gc.RightWrite},
s390x.ACSG & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.LeftWrite | gc.RegRead | gc.RightRead | gc.RightWrite},
s390x.ALAA & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.RightWrite},
s390x.ALAAG & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.RightWrite},
// Macros // Macros
s390x.ACLEAR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightAddr | gc.RightWrite}, s390x.ACLEAR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightAddr | gc.RightWrite},
......
...@@ -546,6 +546,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -546,6 +546,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpSP, ssa.OpSB: case ssa.OpSP, ssa.OpSB:
// nothing to do // nothing to do
case ssa.OpSelect0, ssa.OpSelect1:
// nothing to do
case ssa.OpVarDef: case ssa.OpVarDef:
gc.Gvardef(v.Aux.(*gc.Node)) gc.Gvardef(v.Aux.(*gc.Node))
case ssa.OpVarKill: case ssa.OpVarKill:
...@@ -558,6 +560,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -558,6 +560,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
case ssa.OpS390XFlagEQ, ssa.OpS390XFlagLT, ssa.OpS390XFlagGT: case ssa.OpS390XFlagEQ, ssa.OpS390XFlagLT, ssa.OpS390XFlagGT:
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
case ssa.OpS390XAddTupleFirst32, ssa.OpS390XAddTupleFirst64:
v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
case ssa.OpS390XLoweredNilCheck: case ssa.OpS390XLoweredNilCheck:
// Issue a load which will fault if the input is nil. // Issue a load which will fault if the input is nil.
p := gc.Prog(s390x.AMOVBZ) p := gc.Prog(s390x.AMOVBZ)
...@@ -686,6 +690,93 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -686,6 +690,93 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
clear.To.Type = obj.TYPE_MEM clear.To.Type = obj.TYPE_MEM
clear.To.Reg = v.Args[0].Reg() clear.To.Reg = v.Args[0].Reg()
} }
case ssa.OpS390XMOVWZatomicload, ssa.OpS390XMOVDatomicload:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
case ssa.OpS390XMOVWatomicstore, ssa.OpS390XMOVDatomicstore:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v)
case ssa.OpS390XLAA, ssa.OpS390XLAAG:
p := gc.Prog(v.Op.Asm())
p.Reg = v.Reg0()
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v)
case ssa.OpS390XLoweredAtomicCas32, ssa.OpS390XLoweredAtomicCas64:
// Convert the flags output of CS{,G} into a bool.
// CS{,G} arg1, arg2, arg0
// MOVD $0, ret
// BNE 2(PC)
// MOVD $1, ret
// NOP (so the BNE has somewhere to land)
// CS{,G} arg1, arg2, arg0
cs := gc.Prog(v.Op.Asm())
cs.From.Type = obj.TYPE_REG
cs.From.Reg = v.Args[1].Reg() // old
cs.Reg = v.Args[2].Reg() // new
cs.To.Type = obj.TYPE_MEM
cs.To.Reg = v.Args[0].Reg()
gc.AddAux(&cs.To, v)
// MOVD $0, ret
movd := gc.Prog(s390x.AMOVD)
movd.From.Type = obj.TYPE_CONST
movd.From.Offset = 0
movd.To.Type = obj.TYPE_REG
movd.To.Reg = v.Reg0()
// BNE 2(PC)
bne := gc.Prog(s390x.ABNE)
bne.To.Type = obj.TYPE_BRANCH
// MOVD $1, ret
movd = gc.Prog(s390x.AMOVD)
movd.From.Type = obj.TYPE_CONST
movd.From.Offset = 1
movd.To.Type = obj.TYPE_REG
movd.To.Reg = v.Reg0()
// NOP (so the BNE has somewhere to land)
nop := gc.Prog(obj.ANOP)
gc.Patch(bne, nop)
case ssa.OpS390XLoweredAtomicExchange32, ssa.OpS390XLoweredAtomicExchange64:
// Loop until the CS{,G} succeeds.
// MOV{WZ,D} arg0, ret
// cs: CS{,G} ret, arg1, arg0
// BNE cs
// MOV{WZ,D} arg0, ret
load := gc.Prog(loadByType(v.Type.FieldType(0)))
load.From.Type = obj.TYPE_MEM
load.From.Reg = v.Args[0].Reg()
load.To.Type = obj.TYPE_REG
load.To.Reg = v.Reg0()
gc.AddAux(&load.From, v)
// CS{,G} ret, arg1, arg0
cs := gc.Prog(v.Op.Asm())
cs.From.Type = obj.TYPE_REG
cs.From.Reg = v.Reg0() // old
cs.Reg = v.Args[1].Reg() // new
cs.To.Type = obj.TYPE_MEM
cs.To.Reg = v.Args[0].Reg()
gc.AddAux(&cs.To, v)
// BNE cs
bne := gc.Prog(s390x.ABNE)
bne.To.Type = obj.TYPE_BRANCH
gc.Patch(bne, cs)
default: default:
v.Fatalf("genValue not implemented: %s", v.LongString()) v.Fatalf("genValue not implemented: %s", v.LongString())
} }
......
...@@ -110,6 +110,32 @@ ...@@ -110,6 +110,32 @@
(Sqrt x) -> (FSQRT x) (Sqrt x) -> (FSQRT x)
// Atomic loads.
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
(AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem)
(AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem)
// Atomic stores.
(AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem)
(AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem)
(AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem)
// Atomic adds.
(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 (LAA ptr val mem) val)
(AtomicAdd64 ptr val mem) -> (AddTupleFirst64 (LAAG ptr val mem) val)
(Select0 <t> (AddTupleFirst32 tuple val)) -> (ADDW val (Select0 <t> tuple))
(Select1 (AddTupleFirst32 tuple _ )) -> (Select1 tuple)
(Select0 <t> (AddTupleFirst64 tuple val)) -> (ADD val (Select0 <t> tuple))
(Select1 (AddTupleFirst64 tuple _ )) -> (Select1 tuple)
// Atomic exchanges.
(AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
(AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
// Atomic compare and swap.
(AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
(AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
// Lowering extension // Lowering extension
// Note: we always extend to 64 bits even though some ops don't need that many result bits. // Note: we always extend to 64 bits even though some ops don't need that many result bits.
(SignExt8to16 x) -> (MOVBreg x) (SignExt8to16 x) -> (MOVBreg x)
......
...@@ -135,6 +135,7 @@ func init() { ...@@ -135,6 +135,7 @@ func init() {
gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}} gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}}
gpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}} gpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}}
gpstorebr = regInfo{inputs: []regMask{ptrsp, gpsp, 0}} gpstorebr = regInfo{inputs: []regMask{ptrsp, gpsp, 0}}
gpstorelaa = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}, outputs: gponly}
gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}} gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}}
...@@ -152,6 +153,15 @@ func init() { ...@@ -152,6 +153,15 @@ func init() {
fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}} fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}} fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
// LoweredAtomicExchange overwrites the output before executing
// CS{,G}, so the output register must not be the same as the
// input register. For now we just force the output register to
// R0.
exchange = regInfo{inputs: []regMask{ptrsp, gpsp &^ r0, 0}, outputs: []regMask{r0, 0}}
) )
var S390Xops = []opData{ var S390Xops = []opData{
...@@ -408,6 +418,54 @@ func init() { ...@@ -408,6 +418,54 @@ func init() {
{name: "FlagLT"}, // < {name: "FlagLT"}, // <
{name: "FlagGT"}, // > {name: "FlagGT"}, // >
// Atomic loads. These are just normal loads but return <value,memory> tuples
// so they can be properly ordered with other loads.
// load from arg0+auxint+aux. arg1=mem.
{name: "MOVWZatomicload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", faultOnNilArg0: true},
{name: "MOVDatomicload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", faultOnNilArg0: true},
// Atomic stores. These are just normal stores.
// store arg1 to arg0+auxint+aux. arg2=mem.
{name: "MOVWatomicstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true},
{name: "MOVDatomicstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true},
// Atomic adds.
// *(arg0+auxint+aux) += arg1. arg2=mem.
// Returns a tuple of <old contents of *(arg0+auxint+aux), memory>.
{name: "LAA", argLength: 3, reg: gpstorelaa, asm: "LAA", typ: "(UInt32,Mem)", aux: "SymOff", faultOnNilArg0: true},
{name: "LAAG", argLength: 3, reg: gpstorelaa, asm: "LAAG", typ: "(UInt64,Mem)", aux: "SymOff", faultOnNilArg0: true},
{name: "AddTupleFirst32", argLength: 2}, // arg0=tuple <x,y>. Returns <x+arg1,y>.
{name: "AddTupleFirst64", argLength: 2}, // arg0=tuple <x,y>. Returns <x+arg1,y>.
// Compare and swap.
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.
// if *(arg0+auxint+aux) == arg1 {
// *(arg0+auxint+aux) = arg2
// return (true, memory)
// } else {
// return (false, memory)
// }
// Note that these instructions also return the old value in arg1, but we ignore it.
// TODO: have these return flags instead of bool. The current system generates:
// CS ...
// MOVD $0, ret
// BNE 2(PC)
// MOVD $1, ret
// CMPW ret, $0
// BNE ...
// instead of just
// CS ...
// BEQ ...
// but we can't do that because memory-using ops can't generate flags yet
// (flagalloc wants to move flag-generating instructions around).
{name: "LoweredAtomicCas32", argLength: 4, reg: cas, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
{name: "LoweredAtomicCas64", argLength: 4, reg: cas, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
// Lowered atomic swaps, emulated using compare-and-swap.
// store arg1 to arg0+auxint+aux, arg2=mem.
{name: "LoweredAtomicExchange32", argLength: 3, reg: exchange, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
{name: "LoweredAtomicExchange64", argLength: 3, reg: exchange, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
// find leftmost one // find leftmost one
{ {
name: "FLOGR", name: "FLOGR",
......
...@@ -1417,6 +1417,18 @@ const ( ...@@ -1417,6 +1417,18 @@ const (
OpS390XFlagEQ OpS390XFlagEQ
OpS390XFlagLT OpS390XFlagLT
OpS390XFlagGT OpS390XFlagGT
OpS390XMOVWZatomicload
OpS390XMOVDatomicload
OpS390XMOVWatomicstore
OpS390XMOVDatomicstore
OpS390XLAA
OpS390XLAAG
OpS390XAddTupleFirst32
OpS390XAddTupleFirst64
OpS390XLoweredAtomicCas32
OpS390XLoweredAtomicCas64
OpS390XLoweredAtomicExchange32
OpS390XLoweredAtomicExchange64
OpS390XFLOGR OpS390XFLOGR
OpS390XSTMG2 OpS390XSTMG2
OpS390XSTMG3 OpS390XSTMG3
...@@ -17933,6 +17945,182 @@ var opcodeTable = [...]opInfo{ ...@@ -17933,6 +17945,182 @@ var opcodeTable = [...]opInfo{
argLen: 0, argLen: 0,
reg: regInfo{}, reg: regInfo{},
}, },
{
name: "MOVWZatomicload",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
asm: s390x.AMOVWZ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "MOVDatomicload",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
asm: s390x.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "MOVWatomicstore",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
{1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
},
},
{
name: "MOVDatomicstore",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
{1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
},
},
{
name: "LAA",
auxType: auxSymOff,
argLen: 3,
faultOnNilArg0: true,
asm: s390x.ALAA,
reg: regInfo{
inputs: []inputInfo{
{0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
{1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "LAAG",
auxType: auxSymOff,
argLen: 3,
faultOnNilArg0: true,
asm: s390x.ALAAG,
reg: regInfo{
inputs: []inputInfo{
{0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
{1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "AddTupleFirst32",
argLen: 2,
reg: regInfo{},
},
{
name: "AddTupleFirst64",
argLen: 2,
reg: regInfo{},
},
{
name: "LoweredAtomicCas32",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.ACS,
reg: regInfo{
inputs: []inputInfo{
{1, 1}, // R0
{0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
{2, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
clobbers: 1, // R0
outputs: []outputInfo{
{1, 0},
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "LoweredAtomicCas64",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.ACSG,
reg: regInfo{
inputs: []inputInfo{
{1, 1}, // R0
{0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
{2, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
clobbers: 1, // R0
outputs: []outputInfo{
{1, 0},
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "LoweredAtomicExchange32",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.ACS,
reg: regInfo{
inputs: []inputInfo{
{0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{1, 0},
{0, 1}, // R0
},
},
},
{
name: "LoweredAtomicExchange64",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
asm: s390x.ACSG,
reg: regInfo{
inputs: []inputInfo{
{0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{1, 0},
{0, 1}, // R0
},
},
},
{ {
name: "FLOGR", name: "FLOGR",
argLen: 1, argLen: 1,
......
...@@ -34,6 +34,30 @@ func rewriteValueS390X(v *Value, config *Config) bool { ...@@ -34,6 +34,30 @@ func rewriteValueS390X(v *Value, config *Config) bool {
return rewriteValueS390X_OpAnd8(v, config) return rewriteValueS390X_OpAnd8(v, config)
case OpAndB: case OpAndB:
return rewriteValueS390X_OpAndB(v, config) return rewriteValueS390X_OpAndB(v, config)
case OpAtomicAdd32:
return rewriteValueS390X_OpAtomicAdd32(v, config)
case OpAtomicAdd64:
return rewriteValueS390X_OpAtomicAdd64(v, config)
case OpAtomicCompareAndSwap32:
return rewriteValueS390X_OpAtomicCompareAndSwap32(v, config)
case OpAtomicCompareAndSwap64:
return rewriteValueS390X_OpAtomicCompareAndSwap64(v, config)
case OpAtomicExchange32:
return rewriteValueS390X_OpAtomicExchange32(v, config)
case OpAtomicExchange64:
return rewriteValueS390X_OpAtomicExchange64(v, config)
case OpAtomicLoad32:
return rewriteValueS390X_OpAtomicLoad32(v, config)
case OpAtomicLoad64:
return rewriteValueS390X_OpAtomicLoad64(v, config)
case OpAtomicLoadPtr:
return rewriteValueS390X_OpAtomicLoadPtr(v, config)
case OpAtomicStore32:
return rewriteValueS390X_OpAtomicStore32(v, config)
case OpAtomicStore64:
return rewriteValueS390X_OpAtomicStore64(v, config)
case OpAtomicStorePtrNoWB:
return rewriteValueS390X_OpAtomicStorePtrNoWB(v, config)
case OpAvg64u: case OpAvg64u:
return rewriteValueS390X_OpAvg64u(v, config) return rewriteValueS390X_OpAvg64u(v, config)
case OpBswap32: case OpBswap32:
...@@ -612,6 +636,10 @@ func rewriteValueS390X(v *Value, config *Config) bool { ...@@ -612,6 +636,10 @@ func rewriteValueS390X(v *Value, config *Config) bool {
return rewriteValueS390X_OpS390XXORWconst(v, config) return rewriteValueS390X_OpS390XXORWconst(v, config)
case OpS390XXORconst: case OpS390XXORconst:
return rewriteValueS390X_OpS390XXORconst(v, config) return rewriteValueS390X_OpS390XXORconst(v, config)
case OpSelect0:
return rewriteValueS390X_OpSelect0(v, config)
case OpSelect1:
return rewriteValueS390X_OpSelect1(v, config)
case OpSignExt16to32: case OpSignExt16to32:
return rewriteValueS390X_OpSignExt16to32(v, config) return rewriteValueS390X_OpSignExt16to32(v, config)
case OpSignExt16to64: case OpSignExt16to64:
...@@ -876,6 +904,214 @@ func rewriteValueS390X_OpAndB(v *Value, config *Config) bool { ...@@ -876,6 +904,214 @@ func rewriteValueS390X_OpAndB(v *Value, config *Config) bool {
return true return true
} }
} }
func rewriteValueS390X_OpAtomicAdd32(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicAdd32 ptr val mem)
// cond:
// result: (AddTupleFirst32 (LAA ptr val mem) val)
for {
ptr := v.Args[0]
val := v.Args[1]
mem := v.Args[2]
v.reset(OpS390XAddTupleFirst32)
v0 := b.NewValue0(v.Line, OpS390XLAA, MakeTuple(config.fe.TypeUInt32(), TypeMem))
v0.AddArg(ptr)
v0.AddArg(val)
v0.AddArg(mem)
v.AddArg(v0)
v.AddArg(val)
return true
}
}
func rewriteValueS390X_OpAtomicAdd64(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicAdd64 ptr val mem)
// cond:
// result: (AddTupleFirst64 (LAAG ptr val mem) val)
for {
ptr := v.Args[0]
val := v.Args[1]
mem := v.Args[2]
v.reset(OpS390XAddTupleFirst64)
v0 := b.NewValue0(v.Line, OpS390XLAAG, MakeTuple(config.fe.TypeUInt64(), TypeMem))
v0.AddArg(ptr)
v0.AddArg(val)
v0.AddArg(mem)
v.AddArg(v0)
v.AddArg(val)
return true
}
}
func rewriteValueS390X_OpAtomicCompareAndSwap32(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicCompareAndSwap32 ptr old new_ mem)
// cond:
// result: (LoweredAtomicCas32 ptr old new_ mem)
for {
ptr := v.Args[0]
old := v.Args[1]
new_ := v.Args[2]
mem := v.Args[3]
v.reset(OpS390XLoweredAtomicCas32)
v.AddArg(ptr)
v.AddArg(old)
v.AddArg(new_)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicCompareAndSwap64(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicCompareAndSwap64 ptr old new_ mem)
// cond:
// result: (LoweredAtomicCas64 ptr old new_ mem)
for {
ptr := v.Args[0]
old := v.Args[1]
new_ := v.Args[2]
mem := v.Args[3]
v.reset(OpS390XLoweredAtomicCas64)
v.AddArg(ptr)
v.AddArg(old)
v.AddArg(new_)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicExchange32(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicExchange32 ptr val mem)
// cond:
// result: (LoweredAtomicExchange32 ptr val mem)
for {
ptr := v.Args[0]
val := v.Args[1]
mem := v.Args[2]
v.reset(OpS390XLoweredAtomicExchange32)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicExchange64(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicExchange64 ptr val mem)
// cond:
// result: (LoweredAtomicExchange64 ptr val mem)
for {
ptr := v.Args[0]
val := v.Args[1]
mem := v.Args[2]
v.reset(OpS390XLoweredAtomicExchange64)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicLoad32(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicLoad32 ptr mem)
// cond:
// result: (MOVWZatomicload ptr mem)
for {
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpS390XMOVWZatomicload)
v.AddArg(ptr)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicLoad64(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicLoad64 ptr mem)
// cond:
// result: (MOVDatomicload ptr mem)
for {
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpS390XMOVDatomicload)
v.AddArg(ptr)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicLoadPtr(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicLoadPtr ptr mem)
// cond:
// result: (MOVDatomicload ptr mem)
for {
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpS390XMOVDatomicload)
v.AddArg(ptr)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicStore32(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicStore32 ptr val mem)
// cond:
// result: (MOVWatomicstore ptr val mem)
for {
ptr := v.Args[0]
val := v.Args[1]
mem := v.Args[2]
v.reset(OpS390XMOVWatomicstore)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicStore64(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicStore64 ptr val mem)
// cond:
// result: (MOVDatomicstore ptr val mem)
for {
ptr := v.Args[0]
val := v.Args[1]
mem := v.Args[2]
v.reset(OpS390XMOVDatomicstore)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicStorePtrNoWB(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (AtomicStorePtrNoWB ptr val mem)
// cond:
// result: (MOVDatomicstore ptr val mem)
for {
ptr := v.Args[0]
val := v.Args[1]
mem := v.Args[2]
v.reset(OpS390XMOVDatomicstore)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAvg64u(v *Value, config *Config) bool { func rewriteValueS390X_OpAvg64u(v *Value, config *Config) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -16764,6 +17000,78 @@ func rewriteValueS390X_OpS390XXORconst(v *Value, config *Config) bool { ...@@ -16764,6 +17000,78 @@ func rewriteValueS390X_OpS390XXORconst(v *Value, config *Config) bool {
} }
return false return false
} }
func rewriteValueS390X_OpSelect0(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Select0 <t> (AddTupleFirst32 tuple val))
// cond:
// result: (ADDW val (Select0 <t> tuple))
for {
t := v.Type
v_0 := v.Args[0]
if v_0.Op != OpS390XAddTupleFirst32 {
break
}
tuple := v_0.Args[0]
val := v_0.Args[1]
v.reset(OpS390XADDW)
v.AddArg(val)
v0 := b.NewValue0(v.Line, OpSelect0, t)
v0.AddArg(tuple)
v.AddArg(v0)
return true
}
// match: (Select0 <t> (AddTupleFirst64 tuple val))
// cond:
// result: (ADD val (Select0 <t> tuple))
for {
t := v.Type
v_0 := v.Args[0]
if v_0.Op != OpS390XAddTupleFirst64 {
break
}
tuple := v_0.Args[0]
val := v_0.Args[1]
v.reset(OpS390XADD)
v.AddArg(val)
v0 := b.NewValue0(v.Line, OpSelect0, t)
v0.AddArg(tuple)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueS390X_OpSelect1(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Select1 (AddTupleFirst32 tuple _ ))
// cond:
// result: (Select1 tuple)
for {
v_0 := v.Args[0]
if v_0.Op != OpS390XAddTupleFirst32 {
break
}
tuple := v_0.Args[0]
v.reset(OpSelect1)
v.AddArg(tuple)
return true
}
// match: (Select1 (AddTupleFirst64 tuple _ ))
// cond:
// result: (Select1 tuple)
for {
v_0 := v.Args[0]
if v_0.Op != OpS390XAddTupleFirst64 {
break
}
tuple := v_0.Args[0]
v.reset(OpSelect1)
v.AddArg(tuple)
return true
}
return false
}
func rewriteValueS390X_OpSignExt16to32(v *Value, config *Config) bool { func rewriteValueS390X_OpSignExt16to32(v *Value, config *Config) bool {
b := v.Block b := v.Block
_ = b _ = b
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment