Commit 320ddcf8 authored by Keith Randall's avatar Keith Randall

cmd/compile: inline atomics from runtime/internal/atomic on amd64

Inline atomic reads and writes on amd64.  There's no reason
to pay the overhead of a call for these.

To keep atomic loads from being reordered, we make them
return a <value,memory> tuple.

Change the meaning of resultInArg0 for tuple-generating ops
to mean the first part of the result tuple, not the second.
This means we can always put the store part of the tuple last,
matching how arguments are laid out.  This requires reordering
the outputs of add32carry and sub32carry and their descendents
in various architectures.

benchmark                    old ns/op     new ns/op     delta
BenchmarkAtomicLoad64-8      2.09          0.26          -87.56%
BenchmarkAtomicStore64-8     7.54          5.72          -24.14%

TBD (in a different CL): Cas, Or8, ...

Change-Id: I713ea88e7da3026c44ea5bdb56ed094b20bc5207
Reviewed-on: https://go-review.googlesource.com/27641Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 71ab9fa3
......@@ -935,7 +935,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore,
ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload,
ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore:
ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore,
ssa.OpAMD64MOVQatomicload, ssa.OpAMD64MOVLatomicload:
if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
if gc.Debug_checknil != 0 && int(v.Line) > 1 {
gc.Warnl(v.Line, "removed nil check")
......@@ -951,7 +952,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
return
}
}
if w.Type.IsMemory() {
if w.Type.IsMemory() || w.Type.IsTuple() && w.Type.FieldType(1).IsMemory() {
if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive {
// these ops are OK
mem = w
......@@ -976,6 +977,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
gc.Warnl(v.Line, "generated nil check")
}
case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = gc.SSARegNum(v.Args[0])
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum0(v)
case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
r := gc.SSARegNum0(v)
if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
}
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r
p.To.Type = obj.TYPE_MEM
p.To.Reg = gc.SSARegNum(v.Args[1])
gc.AddAux(&p.To, v)
default:
v.Unimplementedf("genValue not implemented: %s", v.LongString())
}
......
......@@ -283,7 +283,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = r
case ssa.OpARMADDS,
ssa.OpARMSUBS:
r := gc.SSARegNum1(v)
r := gc.SSARegNum0(v)
r1 := gc.SSARegNum(v.Args[0])
r2 := gc.SSARegNum(v.Args[1])
p := gc.Prog(v.Op.Asm())
......@@ -356,7 +356,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Offset = v.AuxInt
p.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum1(v)
p.To.Reg = gc.SSARegNum0(v)
case ssa.OpARMSRRconst:
genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt)
case ssa.OpARMADDshiftLL,
......@@ -373,7 +373,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftLL,
ssa.OpARMSUBSshiftLL,
ssa.OpARMRSBSshiftLL:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LL, v.AuxInt)
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LL, v.AuxInt)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRL,
ssa.OpARMADCshiftRL,
......@@ -389,7 +389,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRL,
ssa.OpARMSUBSshiftRL,
ssa.OpARMRSBSshiftRL:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LR, v.AuxInt)
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LR, v.AuxInt)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRA,
ssa.OpARMADCshiftRA,
......@@ -405,7 +405,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRA,
ssa.OpARMSUBSshiftRA,
ssa.OpARMRSBSshiftRA:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_AR, v.AuxInt)
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_AR, v.AuxInt)
p.Scond = arm.C_SBIT
case ssa.OpARMMVNshiftLL:
genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt)
......@@ -433,7 +433,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftLLreg,
ssa.OpARMSUBSshiftLLreg,
ssa.OpARMRSBSshiftLLreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LL)
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LL)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRLreg,
ssa.OpARMADCshiftRLreg,
......@@ -449,7 +449,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRLreg,
ssa.OpARMSUBSshiftRLreg,
ssa.OpARMRSBSshiftRLreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LR)
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LR)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRAreg,
ssa.OpARMADCshiftRAreg,
......@@ -465,7 +465,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRAreg,
ssa.OpARMSUBSshiftRAreg,
ssa.OpARMRSBSshiftRAreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_AR)
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_AR)
p.Scond = arm.C_SBIT
case ssa.OpARMHMUL,
ssa.OpARMHMULU:
......
......@@ -477,7 +477,7 @@ func inlnode(n *Node) *Node {
if Debug['m'] > 3 {
fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
}
if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall1(n) { // normal case
if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall(n) { // normal case
n = mkinlcall(n, n.Left, n.Isddd)
} else if n.isMethodCalledAsFunction() && n.Left.Sym.Def != nil {
n = mkinlcall(n, n.Left.Sym.Def, n.Isddd)
......
......@@ -571,7 +571,14 @@ func (s *state) stmt(n *Node) {
case OEMPTY, ODCLCONST, ODCLTYPE, OFALL:
// Expression statements
case OCALLFUNC, OCALLMETH, OCALLINTER:
case OCALLFUNC:
if isIntrinsicCall(n) {
s.intrinsicCall(n)
return
}
fallthrough
case OCALLMETH, OCALLINTER:
s.call(n, callNormal)
if n.Op == OCALLFUNC && n.Left.Op == ONAME && n.Left.Class == PFUNC &&
(compiling_runtime && n.Left.Sym.Name == "throw" ||
......@@ -2107,8 +2114,8 @@ func (s *state) expr(n *Node) *ssa.Value {
return s.newValue2(ssa.OpStringMake, n.Type, p, l)
case OCALLFUNC:
if isIntrinsicCall1(n) {
return s.intrinsicCall1(n)
if isIntrinsicCall(n) {
return s.intrinsicCall(n)
}
fallthrough
......@@ -2516,12 +2523,12 @@ const (
callGo
)
// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic
// isSSAIntrinsic returns true if n is a call to a recognized intrinsic
// that can be handled by the SSA backend.
// SSA uses this, but so does the front end to see if should not
// inline a function because it is a candidate for intrinsic
// substitution.
func isSSAIntrinsic1(s *Sym) bool {
func isSSAIntrinsic(s *Sym) bool {
// The test below is not quite accurate -- in the event that
// a function is disabled on a per-function basis, for example
// because of hash-keyed binary failure search, SSA might be
......@@ -2541,38 +2548,74 @@ func isSSAIntrinsic1(s *Sym) bool {
return true
}
}
if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/atomic" {
switch s.Name {
case "Load", "Load64", "Loadint64", "Loadp", "Loaduint", "Loaduintptr":
return true
case "Store", "Store64", "StorepNoWB", "Storeuintptr":
return true
}
}
return false
}
func isIntrinsicCall1(n *Node) bool {
func isIntrinsicCall(n *Node) bool {
if n == nil || n.Left == nil {
return false
}
return isSSAIntrinsic1(n.Left.Sym)
return isSSAIntrinsic(n.Left.Sym)
}
// intrinsicFirstArg extracts arg from n.List and eval
func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
x := n.List.First()
// intrinsicArg extracts the ith arg from n.List and returns its value.
func (s *state) intrinsicArg(n *Node, i int) *ssa.Value {
x := n.List.Slice()[i]
if x.Op == OAS {
x = x.Right
}
return s.expr(x)
}
func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
return s.intrinsicArg(n, 0)
}
// intrinsicCall1 converts a call to a recognized 1-arg intrinsic
// into the intrinsic
func (s *state) intrinsicCall1(n *Node) *ssa.Value {
// intrinsicCall converts a call to a recognized intrinsic function into the intrinsic SSA operation.
func (s *state) intrinsicCall(n *Node) (ret *ssa.Value) {
var result *ssa.Value
switch n.Left.Sym.Name {
case "Ctz64":
name := n.Left.Sym.Name
switch {
case name == "Ctz64":
result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Ctz32":
ret = result
case name == "Ctz32":
result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
case "Bswap64":
ret = result
case name == "Bswap64":
result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Bswap32":
ret = result
case name == "Bswap32":
result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
ret = result
case name == "Load" || name == "Loaduint" && s.config.IntSize == 4 || name == "Loaduintptr" && s.config.PtrSize == 4:
result = s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
ret = s.newValue1(ssa.OpSelect0, Types[TUINT32], result)
case name == "Load64" || name == "Loadint64" || name == "Loaduint" && s.config.IntSize == 8 || name == "Loaduintptr" && s.config.PtrSize == 8:
result = s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
ret = s.newValue1(ssa.OpSelect0, Types[TUINT64], result)
case name == "Loadp":
result = s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(Ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
ret = s.newValue1(ssa.OpSelect0, Ptrto(Types[TUINT8]), result)
case name == "Store" || name == "Storeuintptr" && s.config.PtrSize == 4:
result = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = result
case name == "Store64" || name == "Storeuintptr" && s.config.PtrSize == 8:
result = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = result
case name == "StorepNoWB":
result = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = result
}
if result == nil {
Fatalf("Unknown special call: %v", n.Left.Sym)
......@@ -2580,7 +2623,7 @@ func (s *state) intrinsicCall1(n *Node) *ssa.Value {
if ssa.IntrinsicsDebug > 0 {
Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
}
return result
return
}
// Calls the function n using the specified call type.
......
......@@ -29,6 +29,10 @@ func dse(f *Func) {
}
if v.Type.IsMemory() {
stores = append(stores, v)
if v.Op == OpSelect1 {
// Use the args of the tuple-generating op.
v = v.Args[0]
}
for _, a := range v.Args {
if a.Block == b && a.Type.IsMemory() {
storeUse.add(a.ID)
......
......@@ -106,8 +106,8 @@ func init() {
gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{0, gp}}
gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{0, gp}}
gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
......
......@@ -464,6 +464,19 @@
(If cond yes no) -> (NE (TESTB cond cond) yes no)
// Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here.
(AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem)
(AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem)
(AtomicLoadPtr ptr mem) && config.PtrSize == 8 -> (MOVQatomicload ptr mem)
(AtomicLoadPtr ptr mem) && config.PtrSize == 4 -> (MOVLatomicload ptr mem)
// Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load.
// TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those?
(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeUInt32(),TypeMem)> val ptr mem))
(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeUInt64(),TypeMem)> val ptr mem))
(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
// ***************************
// Above: lowering rules
// Below: optimizations
......@@ -1626,3 +1639,23 @@
(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(MOVQatomicload [off1+off2] {sym} ptr mem)
(MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(MOVLatomicload [off1+off2] {sym} ptr mem)
(MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
// Merge ADDQconst and LEAQ into atomic stores.
(XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(XCHGQ [off1+off2] {sym} val ptr mem)
(XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
(XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
(XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(XCHGL [off1+off2] {sym} val ptr mem)
(XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
(XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
......@@ -134,6 +134,7 @@ func init() {
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstorexchg = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp}}
fp01 = regInfo{inputs: nil, outputs: fponly}
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
......@@ -509,6 +510,20 @@ func init() {
{name: "FlagLT_UGT"}, // signed < and unsigned >
{name: "FlagGT_UGT"}, // signed > and unsigned <
{name: "FlagGT_ULT"}, // signed > and unsigned >
// Atomic loads. These are just normal loads but return <value,memory> tuples
// so they can be properly ordered with other loads.
// load from arg0+auxint+aux. arg1=mem.
{name: "MOVLatomicload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff"},
{name: "MOVQatomicload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff"},
// Atomic stores. We use XCHG to get the right memory ordering semantics.
// These ops return a tuple of <old memory contents, memory>. The old contents are
// ignored for now but they are allocated to a register so that the argument register
// is properly clobbered (together with resultInArg0).
// store arg0 to arg1+auxint+aux, arg2=mem.
// Note: arg0 and arg1 are backwards compared to MOVLstore (to facilitate resultInArg0)!
{name: "XCHGL", argLength: 3, reg: gpstorexchg, asm: "XCHGL", aux: "SymOff", resultInArg0: true},
{name: "XCHGQ", argLength: 3, reg: gpstorexchg, asm: "XCHGQ", aux: "SymOff", resultInArg0: true},
}
var AMD64blocks = []blockData{
......
......@@ -99,17 +99,17 @@ func init() {
var (
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{0, gp}}
gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
gp1flags = regInfo{inputs: []regMask{gpg}}
gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{0, gp}}
gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{0, gp}}
gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
gp3flags = regInfo{inputs: []regMask{gp, gp, gp}}
gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
......
......@@ -39,16 +39,16 @@
(Add32withcarry <config.fe.TypeInt32()>
(Int64Hi x)
(Int64Hi y)
(Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
(Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
(Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
(Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
(Sub64 x y) ->
(Int64Make
(Sub32withcarry <config.fe.TypeInt32()>
(Int64Hi x)
(Int64Hi y)
(Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Mul64 x y) ->
(Int64Make
......
......@@ -417,10 +417,10 @@ var genericOps = []opData{
{name: "Int64Hi", argLength: 1, typ: "UInt32"}, // high 32-bit of arg0
{name: "Int64Lo", argLength: 1, typ: "UInt32"}, // low 32-bit of arg0
{name: "Add32carry", argLength: 2, commutative: true, typ: "(Flags,UInt32)"}, // arg0 + arg1, returns (carry, value)
{name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry)
{name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1)
{name: "Sub32carry", argLength: 2, typ: "(Flags,UInt32)"}, // arg0 - arg1, returns (carry, value)
{name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
{name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1)
{name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
......@@ -440,6 +440,17 @@ var genericOps = []opData{
// pseudo-ops for breaking Tuple
{name: "Select0", argLength: 1}, // the first component of a tuple
{name: "Select1", argLength: 1}, // the second component of a tuple
// Atomic operations used for semantically inlining runtime/internal/atomic.
// Atomic loads return a new memory so that the loads are properly ordered
// with respect to other loads and stores.
// TODO: use for sync/atomic at some point.
{name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicStore32", argLength: 3, typ: "Mem"}, // Store arg1 to arg0. arg2=memory. Returns memory.
{name: "AtomicStore64", argLength: 3, typ: "Mem"}, // Store arg1 to arg0. arg2=memory. Returns memory.
{name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem"}, // Store arg1 to arg0. arg2=memory. Returns memory.
}
// kind control successors implicit exit
......
......@@ -43,7 +43,7 @@ type opData struct {
rematerializeable bool
argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments
commutative bool // this operation is commutative on its first 2 arguments (e.g. addition)
resultInArg0 bool // last output of v and v.Args[0] must be allocated to the same register
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
clobberFlags bool // this op clobbers flags register
}
......@@ -161,11 +161,11 @@ func genOp() {
}
if v.resultInArg0 {
fmt.Fprintln(w, "resultInArg0: true,")
if v.reg.inputs[0] != v.reg.outputs[len(v.reg.outputs)-1] {
log.Fatalf("input[0] and last output register must be equal for %s", v.name)
if v.reg.inputs[0] != v.reg.outputs[0] {
log.Fatalf("input[0] and output[0] must use the same registers for %s", v.name)
}
if v.commutative && v.reg.inputs[1] != v.reg.outputs[len(v.reg.outputs)-1] {
log.Fatalf("input[1] and last output register must be equal for %s", v.name)
if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
log.Fatalf("input[1] and output[0] must use the same registers for %s", v.name)
}
}
if v.clobberFlags {
......
This diff is collapsed.
......@@ -1204,7 +1204,7 @@ func (s *regAllocState) regalloc(f *Func) {
if mask == 0 {
continue
}
if opcodeTable[v.Op].resultInArg0 && out.idx == len(regspec.outputs)-1 {
if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
if !opcodeTable[v.Op].commutative {
// Output must use the same register as input 0.
r := register(s.f.getHome(args[0].ID).(*Register).Num)
......
......@@ -126,7 +126,7 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
_ = b
// match: (Add64 x y)
// cond:
// result: (Int64Make (Add32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
// result: (Int64Make (Add32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
for {
x := v.Args[0]
y := v.Args[1]
......@@ -138,8 +138,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
v2.AddArg(y)
v0.AddArg(v2)
v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags)
v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v5.AddArg(x)
v4.AddArg(v5)
......@@ -149,8 +149,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
v3.AddArg(v4)
v0.AddArg(v3)
v.AddArg(v0)
v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32())
v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v9.AddArg(x)
v8.AddArg(v9)
......@@ -2361,7 +2361,7 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
_ = b
// match: (Sub64 x y)
// cond:
// result: (Int64Make (Sub32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
// result: (Int64Make (Sub32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
for {
x := v.Args[0]
y := v.Args[1]
......@@ -2373,8 +2373,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
v2.AddArg(y)
v0.AddArg(v2)
v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags)
v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v5.AddArg(x)
v4.AddArg(v5)
......@@ -2384,8 +2384,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
v3.AddArg(v4)
v0.AddArg(v3)
v.AddArg(v0)
v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32())
v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v9.AddArg(x)
v8.AddArg(v9)
......
......@@ -33,7 +33,7 @@ type Type interface {
PtrTo() Type // given T, return *T
NumFields() int // # of fields of a struct
FieldType(i int) Type // type of ith field of the struct
FieldType(i int) Type // type of ith field of the struct or ith part of a tuple
FieldOff(i int) int64 // offset of ith field of the struct
FieldName(i int) string // name of ith field of the struct
......@@ -84,31 +84,41 @@ func (t *CompilerType) NumElem() int64 { panic("not implemented") }
type TupleType struct {
first Type
second Type
// Any tuple with a memory type must put that memory type second.
}
func (t *TupleType) Size() int64 { panic("not implemented") }
func (t *TupleType) Alignment() int64 { panic("not implemented") }
func (t *TupleType) IsBoolean() bool { return false }
func (t *TupleType) IsInteger() bool { return false }
func (t *TupleType) IsSigned() bool { return false }
func (t *TupleType) IsFloat() bool { return false }
func (t *TupleType) IsComplex() bool { return false }
func (t *TupleType) IsPtrShaped() bool { return false }
func (t *TupleType) IsString() bool { return false }
func (t *TupleType) IsSlice() bool { return false }
func (t *TupleType) IsArray() bool { return false }
func (t *TupleType) IsStruct() bool { return false }
func (t *TupleType) IsInterface() bool { return false }
func (t *TupleType) IsMemory() bool { return false }
func (t *TupleType) IsFlags() bool { return false }
func (t *TupleType) IsVoid() bool { return false }
func (t *TupleType) IsTuple() bool { return true }
func (t *TupleType) String() string { return t.first.String() + "," + t.second.String() }
func (t *TupleType) SimpleString() string { return "Tuple" }
func (t *TupleType) ElemType() Type { panic("not implemented") }
func (t *TupleType) PtrTo() Type { panic("not implemented") }
func (t *TupleType) NumFields() int { panic("not implemented") }
func (t *TupleType) FieldType(i int) Type { panic("not implemented") }
func (t *TupleType) Size() int64 { panic("not implemented") }
func (t *TupleType) Alignment() int64 { panic("not implemented") }
func (t *TupleType) IsBoolean() bool { return false }
func (t *TupleType) IsInteger() bool { return false }
func (t *TupleType) IsSigned() bool { return false }
func (t *TupleType) IsFloat() bool { return false }
func (t *TupleType) IsComplex() bool { return false }
func (t *TupleType) IsPtrShaped() bool { return false }
func (t *TupleType) IsString() bool { return false }
func (t *TupleType) IsSlice() bool { return false }
func (t *TupleType) IsArray() bool { return false }
func (t *TupleType) IsStruct() bool { return false }
func (t *TupleType) IsInterface() bool { return false }
func (t *TupleType) IsMemory() bool { return false }
func (t *TupleType) IsFlags() bool { return false }
func (t *TupleType) IsVoid() bool { return false }
func (t *TupleType) IsTuple() bool { return true }
func (t *TupleType) String() string { return t.first.String() + "," + t.second.String() }
func (t *TupleType) SimpleString() string { return "Tuple" }
func (t *TupleType) ElemType() Type { panic("not implemented") }
func (t *TupleType) PtrTo() Type { panic("not implemented") }
func (t *TupleType) NumFields() int { panic("not implemented") }
func (t *TupleType) FieldType(i int) Type {
switch i {
case 0:
return t.first
case 1:
return t.second
default:
panic("bad tuple index")
}
}
func (t *TupleType) FieldOff(i int) int64 { panic("not implemented") }
func (t *TupleType) FieldName(i int) string { panic("not implemented") }
func (t *TupleType) NumElem() int64 { panic("not implemented") }
......
......@@ -196,17 +196,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
// output 0 is carry/borrow, output 1 is the low 32 bits.
r := gc.SSARegNum1(v)
r := gc.SSARegNum0(v)
if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output[1] not in same register %s", v.LongString())
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
}
opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))
case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
// output 0 is carry/borrow, output 1 is the low 32 bits.
r := gc.SSARegNum1(v)
r := gc.SSARegNum0(v)
if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output[1] not in same register %s", v.LongString())
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
}
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
......
......@@ -2,6 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Note: some of these functions are semantically inlined
// by the compiler (in src/cmd/compile/internal/gc/ssa.go).
#include "textflag.h"
// bool Cas(int32 *val, int32 old, int32 new)
......
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package atomic_test
import (
"runtime/internal/atomic"
"testing"
)
var sink interface{}
func BenchmarkAtomicLoad64(b *testing.B) {
var x uint64
sink = &x
for i := 0; i < b.N; i++ {
_ = atomic.Load64(&x)
}
}
func BenchmarkAtomicStore64(b *testing.B) {
var x uint64
sink = &x
for i := 0; i < b.N; i++ {
atomic.Store64(&x, 0)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment